diff --git a/velox/duckdb/conversion/DuckConversion.h b/velox/duckdb/conversion/DuckConversion.h index 31f63e12fc74..1c16a44403bf 100644 --- a/velox/duckdb/conversion/DuckConversion.h +++ b/velox/duckdb/conversion/DuckConversion.h @@ -109,7 +109,8 @@ struct DuckDateConversion { return ::duckdb::Timestamp::GetDate(veloxTimestampToDuckDB(input)); } static Timestamp toVelox(const ::duckdb::date_t& input) { - return duckdbTimestampToVelox(::duckdb::Timestamp::FromDatetime(input, 0)); + return duckdbTimestampToVelox( + ::duckdb::Timestamp::FromDatetime(input, ::duckdb::dtime_t(0))); } }; diff --git a/velox/duckdb/conversion/DuckParser.cpp b/velox/duckdb/conversion/DuckParser.cpp index d48fd5d41bdc..c4f3ae0b1438 100644 --- a/velox/duckdb/conversion/DuckParser.cpp +++ b/velox/duckdb/conversion/DuckParser.cpp @@ -20,6 +20,7 @@ namespace facebook::velox::duckdb { +using ::duckdb::BetweenExpression; using ::duckdb::CaseExpression; using ::duckdb::CastExpression; using ::duckdb::ColumnRefExpression; @@ -149,6 +150,21 @@ std::shared_ptr parseComparisonExpr(ParsedExpression& expr) { std::move(params)); } +// Parse x between lower and upper +std::shared_ptr parseBetweenExpr(ParsedExpression& expr) { + const auto& betweenExpr = dynamic_cast(expr); + return callExpr( + "and", + { + callExpr( + "gte", + {parseExpr(*betweenExpr.input), parseExpr(*betweenExpr.lower)}), + callExpr( + "lte", + {parseExpr(*betweenExpr.input), parseExpr(*betweenExpr.upper)}), + }); +} + // Parse a conjunction (AND or OR). std::shared_ptr parseConjunctionExpr( ParsedExpression& expr) { @@ -289,6 +305,9 @@ std::shared_ptr parseExpr(ParsedExpression& expr) { case ExpressionClass::COMPARISON: return parseComparisonExpr(expr); + case ExpressionClass::BETWEEN: + return parseBetweenExpr(expr); + case ExpressionClass::CONJUNCTION: return parseConjunctionExpr(expr); diff --git a/velox/duckdb/conversion/DuckWrapper.cpp b/velox/duckdb/conversion/DuckWrapper.cpp index 893ca5a3467a..5b2133069fad 100644 --- a/velox/duckdb/conversion/DuckWrapper.cpp +++ b/velox/duckdb/conversion/DuckWrapper.cpp @@ -236,19 +236,22 @@ VectorPtr toVeloxVector( return convert>( duckVector, veloxType, size, pool); case LogicalTypeId::DECIMAL: { + uint8_t width; + uint8_t scale; + type.GetDecimalProperties(width, scale); switch (type.InternalType()) { case PhysicalType::INT16: return convertDecimalToDouble( - duckVector, size, veloxType, pool, type.scale()); + duckVector, size, veloxType, pool, scale); case PhysicalType::INT32: return convertDecimalToDouble( - duckVector, size, veloxType, pool, type.scale()); + duckVector, size, veloxType, pool, scale); case PhysicalType::INT64: return convertDecimalToDouble( - duckVector, size, veloxType, pool, type.scale()); + duckVector, size, veloxType, pool, scale); case PhysicalType::INT128: return convertDecimalToDouble( - duckVector, size, veloxType, pool, type.scale()); + duckVector, size, veloxType, pool, scale); default: throw std::runtime_error( "unrecognized internal type for decimal (this shouldn't happen"); diff --git a/velox/duckdb/conversion/tests/DuckConversionTest.cpp b/velox/duckdb/conversion/tests/DuckConversionTest.cpp index f34663c25176..41bd8baa45ef 100644 --- a/velox/duckdb/conversion/tests/DuckConversionTest.cpp +++ b/velox/duckdb/conversion/tests/DuckConversionTest.cpp @@ -69,16 +69,14 @@ TEST(DuckConversionTest, duckValueToVariant) { } TEST(DuckConversionTest, duckValueToVariantUnsupported) { - auto unsupported = { - LogicalType::TIME, + std::vector unsupported = { LogicalType::TIME, LogicalType::TIMESTAMP, LogicalType::INTERVAL, - - LogicalType::STRUCT, - LogicalType::LIST, - LogicalType::BLOB, - }; + LogicalType::LIST({LogicalType::INTEGER}), + LogicalType::STRUCT( + {{"a", LogicalType::INTEGER}, {"b", LogicalType::TINYINT}}), + LogicalType::BLOB}; for (const auto& i : unsupported) { EXPECT_THROW(duckValueToVariant(Value(i)), std::runtime_error); diff --git a/velox/duckdb/functions/DuckFunctions.cpp b/velox/duckdb/functions/DuckFunctions.cpp index d5b53eb20205..39fd7c45e16d 100644 --- a/velox/duckdb/functions/DuckFunctions.cpp +++ b/velox/duckdb/functions/DuckFunctions.cpp @@ -469,7 +469,7 @@ struct DuckDBFunctionData { std::vector inputTypes; // dummy stuff, necessary to call the function but not actually used BoundConstantExpression expr; - ExpressionExecutorState executor_state; + ExpressionExecutorState executor_state{"N/A"}; ExpressionState state; }; diff --git a/velox/exec/tests/QueryAssertions.cpp b/velox/exec/tests/QueryAssertions.cpp index 6cc2705c76f5..2fe4db3419da 100644 --- a/velox/exec/tests/QueryAssertions.cpp +++ b/velox/exec/tests/QueryAssertions.cpp @@ -102,7 +102,7 @@ velox::variant rowVariantAt( const std::shared_ptr& rowType) { std::vector values; for (size_t i = 0; i < vector.struct_value.size(); ++i) { - auto currChild = vector.struct_value[i].second; + auto currChild = vector.struct_value[i]; auto currType = rowType->childAt(i)->kind(); if (currChild.is_null) { values.push_back(variant(currType)); diff --git a/velox/external/duckdb/duckdb.cpp b/velox/external/duckdb/duckdb.cpp index fc9a3c25e4e4..79a947e05c42 100644 --- a/velox/external/duckdb/duckdb.cpp +++ b/velox/external/duckdb/duckdb.cpp @@ -4,6 +4,10 @@ #error header mismatch #endif +#if (!defined(DEBUG) && !defined NDEBUG) +#define NDEBUG +#endif + @@ -156,9 +160,7 @@ namespace duckdb { //! A macro function in the catalog class MacroCatalogEntry : public StandardEntry { public: - MacroCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateMacroInfo *info) - : StandardEntry(CatalogType::MACRO_ENTRY, schema, catalog, info->name), function(move(info->function)) { - } + MacroCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateMacroInfo *info); //! The macro function unique_ptr function; @@ -241,6 +243,8 @@ class SequenceCatalogEntry : public StandardEntry { uint64_t usage_count; //! The sequence counter int64_t counter; + //! The most recently returned value + int64_t last_value; //! The increment value int64_t increment; //! The minimum value of the sequence @@ -363,16 +367,18 @@ class ViewCatalogEntry : public StandardEntry { namespace duckdb { class CatalogEntry; +enum class DependencyType { DEPENDENCY_REGULAR = 0, DEPENDENCY_AUTOMATIC = 1 }; + struct Dependency { - Dependency(CatalogEntry *entry, bool requires_cascade = true) + Dependency(CatalogEntry *entry, DependencyType dependency_type = DependencyType::DEPENDENCY_REGULAR) : // NOLINT: Allow implicit conversion from `CatalogEntry` - entry(entry), requires_cascade(requires_cascade) { + entry(entry), dependency_type(dependency_type) { } //! The catalog entry this depends on CatalogEntry *entry; - //! Whether or not this dependency requires a cascade to drop - bool requires_cascade; + //! The type of dependency + DependencyType dependency_type; }; struct DependencyHashFunction { @@ -393,6 +399,8 @@ using dependency_set_t = unordered_set + namespace duckdb { class Catalog; class ClientContext; @@ -406,8 +414,9 @@ class DependencyManager { //! Erase the object from the DependencyManager; this should only happen when the object itself is destroyed void EraseObject(CatalogEntry *object); - // //! Clear all the dependencies of all entries in the catalog set - void ClearDependencies(CatalogSet &set); + + //! Scans all dependencies, returning pairs of (object, dependent) + void Scan(const std::function &callback); private: Catalog &catalog; @@ -447,6 +456,7 @@ class DefaultSchemaGenerator : public DefaultGenerator { public: unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; + vector GetDefaultEntries() override; }; } // namespace duckdb @@ -472,7 +482,6 @@ CatalogEntry *Catalog::CreateTable(ClientContext &context, BoundCreateTableInfo } CatalogEntry *Catalog::CreateTable(ClientContext &context, SchemaCatalogEntry *schema, BoundCreateTableInfo *info) { - ModifyCatalog(); return schema->CreateTable(context, info); } @@ -482,7 +491,6 @@ CatalogEntry *Catalog::CreateView(ClientContext &context, CreateViewInfo *info) } CatalogEntry *Catalog::CreateView(ClientContext &context, SchemaCatalogEntry *schema, CreateViewInfo *info) { - ModifyCatalog(); return schema->CreateView(context, info); } @@ -492,7 +500,6 @@ CatalogEntry *Catalog::CreateSequence(ClientContext &context, CreateSequenceInfo } CatalogEntry *Catalog::CreateSequence(ClientContext &context, SchemaCatalogEntry *schema, CreateSequenceInfo *info) { - ModifyCatalog(); return schema->CreateSequence(context, info); } @@ -503,7 +510,6 @@ CatalogEntry *Catalog::CreateTableFunction(ClientContext &context, CreateTableFu CatalogEntry *Catalog::CreateTableFunction(ClientContext &context, SchemaCatalogEntry *schema, CreateTableFunctionInfo *info) { - ModifyCatalog(); return schema->CreateTableFunction(context, info); } @@ -514,7 +520,6 @@ CatalogEntry *Catalog::CreateCopyFunction(ClientContext &context, CreateCopyFunc CatalogEntry *Catalog::CreateCopyFunction(ClientContext &context, SchemaCatalogEntry *schema, CreateCopyFunctionInfo *info) { - ModifyCatalog(); return schema->CreateCopyFunction(context, info); } @@ -525,7 +530,6 @@ CatalogEntry *Catalog::CreatePragmaFunction(ClientContext &context, CreatePragma CatalogEntry *Catalog::CreatePragmaFunction(ClientContext &context, SchemaCatalogEntry *schema, CreatePragmaFunctionInfo *info) { - ModifyCatalog(); return schema->CreatePragmaFunction(context, info); } @@ -535,7 +539,6 @@ CatalogEntry *Catalog::CreateFunction(ClientContext &context, CreateFunctionInfo } CatalogEntry *Catalog::CreateFunction(ClientContext &context, SchemaCatalogEntry *schema, CreateFunctionInfo *info) { - ModifyCatalog(); return schema->CreateFunction(context, info); } @@ -545,18 +548,14 @@ CatalogEntry *Catalog::CreateCollation(ClientContext &context, CreateCollationIn } CatalogEntry *Catalog::CreateCollation(ClientContext &context, SchemaCatalogEntry *schema, CreateCollationInfo *info) { - ModifyCatalog(); return schema->CreateCollation(context, info); } CatalogEntry *Catalog::CreateSchema(ClientContext &context, CreateSchemaInfo *info) { - if (info->schema.empty()) { - throw CatalogException("Schema not specified"); - } + D_ASSERT(!info->schema.empty()); if (info->schema == TEMP_SCHEMA) { throw CatalogException("Cannot create built-in schema \"%s\"", info->schema); } - ModifyCatalog(); unordered_set dependencies; auto entry = make_unique(this, info->schema, info->internal); @@ -573,9 +572,7 @@ CatalogEntry *Catalog::CreateSchema(ClientContext &context, CreateSchemaInfo *in } void Catalog::DropSchema(ClientContext &context, DropInfo *info) { - if (info->name.empty()) { - throw CatalogException("Schema not specified"); - } + D_ASSERT(!info->name.empty()); ModifyCatalog(); if (!schemas->DropEntry(context, info->name, info->cascade)) { if (!info->if_exists) { @@ -591,9 +588,7 @@ void Catalog::DropEntry(ClientContext &context, DropInfo *info) { DropSchema(context, info); } else { if (info->schema.empty()) { - // invalid schema: check if the entry is in the temp schema - auto entry = GetEntry(context, info->type, TEMP_SCHEMA, info->name, true); - info->schema = entry ? TEMP_SCHEMA : DEFAULT_SCHEMA; + info->schema = DEFAULT_SCHEMA; } auto schema = GetSchema(context, info->schema); schema->DropEntry(context, info); @@ -602,9 +597,7 @@ void Catalog::DropEntry(ClientContext &context, DropInfo *info) { SchemaCatalogEntry *Catalog::GetSchema(ClientContext &context, const string &schema_name, QueryErrorContext error_context) { - if (schema_name.empty()) { - throw CatalogException("Schema not specified"); - } + D_ASSERT(!schema_name.empty()); if (schema_name == TEMP_SCHEMA) { return context.temporary_objects.get(); } @@ -623,31 +616,22 @@ void Catalog::ScanSchemas(ClientContext &context, std::functionGetEntry(context, type, name, if_exists, error_context); } -template <> -ViewCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists, - QueryErrorContext error_context) { - auto entry = GetEntry(context, CatalogType::VIEW_ENTRY, move(schema_name), name, if_exists); - if (!entry) { - return nullptr; - } - if (entry->type != CatalogType::VIEW_ENTRY) { - throw CatalogException("%s is not a view", name); - } - return (ViewCatalogEntry *)entry; -} - template <> TableCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists, QueryErrorContext error_context) { @@ -711,14 +695,15 @@ void Catalog::Alter(ClientContext &context, AlterInfo *info) { ModifyCatalog(); if (info->schema.empty()) { auto catalog_type = info->GetCatalogType(); - // invalid schema: first search the temporary schema - auto entry = GetEntry(context, catalog_type, TEMP_SCHEMA, info->name, true); - if (entry) { - // entry exists in temp schema: alter there - info->schema = TEMP_SCHEMA; - } else { - // if the entry does not exist in the temp schema, search in the default schema - info->schema = DEFAULT_SCHEMA; + // invalid schema: search the catalog search path + info->schema = DEFAULT_SCHEMA; + for (idx_t i = 0; i < context.catalog_search_path.size(); i++) { + auto entry = GetEntry(context, catalog_type, context.catalog_search_path[i], info->name, true); + if (entry) { + // entry exists in this schema: alter there + info->schema = context.catalog_search_path[i]; + break; + } } } auto schema = GetSchema(context, info->schema); @@ -729,8 +714,8 @@ idx_t Catalog::GetCatalogVersion() { return catalog_version; } -void Catalog::ModifyCatalog() { - catalog_version++; +idx_t Catalog::ModifyCatalog() { + return catalog_version++; } } // namespace duckdb @@ -759,17 +744,12 @@ IndexCatalogEntry::~IndexCatalogEntry() { if (!info || !index) { return; } - for (idx_t i = 0; i < info->indexes.size(); i++) { - if (info->indexes[i].get() == index) { - info->indexes.erase(info->indexes.begin() + i); - break; - } - } + info->indexes.RemoveIndex(index); } string IndexCatalogEntry::ToSQL() { if (sql.empty()) { - throw NotImplementedException("Cannot convert INDEX to SQL because it was not created with a SQL statement"); + throw InternalException("Cannot convert INDEX to SQL because it was not created with a SQL statement"); } return sql; } @@ -781,7 +761,14 @@ string IndexCatalogEntry::ToSQL() { namespace duckdb { +MacroCatalogEntry::MacroCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateMacroInfo *info) + : StandardEntry(CatalogType::MACRO_ENTRY, schema, catalog, info->name), function(move(info->function)) { + this->temporary = info->temporary; + this->internal = info->internal; +} + void MacroCatalogEntry::Serialize(Serializer &serializer) { + D_ASSERT(!internal); serializer.WriteString(schema->name); serializer.WriteString(name); function->expression->Serialize(serializer); @@ -862,6 +849,7 @@ class DefaultFunctionGenerator : public DefaultGenerator { public: unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; + vector GetDefaultEntries() override; }; } // namespace duckdb @@ -890,6 +878,7 @@ class DefaultViewGenerator : public DefaultGenerator { public: unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; + vector GetDefaultEntries() override; }; } // namespace duckdb @@ -1020,11 +1009,12 @@ class UndoBuffer { } // namespace duckdb -#include + namespace duckdb { class SequenceCatalogEntry; +class ColumnData; class ClientContext; class CatalogEntry; class DataTable; @@ -1059,7 +1049,7 @@ class Transaction { transaction_t highest_active_query; //! The current active query for the transaction. Set to MAXIMUM_QUERY_ID if //! no query is active. - std::atomic active_query; + atomic active_query; //! The timestamp when the transaction started timestamp_t start_timestamp; //! The catalog version when the transaction was started @@ -1127,8 +1117,8 @@ SchemaCatalogEntry::SchemaCatalogEntry(Catalog *catalog, string name_p, bool int : CatalogEntry(CatalogType::SCHEMA_ENTRY, catalog, move(name_p)), tables(*catalog, make_unique(*catalog, this)), indexes(*catalog), table_functions(*catalog), copy_functions(*catalog), pragma_functions(*catalog), - functions(*catalog, name == DEFAULT_SCHEMA ? make_unique(*catalog, this) : nullptr), - sequences(*catalog), collations(*catalog) { + functions(*catalog, make_unique(*catalog, this)), sequences(*catalog), + collations(*catalog) { this->internal = internal; } @@ -1236,7 +1226,7 @@ CatalogEntry *SchemaCatalogEntry::CreateFunction(ClientContext &context, CreateF (CreateAggregateFunctionInfo *)info); break; default: - throw CatalogException("Unknown function type \"%s\"", CatalogTypeToString(info->type)); + throw InternalException("Unknown function type \"%s\"", CatalogTypeToString(info->type)); } return AddEntry(context, move(function), info->on_conflict); } @@ -1339,7 +1329,7 @@ CatalogSet &SchemaCatalogEntry::GetCatalogSet(CatalogType type) { case CatalogType::COLLATION_ENTRY: return collations; default: - throw CatalogException("Unsupported catalog type in schema"); + throw InternalException("Unsupported catalog type in schema"); } } @@ -1470,7 +1460,8 @@ class UniqueConstraint : public Constraint { : Constraint(ConstraintType::UNIQUE), index(index), is_primary_key(is_primary_key) { } UniqueConstraint(vector columns, bool is_primary_key) - : Constraint(ConstraintType::UNIQUE), index(INVALID_INDEX), columns(columns), is_primary_key(is_primary_key) { + : Constraint(ConstraintType::UNIQUE), index(INVALID_INDEX), columns(move(columns)), + is_primary_key(is_primary_key) { } //! The index of the column for which this constraint holds. Only used when the constraint relates to a single @@ -1538,12 +1529,21 @@ namespace duckdb { class BoundUniqueConstraint : public BoundConstraint { public: - BoundUniqueConstraint(unordered_set keys, bool is_primary_key) - : BoundConstraint(ConstraintType::UNIQUE), keys(keys), is_primary_key(is_primary_key) { + BoundUniqueConstraint(vector keys, unordered_set key_set, bool is_primary_key) + : BoundConstraint(ConstraintType::UNIQUE), keys(move(keys)), key_set(move(key_set)), + is_primary_key(is_primary_key) { +#ifdef DEBUG + D_ASSERT(keys.size() == key_set.size()); + for (auto &key : keys) { + D_ASSERT(key_set.find(key) != key_set.end()); + } +#endif } - //! The same keys but represented as an unordered set - unordered_set keys; + //! The keys that define the unique constraint + vector keys; + //! The same keys but stored as an unordered set + unordered_set key_set; //! Whether or not the unique constraint is a primary key bool is_primary_key; }; @@ -1725,7 +1725,15 @@ class WriteAheadLog { void WriteInsert(DataChunk &chunk); void WriteDelete(DataChunk &chunk); - void WriteUpdate(DataChunk &chunk, column_t col_idx); + //! Write a single (sub-) column update to the WAL. Chunk must be a pair of (COL, ROW_ID). + //! The column_path vector is a *path* towards a column within the table + //! i.e. if we have a table with a single column S STRUCT(A INT, B INT) + //! and we update the validity mask of "S.B" + //! the column path is: + //! 0 (first column of table) + //! -> 1 (second subcolumn of struct) + //! -> 0 (first subcolumn of INT) + void WriteUpdate(DataChunk &chunk, const vector &column_path); //! Truncate the WAL to a previous size, and clear anything currently set in the writer void Truncate(int64_t size); @@ -1833,7 +1841,6 @@ class StorageManager { - //===----------------------------------------------------------------------===// // DuckDB // @@ -1905,6 +1912,7 @@ void EncodeStringDataPrefix(data_ptr_t dataptr, string_t value, idx_t prefix_len } // namespace duckdb + namespace duckdb { class Key { @@ -2005,7 +2013,7 @@ class Node { //! Get the position of the first child that is greater or equal to the specific byte, or INVALID_INDEX if there are //! no children matching the criteria virtual idx_t GetChildGreaterEqual(uint8_t k, bool &equal) { - return INVALID_INDEX; + throw InternalException("Unimplemented GetChildGreaterEqual for ARTNode"); } //! Get the position of the biggest element in node virtual idx_t GetMin(); @@ -2261,15 +2269,14 @@ struct ARTIndexScanState : public IndexScanState { class ART : public Index { public: - ART(vector column_ids, vector> unbound_expressions, bool is_unique = false); + ART(const vector &column_ids, const vector> &unbound_expressions, + bool is_unique = false, bool is_primary = false); ~ART() override; //! Root of the tree unique_ptr tree; //! True if machine is little endian bool is_little_endian; - //! Whether or not the ART is an index built to enforce a UNIQUE constraint - bool is_unique; public: //! Initialize a scan on the index with the given expression and column ids @@ -2381,14 +2388,15 @@ class ParsedExpressionIterator { namespace duckdb { +class TableCatalogEntry; + //! The ALTER binder is responsible for binding an expression within alter statements class AlterBinder : public ExpressionBinder { public: - AlterBinder(Binder &binder, ClientContext &context, string table, vector &columns, - vector &bound_columns, LogicalType target_type); + AlterBinder(Binder &binder, ClientContext &context, TableCatalogEntry &table, vector &bound_columns, + LogicalType target_type); - string table; - vector &columns; + TableCatalogEntry &table; vector &bound_columns; protected: @@ -2457,13 +2465,32 @@ void TableCatalogEntry::AddLowerCaseAliases(unordered_map &nam } } +idx_t TableCatalogEntry::GetColumnIndex(string &column_name, bool if_exists) { + auto entry = name_map.find(column_name); + if (entry == name_map.end()) { + // entry not found: try lower-casing the name + entry = name_map.find(StringUtil::Lower(column_name)); + if (entry == name_map.end()) { + if (if_exists) { + return INVALID_INDEX; + } + throw BinderException("Table \"%s\" does not have a column with name \"%s\"", name, column_name); + } + } + column_name = columns[entry->second].name; + return idx_t(entry->second); +} + TableCatalogEntry::TableCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, BoundCreateTableInfo *info, std::shared_ptr inherited_storage) : StandardEntry(CatalogType::TABLE_ENTRY, schema, catalog, info->Base().table), storage(move(inherited_storage)), columns(move(info->Base().columns)), constraints(move(info->Base().constraints)), - bound_constraints(move(info->bound_constraints)), name_map(info->name_map) { + bound_constraints(move(info->bound_constraints)) { this->temporary = info->Base().temporary; // add lower case aliases + for (idx_t i = 0; i < columns.size(); i++) { + name_map[columns[i].name] = i; + } AddLowerCaseAliases(name_map); // add the "rowid" alias, if there is no rowid column specified in the table if (name_map.find("rowid") == name_map.end()) { @@ -2494,7 +2521,7 @@ TableCatalogEntry::TableCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schem column_ids.push_back(key); } // create an adaptive radix tree around the expressions - auto art = make_unique(column_ids, move(unbound_expressions), true); + auto art = make_unique(column_ids, move(unbound_expressions), true, unique.is_primary_key); storage->AddIndex(move(art), bound_expressions); } } @@ -2557,20 +2584,15 @@ static void RenameExpression(ParsedExpression &expr, RenameColumnInfo &info) { unique_ptr TableCatalogEntry::RenameColumn(ClientContext &context, RenameColumnInfo &info) { auto create_info = make_unique(schema->name, name); create_info->temporary = temporary; - bool found = false; + idx_t rename_idx = GetColumnIndex(info.old_name); for (idx_t i = 0; i < columns.size(); i++) { ColumnDefinition copy = columns[i].Copy(); create_info->columns.push_back(move(copy)); - if (info.old_name == columns[i].name) { - D_ASSERT(!found); + if (rename_idx == i) { create_info->columns[i].name = info.new_name; - found = true; } } - if (!found) { - throw CatalogException("Table does not have a column with name \"%s\"", info.name); - } for (idx_t c_idx = 0; c_idx < constraints.size(); c_idx++) { auto copy = constraints[c_idx]->Copy(); switch (copy->type) { @@ -2594,7 +2616,7 @@ unique_ptr TableCatalogEntry::RenameColumn(ClientContext &context, break; } default: - throw CatalogException("Unsupported constraint for entry!"); + throw InternalException("Unsupported constraint for entry!"); } create_info->constraints.push_back(move(copy)); } @@ -2621,23 +2643,17 @@ unique_ptr TableCatalogEntry::AddColumn(ClientContext &context, Ad } unique_ptr TableCatalogEntry::RemoveColumn(ClientContext &context, RemoveColumnInfo &info) { - idx_t removed_index = INVALID_INDEX; auto create_info = make_unique(schema->name, name); create_info->temporary = temporary; - for (idx_t i = 0; i < columns.size(); i++) { - if (columns[i].name == info.removed_column) { - D_ASSERT(removed_index == INVALID_INDEX); - removed_index = i; - continue; - } - create_info->columns.push_back(columns[i].Copy()); - } + idx_t removed_index = GetColumnIndex(info.removed_column, info.if_exists); if (removed_index == INVALID_INDEX) { - if (!info.if_exists) { - throw CatalogException("Table does not have a column with name \"%s\"", info.removed_column); - } return nullptr; } + for (idx_t i = 0; i < columns.size(); i++) { + if (removed_index != i) { + create_info->columns.push_back(columns[i].Copy()); + } + } if (create_info->columns.empty()) { throw CatalogException("Cannot drop column: table only has one column remaining!"); } @@ -2708,19 +2724,15 @@ unique_ptr TableCatalogEntry::RemoveColumn(ClientContext &context, unique_ptr TableCatalogEntry::SetDefault(ClientContext &context, SetDefaultInfo &info) { auto create_info = make_unique(schema->name, name); - bool found = false; + idx_t default_idx = GetColumnIndex(info.column_name); for (idx_t i = 0; i < columns.size(); i++) { auto copy = columns[i].Copy(); - if (info.column_name == copy.name) { + if (default_idx == i) { // set the default value of this column copy.default_value = info.expression ? info.expression->Copy() : nullptr; - found = true; } create_info->columns.push_back(move(copy)); } - if (!found) { - throw BinderException("Table \"%s\" does not have a column with name \"%s\"", info.name, info.column_name); - } for (idx_t i = 0; i < constraints.size(); i++) { auto constraint = constraints[i]->Copy(); @@ -2734,19 +2746,15 @@ unique_ptr TableCatalogEntry::SetDefault(ClientContext &context, S unique_ptr TableCatalogEntry::ChangeColumnType(ClientContext &context, ChangeColumnTypeInfo &info) { auto create_info = make_unique(schema->name, name); - idx_t change_idx = INVALID_INDEX; + idx_t change_idx = GetColumnIndex(info.column_name); for (idx_t i = 0; i < columns.size(); i++) { auto copy = columns[i].Copy(); - if (info.column_name == copy.name) { + if (change_idx == i) { // set the default value of this column - change_idx = i; copy.type = info.target_type; } create_info->columns.push_back(move(copy)); } - if (change_idx == INVALID_INDEX) { - throw BinderException("Table \"%s\" does not have a column with name \"%s\"", info.name, info.column_name); - } for (idx_t i = 0; i < constraints.size(); i++) { auto constraint = constraints[i]->Copy(); @@ -2762,7 +2770,7 @@ unique_ptr TableCatalogEntry::ChangeColumnType(ClientContext &cont break; case ConstraintType::UNIQUE: { auto &bound_unique = (BoundUniqueConstraint &)*bound_constraints[i]; - if (bound_unique.keys.find(change_idx) != bound_unique.keys.end()) { + if (bound_unique.key_set.find(change_idx) != bound_unique.key_set.end()) { throw BinderException( "Cannot change the type of a column that has a UNIQUE or PRIMARY KEY constraint specified"); } @@ -2777,7 +2785,7 @@ unique_ptr TableCatalogEntry::ChangeColumnType(ClientContext &cont auto binder = Binder::CreateBinder(context); // bind the specified expression vector bound_columns; - AlterBinder expr_binder(*binder, context, name, columns, bound_columns, info.target_type); + AlterBinder expr_binder(*binder, context, *this, bound_columns, info.target_type); auto expression = info.expression->Copy(); auto bound_expression = expr_binder.Bind(expression); auto bound_create_info = binder->BindCreateTableInfo(move(create_info)); @@ -2807,19 +2815,8 @@ vector TableCatalogEntry::GetTypes() { return types; } -vector TableCatalogEntry::GetTypes(const vector &column_ids) { - vector result; - for (auto &index : column_ids) { - if (index == COLUMN_IDENTIFIER_ROW_ID) { - result.push_back(LOGICAL_ROW_TYPE); - } else { - result.push_back(columns[index].type); - } - } - return result; -} - void TableCatalogEntry::Serialize(Serializer &serializer) { + D_ASSERT(!internal); serializer.WriteString(schema->name); serializer.WriteString(name); D_ASSERT(columns.size() <= NumericLimits::Maximum()); @@ -2836,7 +2833,14 @@ void TableCatalogEntry::Serialize(Serializer &serializer) { string TableCatalogEntry::ToSQL() { std::stringstream ss; - ss << "CREATE TABLE " << KeywordHelper::WriteOptionallyQuoted(name) << "("; + + ss << "CREATE TABLE "; + + if (schema->name != DEFAULT_SCHEMA) { + ss << KeywordHelper::WriteOptionallyQuoted(schema->name) << "."; + } + + ss << KeywordHelper::WriteOptionallyQuoted(name) << "("; // find all columns that have NOT NULL specified, but are NOT primary key columns unordered_set not_null_columns; @@ -2851,7 +2855,7 @@ string TableCatalogEntry::ToSQL() { } else if (constraint->type == ConstraintType::UNIQUE) { auto &pk = (UniqueConstraint &)*constraint; vector constraint_columns = pk.columns; - if (pk.columns.empty()) { + if (pk.index != INVALID_INDEX) { // no columns specified: single column constraint if (pk.is_primary_key) { pk_columns.insert(pk.index); @@ -3027,9 +3031,7 @@ ViewCatalogEntry::ViewCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, } unique_ptr ViewCatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) { - if (internal) { - throw CatalogException("Cannot use ALTER VIEW to alter a system view"); - } + D_ASSERT(!internal); if (info->type != AlterType::ALTER_VIEW) { throw CatalogException("Can only modify view with ALTER VIEW statement"); } @@ -3082,7 +3084,8 @@ unique_ptr ViewCatalogEntry::Deserialize(Deserializer &source) { string ViewCatalogEntry::ToSQL() { if (sql.empty()) { - throw NotImplementedException("Cannot convert VIEW to SQL because it was not created with a SQL statement"); + //! Return empty sql with view name so pragma view_tables don't complain + return sql; } return sql + "\n;"; } @@ -3110,9 +3113,31 @@ unique_ptr ViewCatalogEntry::Copy(ClientContext &context) { namespace duckdb { +CatalogEntry::CatalogEntry(CatalogType type, Catalog *catalog_p, string name_p) + : oid(catalog_p->ModifyCatalog()), type(type), catalog(catalog_p), set(nullptr), name(move(name_p)), deleted(false), + temporary(false), internal(false), parent(nullptr) { +} + CatalogEntry::~CatalogEntry() { } +void CatalogEntry::SetAsRoot() { +} + +// LCOV_EXCL_START +unique_ptr CatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) { + throw InternalException("Unsupported alter type for catalog entry!"); +} + +unique_ptr CatalogEntry::Copy(ClientContext &context) { + throw InternalException("Unsupported copy type for catalog entry!"); +} + +string CatalogEntry::ToSQL() { + throw InternalException("Unsupported catalog type for ToSQL()"); +} +// LCOV_EXCL_STOP + } // namespace duckdb @@ -3133,7 +3158,7 @@ CatalogEntry::~CatalogEntry() { -#include + namespace duckdb { @@ -3165,12 +3190,16 @@ class TransactionManager { string CommitTransaction(ClientContext &context, Transaction *transaction); //! Rollback the given transaction void RollbackTransaction(Transaction *transaction); - //! Add the catalog set - void AddCatalogSet(ClientContext &context, unique_ptr catalog_set); transaction_t GetQueryNumber() { return current_query_number++; } + transaction_t LowestActiveId() { + return lowest_active_id; + } + transaction_t LowestActiveStart() { + return lowest_active_start; + } void Checkpoint(ClientContext &context, bool force = false); @@ -3186,11 +3215,15 @@ class TransactionManager { //! The database instance DatabaseInstance &db; //! The current query number - std::atomic current_query_number; + atomic current_query_number; //! The current start timestamp used by transactions transaction_t current_start_timestamp; //! The current transaction ID used by transactions transaction_t current_transaction_id; + //! The lowest active transaction id + atomic lowest_active_id; + //! The lowest active transaction timestamp + atomic lowest_active_start; //! Set of currently running transactions vector> active_transactions; //! Set of recently committed transactions @@ -3372,7 +3405,7 @@ void CatalogSet::DropEntryInternal(ClientContext &context, idx_t entry_index, Ca // add this catalog to the lock set, if it is not there yet if (lock_set.find(this) == lock_set.end()) { - lock_set.insert(make_pair(this, std::unique_lock(catalog_lock))); + lock_set.insert(make_pair(this, unique_lock(catalog_lock))); } // create a new entry and replace the currently stored one @@ -3410,9 +3443,17 @@ bool CatalogSet::DropEntry(ClientContext &context, const string &name, bool casc return true; } -idx_t CatalogSet::GetEntryIndex(CatalogEntry *entry) { - D_ASSERT(mapping.find(entry->name) != mapping.end()); - return mapping[entry->name]->index; +void CatalogSet::CleanupEntry(CatalogEntry *catalog_entry) { + // destroy the backed up entry: it is no longer required + D_ASSERT(catalog_entry->parent); + if (catalog_entry->parent->type != CatalogType::UPDATED_ENTRY) { + lock_guard lock(catalog_lock); + if (!catalog_entry->deleted) { + // delete the entry from the dependency manager, if it is not deleted yet + catalog_entry->catalog->dependency_manager->EraseObject(catalog_entry); + } + catalog_entry->parent->child = move(catalog_entry->child); + } } bool CatalogSet::HasConflict(ClientContext &context, transaction_t timestamp) { @@ -3495,7 +3536,7 @@ bool CatalogSet::UseTimestamp(ClientContext &context, transaction_t timestamp) { return true; } if (timestamp < transaction.start_time) { - // this version was committed before we started the transaction + // this version was commited before we started the transaction return true; } return false; @@ -3542,38 +3583,61 @@ string CatalogSet::SimilarEntry(ClientContext &context, const string &name) { return result; } -CatalogEntry *CatalogSet::GetEntry(ClientContext &context, const string &name) { - lock_guard lock(catalog_lock); +CatalogEntry *CatalogSet::CreateEntryInternal(ClientContext &context, unique_ptr entry) { + if (mapping.find(entry->name) != mapping.end()) { + return nullptr; + } + auto &name = entry->name; + auto entry_index = current_entry++; + auto catalog_entry = entry.get(); - auto mapping_value = GetMapping(context, name, true); - if (mapping_value == nullptr || mapping_value->deleted) { - // no entry found with this name - if (defaults) { - // ... but this catalog set has a default map defined - // check if there is a default entry that we can create with this name - auto entry = defaults->CreateDefaultEntry(context, name); - if (entry) { - // there is a default entry! - auto entry_index = current_entry++; - auto catalog_entry = entry.get(); + entry->timestamp = 0; - entry->timestamp = 0; + PutMapping(context, name, entry_index); + mapping[name]->timestamp = 0; + entries[entry_index] = move(entry); + return catalog_entry; +} - PutMapping(context, name, entry_index); - mapping[name]->timestamp = 0; - entries[entry_index] = move(entry); - return catalog_entry; - } +CatalogEntry *CatalogSet::GetEntry(ClientContext &context, const string &name) { + unique_lock lock(catalog_lock); + auto mapping_value = GetMapping(context, name, true); + if (mapping_value != nullptr && !mapping_value->deleted) { + // we found an entry for this name + // check the version numbers + + auto catalog_entry = entries[mapping_value->index].get(); + CatalogEntry *current = GetEntryForTransaction(context, catalog_entry); + if (current->deleted || (current->name != name && !UseTimestamp(context, mapping_value->timestamp))) { + return nullptr; } + return current; + } + // no entry found with this name, check for defaults + if (!defaults || defaults->created_all_entries) { + // no defaults either: return null return nullptr; } - auto catalog_entry = entries[mapping_value->index].get(); - // if it does, we have to check version numbers - CatalogEntry *current = GetEntryForTransaction(context, catalog_entry); - if (current->deleted || (current->name != name && !UseTimestamp(context, mapping_value->timestamp))) { + // this catalog set has a default map defined + // check if there is a default entry that we can create with this name + lock.unlock(); + auto entry = defaults->CreateDefaultEntry(context, name); + + lock.lock(); + if (!entry) { + // no default entry return nullptr; } - return current; + // there is a default entry! create it + auto result = CreateEntryInternal(context, move(entry)); + if (result) { + return result; + } + // we found a default entry, but failed + // this means somebody else created the entry first + // just retry? + lock.unlock(); + return GetEntry(context, name); } void CatalogSet::UpdateTimestamp(CatalogEntry *entry, transaction_t timestamp) { @@ -3581,13 +3645,9 @@ void CatalogSet::UpdateTimestamp(CatalogEntry *entry, transaction_t timestamp) { mapping[entry->name]->timestamp = timestamp; } -CatalogEntry *CatalogSet::GetRootEntry(const string &name) { - lock_guard lock(catalog_lock); - auto entry = mapping.find(name); - return entry == mapping.end() || entry->second->deleted ? nullptr : entries[entry->second->index].get(); -} - void CatalogSet::Undo(CatalogEntry *entry) { + lock_guard write_lock(catalog.write_lock); + lock_guard lock(catalog_lock); // entry has to be restored @@ -3636,6 +3696,47 @@ void CatalogSet::Undo(CatalogEntry *entry) { entry->catalog->ModifyCatalog(); } +void CatalogSet::Scan(ClientContext &context, const std::function &callback) { + // lock the catalog set + unique_lock lock(catalog_lock); + if (defaults && !defaults->created_all_entries) { + // this catalog set has a default set defined: + auto default_entries = defaults->GetDefaultEntries(); + for (auto &default_entry : default_entries) { + auto map_entry = mapping.find(default_entry); + if (map_entry == mapping.end()) { + // we unlock during the CreateEntry, since it might reference other catalog sets... + // specifically for views this can happen since the view will be bound + lock.unlock(); + auto entry = defaults->CreateDefaultEntry(context, default_entry); + + lock.lock(); + CreateEntryInternal(context, move(entry)); + } + } + defaults->created_all_entries = true; + } + for (auto &kv : entries) { + auto entry = kv.second.get(); + entry = GetEntryForTransaction(context, entry); + if (!entry->deleted) { + callback(entry); + } + } +} + +void CatalogSet::Scan(const std::function &callback) { + // lock the catalog set + lock_guard lock(catalog_lock); + for (auto &kv : entries) { + auto entry = kv.second.get(); + entry = GetCommittedEntry(entry); + if (!entry->deleted) { + callback(entry); + } + } +} + } // namespace duckdb @@ -3646,17 +3747,88 @@ void CatalogSet::Undo(CatalogEntry *entry) { namespace duckdb { struct DefaultMacro { + const char *schema; const char *name; const char *parameters[8]; const char *macro; }; -static DefaultMacro internal_macros[] = {{"nullif", {"a", "b", nullptr}, "CASE WHEN a=b THEN NULL ELSE a END"}, - {nullptr, {nullptr}, nullptr}}; +static DefaultMacro internal_macros[] = { + {DEFAULT_SCHEMA, "current_user", {nullptr}, "'duckdb'"}, // user name of current execution context + {DEFAULT_SCHEMA, "current_catalog", {nullptr}, "'duckdb'"}, // name of current database (called "catalog" in the SQL standard) + {DEFAULT_SCHEMA, "current_database", {nullptr}, "'duckdb'"}, // name of current database + {DEFAULT_SCHEMA, "user", {nullptr}, "current_user"}, // equivalent to current_user + {DEFAULT_SCHEMA, "session_user", {nullptr}, "'duckdb'"}, // session user name + {"pg_catalog", "inet_client_addr", {nullptr}, "NULL"}, // address of the remote connection + {"pg_catalog", "inet_client_port", {nullptr}, "NULL"}, // port of the remote connection + {"pg_catalog", "inet_server_addr", {nullptr}, "NULL"}, // address of the local connection + {"pg_catalog", "inet_server_port", {nullptr}, "NULL"}, // port of the local connection + {"pg_catalog", "pg_my_temp_schema", {nullptr}, "0"}, // OID of session's temporary schema, or 0 if none + {"pg_catalog", "pg_is_other_temp_schema", {"schema_id", nullptr}, "false"}, // is schema another session's temporary schema? + + {"pg_catalog", "pg_conf_load_time", {nullptr}, "current_timestamp"}, // configuration load time + {"pg_catalog", "pg_postmaster_start_time", {nullptr}, "current_timestamp"}, // server start time + + {"pg_catalog", "pg_typeof", {"expression", nullptr}, "lower(typeof(expression))"}, // get the data type of any value + + // privilege functions + // {"has_any_column_privilege", {"user", "table", "privilege", nullptr}, "true"}, //boolean //does user have privilege for any column of table + {"pg_catalog", "has_any_column_privilege", {"table", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for any column of table + // {"has_column_privilege", {"user", "table", "column", "privilege", nullptr}, "true"}, //boolean //does user have privilege for column + {"pg_catalog", "has_column_privilege", {"table", "column", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for column + // {"has_database_privilege", {"user", "database", "privilege", nullptr}, "true"}, //boolean //does user have privilege for database + {"pg_catalog", "has_database_privilege", {"database", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for database + // {"has_foreign_data_wrapper_privilege", {"user", "fdw", "privilege", nullptr}, "true"}, //boolean //does user have privilege for foreign-data wrapper + {"pg_catalog", "has_foreign_data_wrapper_privilege", {"fdw", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for foreign-data wrapper + // {"has_function_privilege", {"user", "function", "privilege", nullptr}, "true"}, //boolean //does user have privilege for function + {"pg_catalog", "has_function_privilege", {"function", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for function + // {"has_language_privilege", {"user", "language", "privilege", nullptr}, "true"}, //boolean //does user have privilege for language + {"pg_catalog", "has_language_privilege", {"language", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for language + // {"has_schema_privilege", {"user", "schema, privilege", nullptr}, "true"}, //boolean //does user have privilege for schema + {"pg_catalog", "has_schema_privilege", {"schema", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for schema + // {"has_sequence_privilege", {"user", "sequence", "privilege", nullptr}, "true"}, //boolean //does user have privilege for sequence + {"pg_catalog", "has_sequence_privilege", {"sequence", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for sequence + // {"has_server_privilege", {"user", "server", "privilege", nullptr}, "true"}, //boolean //does user have privilege for foreign server + {"pg_catalog", "has_server_privilege", {"server", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for foreign server + // {"has_table_privilege", {"user", "table", "privilege", nullptr}, "true"}, //boolean //does user have privilege for table + {"pg_catalog", "has_table_privilege", {"table", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for table + // {"has_tablespace_privilege", {"user", "tablespace", "privilege", nullptr}, "true"}, //boolean //does user have privilege for tablespace + {"pg_catalog", "has_tablespace_privilege", {"tablespace", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for tablespace + + // various postgres system functions + {"pg_catalog", "pg_get_viewdef", {"oid", nullptr}, "(select sql from duckdb_views() v where v.view_oid=oid)"}, + {"pg_catalog", "pg_get_constraintdef", {"constraint_oid", "pretty_bool", nullptr}, "(select constraint_text from duckdb_constraints() d_constraint where d_constraint.table_oid=constraint_oid/1000000 and d_constraint.constraint_index=constraint_oid%1000000)"}, + {"pg_catalog", "pg_get_expr", {"pg_node_tree", "relation_oid", nullptr}, "pg_node_tree"}, + {"pg_catalog", "format_pg_type", {"type_name", nullptr}, "case when type_name='FLOAT' then 'real' when type_name='DOUBLE' then 'double precision' when type_name='DECIMAL' then 'numeric' when type_name='VARCHAR' then 'character varying' when type_name='BLOB' then 'bytea' when type_name='TIMESTAMP' then 'timestamp without time zone' when type_name='TIME' then 'time without time zone' else lower(type_name) end"}, + {"pg_catalog", "format_type", {"type_oid", "typemod", nullptr}, "(select format_pg_type(type_name) from duckdb_types() t where t.type_oid=type_oid) || case when typemod>0 then concat('(', typemod/1000, ',', typemod%1000, ')') else '' end"}, + + {"pg_catalog", "pg_has_role", {"user", "role", "privilege", nullptr}, "true"}, //boolean //does user have privilege for role + {"pg_catalog", "pg_has_role", {"role", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for role + + {"pg_catalog", "col_description", {"table_oid", "column_number", nullptr}, "NULL"}, // get comment for a table column + {"pg_catalog", "obj_description", {"object_oid", "catalog_name", nullptr}, "NULL"}, // get comment for a database object + {"pg_catalog", "shobj_description", {"object_oid", "catalog_name", nullptr}, "NULL"}, // get comment for a shared database object + + // visibility functions + {"pg_catalog", "pg_collation_is_visible", {"collation_oid", nullptr}, "true"}, + {"pg_catalog", "pg_conversion_is_visible", {"conversion_oid", nullptr}, "true"}, + {"pg_catalog", "pg_function_is_visible", {"function_oid", nullptr}, "true"}, + {"pg_catalog", "pg_opclass_is_visible", {"opclass_oid", nullptr}, "true"}, + {"pg_catalog", "pg_operator_is_visible", {"operator_oid", nullptr}, "true"}, + {"pg_catalog", "pg_opfamily_is_visible", {"opclass_oid", nullptr}, "true"}, + {"pg_catalog", "pg_table_is_visible", {"table_oid", nullptr}, "true"}, + {"pg_catalog", "pg_ts_config_is_visible", {"config_oid", nullptr}, "true"}, + {"pg_catalog", "pg_ts_dict_is_visible", {"dict_oid", nullptr}, "true"}, + {"pg_catalog", "pg_ts_parser_is_visible", {"parser_oid", nullptr}, "true"}, + {"pg_catalog", "pg_ts_template_is_visible", {"template_oid", nullptr}, "true"}, + {"pg_catalog", "pg_type_is_visible", {"type_oid", nullptr}, "true"}, + + {DEFAULT_SCHEMA, "nullif", {"a", "b", nullptr}, "CASE WHEN a=b THEN NULL ELSE a END"}, + {nullptr, nullptr, {nullptr}, nullptr}}; static unique_ptr GetDefaultFunction(const string &schema, const string &name) { for (idx_t index = 0; internal_macros[index].name != nullptr; index++) { - if (internal_macros[index].name == name) { + if (internal_macros[index].schema == schema && internal_macros[index].name == name) { // parse the expression auto expressions = Parser::ParseExpressionList(internal_macros[index].macro); D_ASSERT(expressions.size() == 1); @@ -3668,7 +3840,7 @@ static unique_ptr GetDefaultFunction(const string &schema, c } auto bind_info = make_unique(); - bind_info->schema = DEFAULT_SCHEMA; + bind_info->schema = schema; bind_info->name = internal_macros[index].name; bind_info->temporary = true; bind_info->internal = true; @@ -3692,6 +3864,16 @@ unique_ptr DefaultFunctionGenerator::CreateDefaultEntry(ClientCont return nullptr; } +vector DefaultFunctionGenerator::GetDefaultEntries() { + vector result; + for (idx_t index = 0; internal_macros[index].name != nullptr; index++) { + if (internal_macros[index].schema == schema->name) { + result.emplace_back(internal_macros[index].name); + } + } + return result; +} + } // namespace duckdb @@ -3702,7 +3884,7 @@ struct DefaultSchema { const char *name; }; -static DefaultSchema internal_schemas[] = {{"information_schema"}, {nullptr}}; +static DefaultSchema internal_schemas[] = {{"information_schema"}, {"pg_catalog"}, {nullptr}}; static bool GetDefaultSchema(const string &schema) { for (idx_t index = 0; internal_schemas[index].name != nullptr; index++) { @@ -3723,6 +3905,14 @@ unique_ptr DefaultSchemaGenerator::CreateDefaultEntry(ClientContex return nullptr; } +vector DefaultSchemaGenerator::GetDefaultEntries() { + vector result; + for (idx_t index = 0; internal_schemas[index].name != nullptr; index++) { + result.emplace_back(internal_schemas[index].name); + } + return result; +} + } // namespace duckdb @@ -3741,13 +3931,37 @@ struct DefaultView { static DefaultView internal_views[] = { {DEFAULT_SCHEMA, "pragma_database_list", "SELECT * FROM pragma_database_list()"}, - {DEFAULT_SCHEMA, "sqlite_master", "SELECT * FROM sqlite_master()"}, - {DEFAULT_SCHEMA, "sqlite_schema", "SELECT * FROM sqlite_master()"}, - {DEFAULT_SCHEMA, "sqlite_temp_master", "SELECT * FROM sqlite_master()"}, - {DEFAULT_SCHEMA, "sqlite_temp_schema", "SELECT * FROM sqlite_master()"}, - {"information_schema", "columns", "SELECT * FROM information_schema_columns()"}, - {"information_schema", "schemata", "SELECT * FROM information_schema_schemata()"}, - {"information_schema", "tables", "SELECT * FROM information_schema_tables()"}, + {DEFAULT_SCHEMA, "sqlite_master", "select 'table' \"type\", table_name \"name\", table_name \"tbl_name\", 0 rootpage, sql from duckdb_tables union all select 'view' \"type\", view_name \"name\", view_name \"tbl_name\", 0 rootpage, sql from duckdb_views union all select 'index' \"type\", index_name \"name\", table_name \"tbl_name\", 0 rootpage, sql from duckdb_indexes;"}, + {DEFAULT_SCHEMA, "sqlite_schema", "SELECT * FROM sqlite_master"}, + {DEFAULT_SCHEMA, "sqlite_temp_master", "SELECT * FROM sqlite_master"}, + {DEFAULT_SCHEMA, "sqlite_temp_schema", "SELECT * FROM sqlite_master"}, + {DEFAULT_SCHEMA, "duckdb_constraints", "SELECT * FROM duckdb_constraints()"}, + {DEFAULT_SCHEMA, "duckdb_columns", "SELECT * FROM duckdb_columns() WHERE NOT internal"}, + {DEFAULT_SCHEMA, "duckdb_indexes", "SELECT * FROM duckdb_indexes()"}, + {DEFAULT_SCHEMA, "duckdb_schemas", "SELECT * FROM duckdb_schemas() WHERE NOT internal"}, + {DEFAULT_SCHEMA, "duckdb_tables", "SELECT * FROM duckdb_tables() WHERE NOT internal"}, + {DEFAULT_SCHEMA, "duckdb_types", "SELECT * FROM duckdb_types()"}, + {DEFAULT_SCHEMA, "duckdb_views", "SELECT * FROM duckdb_views() WHERE NOT internal"}, + {"pg_catalog", "pg_am", "SELECT 0 oid, 'art' amname, NULL amhandler, 'i' amtype"}, + {"pg_catalog", "pg_attribute", "SELECT table_oid attrelid, column_name attname, data_type_id atttypid, 0 attstattarget, NULL attlen, column_index attnum, 0 attndims, -1 attcacheoff, case when data_type ilike '%decimal%' then numeric_precision*1000+numeric_scale else -1 end atttypmod, false attbyval, NULL attstorage, NULL attalign, NOT is_nullable attnotnull, column_default IS NOT NULL atthasdef, false atthasmissing, '' attidentity, '' attgenerated, false attisdropped, true attislocal, 0 attinhcount, 0 attcollation, NULL attcompression, NULL attacl, NULL attoptions, NULL attfdwoptions, NULL attmissingval FROM duckdb_columns()"}, + {"pg_catalog", "pg_attrdef", "SELECT column_index oid, table_oid adrelid, column_index adnum, column_default adbin from duckdb_columns() where column_default is not null;"}, + {"pg_catalog", "pg_class", "SELECT table_oid oid, table_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, estimated_size::real reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, index_count > 0 relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'r' relkind, column_count relnatts, check_constraint_count relchecks, false relhasoids, has_primary_key relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_tables() UNION ALL SELECT view_oid oid, view_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'v' relkind, column_count relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_views() UNION ALL SELECT sequence_oid oid, sequence_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'S' relkind, 0 relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_sequences() UNION ALL SELECT index_oid oid, index_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, 't' relpersistence, 'i' relkind, NULL relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_indexes()"}, + {"pg_catalog", "pg_constraint", "SELECT table_oid*1000000+constraint_index oid, constraint_text conname, schema_oid connamespace, CASE WHEN constraint_type='CHECK' then 'c' WHEN constraint_type='UNIQUE' then 'u' WHEN constraint_type='PRIMARY KEY' THEN 'p' ELSE 'x' END contype, false condeferrable, false condeferred, true convalidated, table_oid conrelid, 0 contypid, 0 conindid, 0 conparentid, 0 confrelid, NULL confupdtype, NULL confdeltype, NULL confmatchtype, true conislocal, 0 coninhcount, false connoinherit, constraint_column_indexes conkey, NULL confkey, NULL conpfeqop, NULL conppeqop, NULL conffeqop, NULL conexclop, expression conbin FROM duckdb_constraints()"}, + {"pg_catalog", "pg_depend", "SELECT * FROM duckdb_dependencies()"}, + {"pg_catalog", "pg_description", "SELECT NULL objoid, NULL classoid, NULL objsubid, NULL description WHERE 1=0"}, + {"pg_catalog", "pg_enum", "SELECT NULL oid, NULL enumtypid, NULL enumsortorder, NULL enumlabel WHERE 1=0"}, + {"pg_catalog", "pg_index", "SELECT index_oid indexrelid, table_oid indrelid, 0 indnatts, 0 indnkeyatts, is_unique indisunique, is_primary indisprimary, false indisexclusion, true indimmediate, false indisclustered, true indisvalid, false indcheckxmin, true indisready, true indislive, false indisreplident, NULL::INT[] indkey, NULL::OID[] indcollation, NULL::OID[] indclass, NULL::INT[] indoption, expressions indexprs, NULL indpred FROM duckdb_indexes()"}, + {"pg_catalog", "pg_indexes", "SELECT schema_name schemaname, table_name tablename, index_name indexname, NULL \"tablespace\", sql indexdef FROM duckdb_indexes()"}, + {"pg_catalog", "pg_namespace", "SELECT oid, schema_name nspname, 0 nspowner, NULL nspacl FROM duckdb_schemas()"}, + {"pg_catalog", "pg_sequence", "SELECT sequence_oid seqrelid, 0 seqtypid, start_value seqstart, increment_by seqincrement, max_value seqmax, min_value seqmin, 0 seqcache, cycle seqcycle FROM duckdb_sequences()"}, + {"pg_catalog", "pg_sequences", "SELECT schema_name schemaname, sequence_name sequencename, 'duckdb' sequenceowner, 0 data_type, start_value, min_value, max_value, increment_by, cycle, 0 cache_size, last_value FROM duckdb_sequences()"}, + {"pg_catalog", "pg_tables", "SELECT schema_name schemaname, table_name tablename, 'duckdb' tableowner, NULL \"tablespace\", index_count > 0 hasindexes, false hasrules, false hastriggers FROM duckdb_tables()"}, + {"pg_catalog", "pg_tablespace", "SELECT 0 oid, 'pg_default' spcname, 0 spcowner, NULL spcacl, NULL spcoptions"}, + {"pg_catalog", "pg_type", "SELECT type_oid oid, format_pg_type(type_name) typname, schema_oid typnamespace, 0 typowner, type_size typlen, false typbyval, 'b' typtype, CASE WHEN type_category='NUMERIC' THEN 'N' WHEN type_category='STRING' THEN 'S' WHEN type_category='DATETIME' THEN 'D' WHEN type_category='BOOLEAN' THEN 'B' WHEN type_category='COMPOSITE' THEN 'C' WHEN type_category='USER' THEN 'U' ELSE 'X' END typcategory, false typispreferred, true typisdefined, NULL typdelim, NULL typrelid, NULL typsubscript, NULL typelem, NULL typarray, NULL typinput, NULL typoutput, NULL typreceive, NULL typsend, NULL typmodin, NULL typmodout, NULL typanalyze, 'd' typalign, 'p' typstorage, NULL typnotnull, NULL typbasetype, NULL typtypmod, NULL typndims, NULL typcollation, NULL typdefaultbin, NULL typdefault, NULL typacl FROM duckdb_types();"}, + {"pg_catalog", "pg_views", "SELECT schema_name schemaname, view_name viewname, 'duckdb' viewowner, sql definition FROM duckdb_views()"}, + {"information_schema", "columns", "SELECT NULL table_catalog, schema_name table_schema, table_name, column_name, column_index ordinal_position, column_default, CASE WHEN is_nullable THEN 'YES' ELSE 'NO' END is_nullable, data_type, character_maximum_length, NULL character_octet_length, numeric_precision, numeric_precision_radix, numeric_scale, NULL datetime_precision, NULL interval_type, NULL interval_precision, NULL character_set_catalog, NULL character_set_schema, NULL character_set_name, NULL collation_catalog, NULL collation_schema, NULL collation_name, NULL domain_catalog, NULL domain_schema, NULL domain_name, NULL udt_catalog, NULL udt_schema, NULL udt_name, NULL scope_catalog, NULL scope_schema, NULL scope_name, NULL maximum_cardinality, NULL dtd_identifier, NULL is_self_referencing, NULL is_identity, NULL identity_generation, NULL identity_start, NULL identity_increment, NULL identity_maximum, NULL identity_minimum, NULL identity_cycle, NULL is_generated, NULL generation_expression, NULL is_updatable FROM duckdb_columns;"}, + {"information_schema", "schemata", "SELECT NULL catalog_name, schema_name, 'duckdb' schema_owner, NULL default_character_set_catalog, NULL default_character_set_schema, NULL default_character_set_name, sql sql_path FROM duckdb_schemas()"}, + {"information_schema", "tables", "SELECT NULL table_catalog, schema_name table_schema, table_name, CASE WHEN temporary THEN 'LOCAL TEMPORARY' ELSE 'BASE TABLE' END table_type, NULL self_referencing_column_name, NULL reference_generation, NULL user_defined_type_catalog, NULL user_defined_type_schema, NULL user_defined_type_name, 'YES' is_insertable_into, 'NO' is_typed, CASE WHEN temporary THEN 'PRESERVE' ELSE NULL END commit_action FROM duckdb_tables() UNION ALL SELECT NULL table_catalog, schema_name table_schema, view_name table_name, 'VIEW' table_type, NULL self_referencing_column_name, NULL reference_generation, NULL user_defined_type_catalog, NULL user_defined_type_schema, NULL user_defined_type_name, 'NO' is_insertable_into, 'NO' is_typed, NULL commit_action FROM duckdb_views;"}, {nullptr, nullptr, nullptr}}; static unique_ptr GetDefaultView(const string &schema, const string &name) { @@ -3785,6 +3999,17 @@ unique_ptr DefaultViewGenerator::CreateDefaultEntry(ClientContext return nullptr; } +vector DefaultViewGenerator::GetDefaultEntries() { + vector result; + for (idx_t index = 0; internal_views[index].name != nullptr; index++) { + if (internal_views[index].schema == schema->name) { + result.emplace_back(internal_views[index].name); + } + } + return result; + +} + } // namespace duckdb @@ -3806,10 +4031,11 @@ void DependencyManager::AddObject(ClientContext &context, CatalogEntry *object, } } // indexes do not require CASCADE to be dropped, they are simply always dropped along with the table - bool requires_cascade = object->type != CatalogType::INDEX_ENTRY; + auto dependency_type = object->type == CatalogType::INDEX_ENTRY ? DependencyType::DEPENDENCY_AUTOMATIC + : DependencyType::DEPENDENCY_REGULAR; // add the object to the dependents_map of each object that it depends on for (auto &dependency : dependencies) { - dependents_map[dependency].insert(Dependency(object, requires_cascade)); + dependents_map[dependency].insert(Dependency(object, dependency_type)); } // create the dependents map for this object: it starts out empty dependents_map[object] = dependency_set_t(); @@ -3837,7 +4063,7 @@ void DependencyManager::DropObject(ClientContext &context, CatalogEntry *object, continue; } // conflict: attempting to delete this object but the dependent object still exists - if (cascade || !dep.requires_cascade) { + if (cascade || dep.dependency_type == DependencyType::DEPENDENCY_AUTOMATIC) { // cascade: drop the dependent object catalog_set.DropEntryInternal(context, entry_index, *dependency_entry, cascade, lock_set); } else { @@ -3882,7 +4108,6 @@ void DependencyManager::AlterObject(ClientContext &context, CatalogEntry *old_ob void DependencyManager::EraseObject(CatalogEntry *object) { // obtain the writing lock - lock_guard write_lock(catalog.write_lock); EraseObjectInternal(object); } @@ -3906,16 +4131,11 @@ void DependencyManager::EraseObjectInternal(CatalogEntry *object) { dependencies_map.erase(object); } -void DependencyManager::ClearDependencies(CatalogSet &set) { - // obtain the writing lock +void DependencyManager::Scan(const std::function &callback) { lock_guard write_lock(catalog.write_lock); - - // iterate over the objects in the CatalogSet - for (auto &entry : set.entries) { - CatalogEntry *centry = entry.second.get(); - while (centry) { - EraseObjectInternal(centry); - centry = centry->child.get(); + for (auto &entry : dependents_map) { + for (auto &dependent : entry.second) { + callback(entry.first, dependent.entry, dependent.dependency_type); } } } @@ -3923,172 +4143,274 @@ void DependencyManager::ClearDependencies(CatalogSet &set) { } // namespace duckdb - namespace duckdb { -void DuckDBAssertInternal(bool condition, const char *condition_name, const char *file, int linenr) { - if (condition) { +AllocatedData::AllocatedData(Allocator &allocator, data_ptr_t pointer, idx_t allocated_size) + : allocator(allocator), pointer(pointer), allocated_size(allocated_size) { +} +AllocatedData::~AllocatedData() { + Reset(); +} + +void AllocatedData::Reset() { + if (!pointer) { return; } - throw InternalException("Assertion triggered in file \"%s\" on line %d: %s", file, linenr, condition_name); + allocator.FreeData(pointer, allocated_size); + pointer = nullptr; } -} // namespace duckdb - +Allocator::Allocator() + : allocate_function(Allocator::DefaultAllocate), free_function(Allocator::DefaultFree), + reallocate_function(Allocator::DefaultReallocate) { +} -#include -#include -#include // strlen() on Solaris +Allocator::Allocator(allocate_function_ptr_t allocate_function_p, free_function_ptr_t free_function_p, + reallocate_function_ptr_t reallocate_function_p, unique_ptr private_data) + : allocate_function(allocate_function_p), free_function(free_function_p), + reallocate_function(reallocate_function_p), private_data(move(private_data)) { +} -namespace duckdb { +data_ptr_t Allocator::AllocateData(idx_t size) { + return allocate_function(private_data.get(), size); +} -bool IsLittleEndian() { - int n = 1; - if (*(char *)&n == 1) { - return true; - } else { - return false; +void Allocator::FreeData(data_ptr_t pointer, idx_t size) { + if (!pointer) { + return; } + return free_function(private_data.get(), pointer, size); } -uint8_t FlipSign(uint8_t key_byte) { - return key_byte ^ 128; +data_ptr_t Allocator::ReallocateData(data_ptr_t pointer, idx_t size) { + if (!pointer) { + return pointer; + } + return reallocate_function(private_data.get(), pointer, size); } -uint32_t EncodeFloat(float x) { - uint64_t buff; +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/arrow_wrapper.hpp +// +// +//===----------------------------------------------------------------------===// - //! zero - if (x == 0) { - buff = 0; - buff |= (1u << 31); - return buff; - } - //! infinity - if (x > FLT_MAX) { - return UINT_MAX; - } - //! -infinity - if (x < -FLT_MAX) { - return 0; - } - buff = Load((const_data_ptr_t)&x); - if ((buff & (1u << 31)) == 0) { //! +0 and positive numbers - buff |= (1u << 31); - } else { //! negative numbers - buff = ~buff; //! complement 1 - } - return buff; -} -uint64_t EncodeDouble(double x) { - uint64_t buff; - //! zero - if (x == 0) { - buff = 0; - buff += (1ull << 63); - return buff; + + + +//! Here we have the internal duckdb classes that interact with Arrow's Internal Header (i.e., duckdb/commons/arrow.hpp) +namespace duckdb { +class ArrowSchemaWrapper { +public: + ArrowSchema arrow_schema; + + ArrowSchemaWrapper() { + arrow_schema.release = nullptr; } - //! infinity - if (x > DBL_MAX) { - return ULLONG_MAX; + + ~ArrowSchemaWrapper(); +}; +class ArrowArrayWrapper { +public: + ArrowArray arrow_array; + ArrowArrayWrapper() { + arrow_array.length = 0; + arrow_array.release = nullptr; } - //! -infinity - if (x < -DBL_MAX) { - return 0; + ~ArrowArrayWrapper(); +}; + +class ArrowArrayStreamWrapper { +public: + ArrowArrayStream arrow_array_stream; + int64_t number_of_rows; + void GetSchema(ArrowSchemaWrapper &schema); + + unique_ptr GetNextChunk(); + + const char *GetError(); + + ~ArrowArrayStreamWrapper(); + ArrowArrayStreamWrapper() { + arrow_array_stream.release = nullptr; } - buff = Load((const_data_ptr_t)&x); - if (buff < (1ull << 63)) { //! +0 and positive numbers - buff += (1ull << 63); - } else { //! negative numbers - buff = ~buff; //! complement 1 +}; + +class ResultArrowArrayStreamWrapper { +public: + explicit ResultArrowArrayStreamWrapper(unique_ptr result); + ArrowArrayStream stream; + unique_ptr result; + std::string last_error; + +private: + static int MyStreamGetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *out); + static int MyStreamGetNext(struct ArrowArrayStream *stream, struct ArrowArray *out); + static void MyStreamRelease(struct ArrowArrayStream *stream); + static const char *MyStreamGetLastError(struct ArrowArrayStream *stream); +}; + +} // namespace duckdb + + + + + + +namespace duckdb { + +ArrowSchemaWrapper::~ArrowSchemaWrapper() { + if (arrow_schema.release) { + for (int64_t child_idx = 0; child_idx < arrow_schema.n_children; child_idx++) { + auto &child = *arrow_schema.children[child_idx]; + if (child.release) { + child.release(&child); + } + } + arrow_schema.release(&arrow_schema); + arrow_schema.release = nullptr; } - return buff; } -template <> -void EncodeData(data_ptr_t dataptr, bool value, bool is_little_endian) { - Store(value ? 1 : 0, dataptr); +ArrowArrayWrapper::~ArrowArrayWrapper() { + if (arrow_array.release) { + for (int64_t child_idx = 0; child_idx < arrow_array.n_children; child_idx++) { + auto &child = *arrow_array.children[child_idx]; + if (child.release) { + child.release(&child); + } + } + arrow_array.release(&arrow_array); + arrow_array.release = nullptr; + } } -template <> -void EncodeData(data_ptr_t dataptr, int8_t value, bool is_little_endian) { - Store(value, dataptr); - dataptr[0] = FlipSign(dataptr[0]); +ArrowArrayStreamWrapper::~ArrowArrayStreamWrapper() { + if (arrow_array_stream.release) { + arrow_array_stream.release(&arrow_array_stream); + arrow_array_stream.release = nullptr; + } } -template <> -void EncodeData(data_ptr_t dataptr, int16_t value, bool is_little_endian) { - Store(is_little_endian ? BSWAP16(value) : value, dataptr); - dataptr[0] = FlipSign(dataptr[0]); +void ArrowArrayStreamWrapper::GetSchema(ArrowSchemaWrapper &schema) { + D_ASSERT(arrow_array_stream.get_schema); + // LCOV_EXCL_START + if (arrow_array_stream.get_schema(&arrow_array_stream, &schema.arrow_schema)) { + throw InvalidInputException("arrow_scan: get_schema failed(): %s", string(GetError())); + } + if (!schema.arrow_schema.release) { + throw InvalidInputException("arrow_scan: released schema passed"); + } + if (schema.arrow_schema.n_children < 1) { + throw InvalidInputException("arrow_scan: empty schema passed"); + } + // LCOV_EXCL_STOP } -template <> -void EncodeData(data_ptr_t dataptr, int32_t value, bool is_little_endian) { - Store(is_little_endian ? BSWAP32(value) : value, dataptr); - dataptr[0] = FlipSign(dataptr[0]); -} +unique_ptr ArrowArrayStreamWrapper::GetNextChunk() { + auto current_chunk = make_unique(); + if (arrow_array_stream.get_next(&arrow_array_stream, ¤t_chunk->arrow_array)) { // LCOV_EXCL_START + throw InvalidInputException("arrow_scan: get_next failed(): %s", string(GetError())); + } // LCOV_EXCL_STOP -template <> -void EncodeData(data_ptr_t dataptr, int64_t value, bool is_little_endian) { - Store(is_little_endian ? BSWAP64(value) : value, dataptr); - dataptr[0] = FlipSign(dataptr[0]); + return current_chunk; } -template <> -void EncodeData(data_ptr_t dataptr, uint8_t value, bool is_little_endian) { - Store(value, dataptr); -} +const char *ArrowArrayStreamWrapper::GetError() { // LCOV_EXCL_START + return arrow_array_stream.get_last_error(&arrow_array_stream); +} // LCOV_EXCL_STOP -template <> -void EncodeData(data_ptr_t dataptr, uint16_t value, bool is_little_endian) { - Store(is_little_endian ? BSWAP16(value) : value, dataptr); +int ResultArrowArrayStreamWrapper::MyStreamGetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *out) { + if (!stream->release) { + return -1; + } + auto my_stream = (ResultArrowArrayStreamWrapper *)stream->private_data; + auto &result = *my_stream->result; + if (!result.success) { + my_stream->last_error = "Query Failed"; + return -1; + } + if (result.type == QueryResultType::STREAM_RESULT) { + auto &stream_result = (StreamQueryResult &)result; + if (!stream_result.is_open) { + my_stream->last_error = "Query Stream is closed"; + return -1; + } + } + result.ToArrowSchema(out); + return 0; } -template <> -void EncodeData(data_ptr_t dataptr, uint32_t value, bool is_little_endian) { - Store(is_little_endian ? BSWAP32(value) : value, dataptr); +int ResultArrowArrayStreamWrapper::MyStreamGetNext(struct ArrowArrayStream *stream, struct ArrowArray *out) { + if (!stream->release) { + return -1; + } + auto my_stream = (ResultArrowArrayStreamWrapper *)stream->private_data; + auto &result = *my_stream->result; + if (!result.success) { + my_stream->last_error = "Query Failed"; + return -1; + } + if (result.type == QueryResultType::STREAM_RESULT) { + auto &stream_result = (StreamQueryResult &)result; + if (!stream_result.is_open) { + my_stream->last_error = "Query Stream is closed"; + return -1; + } + } + auto data_chunk = result.Fetch(); + if (!data_chunk) { + //! Nothing to output + out->release = nullptr; + return 0; + } + data_chunk->ToArrowArray(out); + return 0; } -template <> -void EncodeData(data_ptr_t dataptr, uint64_t value, bool is_little_endian) { - Store(is_little_endian ? BSWAP64(value) : value, dataptr); +void ResultArrowArrayStreamWrapper::MyStreamRelease(struct ArrowArrayStream *stream) { + if (!stream->release) { + return; + } + stream->release = nullptr; + delete (ResultArrowArrayStreamWrapper *)stream->private_data; } -template <> -void EncodeData(data_ptr_t dataptr, hugeint_t value, bool is_little_endian) { - EncodeData(dataptr, value.upper, is_little_endian); - EncodeData(dataptr + sizeof(value.upper), value.lower, is_little_endian); +const char *ResultArrowArrayStreamWrapper::MyStreamGetLastError(struct ArrowArrayStream *stream) { + if (!stream->release) { + return "stream was released"; + } + D_ASSERT(stream->private_data); + auto my_stream = (ResultArrowArrayStreamWrapper *)stream->private_data; + return my_stream->last_error.c_str(); } +ResultArrowArrayStreamWrapper::ResultArrowArrayStreamWrapper(unique_ptr result_p) + : result(move(result_p)) { + //! We first initialize the private data of the stream + stream.private_data = this; -template <> -void EncodeData(data_ptr_t dataptr, float value, bool is_little_endian) { - uint32_t converted_value = EncodeFloat(value); - Store(is_little_endian ? BSWAP32(converted_value) : converted_value, dataptr); + //! We initialize the stream functions + stream.get_schema = ResultArrowArrayStreamWrapper::MyStreamGetSchema; + stream.get_next = ResultArrowArrayStreamWrapper::MyStreamGetNext; + stream.release = ResultArrowArrayStreamWrapper::MyStreamRelease; + stream.get_last_error = ResultArrowArrayStreamWrapper::MyStreamGetLastError; } -template <> -void EncodeData(data_ptr_t dataptr, double value, bool is_little_endian) { - uint64_t converted_value = EncodeDouble(value); - Store(is_little_endian ? BSWAP64(converted_value) : converted_value, dataptr); -} +} // namespace duckdb -template <> -void EncodeData(data_ptr_t dataptr, interval_t value, bool is_little_endian) { - EncodeData(dataptr, value.months, is_little_endian); - dataptr += sizeof(value.months); - EncodeData(dataptr, value.days, is_little_endian); - dataptr += sizeof(value.days); - EncodeData(dataptr, value.micros, is_little_endian); -} -void EncodeStringDataPrefix(data_ptr_t dataptr, string_t value, idx_t prefix_len) { - auto len = value.GetSize(); - memcpy(dataptr, value.GetDataUnsafe(), MinValue(len, prefix_len)); - if (len < prefix_len) { - memset(dataptr + len, '\0', prefix_len - len); +namespace duckdb { + +void DuckDBAssertInternal(bool condition, const char *condition_name, const char *file, int linenr) { + if (condition) { + return; } + throw InternalException("Assertion triggered in file \"%s\" on line %d: %s", file, linenr, condition_name); } } // namespace duckdb @@ -4144,6 +4466,7 @@ const sel_t ZERO_VECTOR[STANDARD_VECTOR_SIZE] = {0}; const double PI = 3.141592653589793; const transaction_t TRANSACTION_ID_START = 4611686018427388000ULL; // 2^62 +const transaction_t MAX_TRANSACTION_ID = NumericLimits::Maximum(); // 2^63 const transaction_t NOT_DELETED_ID = NumericLimits::Maximum() - 1; // 2^64 - 1 const transaction_t MAXIMUM_QUERY_ID = NumericLimits::Maximum(); // 2^64 @@ -4487,8 +4810,8 @@ void MD5Context::Add(const char *data) { namespace duckdb { inline uint64_t ChronoNow() { - return std::chrono::duration_cast( - std::chrono::time_point_cast(std::chrono::high_resolution_clock::now()) + return std::chrono::duration_cast( + std::chrono::time_point_cast(std::chrono::high_resolution_clock::now()) .time_since_epoch()) .count(); } @@ -4558,6 +4881,7 @@ uint64_t CycleCounter::Tick() const { namespace duckdb { +// LCOV_EXCL_START string CatalogTypeToString(CatalogType type) { switch (type) { case CatalogType::COLLATION_ENTRY: @@ -4589,10 +4913,60 @@ string CatalogTypeToString(CatalogType type) { case CatalogType::INVALID: case CatalogType::DELETED_ENTRY: case CatalogType::UPDATED_ENTRY: - return "Invalid"; + break; + } + return "INVALID"; +} +// LCOV_EXCL_STOP + +} // namespace duckdb + + + + +namespace duckdb { + +// LCOV_EXCL_START +CompressionType CompressionTypeFromString(const string &str) { + auto compression = StringUtil::Lower(str); + if (compression == "uncompressed") { + return CompressionType::COMPRESSION_UNCOMPRESSED; + } else if (compression == "rle") { + return CompressionType::COMPRESSION_RLE; + } else if (compression == "dictionary") { + return CompressionType::COMPRESSION_DICTIONARY; + } else if (compression == "pfor") { + return CompressionType::COMPRESSION_PFOR_DELTA; + } else if (compression == "bitpacking") { + return CompressionType::COMPRESSION_BITPACKING; + } else if (compression == "fsst") { + return CompressionType::COMPRESSION_FSST; + } else { + return CompressionType::COMPRESSION_INVALID; + } +} + +string CompressionTypeToString(CompressionType type) { + switch (type) { + case CompressionType::COMPRESSION_UNCOMPRESSED: + return "Uncompressed"; + case CompressionType::COMPRESSION_CONSTANT: + return "Constant"; + case CompressionType::COMPRESSION_RLE: + return "RLE"; + case CompressionType::COMPRESSION_DICTIONARY: + return "Dictionary"; + case CompressionType::COMPRESSION_PFOR_DELTA: + return "PFOR"; + case CompressionType::COMPRESSION_BITPACKING: + return "BitPacking"; + case CompressionType::COMPRESSION_FSST: + return "FSST"; + default: + throw InternalException("Unrecognized compression type!"); } - return "Unknown"; } +// LCOV_EXCL_STOP } // namespace duckdb @@ -4601,6 +4975,7 @@ string CatalogTypeToString(CatalogType type) { namespace duckdb { +// LCOV_EXCL_START string ExpressionTypeToString(ExpressionType type) { switch (type) { case ExpressionType::OPERATOR_CAST: @@ -4663,6 +5038,8 @@ string ExpressionTypeToString(ExpressionType type) { return "FIRST_VALUE"; case ExpressionType::WINDOW_LAST_VALUE: return "LAST_VALUE"; + case ExpressionType::WINDOW_NTH_VALUE: + return "NTH_VALUE"; case ExpressionType::WINDOW_CUME_DIST: return "CUME_DIST"; case ExpressionType::WINDOW_LEAD: @@ -4715,11 +5092,24 @@ string ExpressionTypeToString(ExpressionType type) { return "BOUND_FUNCTION"; case ExpressionType::BOUND_AGGREGATE: return "BOUND_AGGREGATE"; + case ExpressionType::ARRAY_CONSTRUCTOR: + return "ARRAY_CONSTRUCTOR"; + case ExpressionType::TABLE_STAR: + return "TABLE_STAR"; + case ExpressionType::BOUND_UNNEST: + return "BOUND_UNNEST"; + case ExpressionType::COLLATE: + return "COLLATE"; + case ExpressionType::POSITIONAL_REFERENCE: + return "POSITIONAL_REFERENCE"; + case ExpressionType::LAMBDA: + return "LAMBDA"; case ExpressionType::INVALID: - default: - return "INVALID"; + break; } + return "INVALID"; } +// LCOV_EXCL_STOP string ExpressionTypeToOperator(ExpressionType type) { switch (type) { @@ -4741,8 +5131,6 @@ string ExpressionTypeToOperator(ExpressionType type) { return "AND"; case ExpressionType::CONJUNCTION_OR: return "OR"; - case ExpressionType::STAR: - return "*"; default: return ""; } @@ -4769,9 +5157,8 @@ ExpressionType NegateComparisionExpression(ExpressionType type) { case ExpressionType::COMPARE_GREATERTHANOREQUALTO: negated_type = ExpressionType::COMPARE_LESSTHAN; break; - default: - throw Exception("Unsupported comparison type in negation"); + throw InternalException("Unsupported comparison type in negation"); } return negated_type; } @@ -4797,9 +5184,8 @@ ExpressionType FlipComparisionExpression(ExpressionType type) { case ExpressionType::COMPARE_GREATERTHANOREQUALTO: flipped_type = ExpressionType::COMPARE_LESSTHANOREQUALTO; break; - default: - throw Exception("Unsupported comparison type in flip"); + throw InternalException("Unsupported comparison type in flip"); } return flipped_type; } @@ -4827,15 +5213,11 @@ string JoinTypeToString(JoinType type) { return "SINGLE"; case JoinType::MARK: return "MARK"; - case JoinType::INVALID: - default: - return "INVALID"; + case JoinType::INVALID: // LCOV_EXCL_START + break; } -} - -bool IsOuterJoin(JoinType type) { - return type == JoinType::LEFT || type == JoinType::OUTER || type == JoinType::RIGHT; -} + return "INVALID"; +} // LCOV_EXCL_STOP bool IsLeftOuterJoin(JoinType type) { return type == JoinType::LEFT || type == JoinType::OUTER; @@ -4853,6 +5235,7 @@ namespace duckdb { //===--------------------------------------------------------------------===// // Value <--> String Utilities //===--------------------------------------------------------------------===// +// LCOV_EXCL_START string LogicalOperatorToString(LogicalOperatorType type) { switch (type) { case LogicalOperatorType::LOGICAL_GET: @@ -4931,8 +5314,6 @@ string LogicalOperatorToString(LogicalOperatorType type) { return "CTE_SCAN"; case LogicalOperatorType::LOGICAL_SHOW: return "SHOW"; - case LogicalOperatorType::LOGICAL_INVALID: - return "INVALID"; case LogicalOperatorType::LOGICAL_ALTER: return "ALTER"; case LogicalOperatorType::LOGICAL_CREATE_SEQUENCE: @@ -4953,19 +5334,64 @@ string LogicalOperatorToString(LogicalOperatorType type) { return "SET"; case LogicalOperatorType::LOGICAL_LOAD: return "LOAD"; + case LogicalOperatorType::LOGICAL_INVALID: + break; } - return "UNDEFINED"; + return "INVALID"; } +// LCOV_EXCL_STOP } // namespace duckdb + + namespace duckdb { +string OptimizerTypeToString(OptimizerType type) { + switch (type) { + case OptimizerType::EXPRESSION_REWRITER: + return "expression_rewriter"; + case OptimizerType::FILTER_PULLUP: + return "filter_pullup"; + case OptimizerType::FILTER_PUSHDOWN: + return "filter_pushdown"; + case OptimizerType::REGEX_RANGE: + return "regex_range"; + case OptimizerType::IN_CLAUSE: + return "in_clause"; + case OptimizerType::JOIN_ORDER: + return "join_order"; + case OptimizerType::DELIMINATOR: + return "deliminator"; + case OptimizerType::UNUSED_COLUMNS: + return "unused_columns"; + case OptimizerType::STATISTICS_PROPAGATION: + return "statistics_propagation"; + case OptimizerType::COMMON_SUBEXPRESSIONS: + return "common_subexpressions"; + case OptimizerType::COMMON_AGGREGATE: + return "common_aggregate"; + case OptimizerType::COLUMN_LIFETIME: + return "column_lifetime"; + case OptimizerType::TOP_N: + return "top_n"; + case OptimizerType::REORDER_FILTER: + return "reorder_filter"; + case OptimizerType::INVALID: // LCOV_EXCL_START + break; + } + return "INVALID"; // LCOV_EXCL_STOP +} + +} // namespace duckdb + + +namespace duckdb { + +// LCOV_EXCL_START string PhysicalOperatorToString(PhysicalOperatorType type) { switch (type) { - case PhysicalOperatorType::LEAF: - return "LEAF"; case PhysicalOperatorType::TABLE_SCAN: return "TABLE_SCAN"; case PhysicalOperatorType::DUMMY_SCAN: @@ -4974,10 +5400,6 @@ string PhysicalOperatorToString(PhysicalOperatorType type) { return "CHUNK_SCAN"; case PhysicalOperatorType::DELIM_SCAN: return "DELIM_SCAN"; - case PhysicalOperatorType::EXTERNAL_FILE_SCAN: - return "EXTERNAL_FILE_SCAN"; - case PhysicalOperatorType::QUERY_DERIVED_SCAN: - return "QUERY_DERIVED_SCAN"; case PhysicalOperatorType::ORDER_BY: return "ORDER_BY"; case PhysicalOperatorType::LIMIT: @@ -4988,8 +5410,6 @@ string PhysicalOperatorToString(PhysicalOperatorType type) { return "STREAMING_SAMPLE"; case PhysicalOperatorType::TOP_N: return "TOP_N"; - case PhysicalOperatorType::AGGREGATE: - return "AGGREGATE"; case PhysicalOperatorType::WINDOW: return "WINDOW"; case PhysicalOperatorType::UNNEST: @@ -5000,8 +5420,6 @@ string PhysicalOperatorToString(PhysicalOperatorType type) { return "HASH_GROUP_BY"; case PhysicalOperatorType::PERFECT_HASH_GROUP_BY: return "PERFECT_HASH_GROUP_BY"; - case PhysicalOperatorType::SORT_GROUP_BY: - return "SORT_GROUP_BY"; case PhysicalOperatorType::FILTER: return "FILTER"; case PhysicalOperatorType::PROJECTION: @@ -5026,14 +5444,10 @@ string PhysicalOperatorToString(PhysicalOperatorType type) { return "UNION"; case PhysicalOperatorType::INSERT: return "INSERT"; - case PhysicalOperatorType::INSERT_SELECT: - return "INSERT_SELECT"; case PhysicalOperatorType::DELETE_OPERATOR: return "DELETE"; case PhysicalOperatorType::UPDATE: return "UPDATE"; - case PhysicalOperatorType::EXPORT_EXTERNAL_FILE: - return "EXPORT_EXTERNAL_FILE"; case PhysicalOperatorType::EMPTY_RESULT: return "EMPTY_RESULT"; case PhysicalOperatorType::CREATE_TABLE: @@ -5052,8 +5466,6 @@ string PhysicalOperatorToString(PhysicalOperatorType type) { return "REC_CTE"; case PhysicalOperatorType::RECURSIVE_CTE_SCAN: return "REC_CTE_SCAN"; - case PhysicalOperatorType::INVALID: - return "INVALID"; case PhysicalOperatorType::EXPRESSION_SCAN: return "EXPRESSION_SCAN"; case PhysicalOperatorType::ALTER: @@ -5082,9 +5494,12 @@ string PhysicalOperatorToString(PhysicalOperatorType type) { return "LOAD"; case PhysicalOperatorType::INOUT_FUNCTION: return "INOUT_FUNCTION"; + case PhysicalOperatorType::INVALID: + break; } - return "UNDEFINED"; + return "INVALID"; } +// LCOV_EXCL_STOP } // namespace duckdb @@ -5093,6 +5508,7 @@ string PhysicalOperatorToString(PhysicalOperatorType type) { namespace duckdb { +// LCOV_EXCL_START string RelationTypeToString(RelationType type) { switch (type) { case RelationType::TABLE_RELATION: @@ -5139,17 +5555,21 @@ string RelationTypeToString(RelationType type) { return "TABLE_FUNCTION_RELATION"; case RelationType::VIEW_RELATION: return "VIEW_RELATION"; + case RelationType::QUERY_RELATION: + return "QUERY_RELATION"; case RelationType::INVALID_RELATION: - default: - return "INVALID_RELATION"; + break; } + return "INVALID_RELATION"; } +// LCOV_EXCL_STOP } // namespace duckdb namespace duckdb { +// LCOV_EXCL_START string StatementTypeToString(StatementType type) { switch (type) { case StatementType::SELECT_STATEMENT: @@ -5199,10 +5619,22 @@ string StatementTypeToString(StatementType type) { case StatementType::LOAD_STATEMENT: return "LOAD"; case StatementType::INVALID_STATEMENT: - return "INVALID"; + break; } return "INVALID"; } +// LCOV_EXCL_STOP + +bool StatementTypeReturnChanges(StatementType type) { + switch (type) { + case StatementType::INSERT_STATEMENT: + case StatementType::UPDATE_STATEMENT: + case StatementType::DELETE_STATEMENT: + return true; + default: + return false; + } +} } // namespace duckdb @@ -5210,6 +5642,7 @@ string StatementTypeToString(StatementType type) { + namespace duckdb { Exception::Exception(const string &msg) : std::exception(), type(ExceptionType::INVALID) { @@ -5294,6 +5727,8 @@ string Exception::ExceptionTypeToString(ExceptionType type) { return "INTERNAL"; case ExceptionType::INVALID_INPUT: return "Invalid Input"; + case ExceptionType::OUT_OF_MEMORY: + return "Out of Memory"; default: return "Unknown"; } @@ -5405,6 +5840,9 @@ InternalException::InternalException(const string &msg) : Exception(ExceptionTyp InvalidInputException::InvalidInputException(const string &msg) : Exception(ExceptionType::INVALID_INPUT, msg) { } +OutOfMemoryException::OutOfMemoryException(const string &msg) : Exception(ExceptionType::OUT_OF_MEMORY, msg) { +} + } // namespace duckdb @@ -11168,46 +11606,100 @@ string ExceptionFormatValue::Format(const string &msg, vector namespace duckdb { -FileBuffer::FileBuffer(FileBufferType type, uint64_t bufsiz) : type(type) { - const int sector_size = Storage::SECTOR_SIZE; - // round up to the nearest sector_size, this is only really necessary if the file buffer will be used for Direct IO - if (bufsiz % sector_size != 0) { - bufsiz += sector_size - (bufsiz % sector_size); +FileBuffer::FileBuffer(Allocator &allocator, FileBufferType type, uint64_t bufsiz) + : allocator(allocator), type(type), malloced_buffer(nullptr) { + SetMallocedSize(bufsiz); + malloced_buffer = allocator.AllocateData(malloced_size); + Construct(bufsiz); +} + +FileBuffer::FileBuffer(FileBuffer &source, FileBufferType type_p) : allocator(source.allocator), type(type_p) { + // take over the structures of the source buffer + buffer = source.buffer; + size = source.size; + internal_buffer = source.internal_buffer; + internal_size = source.internal_size; + malloced_buffer = source.malloced_buffer; + malloced_size = source.malloced_size; + + source.buffer = nullptr; + source.size = 0; + source.internal_buffer = nullptr; + source.internal_size = 0; + source.malloced_buffer = nullptr; + source.malloced_size = 0; +} + +FileBuffer::~FileBuffer() { + allocator.FreeData(malloced_buffer, malloced_size); +} + +void FileBuffer::SetMallocedSize(uint64_t &bufsiz) { + // make room for the block header (if this is not the db file header) + if (type == FileBufferType::MANAGED_BUFFER && bufsiz != Storage::FILE_HEADER_SIZE) { + bufsiz += Storage::BLOCK_HEADER_SIZE; + } + if (type == FileBufferType::BLOCK) { + const int sector_size = Storage::SECTOR_SIZE; + // round up to the nearest sector_size + if (bufsiz % sector_size != 0) { + bufsiz += sector_size - (bufsiz % sector_size); + } + D_ASSERT(bufsiz % sector_size == 0); + D_ASSERT(bufsiz >= sector_size); + // we add (sector_size - 1) to ensure that we can align the buffer to sector_size + malloced_size = bufsiz + (sector_size - 1); + } else { + malloced_size = bufsiz; } - D_ASSERT(bufsiz % sector_size == 0); - D_ASSERT(bufsiz >= sector_size); - // we add (sector_size - 1) to ensure that we can align the buffer to sector_size - malloced_buffer = (data_ptr_t)malloc(bufsiz + (sector_size - 1)); +} + +void FileBuffer::Construct(uint64_t bufsiz) { if (!malloced_buffer) { throw std::bad_alloc(); } - // round to multiple of sector_size - uint64_t num = (uint64_t)malloced_buffer; - uint64_t remainder = num % sector_size; - if (remainder != 0) { - num = num + sector_size - remainder; - } - D_ASSERT(num % sector_size == 0); - D_ASSERT(num + bufsiz <= ((uint64_t)malloced_buffer + bufsiz + (sector_size - 1))); - D_ASSERT(num >= (uint64_t)malloced_buffer); - // construct the FileBuffer object - internal_buffer = (data_ptr_t)num; - internal_size = bufsiz; + if (type == FileBufferType::BLOCK) { + const int sector_size = Storage::SECTOR_SIZE; + // round to multiple of sector_size + uint64_t num = (uint64_t)malloced_buffer; + uint64_t remainder = num % sector_size; + if (remainder != 0) { + num = num + sector_size - remainder; + } + D_ASSERT(num % sector_size == 0); + D_ASSERT(num + bufsiz <= ((uint64_t)malloced_buffer + bufsiz + (sector_size - 1))); + D_ASSERT(num >= (uint64_t)malloced_buffer); + // construct the FileBuffer object + internal_buffer = (data_ptr_t)num; + internal_size = bufsiz; + } else { + internal_buffer = malloced_buffer; + internal_size = malloced_size; + } buffer = internal_buffer + Storage::BLOCK_HEADER_SIZE; size = internal_size - Storage::BLOCK_HEADER_SIZE; } -FileBuffer::~FileBuffer() { - free(malloced_buffer); +void FileBuffer::Resize(uint64_t bufsiz) { + D_ASSERT(type == FileBufferType::MANAGED_BUFFER); + SetMallocedSize(bufsiz); + malloced_buffer = allocator.ReallocateData(malloced_buffer, malloced_size); + Construct(bufsiz); } void FileBuffer::Read(FileHandle &handle, uint64_t location) { - // read the buffer from disk handle.Read(internal_buffer, internal_size, location); +} + +void FileBuffer::ReadAndChecksum(FileHandle &handle, uint64_t location) { + // read the buffer from disk + Read(handle, location); // compute the checksum auto stored_checksum = Load(internal_buffer); uint64_t computed_checksum = Checksum(buffer, size); @@ -11219,21 +11711,30 @@ void FileBuffer::Read(FileHandle &handle, uint64_t location) { } void FileBuffer::Write(FileHandle &handle, uint64_t location) { - // compute the checksum and write it to the start of the buffer + handle.Write(internal_buffer, internal_size, location); +} + +void FileBuffer::ChecksumAndWrite(FileHandle &handle, uint64_t location) { + // compute the checksum and write it to the start of the buffer (if not temp buffer) uint64_t checksum = Checksum(buffer, size); Store(checksum, internal_buffer); // now write the buffer - handle.Write(internal_buffer, internal_size, location); + Write(handle, location); } void FileBuffer::Clear() { memset(internal_buffer, 0, internal_size); } + } // namespace duckdb + + + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/fstream_util.hpp +// duckdb/common/gzip_file_system.hpp // // //===----------------------------------------------------------------------===// @@ -11242,95 +11743,95 @@ void FileBuffer::Clear() { +namespace duckdb { + +class GZipFileSystem : public FileSystem { +public: + static unique_ptr OpenCompressedFile(unique_ptr handle); + + void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override; + int64_t Read(FileHandle &handle, void *buffer, int64_t nr_bytes) override; + + // unsupported operations + void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override; + int64_t Write(FileHandle &handle, void *buffer, int64_t nr_bytes) override; + void Truncate(FileHandle &handle, int64_t new_size) override; + void FileSync(FileHandle &handle) override; + void Seek(FileHandle &handle, idx_t location) override; + void Reset(FileHandle &handle) override; + + int64_t GetFileSize(FileHandle &handle) override; + + bool OnDiskFile(FileHandle &handle) override; + bool CanSeek() override { + return false; + } +}; + +} // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/fstream.hpp +// duckdb/common/pipe_file_system.hpp // // //===----------------------------------------------------------------------===// -#include -#include - -namespace duckdb { -using std::endl; -using std::fstream; -using std::ifstream; -using std::ios; -using std::ios_base; -using std::ofstream; -} // namespace duckdb namespace duckdb { -/** - * Fstream Utility Functions - */ -class FstreamUtil { + +class PipeFileSystem : public FileSystem { public: - /** - * Opens a file for the given name and returns it (default mode : ios_base::in | ios_base::out) - */ - static void OpenFile(const string &, fstream &, - ios_base::openmode mode = ios_base::in | ios_base::out | ios::binary); + static unique_ptr OpenPipe(unique_ptr handle); - /** - * Closes the given file or throws an exception otherwise - */ - static void CloseFile(fstream &); + void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override; + int64_t Read(FileHandle &handle, void *buffer, int64_t nr_bytes) override; + void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override; + int64_t Write(FileHandle &handle, void *buffer, int64_t nr_bytes) override; - /** - * Returns the size in bytes of the given file - */ - static idx_t GetFileSize(fstream &); + // unsupported operations + void Truncate(FileHandle &handle, int64_t new_size) override; + void FileSync(FileHandle &handle) override; + void Seek(FileHandle &handle, idx_t location) override; + void Reset(FileHandle &handle) override; - /** - * Reads the given file as a binary - */ - static data_ptr ReadBinary(fstream &); + int64_t GetFileSize(FileHandle &handle) override; + + bool OnDiskFile(FileHandle &handle) override { + return false; + }; + bool CanSeek() override { + return false; + } }; + } // namespace duckdb -namespace duckdb { -void FstreamUtil::OpenFile(const string &file_path, fstream &new_file, ios_base::openmode mode) { - new_file.open(file_path, mode); - if (!new_file.good()) { - throw IOException("Could not open File!" + file_path); - } -} -void FstreamUtil::CloseFile(fstream &file) { - file.close(); - // check the success of the write - if (file.fail()) { - throw IOException("Failed to close the file!"); - } -} -idx_t FstreamUtil::GetFileSize(fstream &file) { - file.seekg(0, ios::end); - return file.tellg(); -} +#ifdef _WIN32 -data_ptr FstreamUtil::ReadBinary(fstream &file) { - auto file_size = GetFileSize(file); - file.seekg(0, ios::beg); - auto result = data_ptr(new char[file_size]); - file.read(result.get(), file_size); +#ifndef NOMINMAX +#define NOMINMAX +#endif - return result; -} +#include -} // namespace duckdb +#undef CreateDirectory +#undef MoveFile +#undef RemoveDirectory + +#endif //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/gzip_stream.hpp +// duckdb/function/scalar/string_functions.hpp // // //===----------------------------------------------------------------------===// @@ -11339,222 +11840,2192 @@ data_ptr FstreamUtil::ReadBinary(fstream &file) { -#include -#include - -namespace duckdb { -class GzipStreamBuf : public std::streambuf { -public: - explicit GzipStreamBuf(std::string filename) : filename(filename) { - } +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 +// See the end of this file for a list - GzipStreamBuf(const GzipStreamBuf &) = delete; - GzipStreamBuf(GzipStreamBuf &&) = default; - GzipStreamBuf &operator=(const GzipStreamBuf &) = delete; - GzipStreamBuf &operator=(GzipStreamBuf &&) = default; +/* + * Copyright (c) 2014-2019 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors. + * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ - ~GzipStreamBuf() override; - std::streambuf::int_type underflow() override; +/** + * @mainpage + * + * utf8proc is a free/open-source (MIT/expat licensed) C library + * providing Unicode normalization, case-folding, and other operations + * for strings in the UTF-8 encoding, supporting up-to-date Unicode versions. + * See the utf8proc home page (http://julialang.org/utf8proc/) + * for downloads and other information, or the source code on github + * (https://github.com/JuliaLang/utf8proc). + * + * For the utf8proc API documentation, see: @ref utf8proc.h + * + * The features of utf8proc include: + * + * - Transformation of strings (@ref utf8proc_map) to: + * - decompose (@ref UTF8PROC_DECOMPOSE) or compose (@ref UTF8PROC_COMPOSE) Unicode combining characters (http://en.wikipedia.org/wiki/Combining_character) + * - canonicalize Unicode compatibility characters (@ref UTF8PROC_COMPAT) + * - strip "ignorable" (@ref UTF8PROC_IGNORE) characters, control characters (@ref UTF8PROC_STRIPCC), or combining characters such as accents (@ref UTF8PROC_STRIPMARK) + * - case-folding (@ref UTF8PROC_CASEFOLD) + * - Unicode normalization: @ref utf8proc_NFD, @ref utf8proc_NFC, @ref utf8proc_NFKD, @ref utf8proc_NFKC + * - Detecting grapheme boundaries (@ref utf8proc_grapheme_break and @ref UTF8PROC_CHARBOUND) + * - Character-width computation: @ref utf8proc_charwidth + * - Classification of characters by Unicode category: @ref utf8proc_category and @ref utf8proc_category_string + * - Encode (@ref utf8proc_encode_char) and decode (@ref utf8proc_iterate) Unicode codepoints to/from UTF-8. + */ -private: - void Initialize(); +/** @file */ - std::fstream input; - idx_t data_start = 0; - void *mz_stream_ptr = nullptr; // void* so we don't have to include the header - data_ptr_t in_buff = nullptr, in_buff_start, in_buff_end, out_buff = nullptr; // various buffers & pointers - bool is_initialized = false; - std::string filename; - idx_t BUFFER_SIZE = 1024; -}; +#ifndef UTF8PROC_H +#define UTF8PROC_H -class GzipStream : public std::istream { -public: - explicit GzipStream(std::string filename) : std::istream(new GzipStreamBuf(filename)) { - exceptions(std::ios_base::badbit); - } - ~GzipStream() override { - if (rdbuf()) { - delete rdbuf(); - } - } -}; // class istream +// DuckDB change: +#define UTF8PROC_STATIC -} // namespace duckdb +/** @name API version + * + * The utf8proc API version MAJOR.MINOR.PATCH, following + * semantic-versioning rules (http://semver.org) based on API + * compatibility. + * + * This is also returned at runtime by @ref utf8proc_version; however, the + * runtime version may append a string like "-dev" to the version number + * for prerelease versions. + * + * @note The shared-library version number in the Makefile + * (and CMakeLists.txt, and MANIFEST) may be different, + * being based on ABI compatibility rather than API compatibility. + */ +/** @{ */ +/** The MAJOR version number (increased when backwards API compatibility is broken). */ +#define UTF8PROC_VERSION_MAJOR 2 +/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */ +#define UTF8PROC_VERSION_MINOR 4 +/** The PATCH version (increased for fixes that do not change the API). */ +#define UTF8PROC_VERSION_PATCH 0 +/** @} */ +#include +#if defined(_MSC_VER) && _MSC_VER < 1800 +// MSVC prior to 2013 lacked stdbool.h and inttypes.h +typedef signed char utf8proc_int8_t; +typedef unsigned char utf8proc_uint8_t; +typedef short utf8proc_int16_t; +typedef unsigned short utf8proc_uint16_t; +typedef int utf8proc_int32_t; +typedef unsigned int utf8proc_uint32_t; +# ifdef _WIN64 +typedef __int64 utf8proc_ssize_t; +typedef unsigned __int64 utf8proc_size_t; +# else +typedef int utf8proc_ssize_t; +typedef unsigned int utf8proc_size_t; +# endif +# ifndef __cplusplus +// emulate C99 bool +typedef unsigned char utf8proc_bool; +# ifndef __bool_true_false_are_defined +# define false 0 +# define true 1 +# define __bool_true_false_are_defined 1 +# endif +# else +typedef bool utf8proc_bool; +# endif +#else +# include +# include +# include +#endif +#include +#define UTF8PROC_DLLEXPORT +// #ifdef UTF8PROC_STATIC +// # define UTF8PROC_DLLEXPORT +// #else +// # ifdef _WIN32 +// # ifdef UTF8PROC_EXPORTS +// # define UTF8PROC_DLLEXPORT __declspec(dllexport) +// # else +// # define UTF8PROC_DLLEXPORT __declspec(dllimport) +// # endif +// # elif __GNUC__ >= 4 +// # define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default"))) +// # else +// # define UTF8PROC_DLLEXPORT +// # endif +// #endif +namespace duckdb { +typedef int8_t utf8proc_int8_t; +typedef uint8_t utf8proc_uint8_t; +typedef int16_t utf8proc_int16_t; +typedef uint16_t utf8proc_uint16_t; +typedef int32_t utf8proc_int32_t; +typedef uint32_t utf8proc_uint32_t; +typedef size_t utf8proc_size_t; +typedef ptrdiff_t utf8proc_ssize_t; +typedef bool utf8proc_bool; +//#ifdef __cplusplus +//extern "C" { +//#endif +/** + * Option flags used by several functions in the library. + */ +typedef enum { + /** The given UTF-8 input is NULL terminated. */ + UTF8PROC_NULLTERM = (1<<0), + /** Unicode Versioning Stability has to be respected. */ + UTF8PROC_STABLE = (1<<1), + /** Compatibility decomposition (i.e. formatting information is lost). */ + UTF8PROC_COMPAT = (1<<2), + /** Return a result with decomposed characters. */ + UTF8PROC_COMPOSE = (1<<3), + /** Return a result with decomposed characters. */ + UTF8PROC_DECOMPOSE = (1<<4), + /** Strip "default ignorable characters" such as SOFT-HYPHEN or ZERO-WIDTH-SPACE. */ + UTF8PROC_IGNORE = (1<<5), + /** Return an error, if the input contains unassigned codepoints. */ + UTF8PROC_REJECTNA = (1<<6), + /** + * Indicating that NLF-sequences (LF, CRLF, CR, NEL) are representing a + * line break, and should be converted to the codepoint for line + * separation (LS). + */ + UTF8PROC_NLF2LS = (1<<7), + /** + * Indicating that NLF-sequences are representing a paragraph break, and + * should be converted to the codepoint for paragraph separation + * (PS). + */ + UTF8PROC_NLF2PS = (1<<8), + /** Indicating that the meaning of NLF-sequences is unknown. */ + UTF8PROC_NLF2LF = (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS), + /** Strips and/or convers control characters. + * + * NLF-sequences are transformed into space, except if one of the + * NLF2LS/PS/LF options is given. HorizontalTab (HT) and FormFeed (FF) + * are treated as a NLF-sequence in this case. All other control + * characters are simply removed. + */ + UTF8PROC_STRIPCC = (1<<9), + /** + * Performs unicode case folding, to be able to do a case-insensitive + * string comparison. + */ + UTF8PROC_CASEFOLD = (1<<10), + /** + * Inserts 0xFF bytes at the beginning of each sequence which is + * representing a single grapheme cluster (see UAX#29). + */ + UTF8PROC_CHARBOUND = (1<<11), + /** Lumps certain characters together. + * + * E.g. HYPHEN U+2010 and MINUS U+2212 to ASCII "-". See lump.md for details. + * + * If NLF2LF is set, this includes a transformation of paragraph and + * line separators to ASCII line-feed (LF). + */ + UTF8PROC_LUMP = (1<<12), + /** Strips all character markings. + * + * This includes non-spacing, spacing and enclosing (i.e. accents). + * @note This option works only with @ref UTF8PROC_COMPOSE or + * @ref UTF8PROC_DECOMPOSE + */ + UTF8PROC_STRIPMARK = (1<<13), + /** + * Strip unassigned codepoints. + */ + UTF8PROC_STRIPNA = (1<<14), +} utf8proc_option_t; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list +/** @name Error codes + * Error codes being returned by almost all functions. + */ +/** @{ */ +/** Memory could not be allocated. */ +#define UTF8PROC_ERROR_NOMEM -1 +/** The given string is too long to be processed. */ +#define UTF8PROC_ERROR_OVERFLOW -2 +/** The given string is not a legal UTF-8 string. */ +#define UTF8PROC_ERROR_INVALIDUTF8 -3 +/** The @ref UTF8PROC_REJECTNA flag was set and an unassigned codepoint was found. */ +#define UTF8PROC_ERROR_NOTASSIGNED -4 +/** Invalid options have been used. */ +#define UTF8PROC_ERROR_INVALIDOPTS -5 +/** @} */ -/* miniz.c 2.0.8 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing - See "unlicense" statement at the end of this file. - Rich Geldreich , last updated Oct. 13, 2013 - Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt +/* @name Types */ - Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define - MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). +/** Holds the value of a property. */ +typedef utf8proc_int16_t utf8proc_propval_t; - * Low-level Deflate/Inflate implementation notes: +/** Struct containing information about a codepoint. */ +typedef struct utf8proc_property_struct { + /** + * Unicode category. + * @see utf8proc_category_t. + */ + utf8proc_propval_t category; + utf8proc_propval_t combining_class; + /** + * Bidirectional class. + * @see utf8proc_bidi_class_t. + */ + utf8proc_propval_t bidi_class; + /** + * @anchor Decomposition type. + * @see utf8proc_decomp_type_t. + */ + utf8proc_propval_t decomp_type; + utf8proc_uint16_t decomp_seqindex; + utf8proc_uint16_t casefold_seqindex; + utf8proc_uint16_t uppercase_seqindex; + utf8proc_uint16_t lowercase_seqindex; + utf8proc_uint16_t titlecase_seqindex; + utf8proc_uint16_t comb_index; + unsigned bidi_mirrored:1; + unsigned comp_exclusion:1; + /** + * Can this codepoint be ignored? + * + * Used by @ref utf8proc_decompose_char when @ref UTF8PROC_IGNORE is + * passed as an option. + */ + unsigned ignorable:1; + unsigned control_boundary:1; + /** The width of the codepoint. */ + unsigned charwidth:2; + unsigned pad:2; + /** + * Boundclass. + * @see utf8proc_boundclass_t. + */ + unsigned boundclass:8; +} utf8proc_property_t; - Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or - greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses - approximately as well as zlib. +/** Unicode categories. */ +typedef enum { + UTF8PROC_CATEGORY_CN = 0, /**< Other, not assigned */ + UTF8PROC_CATEGORY_LU = 1, /**< Letter, uppercase */ + UTF8PROC_CATEGORY_LL = 2, /**< Letter, lowercase */ + UTF8PROC_CATEGORY_LT = 3, /**< Letter, titlecase */ + UTF8PROC_CATEGORY_LM = 4, /**< Letter, modifier */ + UTF8PROC_CATEGORY_LO = 5, /**< Letter, other */ + UTF8PROC_CATEGORY_MN = 6, /**< Mark, nonspacing */ + UTF8PROC_CATEGORY_MC = 7, /**< Mark, spacing combining */ + UTF8PROC_CATEGORY_ME = 8, /**< Mark, enclosing */ + UTF8PROC_CATEGORY_ND = 9, /**< Number, decimal digit */ + UTF8PROC_CATEGORY_NL = 10, /**< Number, letter */ + UTF8PROC_CATEGORY_NO = 11, /**< Number, other */ + UTF8PROC_CATEGORY_PC = 12, /**< Punctuation, connector */ + UTF8PROC_CATEGORY_PD = 13, /**< Punctuation, dash */ + UTF8PROC_CATEGORY_PS = 14, /**< Punctuation, open */ + UTF8PROC_CATEGORY_PE = 15, /**< Punctuation, close */ + UTF8PROC_CATEGORY_PI = 16, /**< Punctuation, initial quote */ + UTF8PROC_CATEGORY_PF = 17, /**< Punctuation, final quote */ + UTF8PROC_CATEGORY_PO = 18, /**< Punctuation, other */ + UTF8PROC_CATEGORY_SM = 19, /**< Symbol, math */ + UTF8PROC_CATEGORY_SC = 20, /**< Symbol, currency */ + UTF8PROC_CATEGORY_SK = 21, /**< Symbol, modifier */ + UTF8PROC_CATEGORY_SO = 22, /**< Symbol, other */ + UTF8PROC_CATEGORY_ZS = 23, /**< Separator, space */ + UTF8PROC_CATEGORY_ZL = 24, /**< Separator, line */ + UTF8PROC_CATEGORY_ZP = 25, /**< Separator, paragraph */ + UTF8PROC_CATEGORY_CC = 26, /**< Other, control */ + UTF8PROC_CATEGORY_CF = 27, /**< Other, format */ + UTF8PROC_CATEGORY_CS = 28, /**< Other, surrogate */ + UTF8PROC_CATEGORY_CO = 29, /**< Other, private use */ +} utf8proc_category_t; - Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function - coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory - block large enough to hold the entire file. +/** Bidirectional character classes. */ +typedef enum { + UTF8PROC_BIDI_CLASS_L = 1, /**< Left-to-Right */ + UTF8PROC_BIDI_CLASS_LRE = 2, /**< Left-to-Right Embedding */ + UTF8PROC_BIDI_CLASS_LRO = 3, /**< Left-to-Right Override */ + UTF8PROC_BIDI_CLASS_R = 4, /**< Right-to-Left */ + UTF8PROC_BIDI_CLASS_AL = 5, /**< Right-to-Left Arabic */ + UTF8PROC_BIDI_CLASS_RLE = 6, /**< Right-to-Left Embedding */ + UTF8PROC_BIDI_CLASS_RLO = 7, /**< Right-to-Left Override */ + UTF8PROC_BIDI_CLASS_PDF = 8, /**< Pop Directional Format */ + UTF8PROC_BIDI_CLASS_EN = 9, /**< European Number */ + UTF8PROC_BIDI_CLASS_ES = 10, /**< European Separator */ + UTF8PROC_BIDI_CLASS_ET = 11, /**< European Number Terminator */ + UTF8PROC_BIDI_CLASS_AN = 12, /**< Arabic Number */ + UTF8PROC_BIDI_CLASS_CS = 13, /**< Common Number Separator */ + UTF8PROC_BIDI_CLASS_NSM = 14, /**< Nonspacing Mark */ + UTF8PROC_BIDI_CLASS_BN = 15, /**< Boundary Neutral */ + UTF8PROC_BIDI_CLASS_B = 16, /**< Paragraph Separator */ + UTF8PROC_BIDI_CLASS_S = 17, /**< Segment Separator */ + UTF8PROC_BIDI_CLASS_WS = 18, /**< Whitespace */ + UTF8PROC_BIDI_CLASS_ON = 19, /**< Other Neutrals */ + UTF8PROC_BIDI_CLASS_LRI = 20, /**< Left-to-Right Isolate */ + UTF8PROC_BIDI_CLASS_RLI = 21, /**< Right-to-Left Isolate */ + UTF8PROC_BIDI_CLASS_FSI = 22, /**< First Strong Isolate */ + UTF8PROC_BIDI_CLASS_PDI = 23, /**< Pop Directional Isolate */ +} utf8proc_bidi_class_t; - The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. +/** Decomposition type. */ +typedef enum { + UTF8PROC_DECOMP_TYPE_FONT = 1, /**< Font */ + UTF8PROC_DECOMP_TYPE_NOBREAK = 2, /**< Nobreak */ + UTF8PROC_DECOMP_TYPE_INITIAL = 3, /**< Initial */ + UTF8PROC_DECOMP_TYPE_MEDIAL = 4, /**< Medial */ + UTF8PROC_DECOMP_TYPE_FINAL = 5, /**< Final */ + UTF8PROC_DECOMP_TYPE_ISOLATED = 6, /**< Isolated */ + UTF8PROC_DECOMP_TYPE_CIRCLE = 7, /**< Circle */ + UTF8PROC_DECOMP_TYPE_SUPER = 8, /**< Super */ + UTF8PROC_DECOMP_TYPE_SUB = 9, /**< Sub */ + UTF8PROC_DECOMP_TYPE_VERTICAL = 10, /**< Vertical */ + UTF8PROC_DECOMP_TYPE_WIDE = 11, /**< Wide */ + UTF8PROC_DECOMP_TYPE_NARROW = 12, /**< Narrow */ + UTF8PROC_DECOMP_TYPE_SMALL = 13, /**< Small */ + UTF8PROC_DECOMP_TYPE_SQUARE = 14, /**< Square */ + UTF8PROC_DECOMP_TYPE_FRACTION = 15, /**< Fraction */ + UTF8PROC_DECOMP_TYPE_COMPAT = 16, /**< Compat */ +} utf8proc_decomp_type_t; - * zlib-style API notes: +/** Boundclass property. (TR29) */ +typedef enum { + UTF8PROC_BOUNDCLASS_START = 0, /**< Start */ + UTF8PROC_BOUNDCLASS_OTHER = 1, /**< Other */ + UTF8PROC_BOUNDCLASS_CR = 2, /**< Cr */ + UTF8PROC_BOUNDCLASS_LF = 3, /**< Lf */ + UTF8PROC_BOUNDCLASS_CONTROL = 4, /**< Control */ + UTF8PROC_BOUNDCLASS_EXTEND = 5, /**< Extend */ + UTF8PROC_BOUNDCLASS_L = 6, /**< L */ + UTF8PROC_BOUNDCLASS_V = 7, /**< V */ + UTF8PROC_BOUNDCLASS_T = 8, /**< T */ + UTF8PROC_BOUNDCLASS_LV = 9, /**< Lv */ + UTF8PROC_BOUNDCLASS_LVT = 10, /**< Lvt */ + UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR = 11, /**< Regional indicator */ + UTF8PROC_BOUNDCLASS_SPACINGMARK = 12, /**< Spacingmark */ + UTF8PROC_BOUNDCLASS_PREPEND = 13, /**< Prepend */ + UTF8PROC_BOUNDCLASS_ZWJ = 14, /**< Zero Width Joiner */ - miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in - zlib replacement in many apps: - The z_stream struct, optional memory allocation callbacks - deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound - inflateInit/inflateInit2/inflate/inflateEnd - compress, compress2, compressBound, uncompress - CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. - Supports raw deflate streams or standard zlib streams with adler-32 checking. + /* the following are no longer used in Unicode 11, but we keep + the constants here for backward compatibility */ + UTF8PROC_BOUNDCLASS_E_BASE = 15, /**< Emoji Base */ + UTF8PROC_BOUNDCLASS_E_MODIFIER = 16, /**< Emoji Modifier */ + UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ = 17, /**< Glue_After_ZWJ */ + UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */ - Limitations: - The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. - I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but - there are no guarantees that miniz.c pulls this off perfectly. + /* the Extended_Pictographic property is used in the Unicode 11 + grapheme-boundary rules, so we store it in the boundclass field */ + UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC = 19, + UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */ +} utf8proc_boundclass_t; - * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by - Alex Evans. Supports 1-4 bytes/pixel images. +/** + * Function pointer type passed to @ref utf8proc_map_custom and + * @ref utf8proc_decompose_custom, which is used to specify a user-defined + * mapping of codepoints to be applied in conjunction with other mappings. + */ +typedef utf8proc_int32_t (*utf8proc_custom_func)(utf8proc_int32_t codepoint, void *data); - * ZIP archive API notes: +/** + * Array containing the byte lengths of a UTF-8 encoded codepoint based + * on the first byte. + */ +// UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256]; - The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to - get the job done with minimal fuss. There are simple API's to retrieve file information, read files from - existing archives, create new archives, append new files to existing archives, or clone archive data from - one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), - or you can specify custom file read/write callbacks. +/** + * Returns the utf8proc API version as a string MAJOR.MINOR.PATCH + * (http://semver.org format), possibly with a "-dev" suffix for + * development versions. + */ +UTF8PROC_DLLEXPORT const char *utf8proc_version(void); - - Archive reading: Just call this function to read a single file from a disk archive: +/** + * Returns the utf8proc supported Unicode version as a string MAJOR.MINOR.PATCH. + */ +UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void); - void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, - size_t *pSize, mz_uint zip_flags); +/** + * Returns an informative error string for the given utf8proc error code + * (e.g. the error codes returned by @ref utf8proc_map). + */ +UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode); - For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central - directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. +/** + * Reads a single codepoint from the UTF-8 sequence being pointed to by `str`. + * The maximum number of bytes read is `strlen`, unless `strlen` is + * negative (in which case up to 4 bytes are read). + * + * If a valid codepoint could be read, it is stored in the variable + * pointed to by `codepoint_ref`, otherwise that variable will be set to -1. + * In case of success, the number of bytes read is returned; otherwise, a + * negative error code is returned. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref); - - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: +/** + * Check if a codepoint is valid (regardless of whether it has been + * assigned a value by the current Unicode standard). + * + * @return 1 if the given `codepoint` is valid and otherwise return 0. + */ +UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint); - int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); +/** + * Encodes the codepoint as an UTF-8 string in the byte array pointed + * to by `dst`. This array must be at least 4 bytes long. + * + * In case of success the number of bytes written is returned, and + * otherwise 0 is returned. + * + * This function does not check whether `codepoint` is valid Unicode. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst); - The locate operation can optionally check file comments too, which (as one example) can be used to identify - multiple versions of the same file in an archive. This function uses a simple linear search through the central - directory, so it's not very fast. +/** + * Look up the properties for a given codepoint. + * + * @param codepoint The Unicode codepoint. + * + * @returns + * A pointer to a (constant) struct containing information about + * the codepoint. + * @par + * If the codepoint is unassigned or invalid, a pointer to a special struct is + * returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN). + */ +UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint); - Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and - retrieve detailed info on each file by calling mz_zip_reader_file_stat(). +/** Decompose a codepoint into an array of codepoints. + * + * @param codepoint the codepoint. + * @param dst the destination buffer. + * @param bufsize the size of the destination buffer. + * @param options one or more of the following flags: + * - @ref UTF8PROC_REJECTNA - return an error `codepoint` is unassigned + * - @ref UTF8PROC_IGNORE - strip "default ignorable" codepoints + * - @ref UTF8PROC_CASEFOLD - apply Unicode casefolding + * - @ref UTF8PROC_COMPAT - replace certain codepoints with their + * compatibility decomposition + * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster + * - @ref UTF8PROC_LUMP - lump certain different codepoints together + * - @ref UTF8PROC_STRIPMARK - remove all character marks + * - @ref UTF8PROC_STRIPNA - remove unassigned codepoints + * @param last_boundclass + * Pointer to an integer variable containing + * the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND + * option is used. Otherwise, this parameter is ignored. + * + * @return + * In case of success, the number of codepoints written is returned; in case + * of an error, a negative error code is returned (@ref utf8proc_errmsg). + * @par + * If the number of written codepoints would be bigger than `bufsize`, the + * required buffer size is returned, while the buffer will be overwritten with + * undefined data. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char( + utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, + utf8proc_option_t options, int *last_boundclass +); - - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data - to disk and builds an exact image of the central directory in memory. The central directory image is written - all at once at the end of the archive file when the archive is finalized. +/** + * The same as @ref utf8proc_decompose_char, but acts on a whole UTF-8 + * string and orders the decomposed sequences correctly. + * + * If the @ref UTF8PROC_NULLTERM flag in `options` is set, processing + * will be stopped, when a NULL byte is encounted, otherwise `strlen` + * bytes are processed. The result (in the form of 32-bit unicode + * codepoints) is written into the buffer being pointed to by + * `buffer` (which must contain at least `bufsize` entries). In case of + * success, the number of codepoints written is returned; in case of an + * error, a negative error code is returned (@ref utf8proc_errmsg). + * See @ref utf8proc_decompose_custom to supply additional transformations. + * + * If the number of written codepoints would be bigger than `bufsize`, the + * required buffer size is returned, while the buffer will be overwritten with + * undefined data. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, + utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options +); - The archive writer can optionally align each file's local header and file data to any power of 2 alignment, - which can be useful when the archive will be read from optical media. Also, the writer supports placing - arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still - readable by any ZIP tool. +/** + * The same as @ref utf8proc_decompose, but also takes a `custom_func` mapping function + * that is called on each codepoint in `str` before any other transformations + * (along with a `custom_data` pointer that is passed through to `custom_func`). + * The `custom_func` argument is ignored if it is `NULL`. See also @ref utf8proc_map_custom. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, + utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options, + utf8proc_custom_func custom_func, void *custom_data +); - - Archive appending: The simple way to add a single file to an archive is to call this function: +/** + * Normalizes the sequence of `length` codepoints pointed to by `buffer` + * in-place (i.e., the result is also stored in `buffer`). + * + * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode. + * @param length the length (in codepoints) of the buffer. + * @param options a bitwise or (`|`) of one or more of the following flags: + * - @ref UTF8PROC_NLF2LS - convert LF, CRLF, CR and NEL into LS + * - @ref UTF8PROC_NLF2PS - convert LF, CRLF, CR and NEL into PS + * - @ref UTF8PROC_NLF2LF - convert LF, CRLF, CR and NEL into LF + * - @ref UTF8PROC_STRIPCC - strip or convert all non-affected control characters + * - @ref UTF8PROC_COMPOSE - try to combine decomposed codepoints into composite + * codepoints + * - @ref UTF8PROC_STABLE - prohibit combining characters that would violate + * the unicode versioning stability + * + * @return + * In case of success, the length (in codepoints) of the normalized UTF-32 string is + * returned; otherwise, a negative error code is returned (@ref utf8proc_errmsg). + * + * @warning The entries of the array pointed to by `str` have to be in the + * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash! + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); - mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, - const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); +/** + * Reencodes the sequence of `length` codepoints pointed to by `buffer` + * UTF-8 data in-place (i.e., the result is also stored in `buffer`). + * Can optionally normalize the UTF-32 sequence prior to UTF-8 conversion. + * + * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode. + * @param length the length (in codepoints) of the buffer. + * @param options a bitwise or (`|`) of one or more of the following flags: + * - @ref UTF8PROC_NLF2LS - convert LF, CRLF, CR and NEL into LS + * - @ref UTF8PROC_NLF2PS - convert LF, CRLF, CR and NEL into PS + * - @ref UTF8PROC_NLF2LF - convert LF, CRLF, CR and NEL into LF + * - @ref UTF8PROC_STRIPCC - strip or convert all non-affected control characters + * - @ref UTF8PROC_COMPOSE - try to combine decomposed codepoints into composite + * codepoints + * - @ref UTF8PROC_STABLE - prohibit combining characters that would violate + * the unicode versioning stability + * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster + * + * @return + * In case of success, the length (in bytes) of the resulting nul-terminated + * UTF-8 string is returned; otherwise, a negative error code is returned + * (@ref utf8proc_errmsg). + * + * @warning The amount of free space pointed to by `buffer` must + * exceed the amount of the input data by one byte, and the + * entries of the array pointed to by `str` have to be in the + * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash! + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); - The archive will be created if it doesn't already exist, otherwise it'll be appended to. - Note the appending is done in-place and is not an atomic operation, so if something goes wrong - during the operation it's possible the archive could be left without a central directory (although the local - file headers and file data will be fine, so the archive will be recoverable). +/** + * Given a pair of consecutive codepoints, return whether a grapheme break is + * permitted between them (as defined by the extended grapheme clusters in UAX#29). + * + * @param codepoint1 The first codepoint. + * @param codepoint2 The second codepoint, occurring consecutively after `codepoint1`. + * @param state Beginning with Version 29 (Unicode 9.0.0), this algorithm requires + * state to break graphemes. This state can be passed in as a pointer + * in the `state` argument and should initially be set to 0. If the + * state is not passed in (i.e. a null pointer is passed), UAX#29 rules + * GB10/12/13 which require this state will not be applied, essentially + * matching the rules in Unicode 8.0.0. + * + * @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must + * be called IN ORDER on ALL potential breaks in a string. However, it + * is safe to reset the state to zero after a grapheme break. + */ +UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful( + utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state); - For more complex archive modification scenarios: - 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to - preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the - compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and - you're done. This is safe but requires a bunch of temporary disk space or heap memory. +/** + * Same as @ref utf8proc_grapheme_break_stateful, except without support for the + * Unicode 9 additions to the algorithm. Supported for legacy reasons. + */ +UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break( + utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2); - 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), - append new files as needed, then finalize the archive which will write an updated central directory to the - original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a - possibility that the archive's central directory could be lost with this method if anything goes wrong, though. +//! Returns the current UTF8 codepoint in a UTF8 string. Assumes the string is valid UTF8. +UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_codepoint(const char *u_input, int &sz); +UTF8PROC_DLLEXPORT utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state); +UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_codepoint(const char *u_input, int &sz); +UTF8PROC_DLLEXPORT bool utf8proc_codepoint_to_utf8(int cp, int &sz, char *c); +UTF8PROC_DLLEXPORT int utf8proc_codepoint_length(int cp); +UTF8PROC_DLLEXPORT size_t utf8proc_next_grapheme(const char *s, size_t len, size_t cpos); +UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_remove_accents(const utf8proc_uint8_t *str, utf8proc_ssize_t len); +template +void utf8proc_grapheme_callback(const char *s, size_t len, T &&fun) { + int sz; + int boundclass = UTF8PROC_BOUNDCLASS_START; + int initial = utf8proc_get_property(utf8proc_codepoint(s, sz))->boundclass; + grapheme_break_extended(boundclass, initial, &boundclass); + size_t start = 0; + size_t cpos = 0; + while(true) { + cpos += sz; + if (cpos >= len) { + fun(start, cpos); + return; + } + int next = utf8proc_get_property(utf8proc_codepoint(s + cpos, sz))->boundclass; + if (grapheme_break_extended(boundclass, next, &boundclass)) { + if (!fun(start, cpos)) { + return; + } + start = cpos; + } + } +} - - ZIP archive support limitations: - No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. - Requires streams capable of seeking. +/** + * Given a codepoint `c`, return the codepoint of the corresponding + * lower-case character, if any; otherwise (if there is no lower-case + * variant, or if `c` is not a valid codepoint) return `c`. + */ +UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c); - * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the - below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. +/** + * Given a codepoint `c`, return the codepoint of the corresponding + * upper-case character, if any; otherwise (if there is no upper-case + * variant, or if `c` is not a valid codepoint) return `c`. + */ +UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c); - * Important: For best perf. be sure to customize the below macros for your target platform: - #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 - #define MINIZ_LITTLE_ENDIAN 1 - #define MINIZ_HAS_64BIT_REGISTERS 1 +/** + * Given a codepoint `c`, return the codepoint of the corresponding + * title-case character, if any; otherwise (if there is no title-case + * variant, or if `c` is not a valid codepoint) return `c`. + */ +UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c); - * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz - uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files - (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). -*/ +/** + * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`, + * except that a width of 0 is returned for non-printable codepoints + * instead of -1 as in `wcwidth`. + * + * @note + * If you want to check for particular types of non-printable characters, + * (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */ + UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t codepoint); +/** + * Return the Unicode category for the codepoint (one of the + * @ref utf8proc_category_t constants.) + */ +UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint); +/** + * Return the two-letter (nul-terminated) Unicode category string for + * the codepoint (e.g. `"Lu"` or `"Co"`). + */ +UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoint); +/** + * Maps the given UTF-8 string pointed to by `str` to a new UTF-8 + * string, allocated dynamically by `malloc` and returned via `dstptr`. + * + * If the @ref UTF8PROC_NULLTERM flag in the `options` field is set, + * the length is determined by a NULL terminator, otherwise the + * parameter `strlen` is evaluated to determine the string length, but + * in any case the result will be NULL terminated (though it might + * contain NULL characters with the string if `str` contained NULL + * characters). Other flags in the `options` field are passed to the + * functions defined above, and regarded as described. See also + * @ref utf8proc_map_custom to supply a custom codepoint transformation. + * + * In case of success the length of the new string is returned, + * otherwise a negative error code is returned. + * + * @note The memory of the new UTF-8 string will have been allocated + * with `malloc`, and should therefore be deallocated with `free`. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options +); +/** + * Like @ref utf8proc_map, but also takes a `custom_func` mapping function + * that is called on each codepoint in `str` before any other transformations + * (along with a `custom_data` pointer that is passed through to `custom_func`). + * The `custom_func` argument is ignored if it is `NULL`. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options, + utf8proc_custom_func custom_func, void *custom_data +); -/* Defines to completely disable specific portions of miniz.c: - If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. */ +/** @name Unicode normalization + * + * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD, NFKC or + * NFKC_Casefold normalized version of the null-terminated string `str`. These + * are shortcuts to calling @ref utf8proc_map with @ref UTF8PROC_NULLTERM + * combined with @ref UTF8PROC_STABLE and flags indicating the normalization. + */ +/** @{ */ +/** NFD normalization (@ref UTF8PROC_DECOMPOSE). */ +UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str, utf8proc_ssize_t len); +/** NFC normalization (@ref UTF8PROC_COMPOSE). */ +UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str, utf8proc_ssize_t len); +/** NFKD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */ +UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str, utf8proc_ssize_t len); +/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */ +UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str, utf8proc_ssize_t len); +/** + * NFKC_Casefold normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT + * and @ref UTF8PROC_CASEFOLD and @ref UTF8PROC_IGNORE). + **/ +UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str, utf8proc_ssize_t len); +/** @} */ -/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */ -#define MINIZ_NO_STDIO +//#ifdef __cplusplus +//} +//#endif +} +#endif -/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */ -/* get/set file times, and the C run-time funcs that get/set times won't be called. */ -/* The current downside is the times written to your archives will be from 1979. */ -#define MINIZ_NO_TIME -/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */ -/* #define MINIZ_NO_ARCHIVE_APIS */ +// LICENSE_CHANGE_END -/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */ -/* #define MINIZ_NO_ARCHIVE_WRITING_APIS */ -/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */ -/*#define MINIZ_NO_ZLIB_APIS */ +namespace re2 { +class RE2; +} -/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */ -#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES +namespace duckdb { -/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. - Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc - callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user - functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */ -/*#define MINIZ_NO_MALLOC */ +struct ReverseFun { + static void RegisterFunction(BuiltinFunctions &set); +}; -#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) -/* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */ -#define MINIZ_NO_TIME -#endif +struct LowerFun { + static uint8_t ascii_to_lower_map[]; -#include + //! Returns the length of the result string obtained from lowercasing the given input (in bytes) + static idx_t LowerLength(const char *input_data, idx_t input_length); + //! Lowercases the string to the target output location, result_data must have space for at least LowerLength bytes + static void LowerCase(const char *input_data, idx_t input_length, char *result_data); + static ScalarFunction GetFunction(); + static void RegisterFunction(BuiltinFunctions &set); +}; +struct UpperFun { + static uint8_t ascii_to_upper_map[]; -#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) -#include -#endif + static void RegisterFunction(BuiltinFunctions &set); +}; -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +struct StripAccentsFun { + static bool IsAscii(const char *input, idx_t n); + static ScalarFunction GetFunction(); + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct ConcatFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct ConcatWSFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct LengthFun { + static void RegisterFunction(BuiltinFunctions &set); + template + static inline TR Length(TA input) { + auto input_data = input.GetDataUnsafe(); + auto input_length = input.GetSize(); + for (idx_t i = 0; i < input_length; i++) { + if (input_data[i] & 0x80) { + int64_t length = 0; + // non-ascii character: use grapheme iterator on remainder of string + utf8proc_grapheme_callback(input_data, input_length, [&](size_t start, size_t end) { + length++; + return true; + }); + return length; + } + } + return input_length; + } +}; + +struct LikeFun { + static void RegisterFunction(BuiltinFunctions &set); + static bool Glob(const char *s, idx_t slen, const char *pattern, idx_t plen); +}; + +struct LikeEscapeFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct LpadFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct LeftFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct MD5Fun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct NFCNormalizeFun { + static ScalarFunction GetFunction(); + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct RightFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct RegexpFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct SubstringFun { + static void RegisterFunction(BuiltinFunctions &set); + static string_t SubstringScalarFunction(Vector &result, string_t input, int32_t offset, int32_t length); +}; + +struct PrintfFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct InstrFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct PrefixFun { + static ScalarFunction GetFunction(); + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct RepeatFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct ReplaceFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct RpadFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct SuffixFun { + static ScalarFunction GetFunction(); + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct TrimFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct ContainsFun { + static ScalarFunction GetFunction(); + static void RegisterFunction(BuiltinFunctions &set); + static idx_t Find(const string_t &haystack, const string_t &needle); + static idx_t Find(const unsigned char *haystack, idx_t haystack_size, const unsigned char *needle, + idx_t needle_size); +}; + +struct UnicodeFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct StringSplitFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct ASCII { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct CHR { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct MismatchesFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct LevenshteinFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct JaccardFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +} // namespace duckdb + + + + +#include +#include + +#ifndef _WIN32 +#include +#include +#include +#include +#include +#include +#else +#include + +#ifdef __MINGW32__ +// need to manually define this for mingw +extern "C" WINBASEAPI BOOL WINAPI GetPhysicallyInstalledSystemMemory(PULONGLONG); +#endif + +#undef FILE_CREATE // woo mingw +#endif + +namespace duckdb { + +FileSystem &FileSystem::GetFileSystem(ClientContext &context) { + return *context.db->config.file_system; +} + +static void AssertValidFileFlags(uint8_t flags) { + // cannot combine Read and Write flags + D_ASSERT(!(flags & FileFlags::FILE_FLAGS_READ && flags & FileFlags::FILE_FLAGS_WRITE)); + // cannot combine Read and CREATE/Append flags + D_ASSERT(!(flags & FileFlags::FILE_FLAGS_READ && flags & FileFlags::FILE_FLAGS_APPEND)); + D_ASSERT(!(flags & FileFlags::FILE_FLAGS_READ && flags & FileFlags::FILE_FLAGS_FILE_CREATE)); + D_ASSERT(!(flags & FileFlags::FILE_FLAGS_READ && flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW)); + // cannot combine CREATE and CREATE_NEW flags + D_ASSERT(!(flags & FileFlags::FILE_FLAGS_FILE_CREATE && flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW)); +} + +#ifndef _WIN32 +// somehow sometimes this is missing +#ifndef O_CLOEXEC +#define O_CLOEXEC 0 +#endif + +// Solaris +#ifndef O_DIRECT +#define O_DIRECT 0 +#endif + +struct UnixFileHandle : public FileHandle { +public: + UnixFileHandle(FileSystem &file_system, string path, int fd) : FileHandle(file_system, move(path)), fd(fd) { + } + ~UnixFileHandle() override { + Close(); + } + +protected: + void Close() override { + if (fd != -1) { + close(fd); + } + }; + +public: + int fd; +}; + +unique_ptr FileSystem::OpenFile(const string &path, uint8_t flags, FileLockType lock_type, + FileCompressionType compression) { + if (compression != FileCompressionType::UNCOMPRESSED) { + throw NotImplementedException("Unsupported compression type for default file system"); + } + + AssertValidFileFlags(flags); + + int open_flags = 0; + int rc; + if (flags & FileFlags::FILE_FLAGS_READ) { + open_flags = O_RDONLY; + } else { + // need Read or Write + D_ASSERT(flags & FileFlags::FILE_FLAGS_WRITE); + open_flags = O_RDWR | O_CLOEXEC; + if (flags & FileFlags::FILE_FLAGS_FILE_CREATE) { + open_flags |= O_CREAT; + } else if (flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW) { + open_flags |= O_CREAT | O_TRUNC; + } + if (flags & FileFlags::FILE_FLAGS_APPEND) { + open_flags |= O_APPEND; + } + } + if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { +#if defined(__sun) && defined(__SVR4) + throw Exception("DIRECT_IO not supported on Solaris"); +#endif +#if defined(__DARWIN__) || defined(__APPLE__) || defined(__OpenBSD__) + // OSX does not have O_DIRECT, instead we need to use fcntl afterwards to support direct IO + open_flags |= O_SYNC; +#else + open_flags |= O_DIRECT | O_SYNC; +#endif + } + int fd = open(path.c_str(), open_flags, 0666); + if (fd == -1) { + throw IOException("Cannot open file \"%s\": %s", path, strerror(errno)); + } + // #if defined(__DARWIN__) || defined(__APPLE__) + // if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { + // // OSX requires fcntl for Direct IO + // rc = fcntl(fd, F_NOCACHE, 1); + // if (fd == -1) { + // throw IOException("Could not enable direct IO for file \"%s\": %s", path, strerror(errno)); + // } + // } + // #endif + if (lock_type != FileLockType::NO_LOCK) { + // set lock on file + struct flock fl; + memset(&fl, 0, sizeof fl); + fl.l_type = lock_type == FileLockType::READ_LOCK ? F_RDLCK : F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + rc = fcntl(fd, F_SETLK, &fl); + if (rc == -1) { + throw IOException("Could not set lock on file \"%s\": %s", path, strerror(errno)); + } + } + return make_unique(*this, path, fd); +} + +void FileSystem::SetFilePointer(FileHandle &handle, idx_t location) { + int fd = ((UnixFileHandle &)handle).fd; + off_t offset = lseek(fd, location, SEEK_SET); + if (offset == (off_t)-1) { + throw IOException("Could not seek to location %lld for file \"%s\": %s", location, handle.path, + strerror(errno)); + } +} + +idx_t FileSystem::GetFilePointer(FileHandle &handle) { + int fd = ((UnixFileHandle &)handle).fd; + off_t position = lseek(fd, 0, SEEK_CUR); + if (position == (off_t)-1) { + throw IOException("Could not get file position file \"%s\": %s", handle.path, strerror(errno)); + } + return position; +} + +void FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { + int fd = ((UnixFileHandle &)handle).fd; + int64_t bytes_read = pread(fd, buffer, nr_bytes, location); + if (bytes_read == -1) { + throw IOException("Could not read from file \"%s\": %s", handle.path, strerror(errno)); + } + if (bytes_read != nr_bytes) { + throw IOException("Could not read all bytes from file \"%s\": wanted=%lld read=%lld", handle.path, nr_bytes, + bytes_read); + } +} + +int64_t FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { + int fd = ((UnixFileHandle &)handle).fd; + int64_t bytes_read = read(fd, buffer, nr_bytes); + if (bytes_read == -1) { + throw IOException("Could not read from file \"%s\": %s", handle.path, strerror(errno)); + } + return bytes_read; +} + +void FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { + int fd = ((UnixFileHandle &)handle).fd; + int64_t bytes_written = pwrite(fd, buffer, nr_bytes, location); + if (bytes_written == -1) { + throw IOException("Could not write file \"%s\": %s", handle.path, strerror(errno)); + } + if (bytes_written != nr_bytes) { + throw IOException("Could not write all bytes to file \"%s\": wanted=%lld wrote=%lld", handle.path, nr_bytes, + bytes_written); + } +} + +int64_t FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { + int fd = ((UnixFileHandle &)handle).fd; + int64_t bytes_written = write(fd, buffer, nr_bytes); + if (bytes_written == -1) { + throw IOException("Could not write file \"%s\": %s", handle.path, strerror(errno)); + } + return bytes_written; +} + +int64_t FileSystem::GetFileSize(FileHandle &handle) { + int fd = ((UnixFileHandle &)handle).fd; + struct stat s; + if (fstat(fd, &s) == -1) { + return -1; + } + return s.st_size; +} + +time_t FileSystem::GetLastModifiedTime(FileHandle &handle) { + int fd = ((UnixFileHandle &)handle).fd; + struct stat s; + if (fstat(fd, &s) == -1) { + return -1; + } + return s.st_mtime; +} + +FileType FileSystem::GetFileType(FileHandle &handle) { + int fd = ((UnixFileHandle &)handle).fd; + struct stat s; + if (fstat(fd, &s) == -1) { + return FileType::FILE_TYPE_INVALID; + } + switch (s.st_mode & S_IFMT) { + case S_IFBLK: + return FileType::FILE_TYPE_BLOCKDEV; + case S_IFCHR: + return FileType::FILE_TYPE_CHARDEV; + case S_IFIFO: + return FileType::FILE_TYPE_FIFO; + case S_IFDIR: + return FileType::FILE_TYPE_DIR; + case S_IFLNK: + return FileType::FILE_TYPE_LINK; + case S_IFREG: + return FileType::FILE_TYPE_REGULAR; + case S_IFSOCK: + return FileType::FILE_TYPE_SOCKET; + default: + return FileType::FILE_TYPE_INVALID; + } +} + +void FileSystem::Truncate(FileHandle &handle, int64_t new_size) { + int fd = ((UnixFileHandle &)handle).fd; + if (ftruncate(fd, new_size) != 0) { + throw IOException("Could not truncate file \"%s\": %s", handle.path, strerror(errno)); + } +} + +bool FileSystem::DirectoryExists(const string &directory) { + if (!directory.empty()) { + if (access(directory.c_str(), 0) == 0) { + struct stat status; + stat(directory.c_str(), &status); + if (status.st_mode & S_IFDIR) { + return true; + } + } + } + // if any condition fails + return false; +} + +bool FileSystem::FileExists(const string &filename) { + if (!filename.empty()) { + if (access(filename.c_str(), 0) == 0) { + struct stat status; + stat(filename.c_str(), &status); + if (!(status.st_mode & S_IFDIR)) { + return true; + } + } + } + // if any condition fails + return false; +} + +void FileSystem::CreateDirectory(const string &directory) { + struct stat st; + + if (stat(directory.c_str(), &st) != 0) { + /* Directory does not exist. EEXIST for race condition */ + if (mkdir(directory.c_str(), 0755) != 0 && errno != EEXIST) { + throw IOException("Failed to create directory \"%s\"!", directory); + } + } else if (!S_ISDIR(st.st_mode)) { + throw IOException("Failed to create directory \"%s\": path exists but is not a directory!", directory); + } +} + +int RemoveDirectoryRecursive(const char *path) { + DIR *d = opendir(path); + idx_t path_len = (idx_t)strlen(path); + int r = -1; + + if (d) { + struct dirent *p; + r = 0; + while (!r && (p = readdir(d))) { + int r2 = -1; + char *buf; + idx_t len; + /* Skip the names "." and ".." as we don't want to recurse on them. */ + if (!strcmp(p->d_name, ".") || !strcmp(p->d_name, "..")) { + continue; + } + len = path_len + (idx_t)strlen(p->d_name) + 2; + buf = new char[len]; + if (buf) { + struct stat statbuf; + snprintf(buf, len, "%s/%s", path, p->d_name); + if (!stat(buf, &statbuf)) { + if (S_ISDIR(statbuf.st_mode)) { + r2 = RemoveDirectoryRecursive(buf); + } else { + r2 = unlink(buf); + } + } + delete[] buf; + } + r = r2; + } + closedir(d); + } + if (!r) { + r = rmdir(path); + } + return r; +} + +void FileSystem::RemoveDirectory(const string &directory) { + RemoveDirectoryRecursive(directory.c_str()); +} + +void FileSystem::RemoveFile(const string &filename) { + if (std::remove(filename.c_str()) != 0) { + throw IOException("Could not remove file \"%s\": %s", filename, strerror(errno)); + } +} + +bool FileSystem::ListFiles(const string &directory, const std::function &callback) { + if (!DirectoryExists(directory)) { + return false; + } + DIR *dir = opendir(directory.c_str()); + if (!dir) { + return false; + } + struct dirent *ent; + // loop over all files in the directory + while ((ent = readdir(dir)) != nullptr) { + string name = string(ent->d_name); + // skip . .. and empty files + if (name.empty() || name == "." || name == "..") { + continue; + } + // now stat the file to figure out if it is a regular file or directory + string full_path = JoinPath(directory, name); + if (access(full_path.c_str(), 0) != 0) { + continue; + } + struct stat status; + stat(full_path.c_str(), &status); + if (!(status.st_mode & S_IFREG) && !(status.st_mode & S_IFDIR)) { + // not a file or directory: skip + continue; + } + // invoke callback + callback(name, status.st_mode & S_IFDIR); + } + closedir(dir); + return true; +} + +string FileSystem::PathSeparator() { + return "/"; +} + +void FileSystem::FileSync(FileHandle &handle) { + int fd = ((UnixFileHandle &)handle).fd; + if (fsync(fd) != 0) { + throw FatalException("fsync failed!"); + } +} + +void FileSystem::MoveFile(const string &source, const string &target) { + //! FIXME: rename does not guarantee atomicity or overwriting target file if it exists + if (rename(source.c_str(), target.c_str()) != 0) { + throw IOException("Could not rename file!"); + } +} + +void FileSystem::SetWorkingDirectory(const string &path) { + if (chdir(path.c_str()) != 0) { + throw IOException("Could not change working directory!"); + } +} + +idx_t FileSystem::GetAvailableMemory() { + errno = 0; + idx_t max_memory = MinValue((idx_t)sysconf(_SC_PHYS_PAGES) * (idx_t)sysconf(_SC_PAGESIZE), UINTPTR_MAX); + if (errno != 0) { + throw IOException("Could not fetch available system memory!"); + } + return max_memory; +} + +string FileSystem::GetWorkingDirectory() { + auto buffer = unique_ptr(new char[PATH_MAX]); + char *ret = getcwd(buffer.get(), PATH_MAX); + if (!ret) { + throw IOException("Could not get working directory!"); + } + return string(buffer.get()); +} +#else + +constexpr char PIPE_PREFIX[] = "\\\\.\\pipe\\"; + +// Returns the last Win32 error, in string format. Returns an empty string if there is no error. +std::string GetLastErrorAsString() { + // Get the error message, if any. + DWORD errorMessageID = GetLastError(); + if (errorMessageID == 0) + return std::string(); // No error message has been recorded + + LPSTR messageBuffer = nullptr; + idx_t size = + FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, errorMessageID, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); + + std::string message(messageBuffer, size); + + // Free the buffer. + LocalFree(messageBuffer); + + return message; +} + +struct WindowsFileHandle : public FileHandle { +public: + WindowsFileHandle(FileSystem &file_system, string path, HANDLE fd) + : FileHandle(file_system, path), position(0), fd(fd) { + } + virtual ~WindowsFileHandle() { + Close(); + } + +protected: + void Close() override { + CloseHandle(fd); + }; + +public: + idx_t position; + HANDLE fd; +}; + +unique_ptr FileSystem::OpenFile(const string &path, uint8_t flags, FileLockType lock_type, + FileCompressionType compression) { + if (compression != FileCompressionType::UNCOMPRESSED) { + throw NotImplementedException("Unsupported compression type for default file system"); + } + AssertValidFileFlags(flags); + + DWORD desired_access; + DWORD share_mode; + DWORD creation_disposition = OPEN_EXISTING; + DWORD flags_and_attributes = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED; + if (flags & FileFlags::FILE_FLAGS_READ) { + desired_access = GENERIC_READ; + share_mode = FILE_SHARE_READ; + } else { + // need Read or Write + D_ASSERT(flags & FileFlags::FILE_FLAGS_WRITE); + desired_access = GENERIC_READ | GENERIC_WRITE; + share_mode = 0; + if (flags & FileFlags::FILE_FLAGS_FILE_CREATE) { + creation_disposition = OPEN_ALWAYS; + } else if (flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW) { + creation_disposition = CREATE_ALWAYS; + } + if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { + flags_and_attributes |= FILE_FLAG_WRITE_THROUGH; + } + } + if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { + flags_and_attributes |= FILE_FLAG_NO_BUFFERING; + } + HANDLE hFile = + CreateFileA(path.c_str(), desired_access, share_mode, NULL, creation_disposition, flags_and_attributes, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + auto error = GetLastErrorAsString(); + throw IOException("Cannot open file \"%s\": %s", path.c_str(), error); + } + auto handle = make_unique(*this, path.c_str(), hFile); + if (flags & FileFlags::FILE_FLAGS_APPEND) { + auto file_size = GetFileSize(*handle); + SetFilePointer(*handle, file_size); + } + return move(handle); +} + +void FileSystem::SetFilePointer(FileHandle &handle, idx_t location) { + ((WindowsFileHandle &)handle).position = location; +} + +idx_t FileSystem::GetFilePointer(FileHandle &handle) { + return ((WindowsFileHandle &)handle).position; +} + +void FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { + HANDLE hFile = ((WindowsFileHandle &)handle).fd; + DWORD bytes_read; + OVERLAPPED ov = {}; + ov.Internal = 0; + ov.InternalHigh = 0; + ov.Offset = location & 0xFFFFFFFF; + ov.OffsetHigh = location >> 32; + ov.hEvent = 0; + ReadFile(hFile, buffer, (DWORD)nr_bytes, NULL, &ov); + auto rc = GetOverlappedResult(hFile, &ov, &bytes_read, true); + if (rc == 0) { + auto error = GetLastErrorAsString(); + throw IOException("Could not read file \"%s\": %s", handle.path, error); + } + if (bytes_read != nr_bytes) { + throw IOException("Could not read all bytes from file \"%s\": wanted=%lld read=%lld", handle.path, nr_bytes, + bytes_read); + } +} + +int64_t FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { + HANDLE hFile = ((WindowsFileHandle &)handle).fd; + DWORD bytes_read; + auto &pos = ((WindowsFileHandle &)handle).position; + OVERLAPPED ov = {}; + ov.Internal = 0; + ov.InternalHigh = 0; + ov.Offset = pos & 0xFFFFFFFF; + ov.OffsetHigh = pos >> 32; + ov.hEvent = 0; + auto n = std::min(std::max(GetFileSize(handle), pos) - pos, nr_bytes); + ReadFile(hFile, buffer, (DWORD)n, NULL, &ov); + auto rc = GetOverlappedResult(hFile, &ov, &bytes_read, true); + if (rc == 0) { + auto error = GetLastErrorAsString(); + throw IOException("Could not read file \"%s\": %s", handle.path, error); + } + pos += bytes_read; + return bytes_read; +} + +void FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { + HANDLE hFile = ((WindowsFileHandle &)handle).fd; + DWORD bytes_written; + OVERLAPPED ov = {}; + ov.Internal = 0; + ov.InternalHigh = 0; + ov.Offset = location & 0xFFFFFFFF; + ov.OffsetHigh = location >> 32; + ov.hEvent = 0; + WriteFile(hFile, buffer, (DWORD)nr_bytes, NULL, &ov); + auto rc = GetOverlappedResult(hFile, &ov, &bytes_written, true); + if (rc == 0) { + auto error = GetLastErrorAsString(); + throw IOException("Could not write file \"%s\": %s", handle.path, error); + } + if (bytes_written != nr_bytes) { + throw IOException("Could not write all bytes from file \"%s\": wanted=%lld wrote=%lld", handle.path, nr_bytes, + bytes_written); + } +} + +int64_t FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { + HANDLE hFile = ((WindowsFileHandle &)handle).fd; + DWORD bytes_written; + auto &pos = ((WindowsFileHandle &)handle).position; + OVERLAPPED ov = {}; + ov.Internal = 0; + ov.InternalHigh = 0; + ov.Offset = pos & 0xFFFFFFFF; + ov.OffsetHigh = pos >> 32; + ov.hEvent = 0; + WriteFile(hFile, buffer, (DWORD)nr_bytes, NULL, &ov); + auto rc = GetOverlappedResult(hFile, &ov, &bytes_written, true); + if (rc == 0) { + auto error = GetLastErrorAsString(); + throw IOException("Could not write file \"%s\": %s", handle.path, error); + } + pos += bytes_written; + return bytes_written; +} + +int64_t FileSystem::GetFileSize(FileHandle &handle) { + HANDLE hFile = ((WindowsFileHandle &)handle).fd; + LARGE_INTEGER result; + if (!GetFileSizeEx(hFile, &result)) { + return -1; + } + return result.QuadPart; +} + +time_t FileSystem::GetLastModifiedTime(FileHandle &handle) { + HANDLE hFile = ((WindowsFileHandle &)handle).fd; + + // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfiletime + FILETIME last_write; + if (GetFileTime(hFile, nullptr, nullptr, &last_write) == 0) { + return -1; + } + + // https://stackoverflow.com/questions/29266743/what-is-dwlowdatetime-and-dwhighdatetime + ULARGE_INTEGER ul; + ul.LowPart = last_write.dwLowDateTime; + ul.HighPart = last_write.dwHighDateTime; + int64_t fileTime64 = ul.QuadPart; + + // fileTime64 contains a 64-bit value representing the number of + // 100-nanosecond intervals since January 1, 1601 (UTC). + // https://docs.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-filetime + + // Adapted from: https://stackoverflow.com/questions/6161776/convert-windows-filetime-to-second-in-unix-linux + const auto WINDOWS_TICK = 10000000; + const auto SEC_TO_UNIX_EPOCH = 11644473600LL; + time_t result = (fileTime64 / WINDOWS_TICK - SEC_TO_UNIX_EPOCH); + return result; +} + +void FileSystem::Truncate(FileHandle &handle, int64_t new_size) { + HANDLE hFile = ((WindowsFileHandle &)handle).fd; + // seek to the location + SetFilePointer(handle, new_size); + // now set the end of file position + if (!SetEndOfFile(hFile)) { + auto error = GetLastErrorAsString(); + throw IOException("Failure in SetEndOfFile call on file \"%s\": %s", handle.path, error); + } +} + +bool FileSystem::DirectoryExists(const string &directory) { + DWORD attrs = GetFileAttributesA(directory.c_str()); + return (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY)); +} + +bool FileSystem::FileExists(const string &filename) { + DWORD attrs = GetFileAttributesA(filename.c_str()); + return (attrs != INVALID_FILE_ATTRIBUTES && !(attrs & FILE_ATTRIBUTE_DIRECTORY)); +} + +void FileSystem::CreateDirectory(const string &directory) { + if (DirectoryExists(directory)) { + return; + } + if (directory.empty() || !CreateDirectoryA(directory.c_str(), NULL) || !DirectoryExists(directory)) { + throw IOException("Could not create directory!"); + } +} + +static void delete_dir_special_snowflake_windows(string directory) { + if (directory.size() + 3 > MAX_PATH) { + throw IOException("Pathname too long"); + } + // create search pattern + TCHAR szDir[MAX_PATH]; + snprintf(szDir, MAX_PATH, "%s\\*", directory.c_str()); + + WIN32_FIND_DATA ffd; + HANDLE hFind = FindFirstFile(szDir, &ffd); + if (hFind == INVALID_HANDLE_VALUE) { + return; + } + + do { + if (string(ffd.cFileName) == "." || string(ffd.cFileName) == "..") { + continue; + } + if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + // recurse to zap directory contents + FileSystem fs; + delete_dir_special_snowflake_windows(fs.JoinPath(directory, ffd.cFileName)); + } else { + if (strlen(ffd.cFileName) + directory.size() + 1 > MAX_PATH) { + throw IOException("Pathname too long"); + } + // create search pattern + TCHAR del_path[MAX_PATH]; + snprintf(del_path, MAX_PATH, "%s\\%s", directory.c_str(), ffd.cFileName); + if (!DeleteFileA(del_path)) { + throw IOException("Failed to delete directory entry"); + } + } + } while (FindNextFile(hFind, &ffd) != 0); + + DWORD dwError = GetLastError(); + if (dwError != ERROR_NO_MORE_FILES) { + throw IOException("Something went wrong"); + } + FindClose(hFind); + + if (!RemoveDirectoryA(directory.c_str())) { + throw IOException("Failed to delete directory"); + } +} + +void FileSystem::RemoveDirectory(const string &directory) { + delete_dir_special_snowflake_windows(directory.c_str()); +} + +void FileSystem::RemoveFile(const string &filename) { + DeleteFileA(filename.c_str()); +} + +bool FileSystem::ListFiles(const string &directory, const std::function &callback) { + string search_dir = JoinPath(directory, "*"); + + WIN32_FIND_DATA ffd; + HANDLE hFind = FindFirstFile(search_dir.c_str(), &ffd); + if (hFind == INVALID_HANDLE_VALUE) { + return false; + } + do { + string cFileName = string(ffd.cFileName); + if (cFileName == "." || cFileName == "..") { + continue; + } + callback(cFileName, ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY); + } while (FindNextFile(hFind, &ffd) != 0); + + DWORD dwError = GetLastError(); + if (dwError != ERROR_NO_MORE_FILES) { + FindClose(hFind); + return false; + } + + FindClose(hFind); + return true; +} + +string FileSystem::PathSeparator() { + return "\\"; +} + +void FileSystem::FileSync(FileHandle &handle) { + HANDLE hFile = ((WindowsFileHandle &)handle).fd; + if (FlushFileBuffers(hFile) == 0) { + throw IOException("Could not flush file handle to disk!"); + } +} + +void FileSystem::MoveFile(const string &source, const string &target) { + if (!MoveFileA(source.c_str(), target.c_str())) { + throw IOException("Could not move file"); + } +} + +void FileSystem::SetWorkingDirectory(const string &path) { + if (!SetCurrentDirectory(path.c_str())) { + throw IOException("Could not change working directory!"); + } +} + +idx_t FileSystem::GetAvailableMemory() { + ULONGLONG available_memory_kb; + if (!GetPhysicallyInstalledSystemMemory(&available_memory_kb)) { + throw IOException("Could not fetch available system memory!"); + } + return MinValue(available_memory_kb * 1024, UINTPTR_MAX); +} + +string FileSystem::GetWorkingDirectory() { + idx_t count = GetCurrentDirectory(0, nullptr); + if (count == 0) { + throw IOException("Could not get working directory!"); + } + auto buffer = unique_ptr(new char[count]); + idx_t ret = GetCurrentDirectory(count, buffer.get()); + if (count != ret + 1) { + throw IOException("Could not get working directory!"); + } + return string(buffer.get(), ret); +} + +FileType FileSystem::GetFileType(FileHandle &handle) { + auto path = ((WindowsFileHandle &)handle).path; + // pipes in windows are just files in '\\.\pipe\' folder + if (strncmp(path.c_str(), PIPE_PREFIX, strlen(PIPE_PREFIX)) == 0) { + return FileType::FILE_TYPE_FIFO; + } + DWORD attrs = GetFileAttributesA(path.c_str()); + if (attrs != INVALID_FILE_ATTRIBUTES) { + if (attrs & FILE_ATTRIBUTE_DIRECTORY) { + return FileType::FILE_TYPE_DIR; + } else { + return FileType::FILE_TYPE_REGULAR; + } + } + return FileType::FILE_TYPE_INVALID; +} +#endif + +string FileSystem::GetHomeDirectory() { + const char *homedir = getenv("HOME"); + if (!homedir) { + return string(); + } + return homedir; +} + +bool FileSystem::CanSeek() { + return true; +} + +bool FileSystem::OnDiskFile(FileHandle &handle) { + return true; +} + +void FileSystem::Seek(FileHandle &handle, idx_t location) { + if (!CanSeek()) { + throw IOException("Cannot seek in files of this type"); + } + SetFilePointer(handle, location); +} + +void FileSystem::Reset(FileHandle &handle) { + Seek(handle, 0); +} + +idx_t FileSystem::SeekPosition(FileHandle &handle) { + if (!CanSeek()) { + throw IOException("Cannot seek in files of this type"); + } + return GetFilePointer(handle); +} + +string FileSystem::JoinPath(const string &a, const string &b) { + // FIXME: sanitize paths + return a + PathSeparator() + b; +} + +string FileSystem::ConvertSeparators(const string &path) { + auto separator_str = PathSeparator(); + char separator = separator_str[0]; + if (separator == '/') { + // on unix-based systems we only accept / as a separator + return path; + } + // on windows-based systems we accept both + string result = path; + for (idx_t i = 0; i < result.size(); i++) { + if (result[i] == '/') { + result[i] = separator; + } + } + return result; +} + +string FileSystem::ExtractBaseName(const string &path) { + auto sep = PathSeparator(); + auto vec = StringUtil::Split(StringUtil::Split(path, sep).back(), "."); + return vec[0]; +} + +int64_t FileHandle::Read(void *buffer, idx_t nr_bytes) { + return file_system.Read(*this, buffer, nr_bytes); +} + +int64_t FileHandle::Write(void *buffer, idx_t nr_bytes) { + return file_system.Write(*this, buffer, nr_bytes); +} + +void FileHandle::Read(void *buffer, idx_t nr_bytes, idx_t location) { + file_system.Read(*this, buffer, nr_bytes, location); +} + +void FileHandle::Write(void *buffer, idx_t nr_bytes, idx_t location) { + file_system.Write(*this, buffer, nr_bytes, location); +} + +void FileHandle::Seek(idx_t location) { + file_system.Seek(*this, location); +} + +void FileHandle::Reset() { + file_system.Reset(*this); +} + +idx_t FileHandle::SeekPosition() { + return file_system.SeekPosition(*this); +} + +bool FileHandle::CanSeek() { + return file_system.CanSeek(); +} + +string FileHandle::ReadLine() { + string result; + char buffer[1]; + while (true) { + idx_t tuples_read = Read(buffer, 1); + if (tuples_read == 0 || buffer[0] == '\n') { + return result; + } + if (buffer[0] != '\r') { + result += buffer[0]; + } + } +} + +bool FileHandle::OnDiskFile() { + return file_system.OnDiskFile(*this); +} + +idx_t FileHandle::GetFileSize() { + return file_system.GetFileSize(*this); +} + +void FileHandle::Sync() { + file_system.FileSync(*this); +} + +void FileHandle::Truncate(int64_t new_size) { + file_system.Truncate(*this, new_size); +} + +FileType FileHandle::GetType() { + return file_system.GetFileType(*this); +} + +static bool HasGlob(const string &str) { + for (idx_t i = 0; i < str.size(); i++) { + switch (str[i]) { + case '*': + case '?': + case '[': + return true; + default: + break; + } + } + return false; +} + +static void GlobFiles(FileSystem &fs, const string &path, const string &glob, bool match_directory, + vector &result, bool join_path) { + fs.ListFiles(path, [&](const string &fname, bool is_directory) { + if (is_directory != match_directory) { + return; + } + if (LikeFun::Glob(fname.c_str(), fname.size(), glob.c_str(), glob.size())) { + if (join_path) { + result.push_back(fs.JoinPath(path, fname)); + } else { + result.push_back(fname); + } + } + }); +} + +vector FileSystem::Glob(const string &path) { + if (path.empty()) { + return vector(); + } + // first check if the path has a glob at all + if (!HasGlob(path)) { + // no glob: return only the file (if it exists) + vector result; + if (FileExists(path)) { + result.push_back(path); + } + return result; + } + // split up the path into separate chunks + vector splits; + idx_t last_pos = 0; + for (idx_t i = 0; i < path.size(); i++) { + if (path[i] == '\\' || path[i] == '/') { + if (i == last_pos) { + // empty: skip this position + last_pos = i + 1; + continue; + } + if (splits.empty()) { + splits.push_back(path.substr(0, i)); + } else { + splits.push_back(path.substr(last_pos, i - last_pos)); + } + last_pos = i + 1; + } + } + splits.push_back(path.substr(last_pos, path.size() - last_pos)); + // handle absolute paths + bool absolute_path = false; + if (path[0] == '/') { + // first character is a slash - unix absolute path + absolute_path = true; + } else if (StringUtil::Contains(splits[0], ":")) { + // first split has a colon - windows absolute path + absolute_path = true; + } else if (splits[0] == "~") { + // starts with home directory + auto home_directory = GetHomeDirectory(); + if (!home_directory.empty()) { + absolute_path = true; + splits[0] = home_directory; + } + } + vector previous_directories; + if (absolute_path) { + // for absolute paths, we don't start by scanning the current directory + previous_directories.push_back(splits[0]); + } + for (idx_t i = absolute_path ? 1 : 0; i < splits.size(); i++) { + bool is_last_chunk = i + 1 == splits.size(); + bool has_glob = HasGlob(splits[i]); + // if it's the last chunk we need to find files, otherwise we find directories + // not the last chunk: gather a list of all directories that match the glob pattern + vector result; + if (!has_glob) { + // no glob, just append as-is + if (previous_directories.empty()) { + result.push_back(splits[i]); + } else { + for (auto &prev_directory : previous_directories) { + result.push_back(JoinPath(prev_directory, splits[i])); + } + } + } else { + if (previous_directories.empty()) { + // no previous directories: list in the current path + GlobFiles(*this, ".", splits[i], !is_last_chunk, result, false); + } else { + // previous directories + // we iterate over each of the previous directories, and apply the glob of the current directory + for (auto &prev_directory : previous_directories) { + GlobFiles(*this, prev_directory, splits[i], !is_last_chunk, result, true); + } + } + } + if (is_last_chunk || result.empty()) { + return result; + } + previous_directories = move(result); + } + return vector(); +} + +unique_ptr VirtualFileSystem::OpenFile(const string &path, uint8_t flags, FileLockType lock, + FileCompressionType compression) { + if (compression == FileCompressionType::AUTO_DETECT) { + // auto detect compression settings based on file name + auto lower_path = StringUtil::Lower(path); + if (StringUtil::EndsWith(lower_path, ".gz")) { + compression = FileCompressionType::GZIP; + } else { + compression = FileCompressionType::UNCOMPRESSED; + } + } + // open the base file handle + auto file_handle = FindFileSystem(path)->OpenFile(path, flags, lock, FileCompressionType::UNCOMPRESSED); + if (file_handle->GetType() == FileType::FILE_TYPE_FIFO) { + file_handle = PipeFileSystem::OpenPipe(move(file_handle)); + } else if (compression != FileCompressionType::UNCOMPRESSED) { + switch (compression) { + case FileCompressionType::GZIP: + file_handle = GZipFileSystem::OpenCompressedFile(move(file_handle)); + break; + default: + throw NotImplementedException("Unimplemented compression type"); + } + } + return file_handle; +} + +} // namespace duckdb + + + + + + +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list + +/* miniz.c 2.0.8 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + See "unlicense" statement at the end of this file. + Rich Geldreich , last updated Oct. 13, 2013 + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define + MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). + + * Low-level Deflate/Inflate implementation notes: + + Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or + greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses + approximately as well as zlib. + + Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function + coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory + block large enough to hold the entire file. + + The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. + + * zlib-style API notes: + + miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in + zlib replacement in many apps: + The z_stream struct, optional memory allocation callbacks + deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound + inflateInit/inflateInit2/inflate/inflateEnd + compress, compress2, compressBound, uncompress + CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. + Supports raw deflate streams or standard zlib streams with adler-32 checking. + + Limitations: + The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. + I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but + there are no guarantees that miniz.c pulls this off perfectly. + + * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by + Alex Evans. Supports 1-4 bytes/pixel images. + + * ZIP archive API notes: + + The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to + get the job done with minimal fuss. There are simple API's to retrieve file information, read files from + existing archives, create new archives, append new files to existing archives, or clone archive data from + one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), + or you can specify custom file read/write callbacks. + + - Archive reading: Just call this function to read a single file from a disk archive: + + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + + For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central + directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. + + - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: + + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); + + The locate operation can optionally check file comments too, which (as one example) can be used to identify + multiple versions of the same file in an archive. This function uses a simple linear search through the central + directory, so it's not very fast. + + Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and + retrieve detailed info on each file by calling mz_zip_reader_file_stat(). + + - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data + to disk and builds an exact image of the central directory in memory. The central directory image is written + all at once at the end of the archive file when the archive is finalized. + + The archive writer can optionally align each file's local header and file data to any power of 2 alignment, + which can be useful when the archive will be read from optical media. Also, the writer supports placing + arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still + readable by any ZIP tool. + + - Archive appending: The simple way to add a single file to an archive is to call this function: + + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, + const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + + The archive will be created if it doesn't already exist, otherwise it'll be appended to. + Note the appending is done in-place and is not an atomic operation, so if something goes wrong + during the operation it's possible the archive could be left without a central directory (although the local + file headers and file data will be fine, so the archive will be recoverable). + + For more complex archive modification scenarios: + 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to + preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the + compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and + you're done. This is safe but requires a bunch of temporary disk space or heap memory. + + 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), + append new files as needed, then finalize the archive which will write an updated central directory to the + original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a + possibility that the archive's central directory could be lost with this method if anything goes wrong, though. + + - ZIP archive support limitations: + No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. + Requires streams capable of seeking. + + * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the + below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. + + * Important: For best perf. be sure to customize the below macros for your target platform: + #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 + #define MINIZ_LITTLE_ENDIAN 1 + #define MINIZ_HAS_64BIT_REGISTERS 1 + + * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz + uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files + (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). +*/ + + + + + +/* Defines to completely disable specific portions of miniz.c: + If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. */ + +/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */ +#define MINIZ_NO_STDIO + +/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */ +/* get/set file times, and the C run-time funcs that get/set times won't be called. */ +/* The current downside is the times written to your archives will be from 1979. */ +#define MINIZ_NO_TIME + +/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */ +/* #define MINIZ_NO_ARCHIVE_APIS */ + +/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */ +/* #define MINIZ_NO_ARCHIVE_WRITING_APIS */ + +/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */ +/*#define MINIZ_NO_ZLIB_APIS */ + +/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */ +#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. + Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc + callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user + functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */ +/*#define MINIZ_NO_MALLOC */ + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) +/* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */ +#define MINIZ_NO_TIME +#endif + +#include + + + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) +#include +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) /* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */ #define MINIZ_X86_OR_X64_CPU 1 #else @@ -12735,9 +15206,13 @@ namespace duckdb { */ -static idx_t GZipConsumeString(fstream &input) { +static idx_t GZipConsumeString(FileHandle &input) { idx_t size = 1; // terminator - while (input.get() != '\0') { + char buffer[1]; + while (input.Read(buffer, 1) == 1) { + if (buffer[0] == '\0') { + break; + } size++; } return size; @@ -12757,26 +15232,92 @@ static constexpr const uint8_t GZIP_HEADER_MINSIZE = 10; static constexpr const unsigned char GZIP_FLAG_UNSUPPORTED = GZIP_FLAG_ASCII | GZIP_FLAG_MULTIPART | GZIP_FLAG_EXTRA | GZIP_FLAG_COMMENT | GZIP_FLAG_ENCRYPT; -void GzipStreamBuf::Initialize() { - if (is_initialized) { - return; +struct MiniZStreamWrapper { + ~MiniZStreamWrapper() { + Close(); + } + + duckdb_miniz::mz_stream *mz_stream_ptr = nullptr; + +public: + void Initialize() { + Close(); + mz_stream_ptr = new duckdb_miniz::mz_stream(); + memset(mz_stream_ptr, 0, sizeof(duckdb_miniz::mz_stream)); + } + + void Close() { + if (!mz_stream_ptr) { + return; + } + duckdb_miniz::mz_inflateEnd(mz_stream_ptr); + delete mz_stream_ptr; + mz_stream_ptr = nullptr; + } +}; + +class GZipFile : public FileHandle { + static constexpr const idx_t BUFFER_SIZE = 1024; + +public: + GZipFile(unique_ptr child_handle_p, const string &path) + : FileHandle(gzip_fs, path), child_handle(move(child_handle_p)) { + Initialize(); + } + ~GZipFile() override { + Close(); + } + + void Initialize(); + int64_t ReadData(void *buffer, int64_t nr_bytes); + + GZipFileSystem gzip_fs; + unique_ptr child_handle; + +protected: + void Close() override { + miniz_stream.reset(); + in_buff.reset(); + out_buff.reset(); } + +private: + idx_t data_start = 0; + unique_ptr miniz_stream; + // various buffers & pointers + unique_ptr in_buff; + unique_ptr out_buff; + data_ptr_t out_buff_start = nullptr; + data_ptr_t out_buff_end = nullptr; + data_ptr_t in_buff_start = nullptr; + data_ptr_t in_buff_end = nullptr; +}; + +void GZipFile::Initialize() { + Close(); + D_ASSERT(BUFFER_SIZE >= 3); // found to work fine with 3 uint8_t gzip_hdr[10]; data_start = GZIP_HEADER_MINSIZE; - in_buff = new data_t[BUFFER_SIZE]; - in_buff_start = in_buff; - in_buff_end = in_buff; - out_buff = new data_t[BUFFER_SIZE]; + in_buff = unique_ptr(new data_t[BUFFER_SIZE]); + in_buff_start = in_buff.get(); + in_buff_end = in_buff.get(); + out_buff = unique_ptr(new data_t[BUFFER_SIZE]); + out_buff_start = out_buff.get(); + out_buff_end = out_buff.get(); - mz_stream_ptr = new duckdb_miniz::mz_stream(); - // TODO use custom alloc/free methods in miniz to throw exceptions on OOM + miniz_stream = make_unique(); + miniz_stream->Initialize(); - FstreamUtil::OpenFile(filename, input, ios::in | ios::binary); + auto &mz_stream_ptr = miniz_stream->mz_stream_ptr; - input.read((char *)gzip_hdr, GZIP_HEADER_MINSIZE); - if (!input) { + // TODO use custom alloc/free methods in miniz to throw exceptions on OOM + auto read_count = child_handle->Read(gzip_hdr, GZIP_HEADER_MINSIZE); + + // check for incorrectly formatted files + // LCOV_EXCL_START + if (read_count != GZIP_HEADER_MINSIZE) { throw Exception("Input is not a GZIP stream"); } if (gzip_hdr[0] != 0x1F || gzip_hdr[1] != 0x8B) { // magic header @@ -12788,105 +15329,135 @@ void GzipStreamBuf::Initialize() { if (gzip_hdr[3] & GZIP_FLAG_UNSUPPORTED) { throw Exception("Unsupported GZIP archive"); } + // LCOV_EXCL_STOP if (gzip_hdr[3] & GZIP_FLAG_NAME) { - input.seekg(data_start, input.beg); - data_start += GZipConsumeString(input); + child_handle->Seek(data_start); + data_start += GZipConsumeString(*child_handle); } - input.seekg(data_start, input.beg); + child_handle->Seek(data_start); // stream is now set to beginning of payload data - auto ret = duckdb_miniz::mz_inflateInit2((duckdb_miniz::mz_streamp)mz_stream_ptr, -MZ_DEFAULT_WINDOW_BITS); if (ret != duckdb_miniz::MZ_OK) { - throw Exception("Failed to initialize miniz"); + throw InternalException("Failed to initialize miniz"); } - // initialize eback, gptr, egptr - setg((char *)out_buff, (char *)out_buff, (char *)out_buff); - is_initialized = true; } -std::streambuf::int_type GzipStreamBuf::underflow() { - if (!is_initialized) { - Initialize(); - } - - // adapted from https://github.com/mateidavid/zstr - auto zstrm_p = (duckdb_miniz::mz_streamp)mz_stream_ptr; - if (!zstrm_p) { - return traits_type::eof(); - } +unique_ptr GZipFileSystem::OpenCompressedFile(unique_ptr handle) { + auto path = handle->path; + return make_unique(move(handle), path); +} - if (gptr() == egptr()) { - // pointers for free region in output buffer - auto out_buff_free_start = out_buff; - do { - D_ASSERT(in_buff_start <= in_buff_end); - D_ASSERT(in_buff_end <= in_buff_start + BUFFER_SIZE); - - // read more input if none available - if (in_buff_start == in_buff_end) { - // empty input buffer: refill from the start - in_buff_start = in_buff; - std::streamsize sz = input.rdbuf()->sgetn((char *)in_buff, BUFFER_SIZE); - if (sz == 0) { - break; // end of input - } - in_buff_end = in_buff + sz; - } - - // actually decompress - D_ASSERT(zstrm_p); - zstrm_p->next_in = (data_ptr_t)in_buff_start; - D_ASSERT(in_buff_end - in_buff_start < NumericLimits::Maximum()); - zstrm_p->avail_in = (uint32_t)(in_buff_end - in_buff_start); - zstrm_p->next_out = (data_ptr_t)out_buff_free_start; - D_ASSERT((out_buff + BUFFER_SIZE) - out_buff_free_start < NumericLimits::Maximum()); - zstrm_p->avail_out = (uint32_t)((out_buff + BUFFER_SIZE) - out_buff_free_start); - auto ret = duckdb_miniz::mz_inflate(zstrm_p, duckdb_miniz::MZ_NO_FLUSH); - if (ret != duckdb_miniz::MZ_OK && ret != duckdb_miniz::MZ_STREAM_END) { - throw Exception(duckdb_miniz::mz_error(ret)); - } - // update pointers following inflate() - in_buff_start = (data_ptr_t)zstrm_p->next_in; - in_buff_end = in_buff_start + zstrm_p->avail_in; - out_buff_free_start = (data_ptr_t)zstrm_p->next_out; - D_ASSERT(out_buff_free_start + zstrm_p->avail_out == out_buff + BUFFER_SIZE); - // if stream ended, deallocate inflator - if (ret == duckdb_miniz::MZ_STREAM_END) { - duckdb_miniz::mz_inflateEnd(zstrm_p); - delete zstrm_p; - mz_stream_ptr = nullptr; +int64_t GZipFile::ReadData(void *buffer, int64_t remaining) { + auto &mz_stream_ptr = miniz_stream->mz_stream_ptr; + idx_t total_read = 0; + while (true) { + // first check if there are input bytes available in the output buffers + if (out_buff_start != out_buff_end) { + // there is! copy it into the output buffer + idx_t available = MinValue(remaining, out_buff_end - out_buff_start); + memcpy(data_ptr_t(buffer) + total_read, out_buff_start, available); + + // increment the total read variables as required + out_buff_start += available; + total_read += available; + remaining -= available; + if (remaining == 0) { + // done! read enough + return total_read; + } + } + if (!mz_stream_ptr) { + return total_read; + } + + // ran out of buffer: read more data from the child stream + out_buff_start = out_buff.get(); + out_buff_end = out_buff.get(); + D_ASSERT(in_buff_start <= in_buff_end); + D_ASSERT(in_buff_end <= in_buff_start + GZipFile::BUFFER_SIZE); + + // read more input if none available + if (in_buff_start == in_buff_end) { + // empty input buffer: refill from the start + in_buff_start = in_buff.get(); + auto sz = child_handle->Read(in_buff.get(), BUFFER_SIZE); + if (sz <= 0) { break; } + in_buff_end = in_buff_start + sz; + } - } while (out_buff_free_start == out_buff); - // 2 exit conditions: - // - end of input: there might or might not be output available - // - out_buff_free_start != out_buff: output available - setg((char *)out_buff, (char *)out_buff, (char *)out_buff_free_start); + // actually decompress + mz_stream_ptr->next_in = (data_ptr_t)in_buff_start; + D_ASSERT(in_buff_end - in_buff_start < NumericLimits::Maximum()); + mz_stream_ptr->avail_in = (uint32_t)(in_buff_end - in_buff_start); + mz_stream_ptr->next_out = (data_ptr_t)out_buff_end; + D_ASSERT((out_buff.get() + BUFFER_SIZE) - out_buff_end < NumericLimits::Maximum()); + mz_stream_ptr->avail_out = (uint32_t)((out_buff.get() + BUFFER_SIZE) - out_buff_end); + auto ret = duckdb_miniz::mz_inflate(mz_stream_ptr, duckdb_miniz::MZ_NO_FLUSH); + if (ret != duckdb_miniz::MZ_OK && ret != duckdb_miniz::MZ_STREAM_END) { + throw Exception(duckdb_miniz::mz_error(ret)); + } + // update pointers following inflate() + in_buff_start = (data_ptr_t)mz_stream_ptr->next_in; + in_buff_end = in_buff_start + mz_stream_ptr->avail_in; + out_buff_end = (data_ptr_t)mz_stream_ptr->next_out; + D_ASSERT(out_buff_end + mz_stream_ptr->avail_out == out_buff.get() + BUFFER_SIZE); + // if stream ended, deallocate inflator + if (ret == duckdb_miniz::MZ_STREAM_END) { + miniz_stream->Close(); + } } + return total_read; +} + +int64_t GZipFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { + auto &gzip_file = (GZipFile &)handle; + return gzip_file.ReadData(buffer, nr_bytes); +} - // ensure all those pointers point at something sane - D_ASSERT(out_buff); - D_ASSERT(gptr() <= egptr()); - D_ASSERT(eback() == (char *)out_buff); - D_ASSERT(gptr() >= (char *)out_buff); - D_ASSERT(gptr() <= (char *)out_buff + BUFFER_SIZE); - D_ASSERT(egptr() >= (char *)out_buff); - D_ASSERT(egptr() <= (char *)out_buff + BUFFER_SIZE); - D_ASSERT(gptr() <= egptr()); +// unsupported operations +// LCOV_EXCL_START +void GZipFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { + throw NotImplementedException("Unsupported: Random read in gzip file not supported"); +} - return this->gptr() == this->egptr() ? traits_type::eof() : traits_type::to_int_type(*this->gptr()); +void GZipFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { + throw NotImplementedException("Unsupported: Write to gzip file"); } -GzipStreamBuf::~GzipStreamBuf() { - delete[] in_buff; - delete[] out_buff; - auto zstrm_p = (duckdb_miniz::mz_streamp)mz_stream_ptr; - if (zstrm_p) { - duckdb_miniz::mz_inflateEnd(zstrm_p); - } - delete zstrm_p; +int64_t GZipFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { + throw NotImplementedException("Unsupported: Write to gzip file"); +} + +void GZipFileSystem::Truncate(FileHandle &handle, int64_t new_size) { + throw NotImplementedException("Unsupported: Truncate gzip file"); +} + +void GZipFileSystem::FileSync(FileHandle &handle) { + throw NotImplementedException("Unsupported: Sync gzip file"); +} + +void GZipFileSystem::Seek(FileHandle &handle, idx_t location) { + throw NotImplementedException("Unsupported: Seek within gzip file"); +} +// LCOV_EXCL_STOP + +void GZipFileSystem::Reset(FileHandle &handle) { + auto &gzip_file = (GZipFile &)handle; + gzip_file.child_handle->Reset(); + gzip_file.Initialize(); +} + +int64_t GZipFileSystem::GetFileSize(FileHandle &handle) { + auto &gzip_file = (GZipFile &)handle; + return gzip_file.child_handle->GetFileSize(); +} + +bool GZipFileSystem::OnDiskFile(FileHandle &handle) { + auto &gzip_file = (GZipFile &)handle; + return gzip_file.child_handle->OnDiskFile(); } } // namespace duckdb @@ -12901,7 +15472,7 @@ namespace duckdb { using std::numeric_limits; int8_t NumericLimits::Minimum() { - return numeric_limits::lowest() + 1; + return numeric_limits::lowest(); } int8_t NumericLimits::Maximum() { @@ -12909,7 +15480,7 @@ int8_t NumericLimits::Maximum() { } int16_t NumericLimits::Minimum() { - return numeric_limits::lowest() + 1; + return numeric_limits::lowest(); } int16_t NumericLimits::Maximum() { @@ -12917,7 +15488,7 @@ int16_t NumericLimits::Maximum() { } int32_t NumericLimits::Minimum() { - return numeric_limits::lowest() + 1; + return numeric_limits::lowest(); } int32_t NumericLimits::Maximum() { @@ -12925,7 +15496,7 @@ int32_t NumericLimits::Maximum() { } int64_t NumericLimits::Minimum() { - return numeric_limits::lowest() + 1; + return numeric_limits::lowest(); } int64_t NumericLimits::Maximum() { @@ -12983,7 +15554,7 @@ uint64_t NumericLimits::Maximum() { hugeint_t NumericLimits::Minimum() { hugeint_t result; result.lower = 1; - result.upper = numeric_limits::lowest() + 1; + result.upper = numeric_limits::lowest(); return result; } @@ -12994,58 +15565,86 @@ hugeint_t NumericLimits::Maximum() { return result; } -// we offset the minimum value by 1 to account for the NULL value in the -// hashtables -static int64_t MinimumValue(PhysicalType type) { - switch (type) { - case PhysicalType::INT8: - return NumericLimits::Minimum(); - case PhysicalType::INT16: - return NumericLimits::Minimum(); - case PhysicalType::INT32: - return NumericLimits::Minimum(); - case PhysicalType::INT64: - case PhysicalType::INT128: - return NumericLimits::Minimum(); - default: - throw InvalidTypeException(type, "MinimumValue requires integral type"); - } -} +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/cast_operators.hpp +// +// +//===----------------------------------------------------------------------===// -static uint64_t MaximumValue(PhysicalType type) { - switch (type) { - case PhysicalType::INT8: - return NumericLimits::Maximum(); - case PhysicalType::INT16: - return NumericLimits::Maximum(); - case PhysicalType::INT32: - return NumericLimits::Maximum(); - case PhysicalType::INT64: - case PhysicalType::INT128: - return NumericLimits::Maximum(); - default: - throw InvalidTypeException(type, "MaximumValue requires integral type"); - } -} -PhysicalType MinimalType(int64_t value) { - if (value >= MinimumValue(PhysicalType::INT8) && (uint64_t)value <= MaximumValue(PhysicalType::INT8)) { - return PhysicalType::INT8; - } - if (value >= MinimumValue(PhysicalType::INT16) && (uint64_t)value <= MaximumValue(PhysicalType::INT16)) { - return PhysicalType::INT16; - } - if (value >= MinimumValue(PhysicalType::INT32) && (uint64_t)value <= MaximumValue(PhysicalType::INT32)) { - return PhysicalType::INT32; + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/convert_to_string.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { + +struct ConvertToString { + template + static inline string Operation(SRC input) { + throw InternalException("Unrecognized type for ConvertToString %s", GetTypeId()); } - return PhysicalType::INT64; -} +}; + +template <> +string ConvertToString::Operation(bool input); +template <> +string ConvertToString::Operation(int8_t input); +template <> +string ConvertToString::Operation(int16_t input); +template <> +string ConvertToString::Operation(int32_t input); +template <> +string ConvertToString::Operation(int64_t input); +template <> +string ConvertToString::Operation(uint8_t input); +template <> +string ConvertToString::Operation(uint16_t input); +template <> +string ConvertToString::Operation(uint32_t input); +template <> +string ConvertToString::Operation(uint64_t input); +template <> +string ConvertToString::Operation(hugeint_t input); +template <> +string ConvertToString::Operation(float input); +template <> +string ConvertToString::Operation(double input); +template <> +string ConvertToString::Operation(interval_t input); +template <> +string ConvertToString::Operation(date_t input); +template <> +string ConvertToString::Operation(dtime_t input); +template <> +string ConvertToString::Operation(timestamp_t input); +template <> +string ConvertToString::Operation(string_t input); } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/operator/cast_operators.hpp +// duckdb/common/types/null_value.hpp // // //===----------------------------------------------------------------------===// @@ -13057,293 +15656,463 @@ PhysicalType MinimalType(int64_t value) { + +#include +#include +#include + namespace duckdb { -struct Cast { +//! Placeholder to insert in Vectors or to use for hashing NULLs +template +inline T NullValue() { + return std::numeric_limits::min(); +} + +constexpr const char str_nil[2] = {'\200', '\0'}; + +template <> +inline const char *NullValue() { + D_ASSERT(str_nil[0] == '\200' && str_nil[1] == '\0'); + return str_nil; +} + +template <> +inline string_t NullValue() { + return string_t(NullValue()); +} + +template <> +inline char *NullValue() { + return (char *)NullValue(); +} + +template <> +inline string NullValue() { + return string(NullValue()); +} + +template <> +inline interval_t NullValue() { + interval_t null_value; + null_value.days = NullValue(); + null_value.months = NullValue(); + null_value.micros = NullValue(); + return null_value; +} + +template <> +inline hugeint_t NullValue() { + hugeint_t min; + min.lower = 0; + min.upper = std::numeric_limits::min(); + return min; +} + +template <> +inline float NullValue() { + return NAN; +} + +template <> +inline double NullValue() { + return NAN; +} + +} // namespace duckdb + + +namespace duckdb { +struct ValidityMask; +class Vector; + +struct TryCast { template - static inline DST Operation(SRC input) { - return (DST)input; + static inline bool Operation(SRC input, DST &result, bool strict = false) { + throw NotImplementedException("Unimplemented type for cast (%s -> %s)", GetTypeId(), GetTypeId()); } }; -struct TryCast { +struct TryCastErrorMessage { template - static inline bool Operation(SRC input, DST &target, bool strict = false) { - target = Cast::Operation(input); - return true; + static inline bool Operation(SRC input, DST &result, string *error_message, bool strict = false) { + throw NotImplementedException("Unimplemented type for cast (%s -> %s)", GetTypeId(), GetTypeId()); } }; -struct StrictCast { +template +static string CastExceptionText(SRC input) { + if (std::is_same()) { + return "Could not convert string '" + ConvertToString::Operation(input) + "' to " + + TypeIdToString(GetTypeId()); + } + if (TypeIsNumber() && TypeIsNumber()) { + return "Type " + TypeIdToString(GetTypeId()) + " with value " + ConvertToString::Operation(input) + + " can't be cast because the value is out of range for the destination type " + + TypeIdToString(GetTypeId()); + } + return "Type " + TypeIdToString(GetTypeId()) + " with value " + ConvertToString::Operation(input) + + " can't be cast to the destination type " + TypeIdToString(GetTypeId()); +} + +struct Cast { template static inline DST Operation(SRC input) { - return (DST)input; + DST result; + if (!TryCast::Operation(input, result)) { + throw InvalidInputException(CastExceptionText(input)); + } + return result; + } +}; + +struct HandleCastError { + static void AssignError(string error_message, string *error_message_ptr) { + if (!error_message_ptr) { + throw ConversionException(error_message); + } + if (error_message_ptr->empty()) { + *error_message_ptr = error_message; + } } }; //===--------------------------------------------------------------------===// -// Numeric -> int8_t casts +// Cast bool -> Numeric //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(uint8_t input, int8_t &result, bool strict); +bool TryCast::Operation(bool input, bool &result, bool strict); template <> -bool TryCast::Operation(uint16_t input, int8_t &result, bool strict); +bool TryCast::Operation(bool input, int8_t &result, bool strict); template <> -bool TryCast::Operation(uint32_t input, int8_t &result, bool strict); +bool TryCast::Operation(bool input, int16_t &result, bool strict); template <> -bool TryCast::Operation(uint64_t input, int8_t &result, bool strict); +bool TryCast::Operation(bool input, int32_t &result, bool strict); template <> -bool TryCast::Operation(int16_t input, int8_t &result, bool strict); +bool TryCast::Operation(bool input, int64_t &result, bool strict); template <> -bool TryCast::Operation(int32_t input, int8_t &result, bool strict); +bool TryCast::Operation(bool input, hugeint_t &result, bool strict); template <> -bool TryCast::Operation(int64_t input, int8_t &result, bool strict); +bool TryCast::Operation(bool input, uint8_t &result, bool strict); template <> -bool TryCast::Operation(float input, int8_t &result, bool strict); +bool TryCast::Operation(bool input, uint16_t &result, bool strict); template <> -bool TryCast::Operation(double input, int8_t &result, bool strict); +bool TryCast::Operation(bool input, uint32_t &result, bool strict); +template <> +bool TryCast::Operation(bool input, uint64_t &result, bool strict); +template <> +bool TryCast::Operation(bool input, float &result, bool strict); +template <> +bool TryCast::Operation(bool input, double &result, bool strict); +//===--------------------------------------------------------------------===// +// Cast int8_t -> Numeric +//===--------------------------------------------------------------------===// +template <> +bool TryCast::Operation(int8_t input, bool &result, bool strict); +template <> +bool TryCast::Operation(int8_t input, int8_t &result, bool strict); template <> -int8_t Cast::Operation(uint8_t input); +bool TryCast::Operation(int8_t input, int16_t &result, bool strict); template <> -int8_t Cast::Operation(uint16_t input); +bool TryCast::Operation(int8_t input, int32_t &result, bool strict); template <> -int8_t Cast::Operation(uint32_t input); +bool TryCast::Operation(int8_t input, int64_t &result, bool strict); template <> -int8_t Cast::Operation(uint64_t input); +bool TryCast::Operation(int8_t input, hugeint_t &result, bool strict); +template <> +bool TryCast::Operation(int8_t input, uint8_t &result, bool strict); template <> -int8_t Cast::Operation(int16_t input); +bool TryCast::Operation(int8_t input, uint16_t &result, bool strict); template <> -int8_t Cast::Operation(int32_t input); +bool TryCast::Operation(int8_t input, uint32_t &result, bool strict); template <> -int8_t Cast::Operation(int64_t input); +bool TryCast::Operation(int8_t input, uint64_t &result, bool strict); template <> -int8_t Cast::Operation(float input); +bool TryCast::Operation(int8_t input, float &result, bool strict); template <> -int8_t Cast::Operation(double input); +bool TryCast::Operation(int8_t input, double &result, bool strict); //===--------------------------------------------------------------------===// -// Numeric -> uint8_t casts +// Cast int16_t -> Numeric //===--------------------------------------------------------------------===// - template <> -bool TryCast::Operation(uint16_t input, uint8_t &result, bool strict); +bool TryCast::Operation(int16_t input, bool &result, bool strict); template <> -bool TryCast::Operation(uint32_t input, uint8_t &result, bool strict); +bool TryCast::Operation(int16_t input, int8_t &result, bool strict); template <> -bool TryCast::Operation(uint64_t input, uint8_t &result, bool strict); +bool TryCast::Operation(int16_t input, int16_t &result, bool strict); template <> -bool TryCast::Operation(int8_t input, uint8_t &result, bool strict); +bool TryCast::Operation(int16_t input, int32_t &result, bool strict); +template <> +bool TryCast::Operation(int16_t input, int64_t &result, bool strict); +template <> +bool TryCast::Operation(int16_t input, hugeint_t &result, bool strict); template <> bool TryCast::Operation(int16_t input, uint8_t &result, bool strict); template <> -bool TryCast::Operation(int32_t input, uint8_t &result, bool strict); +bool TryCast::Operation(int16_t input, uint16_t &result, bool strict); template <> -bool TryCast::Operation(int64_t input, uint8_t &result, bool strict); +bool TryCast::Operation(int16_t input, uint32_t &result, bool strict); template <> -bool TryCast::Operation(float input, uint8_t &result, bool strict); +bool TryCast::Operation(int16_t input, uint64_t &result, bool strict); template <> -bool TryCast::Operation(double input, uint8_t &result, bool strict); +bool TryCast::Operation(int16_t input, float &result, bool strict); +template <> +bool TryCast::Operation(int16_t input, double &result, bool strict); +//===--------------------------------------------------------------------===// +// Cast int32_t -> Numeric +//===--------------------------------------------------------------------===// template <> -uint8_t Cast::Operation(uint16_t input); +bool TryCast::Operation(int32_t input, bool &result, bool strict); template <> -uint8_t Cast::Operation(uint32_t input); +bool TryCast::Operation(int32_t input, int8_t &result, bool strict); template <> -uint8_t Cast::Operation(uint64_t input); +bool TryCast::Operation(int32_t input, int16_t &result, bool strict); template <> -uint8_t Cast::Operation(int8_t input); +bool TryCast::Operation(int32_t input, int32_t &result, bool strict); template <> -uint8_t Cast::Operation(int16_t input); +bool TryCast::Operation(int32_t input, int64_t &result, bool strict); template <> -uint8_t Cast::Operation(int32_t input); +bool TryCast::Operation(int32_t input, hugeint_t &result, bool strict); template <> -uint8_t Cast::Operation(int64_t input); +bool TryCast::Operation(int32_t input, uint8_t &result, bool strict); template <> -uint8_t Cast::Operation(float input); +bool TryCast::Operation(int32_t input, uint16_t &result, bool strict); template <> -uint8_t Cast::Operation(double input); -//===--------------------------------------------------------------------===// -// Numeric -> int16_t casts -//===--------------------------------------------------------------------===// +bool TryCast::Operation(int32_t input, uint32_t &result, bool strict); template <> -bool TryCast::Operation(uint16_t input, int16_t &result, bool strict); +bool TryCast::Operation(int32_t input, uint64_t &result, bool strict); template <> -bool TryCast::Operation(uint32_t input, int16_t &result, bool strict); +bool TryCast::Operation(int32_t input, float &result, bool strict); template <> -bool TryCast::Operation(uint64_t input, int16_t &result, bool strict); +bool TryCast::Operation(int32_t input, double &result, bool strict); + +//===--------------------------------------------------------------------===// +// Cast int64_t -> Numeric +//===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(int32_t input, int16_t &result, bool strict); +bool TryCast::Operation(int64_t input, bool &result, bool strict); +template <> +bool TryCast::Operation(int64_t input, int8_t &result, bool strict); template <> bool TryCast::Operation(int64_t input, int16_t &result, bool strict); template <> -bool TryCast::Operation(float input, int16_t &result, bool strict); +bool TryCast::Operation(int64_t input, int32_t &result, bool strict); template <> -bool TryCast::Operation(double input, int16_t &result, bool strict); - +bool TryCast::Operation(int64_t input, int64_t &result, bool strict); template <> -int16_t Cast::Operation(uint16_t input); +bool TryCast::Operation(int64_t input, hugeint_t &result, bool strict); template <> -int16_t Cast::Operation(uint32_t input); +bool TryCast::Operation(int64_t input, uint8_t &result, bool strict); template <> -int16_t Cast::Operation(uint64_t input); +bool TryCast::Operation(int64_t input, uint16_t &result, bool strict); template <> -int16_t Cast::Operation(int32_t input); +bool TryCast::Operation(int64_t input, uint32_t &result, bool strict); template <> -int16_t Cast::Operation(int64_t input); +bool TryCast::Operation(int64_t input, uint64_t &result, bool strict); template <> -int16_t Cast::Operation(float input); +bool TryCast::Operation(int64_t input, float &result, bool strict); template <> -int16_t Cast::Operation(double input); +bool TryCast::Operation(int64_t input, double &result, bool strict); //===--------------------------------------------------------------------===// -// Numeric -> uint16_t casts +// Cast hugeint_t -> Numeric //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(uint32_t input, uint16_t &result, bool strict); +bool TryCast::Operation(hugeint_t input, bool &result, bool strict); template <> -bool TryCast::Operation(uint64_t input, uint16_t &result, bool strict); +bool TryCast::Operation(hugeint_t input, int8_t &result, bool strict); template <> -bool TryCast::Operation(int8_t input, uint16_t &result, bool strict); +bool TryCast::Operation(hugeint_t input, int16_t &result, bool strict); template <> -bool TryCast::Operation(int16_t input, uint16_t &result, bool strict); +bool TryCast::Operation(hugeint_t input, int32_t &result, bool strict); template <> -bool TryCast::Operation(int32_t input, uint16_t &result, bool strict); +bool TryCast::Operation(hugeint_t input, int64_t &result, bool strict); template <> -bool TryCast::Operation(int64_t input, uint16_t &result, bool strict); +bool TryCast::Operation(hugeint_t input, hugeint_t &result, bool strict); template <> -bool TryCast::Operation(float input, uint16_t &result, bool strict); +bool TryCast::Operation(hugeint_t input, uint8_t &result, bool strict); template <> -bool TryCast::Operation(double input, uint16_t &result, bool strict); +bool TryCast::Operation(hugeint_t input, uint16_t &result, bool strict); +template <> +bool TryCast::Operation(hugeint_t input, uint32_t &result, bool strict); +template <> +bool TryCast::Operation(hugeint_t input, uint64_t &result, bool strict); +template <> +bool TryCast::Operation(hugeint_t input, float &result, bool strict); +template <> +bool TryCast::Operation(hugeint_t input, double &result, bool strict); +//===--------------------------------------------------------------------===// +// Cast uint8_t -> Numeric +//===--------------------------------------------------------------------===// +template <> +bool TryCast::Operation(uint8_t input, bool &result, bool strict); +template <> +bool TryCast::Operation(uint8_t input, int8_t &result, bool strict); +template <> +bool TryCast::Operation(uint8_t input, int16_t &result, bool strict); template <> -uint16_t Cast::Operation(uint32_t input); +bool TryCast::Operation(uint8_t input, int32_t &result, bool strict); template <> -uint16_t Cast::Operation(uint64_t input); +bool TryCast::Operation(uint8_t input, int64_t &result, bool strict); +template <> +bool TryCast::Operation(uint8_t input, hugeint_t &result, bool strict); template <> -uint16_t Cast::Operation(int8_t input); +bool TryCast::Operation(uint8_t input, uint8_t &result, bool strict); template <> -uint16_t Cast::Operation(int16_t input); +bool TryCast::Operation(uint8_t input, uint16_t &result, bool strict); template <> -uint16_t Cast::Operation(int32_t input); +bool TryCast::Operation(uint8_t input, uint32_t &result, bool strict); template <> -uint16_t Cast::Operation(int64_t input); +bool TryCast::Operation(uint8_t input, uint64_t &result, bool strict); template <> -uint16_t Cast::Operation(float input); +bool TryCast::Operation(uint8_t input, float &result, bool strict); template <> -uint16_t Cast::Operation(double input); +bool TryCast::Operation(uint8_t input, double &result, bool strict); + //===--------------------------------------------------------------------===// -// Numeric -> int32_t casts +// Cast uint16_t -> Numeric //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(uint32_t input, int32_t &result, bool strict); +bool TryCast::Operation(uint16_t input, bool &result, bool strict); template <> -bool TryCast::Operation(uint64_t input, int32_t &result, bool strict); +bool TryCast::Operation(uint16_t input, int8_t &result, bool strict); template <> -bool TryCast::Operation(int64_t input, int32_t &result, bool strict); +bool TryCast::Operation(uint16_t input, int16_t &result, bool strict); template <> -bool TryCast::Operation(float input, int32_t &result, bool strict); +bool TryCast::Operation(uint16_t input, int32_t &result, bool strict); template <> -bool TryCast::Operation(double input, int32_t &result, bool strict); - +bool TryCast::Operation(uint16_t input, int64_t &result, bool strict); +template <> +bool TryCast::Operation(uint16_t input, hugeint_t &result, bool strict); +template <> +bool TryCast::Operation(uint16_t input, uint8_t &result, bool strict); template <> -int32_t Cast::Operation(uint32_t input); +bool TryCast::Operation(uint16_t input, uint16_t &result, bool strict); template <> -int32_t Cast::Operation(uint64_t input); +bool TryCast::Operation(uint16_t input, uint32_t &result, bool strict); template <> -int32_t Cast::Operation(int64_t input); +bool TryCast::Operation(uint16_t input, uint64_t &result, bool strict); template <> -int32_t Cast::Operation(float input); +bool TryCast::Operation(uint16_t input, float &result, bool strict); template <> -int32_t Cast::Operation(double input); +bool TryCast::Operation(uint16_t input, double &result, bool strict); //===--------------------------------------------------------------------===// -// Numeric -> uint32_t casts +// Cast uint32_t -> Numeric //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(uint64_t input, uint32_t &result, bool strict); -template <> -bool TryCast::Operation(int8_t input, uint32_t &result, bool strict); -template <> -bool TryCast::Operation(int16_t input, uint32_t &result, bool strict); +bool TryCast::Operation(uint32_t input, bool &result, bool strict); template <> -bool TryCast::Operation(int32_t input, uint32_t &result, bool strict); +bool TryCast::Operation(uint32_t input, int8_t &result, bool strict); template <> -bool TryCast::Operation(int64_t input, uint32_t &result, bool strict); +bool TryCast::Operation(uint32_t input, int16_t &result, bool strict); template <> -bool TryCast::Operation(float input, uint32_t &result, bool strict); +bool TryCast::Operation(uint32_t input, int32_t &result, bool strict); template <> -bool TryCast::Operation(double input, uint32_t &result, bool strict); - +bool TryCast::Operation(uint32_t input, int64_t &result, bool strict); template <> -uint32_t Cast::Operation(uint64_t input); +bool TryCast::Operation(uint32_t input, hugeint_t &result, bool strict); template <> -uint32_t Cast::Operation(int8_t input); +bool TryCast::Operation(uint32_t input, uint8_t &result, bool strict); template <> -uint32_t Cast::Operation(int16_t input); +bool TryCast::Operation(uint32_t input, uint16_t &result, bool strict); template <> -uint32_t Cast::Operation(int32_t input); +bool TryCast::Operation(uint32_t input, uint32_t &result, bool strict); template <> -uint32_t Cast::Operation(int64_t input); +bool TryCast::Operation(uint32_t input, uint64_t &result, bool strict); template <> -uint32_t Cast::Operation(float input); +bool TryCast::Operation(uint32_t input, float &result, bool strict); template <> -uint32_t Cast::Operation(double input); +bool TryCast::Operation(uint32_t input, double &result, bool strict); //===--------------------------------------------------------------------===// -// Numeric -> int64_t casts +// Cast uint64_t -> Numeric //===--------------------------------------------------------------------===// template <> +bool TryCast::Operation(uint64_t input, bool &result, bool strict); +template <> +bool TryCast::Operation(uint64_t input, int8_t &result, bool strict); +template <> +bool TryCast::Operation(uint64_t input, int16_t &result, bool strict); +template <> +bool TryCast::Operation(uint64_t input, int32_t &result, bool strict); +template <> bool TryCast::Operation(uint64_t input, int64_t &result, bool strict); template <> -bool TryCast::Operation(float input, int64_t &result, bool strict); +bool TryCast::Operation(uint64_t input, hugeint_t &result, bool strict); template <> -bool TryCast::Operation(double input, int64_t &result, bool strict); - +bool TryCast::Operation(uint64_t input, uint8_t &result, bool strict); +template <> +bool TryCast::Operation(uint64_t input, uint16_t &result, bool strict); +template <> +bool TryCast::Operation(uint64_t input, uint32_t &result, bool strict); template <> -int64_t Cast::Operation(uint64_t input); +bool TryCast::Operation(uint64_t input, uint64_t &result, bool strict); template <> -int64_t Cast::Operation(float input); +bool TryCast::Operation(uint64_t input, float &result, bool strict); template <> -int64_t Cast::Operation(double input); +bool TryCast::Operation(uint64_t input, double &result, bool strict); //===--------------------------------------------------------------------===// -// Numeric -> uint64_t casts +// Cast float -> Numeric //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(int8_t input, uint64_t &result, bool strict); +bool TryCast::Operation(float input, bool &result, bool strict); template <> -bool TryCast::Operation(int16_t input, uint64_t &result, bool strict); +bool TryCast::Operation(float input, int8_t &result, bool strict); template <> -bool TryCast::Operation(int32_t input, uint64_t &result, bool strict); +bool TryCast::Operation(float input, int16_t &result, bool strict); template <> -bool TryCast::Operation(int64_t input, uint64_t &result, bool strict); +bool TryCast::Operation(float input, int32_t &result, bool strict); template <> -bool TryCast::Operation(float input, uint64_t &result, bool strict); +bool TryCast::Operation(float input, int64_t &result, bool strict); template <> -bool TryCast::Operation(double input, uint64_t &result, bool strict); - +bool TryCast::Operation(float input, hugeint_t &result, bool strict); template <> -uint64_t Cast::Operation(int8_t input); +bool TryCast::Operation(float input, uint8_t &result, bool strict); template <> -uint64_t Cast::Operation(int16_t input); +bool TryCast::Operation(float input, uint16_t &result, bool strict); template <> -uint64_t Cast::Operation(int32_t input); +bool TryCast::Operation(float input, uint32_t &result, bool strict); template <> -uint64_t Cast::Operation(int64_t input); +bool TryCast::Operation(float input, uint64_t &result, bool strict); template <> -uint64_t Cast::Operation(float input); +bool TryCast::Operation(float input, float &result, bool strict); template <> -uint64_t Cast::Operation(double input); +bool TryCast::Operation(float input, double &result, bool strict); //===--------------------------------------------------------------------===// -// Double -> float casts +// Cast double -> Numeric //===--------------------------------------------------------------------===// template <> +bool TryCast::Operation(double input, bool &result, bool strict); +template <> +bool TryCast::Operation(double input, int8_t &result, bool strict); +template <> +bool TryCast::Operation(double input, int16_t &result, bool strict); +template <> +bool TryCast::Operation(double input, int32_t &result, bool strict); +template <> +bool TryCast::Operation(double input, int64_t &result, bool strict); +template <> +bool TryCast::Operation(double input, hugeint_t &result, bool strict); +template <> +bool TryCast::Operation(double input, uint8_t &result, bool strict); +template <> +bool TryCast::Operation(double input, uint16_t &result, bool strict); +template <> +bool TryCast::Operation(double input, uint32_t &result, bool strict); +template <> +bool TryCast::Operation(double input, uint64_t &result, bool strict); +template <> bool TryCast::Operation(double input, float &result, bool strict); - template <> -float Cast::Operation(double input); +bool TryCast::Operation(double input, double &result, bool strict); + //===--------------------------------------------------------------------===// // String -> Numeric Casts //===--------------------------------------------------------------------===// @@ -13372,491 +16141,974 @@ bool TryCast::Operation(string_t input, float &result, bool strict); template <> bool TryCast::Operation(string_t input, double &result, bool strict); +//===--------------------------------------------------------------------===// +// Date Casts +//===--------------------------------------------------------------------===// template <> -bool Cast::Operation(string_t input); -template <> -int8_t Cast::Operation(string_t input); -template <> -int16_t Cast::Operation(string_t input); -template <> -int32_t Cast::Operation(string_t input); -template <> -int64_t Cast::Operation(string_t input); -template <> -uint8_t Cast::Operation(string_t input); -template <> -uint16_t Cast::Operation(string_t input); +bool TryCast::Operation(date_t input, date_t &result, bool strict); template <> -uint32_t Cast::Operation(string_t input); +bool TryCast::Operation(date_t input, timestamp_t &result, bool strict); + +//===--------------------------------------------------------------------===// +// Time Casts +//===--------------------------------------------------------------------===// template <> -uint64_t Cast::Operation(string_t input); +bool TryCast::Operation(dtime_t input, dtime_t &result, bool strict); + +//===--------------------------------------------------------------------===// +// Timestamp Casts +//===--------------------------------------------------------------------===// template <> -hugeint_t Cast::Operation(string_t input); +bool TryCast::Operation(timestamp_t input, date_t &result, bool strict); template <> -float Cast::Operation(string_t input); +bool TryCast::Operation(timestamp_t input, dtime_t &result, bool strict); template <> -double Cast::Operation(string_t input); +bool TryCast::Operation(timestamp_t input, timestamp_t &result, bool strict); + +//===--------------------------------------------------------------------===// +// Interval Casts +//===--------------------------------------------------------------------===// template <> -string Cast::Operation(string_t input); +bool TryCast::Operation(interval_t input, interval_t &result, bool strict); +//===--------------------------------------------------------------------===// +// String -> Date Casts +//===--------------------------------------------------------------------===// template <> -bool StrictCast::Operation(string_t input); +bool TryCastErrorMessage::Operation(string_t input, date_t &result, string *error_message, bool strict); template <> -int8_t StrictCast::Operation(string_t input); +bool TryCast::Operation(string_t input, date_t &result, bool strict); template <> -int16_t StrictCast::Operation(string_t input); +date_t Cast::Operation(string_t input); +//===--------------------------------------------------------------------===// +// String -> Time Casts +//===--------------------------------------------------------------------===// template <> -int32_t StrictCast::Operation(string_t input); +bool TryCastErrorMessage::Operation(string_t input, dtime_t &result, string *error_message, bool strict); template <> -int64_t StrictCast::Operation(string_t input); +bool TryCast::Operation(string_t input, dtime_t &result, bool strict); template <> -uint8_t StrictCast::Operation(string_t input); +dtime_t Cast::Operation(string_t input); +//===--------------------------------------------------------------------===// +// String -> Timestamp Casts +//===--------------------------------------------------------------------===// template <> -uint16_t StrictCast::Operation(string_t input); +bool TryCastErrorMessage::Operation(string_t input, timestamp_t &result, string *error_message, bool strict); template <> -uint32_t StrictCast::Operation(string_t input); +bool TryCast::Operation(string_t input, timestamp_t &result, bool strict); template <> -uint64_t StrictCast::Operation(string_t input); +timestamp_t Cast::Operation(string_t input); +//===--------------------------------------------------------------------===// +// String -> Interval Casts +//===--------------------------------------------------------------------===// template <> -hugeint_t StrictCast::Operation(string_t input); +bool TryCastErrorMessage::Operation(string_t input, interval_t &result, string *error_message, bool strict); + +//===--------------------------------------------------------------------===// +// string -> Non-Standard Timestamps +//===--------------------------------------------------------------------===// +struct TryCastToTimestampNS { + template + static inline bool Operation(SRC input, DST &result, bool strict = false) { + throw InternalException("Unsupported type for try cast to timestamp (ns)"); + } +}; + +struct TryCastToTimestampMS { + template + static inline bool Operation(SRC input, DST &result, bool strict = false) { + throw InternalException("Unsupported type for try cast to timestamp (ms)"); + } +}; + +struct TryCastToTimestampSec { + template + static inline bool Operation(SRC input, DST &result, bool strict = false) { + throw InternalException("Unsupported type for try cast to timestamp (s)"); + } +}; + template <> -float StrictCast::Operation(string_t input); +bool TryCastToTimestampNS::Operation(string_t input, timestamp_t &result, bool strict); template <> -double StrictCast::Operation(string_t input); +bool TryCastToTimestampMS::Operation(string_t input, timestamp_t &result, bool strict); template <> -string StrictCast::Operation(string_t input); +bool TryCastToTimestampSec::Operation(string_t input, timestamp_t &result, bool strict); //===--------------------------------------------------------------------===// -// Hugeint casts +// Non-Standard Timestamps -> string/standard timestamp //===--------------------------------------------------------------------===// -// Numeric -> Hugeint casts -template <> -bool TryCast::Operation(bool input, hugeint_t &result, bool strict); -template <> -bool TryCast::Operation(int8_t input, hugeint_t &result, bool strict); + +struct CastFromTimestampNS { + template + static inline string_t Operation(SRC input, Vector &result) { + throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); + } +}; + +struct CastFromTimestampMS { + template + static inline string_t Operation(SRC input, Vector &result) { + throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); + } +}; + +struct CastFromTimestampSec { + template + static inline string_t Operation(SRC input, Vector &result) { + throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); + } +}; + +struct CastTimestampUsToMs { + template + static inline DST Operation(SRC input) { + throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); + } +}; + +struct CastTimestampUsToNs { + template + static inline DST Operation(SRC input) { + throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); + } +}; + +struct CastTimestampUsToSec { + template + static inline DST Operation(SRC input) { + throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); + } +}; + +struct CastTimestampMsToUs { + template + static inline DST Operation(SRC input) { + throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); + } +}; + +struct CastTimestampNsToUs { + template + static inline DST Operation(SRC input) { + throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); + } +}; + +struct CastTimestampSecToUs { + template + static inline DST Operation(SRC input) { + throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); + } +}; + template <> -bool TryCast::Operation(int16_t input, hugeint_t &result, bool strict); +duckdb::timestamp_t CastTimestampUsToMs::Operation(duckdb::timestamp_t input); template <> -bool TryCast::Operation(int32_t input, hugeint_t &result, bool strict); +duckdb::timestamp_t CastTimestampUsToNs::Operation(duckdb::timestamp_t input); template <> -bool TryCast::Operation(int64_t input, hugeint_t &result, bool strict); +duckdb::timestamp_t CastTimestampUsToSec::Operation(duckdb::timestamp_t input); template <> -bool TryCast::Operation(uint8_t input, hugeint_t &result, bool strict); +duckdb::timestamp_t CastTimestampMsToUs::Operation(duckdb::timestamp_t input); template <> -bool TryCast::Operation(uint16_t input, hugeint_t &result, bool strict); +duckdb::timestamp_t CastTimestampNsToUs::Operation(duckdb::timestamp_t input); template <> -bool TryCast::Operation(uint32_t input, hugeint_t &result, bool strict); +duckdb::timestamp_t CastTimestampSecToUs::Operation(duckdb::timestamp_t input); + template <> -bool TryCast::Operation(uint64_t input, hugeint_t &result, bool strict); +duckdb::string_t CastFromTimestampNS::Operation(duckdb::timestamp_t input, Vector &result); template <> -bool TryCast::Operation(float input, hugeint_t &result, bool strict); +duckdb::string_t CastFromTimestampMS::Operation(duckdb::timestamp_t input, Vector &result); template <> -bool TryCast::Operation(double input, hugeint_t &result, bool strict); +duckdb::string_t CastFromTimestampSec::Operation(duckdb::timestamp_t input, Vector &result); +//===--------------------------------------------------------------------===// +// Blobs +//===--------------------------------------------------------------------===// +struct CastFromBlob { + template + static inline string_t Operation(SRC input, Vector &result) { + throw duckdb::NotImplementedException("Cast from blob could not be performed!"); + } +}; template <> -hugeint_t Cast::Operation(bool input); +duckdb::string_t CastFromBlob::Operation(duckdb::string_t input, Vector &vector); + +struct TryCastToBlob { + template + static inline bool Operation(SRC input, DST &result, Vector &result_vector, string *error_message, + bool strict = false) { + throw InternalException("Unsupported type for try cast to blob"); + } +}; + template <> -hugeint_t Cast::Operation(int8_t input); +bool TryCastToBlob::Operation(string_t input, string_t &result, Vector &result_vector, string *error_message, + bool strict); + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/string_cast.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + +namespace duckdb { + +//! StringCast +class Vector; + +struct StringCast { + template + static inline string_t Operation(SRC input, Vector &result) { + throw NotImplementedException("Unimplemented type for string cast!"); + } +}; + template <> -hugeint_t Cast::Operation(int16_t input); +duckdb::string_t StringCast::Operation(bool input, Vector &result); template <> -hugeint_t Cast::Operation(int32_t input); +duckdb::string_t StringCast::Operation(int8_t input, Vector &result); template <> -hugeint_t Cast::Operation(int64_t input); +duckdb::string_t StringCast::Operation(int16_t input, Vector &result); template <> -hugeint_t Cast::Operation(uint8_t input); +duckdb::string_t StringCast::Operation(int32_t input, Vector &result); template <> -hugeint_t Cast::Operation(uint16_t input); +duckdb::string_t StringCast::Operation(int64_t input, Vector &result); template <> -hugeint_t Cast::Operation(uint32_t input); +duckdb::string_t StringCast::Operation(uint8_t input, Vector &result); template <> -hugeint_t Cast::Operation(uint64_t input); +duckdb::string_t StringCast::Operation(uint16_t input, Vector &result); template <> -hugeint_t Cast::Operation(float input); +duckdb::string_t StringCast::Operation(uint32_t input, Vector &result); template <> -hugeint_t Cast::Operation(double input); -// Hugeint -> numeric casts +duckdb::string_t StringCast::Operation(uint64_t input, Vector &result); template <> -bool TryCast::Operation(hugeint_t input, bool &result, bool strict); +duckdb::string_t StringCast::Operation(hugeint_t input, Vector &result); template <> -bool TryCast::Operation(hugeint_t input, int8_t &result, bool strict); +duckdb::string_t StringCast::Operation(float input, Vector &result); template <> -bool TryCast::Operation(hugeint_t input, int16_t &result, bool strict); +duckdb::string_t StringCast::Operation(double input, Vector &result); template <> -bool TryCast::Operation(hugeint_t input, int32_t &result, bool strict); +duckdb::string_t StringCast::Operation(interval_t input, Vector &result); template <> -bool TryCast::Operation(hugeint_t input, int64_t &result, bool strict); +duckdb::string_t StringCast::Operation(duckdb::string_t input, Vector &result); template <> -bool TryCast::Operation(hugeint_t input, uint8_t &result, bool strict); +duckdb::string_t StringCast::Operation(date_t input, Vector &result); template <> -bool TryCast::Operation(hugeint_t input, uint16_t &result, bool strict); +duckdb::string_t StringCast::Operation(dtime_t input, Vector &result); template <> -bool TryCast::Operation(hugeint_t input, uint32_t &result, bool strict); +duckdb::string_t StringCast::Operation(timestamp_t input, Vector &result); + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/numeric_cast.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +template +static bool TryCastWithOverflowCheck(SRC value, DST &result) { + if (NumericLimits::IsSigned() != NumericLimits::IsSigned()) { + if (NumericLimits::IsSigned()) { + // signed to unsigned conversion + if (NumericLimits::Digits() > NumericLimits::Digits()) { + if (value < 0 || value > (SRC)NumericLimits::Maximum()) { + return false; + } + } else { + if (value < 0) { + return false; + } + } + result = (DST)value; + return true; + } else { + // unsigned to signed conversion + if (NumericLimits::Digits() >= NumericLimits::Digits()) { + if (value <= (SRC)NumericLimits::Maximum()) { + result = (DST)value; + return true; + } + return false; + } else { + result = (DST)value; + return true; + } + } + } else { + // same sign conversion + if (NumericLimits::Digits() >= NumericLimits::Digits()) { + result = (DST)value; + return true; + } else { + if (value < SRC(NumericLimits::Minimum()) || value > SRC(NumericLimits::Maximum())) { + return false; + } + result = (DST)value; + return true; + } + } +} + template <> -bool TryCast::Operation(hugeint_t input, uint64_t &result, bool strict); +bool TryCastWithOverflowCheck(float value, int32_t &result) { + if (!(value >= -2147483648.0f && value < 2147483648.0f)) { + return false; + } + result = int32_t(value); + return true; +} + template <> -bool TryCast::Operation(hugeint_t input, float &result, bool strict); +bool TryCastWithOverflowCheck(float value, int64_t &result) { + if (!(value >= -9223372036854775808.0f && value < 9223372036854775808.0f)) { + return false; + } + result = int64_t(value); + return true; +} + template <> -bool TryCast::Operation(hugeint_t input, double &result, bool strict); +bool TryCastWithOverflowCheck(double value, int64_t &result) { + if (!(value >= -9223372036854775808.0 && value < 9223372036854775808.0)) { + return false; + } + result = int64_t(value); + return true; +} template <> -bool Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(double input, float &result) { + if (input < (double)NumericLimits::Minimum() || input > (double)NumericLimits::Maximum()) { + return false; + } + auto res = (float)input; + if (std::isnan(res) || std::isinf(res)) { + return false; + } + result = res; + return true; +} + +//===--------------------------------------------------------------------===// +// Cast Numeric -> bool +//===--------------------------------------------------------------------===// template <> -int8_t Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(bool value, bool &result) { + result = bool(value); + return true; +} + template <> -int16_t Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(int8_t value, bool &result) { + result = bool(value); + return true; +} + template <> -int32_t Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(int16_t value, bool &result) { + result = bool(value); + return true; +} + template <> -int64_t Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(int32_t value, bool &result) { + result = bool(value); + return true; +} + template <> -uint8_t Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(int64_t value, bool &result) { + result = bool(value); + return true; +} + template <> -uint16_t Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(uint8_t value, bool &result) { + result = bool(value); + return true; +} + template <> -uint32_t Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(uint16_t value, bool &result) { + result = bool(value); + return true; +} + template <> -uint64_t Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(uint32_t value, bool &result) { + result = bool(value); + return true; +} + template <> -float Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(uint64_t value, bool &result) { + result = bool(value); + return true; +} + template <> -double Cast::Operation(hugeint_t input); -// nop cast +bool TryCastWithOverflowCheck(float value, bool &result) { + result = bool(value); + return true; +} + template <> -bool TryCast::Operation(hugeint_t input, hugeint_t &result, bool strict); +bool TryCastWithOverflowCheck(double value, bool &result) { + result = bool(value); + return true; +} + template <> -hugeint_t Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(hugeint_t input, bool &result) { + result = input.upper != 0 || input.lower != 0; + return true; +} //===--------------------------------------------------------------------===// -// Interval -> String Casts +// Cast bool -> Numeric //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(string_t input, interval_t &result, bool strict); +bool TryCastWithOverflowCheck(bool value, int8_t &result) { + result = int8_t(value); + return true; +} + +template <> +bool TryCastWithOverflowCheck(bool value, int16_t &result) { + result = int16_t(value); + return true; +} + +template <> +bool TryCastWithOverflowCheck(bool value, int32_t &result) { + result = int32_t(value); + return true; +} + template <> -interval_t StrictCast::Operation(string_t input); +bool TryCastWithOverflowCheck(bool value, int64_t &result) { + result = int64_t(value); + return true; +} + template <> -interval_t Cast::Operation(string_t input); -//===--------------------------------------------------------------------===// -// Numeric -> String Casts -//===--------------------------------------------------------------------===// -// these functions are convenience functions that cast a value to a std::string, they are very slow -// for performance sensitive casting StringCast::Operation should be used +bool TryCastWithOverflowCheck(bool value, uint8_t &result) { + result = uint8_t(value); + return true; +} + template <> -string Cast::Operation(bool input); +bool TryCastWithOverflowCheck(bool value, uint16_t &result) { + result = uint16_t(value); + return true; +} + template <> -string Cast::Operation(int8_t input); +bool TryCastWithOverflowCheck(bool value, uint32_t &result) { + result = uint32_t(value); + return true; +} + template <> -string Cast::Operation(int16_t input); +bool TryCastWithOverflowCheck(bool value, uint64_t &result) { + result = uint64_t(value); + return true; +} + template <> -string Cast::Operation(int32_t input); +bool TryCastWithOverflowCheck(bool value, float &result) { + result = float(value); + return true; +} + template <> -string Cast::Operation(int64_t input); +bool TryCastWithOverflowCheck(bool value, double &result) { + result = double(value); + return true; +} + template <> -string Cast::Operation(uint8_t input); +bool TryCastWithOverflowCheck(bool input, hugeint_t &result) { + result.upper = 0; + result.lower = input ? 1 : 0; + return true; +} + +//===--------------------------------------------------------------------===// +// Cast Numeric -> hugeint +//===--------------------------------------------------------------------===// template <> -string Cast::Operation(uint16_t input); +bool TryCastWithOverflowCheck(int8_t value, hugeint_t &result) { + return Hugeint::TryConvert(value, result); +} + template <> -string Cast::Operation(uint32_t input); +bool TryCastWithOverflowCheck(int16_t value, hugeint_t &result) { + return Hugeint::TryConvert(value, result); +} + template <> -string Cast::Operation(uint64_t input); +bool TryCastWithOverflowCheck(int32_t value, hugeint_t &result) { + return Hugeint::TryConvert(value, result); +} + template <> -string Cast::Operation(hugeint_t input); +bool TryCastWithOverflowCheck(int64_t value, hugeint_t &result) { + return Hugeint::TryConvert(value, result); +} + template <> -string Cast::Operation(float input); +bool TryCastWithOverflowCheck(uint8_t value, hugeint_t &result) { + return Hugeint::TryConvert(value, result); +} + template <> -string Cast::Operation(double input); +bool TryCastWithOverflowCheck(uint16_t value, hugeint_t &result) { + return Hugeint::TryConvert(value, result); +} + template <> -string Cast::Operation(string_t input); +bool TryCastWithOverflowCheck(uint32_t value, hugeint_t &result) { + return Hugeint::TryConvert(value, result); +} -class Vector; -struct StringCast { - template - static inline string_t Operation(SRC input, Vector &result) { - throw NotImplementedException("Unimplemented type for string cast!"); - } -}; +template <> +bool TryCastWithOverflowCheck(uint64_t value, hugeint_t &result) { + return Hugeint::TryConvert(value, result); +} template <> -duckdb::string_t StringCast::Operation(bool input, Vector &result); +bool TryCastWithOverflowCheck(float value, hugeint_t &result) { + return Hugeint::TryConvert(value, result); +} + template <> -duckdb::string_t StringCast::Operation(int8_t input, Vector &result); +bool TryCastWithOverflowCheck(double value, hugeint_t &result) { + return Hugeint::TryConvert(value, result); +} + template <> -duckdb::string_t StringCast::Operation(int16_t input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, hugeint_t &result) { + result = value; + return true; +} + +//===--------------------------------------------------------------------===// +// Cast Hugeint -> Numeric +//===--------------------------------------------------------------------===// template <> -duckdb::string_t StringCast::Operation(int32_t input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, int8_t &result) { + return Hugeint::TryCast(value, result); +} + template <> -duckdb::string_t StringCast::Operation(int64_t input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, int16_t &result) { + return Hugeint::TryCast(value, result); +} + template <> -duckdb::string_t StringCast::Operation(uint8_t input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, int32_t &result) { + return Hugeint::TryCast(value, result); +} + template <> -duckdb::string_t StringCast::Operation(uint16_t input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, int64_t &result) { + return Hugeint::TryCast(value, result); +} + template <> -duckdb::string_t StringCast::Operation(uint32_t input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, uint8_t &result) { + return Hugeint::TryCast(value, result); +} + template <> -duckdb::string_t StringCast::Operation(uint64_t input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, uint16_t &result) { + return Hugeint::TryCast(value, result); +} + template <> -duckdb::string_t StringCast::Operation(hugeint_t input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, uint32_t &result) { + return Hugeint::TryCast(value, result); +} + template <> -duckdb::string_t StringCast::Operation(float input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, uint64_t &result) { + return Hugeint::TryCast(value, result); +} + template <> -duckdb::string_t StringCast::Operation(double input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, float &result) { + return Hugeint::TryCast(value, result); +} + template <> -duckdb::string_t StringCast::Operation(interval_t input, Vector &result); +bool TryCastWithOverflowCheck(hugeint_t value, double &result) { + return Hugeint::TryCast(value, result); +} + +struct NumericTryCast { + template + static inline bool Operation(SRC input, DST &result, bool strict = false) { + return TryCastWithOverflowCheck(input, result); + } +}; + +struct NumericCast { + template + static inline DST Operation(SRC input) { + DST result; + if (!NumericTryCast::Operation(input, result)) { + throw InvalidInputException(CastExceptionText(input)); + } + return result; + } +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/decimal_cast_operators.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { //===--------------------------------------------------------------------===// // Decimal Casts //===--------------------------------------------------------------------===// -struct CastToDecimal { +struct TryCastToDecimal { template - static inline DST Operation(SRC input, uint8_t width, uint8_t scale) { - throw NotImplementedException("Unimplemented type for CastToDecimal!"); + static inline bool Operation(SRC input, DST &result, string *error_message, uint8_t width, uint8_t scale) { + throw NotImplementedException("Unimplemented type for TryCastToDecimal!"); } }; -struct CastFromDecimal { +struct TryCastFromDecimal { template - static inline DST Operation(SRC input, uint8_t width, uint8_t scale) { - throw NotImplementedException("Unimplemented type for CastFromDecimal!"); + static inline bool Operation(SRC input, DST &result, string *error_message, uint8_t width, uint8_t scale) { + throw NotImplementedException("Unimplemented type for TryCastFromDecimal!"); } }; -// BOOLEAN +//===--------------------------------------------------------------------===// +// Cast Decimal <-> bool +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(bool input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(bool input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(bool input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(bool input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(bool input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(bool input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(bool input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(bool input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -bool CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, bool &result, string *error_message, uint8_t width, uint8_t scale); template <> -bool CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, bool &result, string *error_message, uint8_t width, uint8_t scale); template <> -bool CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, bool &result, string *error_message, uint8_t width, uint8_t scale); template <> -bool CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, bool &result, string *error_message, uint8_t width, uint8_t scale); -// TINYINT +//===--------------------------------------------------------------------===// +// Cast Decimal <-> int8_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(int8_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int8_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(int8_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int8_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(int8_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int8_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(int8_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int8_t input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int8_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, int8_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int8_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, int8_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int8_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, int8_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int8_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, int8_t &result, string *error_message, uint8_t width, + uint8_t scale); -// SMALLINT +//===--------------------------------------------------------------------===// +// Cast Decimal <-> int16_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int16_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int16_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int16_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int16_t input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int16_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int16_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int16_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int16_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, int16_t &result, string *error_message, uint8_t width, + uint8_t scale); -// INTEGER +//===--------------------------------------------------------------------===// +// Cast Decimal <-> int32_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int32_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int32_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int32_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int32_t input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, int32_t &result, string *error_message, uint8_t width, + uint8_t scale); -// BIGINT +//===--------------------------------------------------------------------===// +// Cast Decimal <-> int64_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int64_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int64_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int64_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(int64_t input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, int64_t &result, string *error_message, uint8_t width, + uint8_t scale); -// UTINYINT +//===--------------------------------------------------------------------===// +// Cast Decimal <-> hugeint_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(uint8_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(hugeint_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(uint8_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(hugeint_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(uint8_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(hugeint_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(uint8_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(hugeint_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint8_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint8_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint8_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint8_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale); -// USMALLINT +//===--------------------------------------------------------------------===// +// Cast Decimal <-> uint8_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(uint16_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint8_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(uint16_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint8_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(uint16_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint8_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(uint16_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint8_t input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -uint16_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, uint8_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -uint16_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, uint8_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -uint16_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, uint8_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -uint16_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, uint8_t &result, string *error_message, uint8_t width, + uint8_t scale); -// UINTEGER +//===--------------------------------------------------------------------===// +// Cast Decimal <-> uint16_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(uint32_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint16_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(uint32_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint16_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(uint32_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint16_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(uint32_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint16_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint32_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, uint16_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint32_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, uint16_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint32_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, uint16_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint32_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, uint16_t &result, string *error_message, uint8_t width, + uint8_t scale); -// BIGINT +//===--------------------------------------------------------------------===// +// Cast Decimal <-> uint32_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(uint64_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint32_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(uint64_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint32_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(uint64_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint32_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(uint64_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint32_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint64_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, uint32_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint64_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, uint32_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint64_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, uint32_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -uint64_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, uint32_t &result, string *error_message, uint8_t width, + uint8_t scale); -// HUGEINT +//===--------------------------------------------------------------------===// +// Cast Decimal <-> uint64_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint64_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint64_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint64_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(uint64_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -hugeint_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, uint64_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -hugeint_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, uint64_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -hugeint_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, uint64_t &result, string *error_message, uint8_t width, + uint8_t scale); template <> -hugeint_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, uint64_t &result, string *error_message, uint8_t width, + uint8_t scale); -// FLOAT +//===--------------------------------------------------------------------===// +// Cast Decimal <-> float +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(float input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(float input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(float input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(float input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(float input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(float input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(float input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(float input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -float CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, float &result, string *error_message, uint8_t width, uint8_t scale); template <> -float CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, float &result, string *error_message, uint8_t width, uint8_t scale); template <> -float CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, float &result, string *error_message, uint8_t width, uint8_t scale); template <> -float CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, float &result, string *error_message, uint8_t width, uint8_t scale); -// DOUBLE +//===--------------------------------------------------------------------===// +// Cast Decimal <-> double +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(double input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(double input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(double input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(double input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(double input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(double input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(double input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(double input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -double CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int16_t input, double &result, string *error_message, uint8_t width, uint8_t scale); template <> -double CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int32_t input, double &result, string *error_message, uint8_t width, uint8_t scale); template <> -double CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(int64_t input, double &result, string *error_message, uint8_t width, uint8_t scale); template <> -double CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale); +bool TryCastFromDecimal::Operation(hugeint_t input, double &result, string *error_message, uint8_t width, + uint8_t scale); -// VARCHAR +//===--------------------------------------------------------------------===// +// Cast Decimal -> VARCHAR +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(string_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(string_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int32_t CastToDecimal::Operation(string_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(string_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -int64_t CastToDecimal::Operation(string_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(string_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale); template <> -hugeint_t CastToDecimal::Operation(string_t input, uint8_t width, uint8_t scale); +bool TryCastToDecimal::Operation(string_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale); struct StringCastFromDecimal { template @@ -13874,187 +17126,12 @@ string_t StringCastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t template <> string_t StringCastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale, Vector &result); -//===--------------------------------------------------------------------===// -// Date Casts -//===--------------------------------------------------------------------===// -struct CastFromDate { - template - static inline string_t Operation(SRC input, Vector &result) { - throw duckdb::NotImplementedException("Cast from date could not be performed!"); - } -}; - -struct CastToDate { - template - static inline DST Operation(SRC input) { - throw duckdb::NotImplementedException("Cast to date could not be performed!"); - } -}; - -struct StrictCastToDate { - template - static inline DST Operation(SRC input) { - throw duckdb::NotImplementedException("Cast to date could not be performed!"); - } -}; - -struct CastDateToTimestamp { - template - static inline DST Operation(SRC input) { - throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); - } -}; -template <> -duckdb::string_t CastFromDate::Operation(duckdb::date_t input, Vector &result); -template <> -duckdb::date_t CastToDate::Operation(string_t input); -template <> -duckdb::date_t StrictCastToDate::Operation(string_t input); -template <> -duckdb::timestamp_t CastDateToTimestamp::Operation(duckdb::date_t input); - -struct CastFromTime { - template - static inline string_t Operation(SRC input, Vector &result) { - throw duckdb::NotImplementedException("Cast from time could not be performed!"); - } -}; -struct CastToTime { - template - static inline DST Operation(SRC input) { - throw duckdb::NotImplementedException("Cast to time could not be performed!"); - } -}; -struct StrictCastToTime { - template - static inline DST Operation(SRC input) { - throw duckdb::NotImplementedException("Cast to time could not be performed!"); - } -}; -template <> -duckdb::string_t CastFromTime::Operation(duckdb::dtime_t input, Vector &result); -template <> -duckdb::dtime_t CastToTime::Operation(string_t input); -template <> -duckdb::dtime_t StrictCastToTime::Operation(string_t input); - -struct CastToTimestamp { - template - static inline DST Operation(SRC input) { - throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); - } -}; - -struct CastFromTimestamp { - template - static inline string_t Operation(SRC input, Vector &result) { - throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); - } -}; - -struct CastTimestampToDate { - template - static inline DST Operation(SRC input) { - throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); - } -}; - -struct CastTimestampToTime { - template - static inline DST Operation(SRC input) { - throw duckdb::NotImplementedException("Cast to timestamp could not be performed!"); - } -}; - -template <> -duckdb::date_t CastTimestampToDate::Operation(duckdb::timestamp_t input); -template <> -duckdb::dtime_t CastTimestampToTime::Operation(duckdb::timestamp_t input); -template <> -duckdb::string_t CastFromTimestamp::Operation(duckdb::timestamp_t input, Vector &result); -template <> -duckdb::timestamp_t CastToTimestamp::Operation(string_t input); - -struct CastFromBlob { - template - static inline string_t Operation(SRC input, Vector &result) { - throw duckdb::NotImplementedException("Cast from blob could not be performed!"); - } -}; -template <> -duckdb::string_t CastFromBlob::Operation(duckdb::string_t input, Vector &vector); - -struct CastToBlob { - template - static inline string_t Operation(SRC input, Vector &result) { - throw duckdb::NotImplementedException("Cast to blob could not be performed!"); - } -}; -template <> -duckdb::string_t CastToBlob::Operation(duckdb::string_t input, Vector &vector); - } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/blob.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { - -//! The Blob class is a static class that holds helper functions for the Blob type. -class Blob { -public: - // map of integer -> hex value - static constexpr const char *HEX_TABLE = "0123456789ABCDEF"; - // reverse map of byte -> integer value, or -1 for invalid hex values - static const int HEX_MAP[256]; - //! map of index -> base64 character - static constexpr const char *BASE64_MAP = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - //! padding character used in base64 encoding - static constexpr const char BASE64_PADDING = '='; - -public: - //! Returns the string size of a blob -> string conversion - static idx_t GetStringSize(string_t blob); - //! Converts a blob to a string, writing the output to the designated output string. - //! The string needs to have space for at least GetStringSize(blob) bytes. - static void ToString(string_t blob, char *output); - //! Convert a blob object to a string - static string ToString(string_t blob); - - //! Returns the blob size of a string -> blob conversion - static idx_t GetBlobSize(string_t str); - //! Convert a string to a blob. This function should ONLY be called after calling GetBlobSize, since it does NOT - //! perform data validation. - static void ToBlob(string_t str, data_ptr_t output); - //! Convert a string object to a blob - static string ToBlob(string_t str); - - // base 64 conversion functions - //! Returns the string size of a blob -> base64 conversion - static idx_t ToBase64Size(string_t blob); - //! Converts a blob to a base64 string, output should have space for at least ToBase64Size(blob) bytes - static void ToBase64(string_t blob, char *output); - - //! Returns the string size of a base64 string -> blob conversion - static idx_t FromBase64Size(string_t str); - //! Converts a base64 string to a blob, output should have space for at least FromBase64Size(blob) bytes - static void FromBase64(string_t str, data_ptr_t output, idx_t output_size); -}; -} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB @@ -14117,7 +17194,7 @@ class NumericHelper { template static string_t FormatSigned(SIGNED value, Vector &vector) { int sign = -(value < 0); - UNSIGNED unsigned_value = (value ^ sign) - sign; + UNSIGNED unsigned_value = UNSIGNED(value ^ sign) - sign; int length = UnsignedLength(unsigned_value) - sign; string_t result = StringVector::EmptyString(vector, length); auto dataptr = result.GetDataWriteable(); @@ -14564,11 +17641,11 @@ struct IntervalToStringCast { micros = -micros; } int64_t hour = micros / Interval::MICROS_PER_HOUR; - micros -= dtime_t(hour) * Interval::MICROS_PER_HOUR; + micros -= hour * Interval::MICROS_PER_HOUR; int64_t min = micros / Interval::MICROS_PER_MINUTE; - micros -= dtime_t(min) * Interval::MICROS_PER_MINUTE; + micros -= min * Interval::MICROS_PER_MINUTE; int64_t sec = micros / Interval::MICROS_PER_SEC; - micros -= dtime_t(sec) * Interval::MICROS_PER_SEC; + micros -= sec * Interval::MICROS_PER_SEC; if (hour < 10) { buffer[length++] = '0'; @@ -14592,628 +17669,782 @@ struct IntervalToStringCast { } }; -} // namespace duckdb +} // namespace duckdb + + + + + + + + + + +#include +#include +#include + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Cast bool -> Numeric +//===--------------------------------------------------------------------===// +template <> +bool TryCast::Operation(bool input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(bool input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(bool input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} +template <> +bool TryCast::Operation(bool input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} +template <> +bool TryCast::Operation(bool input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} +template <> +bool TryCast::Operation(bool input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} +template <> +bool TryCast::Operation(bool input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} +template <> +bool TryCast::Operation(bool input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} +template <> +bool TryCast::Operation(bool input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} +template <> +bool TryCast::Operation(bool input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} +template <> +bool TryCast::Operation(bool input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} +template <> +bool TryCast::Operation(bool input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} -#include -#include -#include +//===--------------------------------------------------------------------===// +// Cast int8_t -> Numeric +//===--------------------------------------------------------------------===// +template <> +bool TryCast::Operation(int8_t input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} -namespace duckdb { +template <> +bool TryCast::Operation(int8_t input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} -template -static bool TryCastWithOverflowCheck(SRC value, DST &result) { - if (std::numeric_limits::is_signed != std::numeric_limits::is_signed) { - if (std::numeric_limits::is_signed) { - // signed to unsigned conversion - if (std::numeric_limits::digits > std::numeric_limits::digits) { - if (value < 0 || value > (SRC)NumericLimits::Maximum()) { - return false; - } - } else { - if (value < 0 || (DST)value > NumericLimits::Maximum()) { - return false; - } - } +template <> +bool TryCast::Operation(int8_t input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} - result = (DST)value; - return true; - } else { - // unsigned to signed conversion - if (std::numeric_limits::digits > std::numeric_limits::digits) { - if (value <= (SRC)NumericLimits::Maximum()) { - result = (DST)value; - return true; - } - } else { - if ((DST)value <= NumericLimits::Maximum()) { - result = (DST)value; - return true; - } - } +template <> +bool TryCast::Operation(int8_t input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} - return false; - } - } else { - if (value < NumericLimits::Minimum() || value > NumericLimits::Maximum()) { - return false; - } - result = (DST)value; - return true; - } +template <> +bool TryCast::Operation(int8_t input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -bool TryCastWithOverflowCheck(float value, int32_t &result) { - if (!(value >= -2147483648.0f && value < 2147483648.0f)) { - return false; - } - result = int32_t(value); - return true; +bool TryCast::Operation(int8_t input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -bool TryCastWithOverflowCheck(float value, int64_t &result) { - if (!(value >= -9223372036854775808.0f && value < 9223372036854775808.0f)) { - return false; - } - result = int64_t(value); - return true; +bool TryCast::Operation(int8_t input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -bool TryCastWithOverflowCheck(double value, int64_t &result) { - if (!(value >= -9223372036854775808.0 && value < 9223372036854775808.0)) { - return false; - } - result = int64_t(value); - return true; +bool TryCast::Operation(int8_t input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } -template -static DST CastWithOverflowCheck(SRC value) { - DST result; - if (!TryCastWithOverflowCheck(value, result)) { - throw ValueOutOfRangeException((double)value, GetTypeId(), GetTypeId()); - } - return result; +template <> +bool TryCast::Operation(int8_t input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(int8_t input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(int8_t input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(int8_t input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } //===--------------------------------------------------------------------===// -// Numeric -> int8_t casts +// Cast int16_t -> Numeric //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(uint8_t input, int8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int16_t input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(uint16_t input, int8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int16_t input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(uint32_t input, int8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int16_t input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(uint64_t input, int8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int16_t input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(int16_t input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int16_t input, int8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int16_t input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int32_t input, int8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int16_t input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int64_t input, int8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int16_t input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(float input, int8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int16_t input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(double input, int8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int16_t input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(int16_t input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(int16_t input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +//===--------------------------------------------------------------------===// +// Cast int32_t -> Numeric +//===--------------------------------------------------------------------===// +template <> +bool TryCast::Operation(int32_t input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(int32_t input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -int8_t Cast::Operation(uint8_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int32_t input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(int32_t input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int8_t Cast::Operation(uint16_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int32_t input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int8_t Cast::Operation(uint32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int32_t input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int8_t Cast::Operation(uint64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int32_t input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int8_t Cast::Operation(int16_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int32_t input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int8_t Cast::Operation(int32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int32_t input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int8_t Cast::Operation(int64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int32_t input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int8_t Cast::Operation(float input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int32_t input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int8_t Cast::Operation(double input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int32_t input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } //===--------------------------------------------------------------------===// -// Numeric -> uint8_t casts +// Cast int64_t -> Numeric //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(uint16_t input, uint8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int64_t input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(uint32_t input, uint8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int64_t input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(uint64_t input, uint8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int64_t input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int8_t input, uint8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int64_t input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int16_t input, uint8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int64_t input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int32_t input, uint8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int64_t input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> bool TryCast::Operation(int64_t input, uint8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(float input, uint8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int64_t input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(double input, uint8_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(int64_t input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint8_t Cast::Operation(uint16_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int64_t input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint8_t Cast::Operation(uint32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int64_t input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint8_t Cast::Operation(uint64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(int64_t input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + +//===--------------------------------------------------------------------===// +// Cast hugeint_t -> Numeric +//===--------------------------------------------------------------------===// template <> -uint8_t Cast::Operation(int8_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(hugeint_t input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint8_t Cast::Operation(int16_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(hugeint_t input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint8_t Cast::Operation(int32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(hugeint_t input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint8_t Cast::Operation(int64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(hugeint_t input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint8_t Cast::Operation(float input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(hugeint_t input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint8_t Cast::Operation(double input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(hugeint_t input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } -//===--------------------------------------------------------------------===// -// Numeric -> int16_t casts -//===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(uint16_t input, int16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(hugeint_t input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(uint32_t input, int16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(hugeint_t input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(uint64_t input, int16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(hugeint_t input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int32_t input, int16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(hugeint_t input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int64_t input, int16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(hugeint_t input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(float input, int16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(hugeint_t input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + +//===--------------------------------------------------------------------===// +// Cast uint8_t -> Numeric +//===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(double input, int16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint8_t input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -int16_t Cast::Operation(uint16_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint8_t input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int16_t Cast::Operation(uint32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint8_t input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int16_t Cast::Operation(uint64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint8_t input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -int16_t Cast::Operation(int32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint8_t input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int16_t Cast::Operation(int64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint8_t input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int16_t Cast::Operation(float input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint8_t input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int16_t Cast::Operation(double input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint8_t input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } -//===--------------------------------------------------------------------===// -// Numeric -> uint16_t casts -//===--------------------------------------------------------------------===// - template <> -bool TryCast::Operation(uint32_t input, uint16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint8_t input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(uint64_t input, uint16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint8_t input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int8_t input, uint16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint8_t input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -bool TryCast::Operation(int16_t input, uint16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint8_t input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + +//===--------------------------------------------------------------------===// +// Cast uint16_t -> Numeric +//===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(int32_t input, uint16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint16_t input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int64_t input, uint16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint16_t input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(float input, uint16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint16_t input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(double input, uint16_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint16_t input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -uint16_t Cast::Operation(uint32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint16_t input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint16_t Cast::Operation(uint64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint16_t input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -uint16_t Cast::Operation(int8_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint16_t input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -uint16_t Cast::Operation(int16_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint16_t input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint16_t Cast::Operation(int32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint16_t input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint16_t Cast::Operation(int64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint16_t input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint16_t Cast::Operation(float input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint16_t input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint16_t Cast::Operation(double input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint16_t input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + //===--------------------------------------------------------------------===// -// Numeric -> int32_t casts +// Cast uint32_t -> Numeric //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(uint32_t input, int32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint32_t input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(uint64_t input, int32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint32_t input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int64_t input, int32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint32_t input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(float input, int32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint32_t input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(double input, int32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint32_t input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -int32_t Cast::Operation(uint32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint32_t input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(uint32_t input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(uint32_t input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int32_t Cast::Operation(uint64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint32_t input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -int32_t Cast::Operation(int64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint32_t input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int32_t Cast::Operation(float input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint32_t input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int32_t Cast::Operation(double input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint32_t input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } //===--------------------------------------------------------------------===// -// Numeric -> uint32_t casts +// Cast uint64_t -> Numeric //===--------------------------------------------------------------------===// +template <> +bool TryCast::Operation(uint64_t input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} template <> -bool TryCast::Operation(uint64_t input, uint32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint64_t input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int8_t input, uint32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint64_t input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int16_t input, uint32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint64_t input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int32_t input, uint32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint64_t input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int64_t input, uint32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint64_t input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(float input, uint32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint64_t input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(double input, uint32_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(uint64_t input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -uint32_t Cast::Operation(uint64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint64_t input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint32_t Cast::Operation(int8_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint64_t input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint32_t Cast::Operation(int16_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint64_t input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -uint32_t Cast::Operation(int32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(uint64_t input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + +//===--------------------------------------------------------------------===// +// Cast float -> Numeric +//===--------------------------------------------------------------------===// template <> -uint32_t Cast::Operation(int64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(float input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint32_t Cast::Operation(float input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(float input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint32_t Cast::Operation(double input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(float input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } -//===--------------------------------------------------------------------===// -// Numeric -> int64_t casts -//===--------------------------------------------------------------------===// + template <> -bool TryCast::Operation(uint64_t input, int64_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(float input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> bool TryCast::Operation(float input, int64_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(double input, int64_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(float input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -int64_t Cast::Operation(uint64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(float input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int64_t Cast::Operation(float input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(float input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -int64_t Cast::Operation(double input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(float input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } -//===--------------------------------------------------------------------===// -// Numeric -> uint64_t casts -//===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(int8_t input, uint64_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(float input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int16_t input, uint64_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(float input, float &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(int32_t input, uint64_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(float input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } +//===--------------------------------------------------------------------===// +// Cast double -> Numeric +//===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(int64_t input, uint64_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(double input, bool &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(float input, uint64_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(double input, int8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -bool TryCast::Operation(double input, uint64_t &result, bool strict) { - return TryCastWithOverflowCheck(input, result); +bool TryCast::Operation(double input, int16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); +} + +template <> +bool TryCast::Operation(double input, int32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint64_t Cast::Operation(int8_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(double input, int64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint64_t Cast::Operation(int16_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(double input, hugeint_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint64_t Cast::Operation(int32_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(double input, uint8_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } template <> -uint64_t Cast::Operation(int64_t input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(double input, uint16_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint64_t Cast::Operation(float input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(double input, uint32_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } + template <> -uint64_t Cast::Operation(double input) { - return CastWithOverflowCheck(input); +bool TryCast::Operation(double input, uint64_t &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } -//===--------------------------------------------------------------------===// -// Double -> float casts -//===--------------------------------------------------------------------===// template <> bool TryCast::Operation(double input, float &result, bool strict) { - if (input < (double)NumericLimits::Minimum() || input > (double)NumericLimits::Maximum()) { - return false; - } - auto res = (float)input; - if (std::isnan(res) || std::isinf(res)) { - return false; - } - result = res; - return true; + return NumericTryCast::Operation(input, result, strict); } template <> -float Cast::Operation(double input) { - float result; - bool strict = false; - if (!TryCast::Operation(input, result, strict)) { - throw ValueOutOfRangeException(input, GetTypeId(), GetTypeId()); - } - return result; +bool TryCast::Operation(double input, double &result, bool strict) { + return NumericTryCast::Operation(input, result, strict); } //===--------------------------------------------------------------------===// // Cast String -> Numeric //===--------------------------------------------------------------------===// -template -static T TryCastString(string_t input) { - T result; - if (!TryCast::Operation(input, result)) { - throw ConversionException("Could not convert string '%s' to %s", input.GetString(), - TypeIdToString(GetTypeId())); - } - return result; -} - -template -static T TryStrictCastString(string_t input) { - T result; - if (!TryCast::Operation(input, result, true)) { - throw ConversionException("Could not convert string '%s' to %s", input.GetString(), - TypeIdToString(GetTypeId())); - } - return result; -} - struct IntegerCastOperation { template static bool HandleDigit(T &result, uint8_t digit) { @@ -15231,7 +18462,7 @@ struct IntegerCastOperation { return true; } - template + template static bool HandleExponent(T &result, int64_t exponent) { double dbl_res = result * std::pow(10.0L, exponent); if (dbl_res < NumericLimits::Minimum() || dbl_res > NumericLimits::Maximum()) { @@ -15271,19 +18502,21 @@ static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict) idx_t start_digit = pos; while (pos < len) { if (!StringUtil::CharacterIsDigit(buf[pos])) { - return false; + break; } if (!OP::template HandleDecimal(result, buf[pos] - '0')) { return false; } pos++; } - if (!OP::template Finalize(result)) { - return false; - } // make sure there is either (1) one number after the period, or (2) one number before the period // i.e. we accept "1." and ".1" as valid numbers, but not "." - return number_before_period || pos > start_digit; + if (!(number_before_period || pos > start_digit)) { + return false; + } + if (pos >= len) { + break; + } } if (StringUtil::CharacterIsSpace(buf[pos])) { // skip any trailing spaces @@ -15311,7 +18544,7 @@ static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict) return false; } } - return OP::template HandleExponent(result, exponent); + return OP::template HandleExponent(result, exponent); } } return false; @@ -15327,8 +18560,9 @@ static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict) return pos > start_pos; } -template -static bool TryIntegerCast(const char *buf, idx_t len, T &result, bool strict, bool unsigned_int = false) { +template +static bool TryIntegerCast(const char *buf, idx_t len, T &result, bool strict) { // skip any spaces at the start while (len > 0 && StringUtil::CharacterIsSpace(*buf)) { buf++; @@ -15345,7 +18579,7 @@ static bool TryIntegerCast(const char *buf, idx_t len, T &result, bool strict, b if (!negative) { return IntegerCastLoop(buf, len, result, strict); } else { - if (unsigned_int) { + if (!IS_SIGNED) { // Need to check if its not -0 idx_t pos = 1; while (pos < len) { @@ -15421,19 +18655,19 @@ bool TryCast::Operation(string_t input, int64_t &result, bool strict) { template <> bool TryCast::Operation(string_t input, uint8_t &result, bool strict) { - return TryIntegerCast(input.GetDataUnsafe(), input.GetSize(), result, strict, true); + return TryIntegerCast(input.GetDataUnsafe(), input.GetSize(), result, strict); } template <> bool TryCast::Operation(string_t input, uint16_t &result, bool strict) { - return TryIntegerCast(input.GetDataUnsafe(), input.GetSize(), result, strict, true); + return TryIntegerCast(input.GetDataUnsafe(), input.GetSize(), result, strict); } template <> bool TryCast::Operation(string_t input, uint32_t &result, bool strict) { - return TryIntegerCast(input.GetDataUnsafe(), input.GetSize(), result, strict, true); + return TryIntegerCast(input.GetDataUnsafe(), input.GetSize(), result, strict); } template <> bool TryCast::Operation(string_t input, uint64_t &result, bool strict) { - return TryIntegerCast(input.GetDataUnsafe(), input.GetSize(), result, strict, true); + return TryIntegerCast(input.GetDataUnsafe(), input.GetSize(), result, strict); } template @@ -15482,7 +18716,7 @@ static bool DoubleCastLoop(const char *buf, idx_t len, T &result, bool strict) { // parse an integer, this time not allowing another exponent pos++; int64_t exponent; - if (!TryIntegerCast(buf + pos, len - pos, exponent, strict)) { + if (!TryIntegerCast(buf + pos, len - pos, exponent, strict)) { return false; } ComputeDoubleResult(result, decimal, decimal_factor); @@ -15562,391 +18796,240 @@ bool TryCast::Operation(string_t input, double &result, bool strict) { return TryDoubleCast(input.GetDataUnsafe(), input.GetSize(), result, strict); } +//===--------------------------------------------------------------------===// +// Cast From Date +//===--------------------------------------------------------------------===// template <> -bool Cast::Operation(string_t input) { - return TryCastString(input); -} -template <> -int8_t Cast::Operation(string_t input) { - return TryCastString(input); -} -template <> -int16_t Cast::Operation(string_t input) { - return TryCastString(input); -} -template <> -int32_t Cast::Operation(string_t input) { - return TryCastString(input); -} -template <> -int64_t Cast::Operation(string_t input) { - return TryCastString(input); -} -template <> -uint8_t Cast::Operation(string_t input) { - return TryCastString(input); -} -template <> -uint16_t Cast::Operation(string_t input) { - return TryCastString(input); -} -template <> -uint32_t Cast::Operation(string_t input) { - return TryCastString(input); -} -template <> -uint64_t Cast::Operation(string_t input) { - return TryCastString(input); +bool TryCast::Operation(date_t input, date_t &result, bool strict) { + result = input; + return true; } template <> -float Cast::Operation(string_t input) { - return TryCastString(input); -} -template <> -double Cast::Operation(string_t input) { - return TryCastString(input); +bool TryCast::Operation(date_t input, timestamp_t &result, bool strict) { + return Timestamp::TryFromDatetime(input, Time::FromTime(0, 0, 0), result); } +//===--------------------------------------------------------------------===// +// Cast From Time +//===--------------------------------------------------------------------===// template <> -bool StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} -template <> -int8_t StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} -template <> -int16_t StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} -template <> -int32_t StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} -template <> -int64_t StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} -template <> -uint8_t StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} -template <> -uint16_t StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} -template <> -uint32_t StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} -template <> -uint64_t StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} -template <> -float StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} -template <> -double StrictCast::Operation(string_t input) { - return TryStrictCastString(input); +bool TryCast::Operation(dtime_t input, dtime_t &result, bool strict) { + result = input; + return true; } //===--------------------------------------------------------------------===// -// Cast Numeric -> String +// Cast From Timestamps //===--------------------------------------------------------------------===// -template -string CastToStandardString(T input) { - Vector v(LogicalType::VARCHAR); - return StringCast::Operation(input, v).GetString(); -} - -template <> -string Cast::Operation(bool input) { - return CastToStandardString(input); -} -template <> -string Cast::Operation(int8_t input) { - return CastToStandardString(input); -} -template <> -string Cast::Operation(int16_t input) { - return CastToStandardString(input); -} -template <> -string Cast::Operation(int32_t input) { - return CastToStandardString(input); -} -template <> -string Cast::Operation(int64_t input) { - return CastToStandardString(input); -} -template <> -string Cast::Operation(uint8_t input) { - return CastToStandardString(input); -} template <> -string Cast::Operation(uint16_t input) { - return CastToStandardString(input); +bool TryCast::Operation(timestamp_t input, date_t &result, bool strict) { + result = Timestamp::GetDate(input); + return true; } + template <> -string Cast::Operation(uint32_t input) { - return CastToStandardString(input); +bool TryCast::Operation(timestamp_t input, dtime_t &result, bool strict) { + result = Timestamp::GetTime(input); + return true; } + template <> -string Cast::Operation(uint64_t input) { - return CastToStandardString(input); +bool TryCast::Operation(timestamp_t input, timestamp_t &result, bool strict) { + result = input; + return true; } + +//===--------------------------------------------------------------------===// +// Cast from Interval +//===--------------------------------------------------------------------===// template <> -string Cast::Operation(hugeint_t input) { - return Hugeint::ToString(input); +bool TryCast::Operation(interval_t input, interval_t &result, bool strict) { + result = input; + return true; } + +//===--------------------------------------------------------------------===// +// Non-Standard Timestamps +//===--------------------------------------------------------------------===// template <> -string Cast::Operation(float input) { - return CastToStandardString(input); +duckdb::string_t CastFromTimestampNS::Operation(duckdb::timestamp_t input, Vector &result) { + return StringCast::Operation(Timestamp::FromEpochNanoSeconds(input.value), result); } template <> -string Cast::Operation(double input) { - return CastToStandardString(input); +duckdb::string_t CastFromTimestampMS::Operation(duckdb::timestamp_t input, Vector &result) { + return StringCast::Operation(Timestamp::FromEpochMs(input.value), result); } template <> -string Cast::Operation(string_t input) { - return input.GetString(); +duckdb::string_t CastFromTimestampSec::Operation(duckdb::timestamp_t input, Vector &result) { + return StringCast::Operation(Timestamp::FromEpochSeconds(input.value), result); } template <> -string_t StringCast::Operation(bool input, Vector &vector) { - if (input) { - return StringVector::AddString(vector, "true", 4); - } else { - return StringVector::AddString(vector, "false", 5); - } +timestamp_t CastTimestampUsToMs::Operation(timestamp_t input) { + timestamp_t cast_timestamp(Timestamp::GetEpochMs(input)); + return cast_timestamp; } template <> -string_t StringCast::Operation(int8_t input, Vector &vector) { - return NumericHelper::FormatSigned(input, vector); +timestamp_t CastTimestampUsToNs::Operation(timestamp_t input) { + timestamp_t cast_timestamp(Timestamp::GetEpochNanoSeconds(input)); + return cast_timestamp; } template <> -string_t StringCast::Operation(int16_t input, Vector &vector) { - return NumericHelper::FormatSigned(input, vector); +timestamp_t CastTimestampUsToSec::Operation(timestamp_t input) { + timestamp_t cast_timestamp(Timestamp::GetEpochSeconds(input)); + return cast_timestamp; } template <> -string_t StringCast::Operation(int32_t input, Vector &vector) { - return NumericHelper::FormatSigned(input, vector); +timestamp_t CastTimestampMsToUs::Operation(timestamp_t input) { + return Timestamp::FromEpochMs(input.value); } template <> -string_t StringCast::Operation(int64_t input, Vector &vector) { - return NumericHelper::FormatSigned(input, vector); -} -template <> -duckdb::string_t StringCast::Operation(uint8_t input, Vector &vector) { - return NumericHelper::FormatSigned(input, vector); -} -template <> -duckdb::string_t StringCast::Operation(uint16_t input, Vector &vector) { - return NumericHelper::FormatSigned(input, vector); -} -template <> -duckdb::string_t StringCast::Operation(uint32_t input, Vector &vector) { - return NumericHelper::FormatSigned(input, vector); -} -template <> -duckdb::string_t StringCast::Operation(uint64_t input, Vector &vector) { - return NumericHelper::FormatSigned(input, vector); +timestamp_t CastTimestampNsToUs::Operation(timestamp_t input) { + return Timestamp::FromEpochNanoSeconds(input.value); } template <> -string_t StringCast::Operation(float input, Vector &vector) { - std::string s = duckdb_fmt::format("{}", input); - return StringVector::AddString(vector, s); +timestamp_t CastTimestampSecToUs::Operation(timestamp_t input) { + return Timestamp::FromEpochSeconds(input.value); } +//===--------------------------------------------------------------------===// +// Cast To Timestamp +//===--------------------------------------------------------------------===// template <> -string_t StringCast::Operation(double input, Vector &vector) { - std::string s = duckdb_fmt::format("{}", input); - return StringVector::AddString(vector, s); +bool TryCastToTimestampNS::Operation(string_t input, timestamp_t &result, bool strict) { + if (!TryCast::Operation(input, result, strict)) { + return false; + } + result = Timestamp::GetEpochNanoSeconds(result); + return true; } template <> -string_t StringCast::Operation(interval_t input, Vector &vector) { - char buffer[70]; - idx_t length = IntervalToStringCast::Format(input, buffer); - return StringVector::AddString(vector, buffer, length); +bool TryCastToTimestampMS::Operation(string_t input, timestamp_t &result, bool strict) { + if (!TryCast::Operation(input, result, strict)) { + return false; + } + result = Timestamp::GetEpochMs(result); + return true; } template <> -duckdb::string_t StringCast::Operation(hugeint_t input, Vector &vector) { - return HugeintToStringCast::FormatSigned(input, vector); +bool TryCastToTimestampSec::Operation(string_t input, timestamp_t &result, bool strict) { + if (!TryCast::Operation(input, result, strict)) { + return false; + } + result = Timestamp::GetEpochSeconds(result); + return true; } - //===--------------------------------------------------------------------===// -// Cast From Date +// Cast From Blob //===--------------------------------------------------------------------===// template <> -string_t CastFromDate::Operation(date_t input, Vector &vector) { - int32_t date[3]; - Date::Convert(input, date[0], date[1], date[2]); - - idx_t year_length; - bool add_bc; - idx_t length = DateToStringCast::Length(date, year_length, add_bc); - - string_t result = StringVector::EmptyString(vector, length); - auto data = result.GetDataWriteable(); - - DateToStringCast::Format(data, date, year_length, add_bc); +string_t CastFromBlob::Operation(string_t input, Vector &vector) { + idx_t result_size = Blob::GetStringSize(input); + string_t result = StringVector::EmptyString(vector, result_size); + Blob::ToString(input, result.GetDataWriteable()); result.Finalize(); return result; } //===--------------------------------------------------------------------===// -// Cast To Date -//===--------------------------------------------------------------------===// -template <> -date_t CastToDate::Operation(string_t input) { - return Date::FromCString(input.GetDataUnsafe(), input.GetSize()); -} - -template <> -date_t StrictCastToDate::Operation(string_t input) { - return Date::FromCString(input.GetDataUnsafe(), input.GetSize(), true); -} - -//===--------------------------------------------------------------------===// -// Cast From Time +// Cast To Blob //===--------------------------------------------------------------------===// template <> -string_t CastFromTime::Operation(dtime_t input, Vector &vector) { - int32_t time[4]; - Time::Convert(input, time[0], time[1], time[2], time[3]); - - char micro_buffer[10]; - idx_t length = TimeToStringCast::Length(time, micro_buffer); - - string_t result = StringVector::EmptyString(vector, length); - auto data = result.GetDataWriteable(); - - TimeToStringCast::Format(data, length, time, micro_buffer); +bool TryCastToBlob::Operation(string_t input, string_t &result, Vector &result_vector, string *error_message, + bool strict) { + idx_t result_size; + if (!Blob::TryGetBlobSize(input, result_size, error_message)) { + return false; + } + result = StringVector::EmptyString(result_vector, result_size); + Blob::ToBlob(input, (data_ptr_t)result.GetDataWriteable()); result.Finalize(); - return result; + return true; } //===--------------------------------------------------------------------===// -// Cast To Time +// Cast To Date //===--------------------------------------------------------------------===// template <> -dtime_t CastToTime::Operation(string_t input) { - return Time::FromCString(input.GetDataUnsafe(), input.GetSize()); +bool TryCastErrorMessage::Operation(string_t input, date_t &result, string *error_message, bool strict) { + if (!TryCast::Operation(input, result, strict)) { + HandleCastError::AssignError(Date::ConversionError(input), error_message); + return false; + } + return true; } template <> -dtime_t StrictCastToTime::Operation(string_t input) { - return Time::FromCString(input.GetDataUnsafe(), input.GetSize(), true); +bool TryCast::Operation(string_t input, date_t &result, bool strict) { + idx_t pos; + return Date::TryConvertDate(input.GetDataUnsafe(), input.GetSize(), pos, result, strict); } template <> -timestamp_t CastDateToTimestamp::Operation(date_t input) { - return Timestamp::FromDatetime(input, Time::FromTime(0, 0, 0, 0)); +date_t Cast::Operation(string_t input) { + return Date::FromCString(input.GetDataUnsafe(), input.GetSize()); } //===--------------------------------------------------------------------===// -// Cast From Timestamps +// Cast To Time //===--------------------------------------------------------------------===// template <> -string_t CastFromTimestamp::Operation(timestamp_t input, Vector &vector) { - date_t date_entry; - dtime_t time_entry; - Timestamp::Convert(input, date_entry, time_entry); - - int32_t date[3], time[4]; - Date::Convert(date_entry, date[0], date[1], date[2]); - Time::Convert(time_entry, time[0], time[1], time[2], time[3]); - - // format for timestamp is DATE TIME (separated by space) - idx_t year_length; - bool add_bc; - char micro_buffer[6]; - idx_t date_length = DateToStringCast::Length(date, year_length, add_bc); - idx_t time_length = TimeToStringCast::Length(time, micro_buffer); - idx_t length = date_length + time_length + 1; - - string_t result = StringVector::EmptyString(vector, length); - auto data = result.GetDataWriteable(); - - DateToStringCast::Format(data, date, year_length, add_bc); - data[date_length] = ' '; - TimeToStringCast::Format(data + date_length + 1, time_length, time, micro_buffer); - - result.Finalize(); - return result; +bool TryCastErrorMessage::Operation(string_t input, dtime_t &result, string *error_message, bool strict) { + if (!TryCast::Operation(input, result, strict)) { + HandleCastError::AssignError(Time::ConversionError(input), error_message); + return false; + } + return true; } template <> -date_t CastTimestampToDate::Operation(timestamp_t input) { - return Timestamp::GetDate(input); +bool TryCast::Operation(string_t input, dtime_t &result, bool strict) { + idx_t pos; + return Time::TryConvertTime(input.GetDataUnsafe(), input.GetSize(), pos, result, strict); } template <> -dtime_t CastTimestampToTime::Operation(timestamp_t input) { - return Timestamp::GetTime(input); +dtime_t Cast::Operation(string_t input) { + return Time::FromCString(input.GetDataUnsafe(), input.GetSize()); } //===--------------------------------------------------------------------===// // Cast To Timestamp //===--------------------------------------------------------------------===// template <> -timestamp_t CastToTimestamp::Operation(string_t input) { - return Timestamp::FromCString(input.GetDataUnsafe(), input.GetSize()); +bool TryCastErrorMessage::Operation(string_t input, timestamp_t &result, string *error_message, bool strict) { + if (!TryCast::Operation(input, result, strict)) { + HandleCastError::AssignError(Timestamp::ConversionError(input), error_message); + return false; + } + return true; } -//===--------------------------------------------------------------------===// -// Cast From Blob -//===--------------------------------------------------------------------===// template <> -string_t CastFromBlob::Operation(string_t input, Vector &vector) { - idx_t result_size = Blob::GetStringSize(input); - - string_t result = StringVector::EmptyString(vector, result_size); - Blob::ToString(input, result.GetDataWriteable()); - result.Finalize(); - return result; +bool TryCast::Operation(string_t input, timestamp_t &result, bool strict) { + return Timestamp::TryConvertTimestamp(input.GetDataUnsafe(), input.GetSize(), result); } -//===--------------------------------------------------------------------===// -// Cast To Blob -//===--------------------------------------------------------------------===// template <> -string_t CastToBlob::Operation(string_t input, Vector &vector) { - idx_t result_size = Blob::GetBlobSize(input); - - string_t result = StringVector::EmptyString(vector, result_size); - Blob::ToBlob(input, (data_ptr_t)result.GetDataWriteable()); - result.Finalize(); - return result; +timestamp_t Cast::Operation(string_t input) { + return Timestamp::FromCString(input.GetDataUnsafe(), input.GetSize()); } //===--------------------------------------------------------------------===// // Cast From Interval //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(string_t input, interval_t &result, bool strict) { - return Interval::FromCString(input.GetDataUnsafe(), input.GetSize(), result); -} - -template <> -interval_t StrictCast::Operation(string_t input) { - return TryStrictCastString(input); -} - -template <> -interval_t Cast::Operation(string_t input) { - return TryCastString(input); +bool TryCastErrorMessage::Operation(string_t input, interval_t &result, string *error_message, bool strict) { + return Interval::FromCString(input.GetDataUnsafe(), input.GetSize(), result, error_message, strict); } //===--------------------------------------------------------------------===// @@ -16008,9 +19091,11 @@ struct HugeIntegerCastOperation { return true; } - template + template static bool HandleExponent(T &result, int64_t exponent) { - result.Flush(); + if (!result.Flush()) { + return false; + } if (exponent < -38 || exponent > 38) { // out of range for exact exponent: use double and convert double dbl_res = Hugeint::Cast(result.hugeint) * std::pow(10.0L, exponent); @@ -16045,939 +19130,1131 @@ struct HugeIntegerCastOperation { template <> bool TryCast::Operation(string_t input, hugeint_t &result, bool strict) { HugeIntCastData data; - if (!TryIntegerCast(input.GetDataUnsafe(), input.GetSize(), data, - strict)) { + if (!TryIntegerCast(input.GetDataUnsafe(), input.GetSize(), + data, strict)) { return false; } result = data.hugeint; return true; } -template <> -hugeint_t Cast::Operation(string_t input) { - return TryCastString(input); +//===--------------------------------------------------------------------===// +// Decimal String Cast +//===--------------------------------------------------------------------===// +template +struct DecimalCastData { + T result; + uint8_t width; + uint8_t scale; + uint8_t digit_count; + uint8_t decimal_count; +}; + +struct DecimalCastOperation { + template + static bool HandleDigit(T &state, uint8_t digit) { + if (state.result == 0 && digit == 0) { + // leading zero's don't count towards the digit count + return true; + } + if (state.digit_count == state.width - state.scale) { + // width of decimal type is exceeded! + return false; + } + state.digit_count++; + if (NEGATIVE) { + state.result = state.result * 10 - digit; + } else { + state.result = state.result * 10 + digit; + } + return true; + } + + template + static bool HandleExponent(T &state, int64_t exponent) { + Finalize(state); + if (exponent < 0) { + for (idx_t i = 0; i < idx_t(-exponent); i++) { + state.result /= 10; + if (state.result == 0) { + break; + } + } + return true; + } else { + // positive exponent: append 0's + for (idx_t i = 0; i < idx_t(exponent); i++) { + if (!HandleDigit(state, 0)) { + return false; + } + } + return true; + } + } + + template + static bool HandleDecimal(T &state, uint8_t digit) { + if (state.decimal_count == state.scale) { + // we exceeded the amount of supported decimals + // however, we don't throw an error here + // we just truncate the decimal + return true; + } + state.decimal_count++; + if (NEGATIVE) { + state.result = state.result * 10 - digit; + } else { + state.result = state.result * 10 + digit; + } + return true; + } + + template + static bool Finalize(T &state) { + // if we have not gotten exactly "scale" decimals, we need to multiply the result + // e.g. if we have a string "1.0" that is cast to a DECIMAL(9,3), the value needs to be 1000 + // but we have only gotten the value "10" so far, so we multiply by 1000 + for (uint8_t i = state.decimal_count; i < state.scale; i++) { + state.result *= 10; + } + return true; + } +}; + +template +bool TryDecimalStringCast(string_t input, T &result, string *error_message, uint8_t width, uint8_t scale) { + DecimalCastData state; + state.result = 0; + state.width = width; + state.scale = scale; + state.digit_count = 0; + state.decimal_count = 0; + if (!TryIntegerCast, true, true, DecimalCastOperation, false>(input.GetDataUnsafe(), + input.GetSize(), state, false)) { + string error = StringUtil::Format("Could not convert string \"%s\" to DECIMAL(%d,%d)", input.GetString(), + (int)width, (int)scale); + HandleCastError::AssignError(error, error_message); + return false; + } + result = state.result; + return true; } template <> -hugeint_t StrictCast::Operation(string_t input) { - return TryStrictCastString(input); +bool TryCastToDecimal::Operation(string_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return TryDecimalStringCast(input, result, error_message, width, scale); } -//===--------------------------------------------------------------------===// -// Numeric -> Hugeint -//===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(bool input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; +bool TryCastToDecimal::Operation(string_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return TryDecimalStringCast(input, result, error_message, width, scale); } template <> -bool TryCast::Operation(int8_t input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; +bool TryCastToDecimal::Operation(string_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return TryDecimalStringCast(input, result, error_message, width, scale); } template <> -bool TryCast::Operation(int16_t input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; +bool TryCastToDecimal::Operation(string_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryDecimalStringCast(input, result, error_message, width, scale); } template <> -bool TryCast::Operation(int32_t input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; +string_t StringCastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale, Vector &result) { + return DecimalToString::Format(input, scale, result); } template <> -bool TryCast::Operation(int64_t input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; +string_t StringCastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale, Vector &result) { + return DecimalToString::Format(input, scale, result); } template <> -bool TryCast::Operation(uint8_t input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; +string_t StringCastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale, Vector &result) { + return DecimalToString::Format(input, scale, result); } + template <> -bool TryCast::Operation(uint16_t input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; +string_t StringCastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale, Vector &result) { + return HugeintToStringCast::FormatDecimal(input, scale, result); } -template <> -bool TryCast::Operation(uint32_t input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; + +//===--------------------------------------------------------------------===// +// Decimal Casts +//===--------------------------------------------------------------------===// +// Decimal <-> Bool +//===--------------------------------------------------------------------===// +template +bool TryCastBoolToDecimal(bool input, T &result, string *error_message, uint8_t width, uint8_t scale) { + if (width > scale) { + result = input ? OP::POWERS_OF_TEN[scale] : 0; + return true; + } else { + return TryCast::Operation(input, result); + } } + template <> -bool TryCast::Operation(uint64_t input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; +bool TryCastToDecimal::Operation(bool input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastBoolToDecimal(input, result, error_message, width, scale); } template <> -bool TryCast::Operation(float input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; +bool TryCastToDecimal::Operation(bool input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastBoolToDecimal(input, result, error_message, width, scale); } template <> -bool TryCast::Operation(double input, hugeint_t &result, bool strict) { - result = Cast::Operation(input); - return true; +bool TryCastToDecimal::Operation(bool input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastBoolToDecimal(input, result, error_message, width, scale); } template <> -hugeint_t Cast::Operation(bool input) { - hugeint_t result; - result.upper = 0; - result.lower = input ? 1 : 0; - return result; +bool TryCastToDecimal::Operation(bool input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastBoolToDecimal(input, result, error_message, width, scale); } template <> -hugeint_t Cast::Operation(uint8_t input) { - return Hugeint::Convert(input); +bool TryCastFromDecimal::Operation(int16_t input, bool &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCast::Operation(input, result); } + template <> -hugeint_t Cast::Operation(uint16_t input) { - return Hugeint::Convert(input); +bool TryCastFromDecimal::Operation(int32_t input, bool &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCast::Operation(input, result); } + template <> -hugeint_t Cast::Operation(uint32_t input) { - return Hugeint::Convert(input); +bool TryCastFromDecimal::Operation(int64_t input, bool &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCast::Operation(input, result); } + template <> -hugeint_t Cast::Operation(uint64_t input) { - return Hugeint::Convert(input); +bool TryCastFromDecimal::Operation(hugeint_t input, bool &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCast::Operation(input, result); } -template <> -hugeint_t Cast::Operation(int8_t input) { - return Hugeint::Convert(input); +//===--------------------------------------------------------------------===// +// Numeric -> Decimal Cast +//===--------------------------------------------------------------------===// +struct SignedToDecimalOperator { + template + static bool Operation(SRC input, DST max_width) { + return int64_t(input) >= int64_t(max_width) || int64_t(input) <= int64_t(-max_width); + } +}; + +struct UnsignedToDecimalOperator { + template + static bool Operation(SRC input, DST max_width) { + return uint64_t(input) >= uint64_t(max_width); + } +}; + +template +bool StandardNumericToDecimalCast(SRC input, DST &result, string *error_message, uint8_t width, uint8_t scale) { + // check for overflow + DST max_width = NumericHelper::POWERS_OF_TEN[width - scale]; + if (OP::template Operation(input, max_width)) { + string error = StringUtil::Format("Could not cast value %d to DECIMAL(%d,%d)", input, width, scale); + HandleCastError::AssignError(error, error_message); + return false; + } + result = DST(input) * NumericHelper::POWERS_OF_TEN[scale]; + return true; } -template <> -hugeint_t Cast::Operation(int16_t input) { - return Hugeint::Convert(input); + +template +bool NumericToHugeDecimalCast(SRC input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale) { + // check for overflow + hugeint_t max_width = Hugeint::POWERS_OF_TEN[width - scale]; + hugeint_t hinput = Hugeint::Convert(input); + if (hinput >= max_width || hinput <= -max_width) { + string error = StringUtil::Format("Could not cast value %s to DECIMAL(%d,%d)", hinput.ToString(), width, scale); + HandleCastError::AssignError(error, error_message); + return false; + } + result = hinput * Hugeint::POWERS_OF_TEN[scale]; + return true; } + +//===--------------------------------------------------------------------===// +// Cast int8_t -> Decimal +//===--------------------------------------------------------------------===// template <> -hugeint_t Cast::Operation(int32_t input) { - return Hugeint::Convert(input); +bool TryCastToDecimal::Operation(int8_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } template <> -hugeint_t Cast::Operation(int64_t input) { - return Hugeint::Convert(input); +bool TryCastToDecimal::Operation(int8_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } template <> -hugeint_t Cast::Operation(float input) { - return Hugeint::Convert(input); +bool TryCastToDecimal::Operation(int8_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } template <> -hugeint_t Cast::Operation(double input) { - return Hugeint::Convert(input); +bool TryCastToDecimal::Operation(int8_t input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale) { + return NumericToHugeDecimalCast(input, result, error_message, width, scale); } //===--------------------------------------------------------------------===// -// Hugeint -> Numeric +// Cast int16_t -> Decimal //===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(hugeint_t input, bool &result, bool strict) { - // any positive number converts to true - result = input.upper > 0 || (input.upper == 0 && input.lower > 0); - return true; -} - -template <> -bool TryCast::Operation(hugeint_t input, int8_t &result, bool strict) { - return Hugeint::TryCast(input, result); +bool TryCastToDecimal::Operation(int16_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } - template <> -bool TryCast::Operation(hugeint_t input, int16_t &result, bool strict) { - return Hugeint::TryCast(input, result); +bool TryCastToDecimal::Operation(int16_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } - template <> -bool TryCast::Operation(hugeint_t input, int32_t &result, bool strict) { - return Hugeint::TryCast(input, result); +bool TryCastToDecimal::Operation(int16_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } - template <> -bool TryCast::Operation(hugeint_t input, int64_t &result, bool strict) { - return Hugeint::TryCast(input, result); +bool TryCastToDecimal::Operation(int16_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return NumericToHugeDecimalCast(input, result, error_message, width, scale); } +//===--------------------------------------------------------------------===// +// Cast int32_t -> Decimal +//===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(hugeint_t input, uint8_t &result, bool strict) { - return Hugeint::TryCast(input, result); +bool TryCastToDecimal::Operation(int32_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } template <> -bool TryCast::Operation(hugeint_t input, uint16_t &result, bool strict) { - return Hugeint::TryCast(input, result); +bool TryCastToDecimal::Operation(int32_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } template <> -bool TryCast::Operation(hugeint_t input, uint32_t &result, bool strict) { - return Hugeint::TryCast(input, result); +bool TryCastToDecimal::Operation(int32_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } template <> -bool TryCast::Operation(hugeint_t input, uint64_t &result, bool strict) { - return Hugeint::TryCast(input, result); +bool TryCastToDecimal::Operation(int32_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return NumericToHugeDecimalCast(input, result, error_message, width, scale); } +//===--------------------------------------------------------------------===// +// Cast int64_t -> Decimal +//===--------------------------------------------------------------------===// template <> -bool TryCast::Operation(hugeint_t input, float &result, bool strict) { - return Hugeint::TryCast(input, result); +bool TryCastToDecimal::Operation(int64_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } - template <> -bool TryCast::Operation(hugeint_t input, double &result, bool strict) { - return Hugeint::TryCast(input, result); +bool TryCastToDecimal::Operation(int64_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } - template <> -bool Cast::Operation(hugeint_t input) { - bool result; - TryCast::Operation(input, result); - return result; +bool TryCastToDecimal::Operation(int64_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, width, scale); } - -template -static T HugeintCastToNumeric(hugeint_t input) { - T result; - if (!TryCast::Operation(input, result)) { - throw ValueOutOfRangeException(input, PhysicalType::INT128, GetTypeId()); - } - return result; +template <> +bool TryCastToDecimal::Operation(int64_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return NumericToHugeDecimalCast(input, result, error_message, width, scale); } +//===--------------------------------------------------------------------===// +// Cast uint8_t -> Decimal +//===--------------------------------------------------------------------===// template <> -int8_t Cast::Operation(hugeint_t input) { - return HugeintCastToNumeric(input); +bool TryCastToDecimal::Operation(uint8_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } - template <> -int16_t Cast::Operation(hugeint_t input) { - return HugeintCastToNumeric(input); +bool TryCastToDecimal::Operation(uint8_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } - template <> -int32_t Cast::Operation(hugeint_t input) { - return HugeintCastToNumeric(input); +bool TryCastToDecimal::Operation(uint8_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } - template <> -int64_t Cast::Operation(hugeint_t input) { - return HugeintCastToNumeric(input); +bool TryCastToDecimal::Operation(uint8_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return NumericToHugeDecimalCast(input, result, error_message, width, scale); } +//===--------------------------------------------------------------------===// +// Cast uint16_t -> Decimal +//===--------------------------------------------------------------------===// template <> -uint8_t Cast::Operation(hugeint_t input) { - return HugeintCastToNumeric(input); +bool TryCastToDecimal::Operation(uint16_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } template <> -uint16_t Cast::Operation(hugeint_t input) { - return HugeintCastToNumeric(input); +bool TryCastToDecimal::Operation(uint16_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } template <> -uint32_t Cast::Operation(hugeint_t input) { - return HugeintCastToNumeric(input); +bool TryCastToDecimal::Operation(uint16_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } template <> -uint64_t Cast::Operation(hugeint_t input) { - return HugeintCastToNumeric(input); +bool TryCastToDecimal::Operation(uint16_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return NumericToHugeDecimalCast(input, result, error_message, width, scale); } +//===--------------------------------------------------------------------===// +// Cast uint32_t -> Decimal +//===--------------------------------------------------------------------===// template <> -float Cast::Operation(hugeint_t input) { - return HugeintCastToNumeric(input); +bool TryCastToDecimal::Operation(uint32_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } - template <> -double Cast::Operation(hugeint_t input) { - return HugeintCastToNumeric(input); +bool TryCastToDecimal::Operation(uint32_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } - template <> -bool TryCast::Operation(hugeint_t input, hugeint_t &result, bool strict) { - result = input; - return true; +bool TryCastToDecimal::Operation(uint32_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } - template <> -hugeint_t Cast::Operation(hugeint_t input) { - return input; +bool TryCastToDecimal::Operation(uint32_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return NumericToHugeDecimalCast(input, result, error_message, width, scale); } //===--------------------------------------------------------------------===// -// Decimal String Cast +// Cast uint64_t -> Decimal //===--------------------------------------------------------------------===// -template -struct DecimalCastData { - T result; - uint8_t width; - uint8_t scale; - uint8_t digit_count; - uint8_t decimal_count; -}; - -struct DecimalCastOperation { - template - static bool HandleDigit(T &state, uint8_t digit) { - if (state.result == 0 && digit == 0) { - // leading zero's don't count towards the digit count - return true; - } - if (state.digit_count == state.width - state.scale) { - // width of decimal type is exceeded! - return false; - } - state.digit_count++; - if (NEGATIVE) { - state.result = state.result * 10 - digit; - } else { - state.result = state.result * 10 + digit; - } - return true; - } - - template - static bool HandleExponent(T &state, int64_t exponent) { - return false; - } - - template - static bool HandleDecimal(T &state, uint8_t digit) { - if (state.decimal_count == state.scale) { - // we exceeded the amount of supported decimals - // however, we don't throw an error here - // we just truncate the decimal - return true; - } - state.decimal_count++; - if (NEGATIVE) { - state.result = state.result * 10 - digit; - } else { - state.result = state.result * 10 + digit; - } - return true; - } - - template - static bool Finalize(T &state) { - // if we have not gotten exactly "scale" decimals, we need to multiply the result - // e.g. if we have a string "1.0" that is cast to a DECIMAL(9,3), the value needs to be 1000 - // but we have only gotten the value "10" so far, so we multiply by 1000 - for (uint8_t i = state.decimal_count; i < state.scale; i++) { - state.result *= 10; - } - return true; - } -}; - -template -T DecimalStringCast(string_t input, uint8_t width, uint8_t scale) { - DecimalCastData state; - state.result = 0; - state.width = width; - state.scale = scale; - state.digit_count = 0; - state.decimal_count = 0; - if (!TryIntegerCast, false, DecimalCastOperation, false>(input.GetDataUnsafe(), input.GetSize(), - state, false)) { - throw ConversionException("Could not convert string \"%s\" to DECIMAL(%d,%d)", input.GetString(), (int)width, - (int)scale); - } - return state.result; +template <> +bool TryCastToDecimal::Operation(uint64_t input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } - template <> -int16_t CastToDecimal::Operation(string_t input, uint8_t width, uint8_t scale) { - return DecimalStringCast(input, width, scale); +bool TryCastToDecimal::Operation(uint64_t input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } - template <> -int32_t CastToDecimal::Operation(string_t input, uint8_t width, uint8_t scale) { - return DecimalStringCast(input, width, scale); +bool TryCastToDecimal::Operation(uint64_t input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return StandardNumericToDecimalCast(input, result, error_message, + width, scale); } - template <> -int64_t CastToDecimal::Operation(string_t input, uint8_t width, uint8_t scale) { - return DecimalStringCast(input, width, scale); +bool TryCastToDecimal::Operation(uint64_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return NumericToHugeDecimalCast(input, result, error_message, width, scale); } -template <> -hugeint_t CastToDecimal::Operation(string_t input, uint8_t width, uint8_t scale) { - return DecimalStringCast(input, width, scale); +//===--------------------------------------------------------------------===// +// Hugeint -> Decimal Cast +//===--------------------------------------------------------------------===// +template +bool HugeintToDecimalCast(hugeint_t input, DST &result, string *error_message, uint8_t width, uint8_t scale) { + // check for overflow + hugeint_t max_width = Hugeint::POWERS_OF_TEN[width - scale]; + if (input >= max_width || input <= -max_width) { + string error = StringUtil::Format("Could not cast value %s to DECIMAL(%d,%d)", input.ToString(), width, scale); + HandleCastError::AssignError(error, error_message); + return false; + } + result = Hugeint::Cast(input * Hugeint::POWERS_OF_TEN[scale]); + return true; } template <> -string_t StringCastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale, Vector &result) { - return DecimalToString::Format(input, scale, result); +bool TryCastToDecimal::Operation(hugeint_t input, int16_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return HugeintToDecimalCast(input, result, error_message, width, scale); } template <> -string_t StringCastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale, Vector &result) { - return DecimalToString::Format(input, scale, result); +bool TryCastToDecimal::Operation(hugeint_t input, int32_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return HugeintToDecimalCast(input, result, error_message, width, scale); } template <> -string_t StringCastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale, Vector &result) { - return DecimalToString::Format(input, scale, result); +bool TryCastToDecimal::Operation(hugeint_t input, int64_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return HugeintToDecimalCast(input, result, error_message, width, scale); } template <> -string_t StringCastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale, Vector &result) { - return HugeintToStringCast::FormatDecimal(input, scale, result); +bool TryCastToDecimal::Operation(hugeint_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return HugeintToDecimalCast(input, result, error_message, width, scale); +} + +//===--------------------------------------------------------------------===// +// Float/Double -> Decimal Cast +//===--------------------------------------------------------------------===// +template +bool DoubleToDecimalCast(SRC input, DST &result, string *error_message, uint8_t width, uint8_t scale) { + double value = input * NumericHelper::DOUBLE_POWERS_OF_TEN[scale]; + if (value <= -NumericHelper::DOUBLE_POWERS_OF_TEN[width] || value >= NumericHelper::DOUBLE_POWERS_OF_TEN[width]) { + string error = StringUtil::Format("Could not cast value %f to DECIMAL(%d,%d)", value, width, scale); + ; + HandleCastError::AssignError(error, error_message); + return false; + } + result = Cast::Operation(value); + return true; } template <> -int16_t CastToDecimal::Operation(bool input, uint8_t width, uint8_t scale) { - return input ? NumericHelper::POWERS_OF_TEN[scale] : 0; +bool TryCastToDecimal::Operation(float input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return DoubleToDecimalCast(input, result, error_message, width, scale); } template <> -int32_t CastToDecimal::Operation(bool input, uint8_t width, uint8_t scale) { - return input ? NumericHelper::POWERS_OF_TEN[scale] : 0; +bool TryCastToDecimal::Operation(float input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return DoubleToDecimalCast(input, result, error_message, width, scale); } template <> -int64_t CastToDecimal::Operation(bool input, uint8_t width, uint8_t scale) { - return input ? NumericHelper::POWERS_OF_TEN[scale] : 0; +bool TryCastToDecimal::Operation(float input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return DoubleToDecimalCast(input, result, error_message, width, scale); } template <> -hugeint_t CastToDecimal::Operation(bool input, uint8_t width, uint8_t scale) { - return input ? Hugeint::POWERS_OF_TEN[scale] : 0; +bool TryCastToDecimal::Operation(float input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale) { + return DoubleToDecimalCast(input, result, error_message, width, scale); } template <> -bool CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return Cast::Operation(input); +bool TryCastToDecimal::Operation(double input, int16_t &result, string *error_message, uint8_t width, uint8_t scale) { + return DoubleToDecimalCast(input, result, error_message, width, scale); } template <> -bool CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return Cast::Operation(input); +bool TryCastToDecimal::Operation(double input, int32_t &result, string *error_message, uint8_t width, uint8_t scale) { + return DoubleToDecimalCast(input, result, error_message, width, scale); } template <> -bool CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return Cast::Operation(input); +bool TryCastToDecimal::Operation(double input, int64_t &result, string *error_message, uint8_t width, uint8_t scale) { + return DoubleToDecimalCast(input, result, error_message, width, scale); } template <> -bool CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return Cast::Operation(input); +bool TryCastToDecimal::Operation(double input, hugeint_t &result, string *error_message, uint8_t width, uint8_t scale) { + return DoubleToDecimalCast(input, result, error_message, width, scale); } //===--------------------------------------------------------------------===// -// Numeric -> Decimal Cast +// Decimal -> Numeric Cast //===--------------------------------------------------------------------===// template -DST StandardNumericToDecimalCast(SRC input, uint8_t width, uint8_t scale) { - // check for overflow - DST max_width = NumericHelper::POWERS_OF_TEN[width - scale]; - if (int64_t(input) >= max_width || int64_t(input) <= -max_width) { - throw OutOfRangeException("Could not cast value %d to DECIMAL(%d,%d)", input, width, scale); +bool TryCastDecimalToNumeric(SRC input, DST &result, string *error_message, uint8_t scale) { + auto scaled_value = input / NumericHelper::POWERS_OF_TEN[scale]; + if (!TryCast::Operation(scaled_value, result)) { + string error = StringUtil::Format("Failed to cast decimal value %d to type %s", scaled_value, GetTypeId()); + HandleCastError::AssignError(error, error_message); + return false; } - return DST(input) * NumericHelper::POWERS_OF_TEN[scale]; + return true; } -template -hugeint_t NumericToHugeDecimalCast(SRC input, uint8_t width, uint8_t scale) { - // check for overflow - hugeint_t max_width = Hugeint::POWERS_OF_TEN[width - scale]; - hugeint_t hinput = hugeint_t(input); - if (hinput >= max_width || hinput <= -max_width) { - throw OutOfRangeException("Could not cast value %s to DECIMAL(%d,%d)", hinput.ToString(), width, scale); +template +bool TryCastHugeDecimalToNumeric(hugeint_t input, DST &result, string *error_message, uint8_t scale) { + auto scaled_value = input / Hugeint::POWERS_OF_TEN[scale]; + if (!TryCast::Operation(scaled_value, result)) { + string error = StringUtil::Format("Failed to cast decimal value %s to type %s", + ConvertToString::Operation(scaled_value), GetTypeId()); + HandleCastError::AssignError(error, error_message); + return false; } - return hinput * Hugeint::POWERS_OF_TEN[scale]; + return true; } -// TINYINT -> DECIMAL +//===--------------------------------------------------------------------===// +// Cast Decimal -> int8_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(int8_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, int8_t &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int32_t CastToDecimal::Operation(int8_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, int8_t &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int64_t CastToDecimal::Operation(int8_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, int8_t &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -hugeint_t CastToDecimal::Operation(int8_t input, uint8_t width, uint8_t scale) { - return NumericToHugeDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(hugeint_t input, int8_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastHugeDecimalToNumeric(input, result, error_message, scale); } -// UTINYINT -> DECIMAL +//===--------------------------------------------------------------------===// +// Cast Decimal -> int16_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(uint8_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, int16_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } template <> -int32_t CastToDecimal::Operation(uint8_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, int16_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } template <> -int64_t CastToDecimal::Operation(uint8_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, int16_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } template <> -hugeint_t CastToDecimal::Operation(uint8_t input, uint8_t width, uint8_t scale) { - return NumericToHugeDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(hugeint_t input, int16_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastHugeDecimalToNumeric(input, result, error_message, scale); } -// SMALLINT -> DECIMAL +//===--------------------------------------------------------------------===// +// Cast Decimal -> int32_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, int32_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int32_t CastToDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, int32_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int64_t CastToDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, int32_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -hugeint_t CastToDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return NumericToHugeDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(hugeint_t input, int32_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastHugeDecimalToNumeric(input, result, error_message, scale); } -// USMALLINT -> DECIMAL +//===--------------------------------------------------------------------===// +// Cast Decimal -> int64_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(uint16_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, int64_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } template <> -int32_t CastToDecimal::Operation(uint16_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, int64_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int64_t CastToDecimal::Operation(uint16_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, int64_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } template <> -hugeint_t CastToDecimal::Operation(uint16_t input, uint8_t width, uint8_t scale) { - return NumericToHugeDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(hugeint_t input, int64_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastHugeDecimalToNumeric(input, result, error_message, scale); } -// INTEGER -> DECIMAL +//===--------------------------------------------------------------------===// +// Cast Decimal -> uint8_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, uint8_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int32_t CastToDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, uint8_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int64_t CastToDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, uint8_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -hugeint_t CastToDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return NumericToHugeDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(hugeint_t input, uint8_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastHugeDecimalToNumeric(input, result, error_message, scale); } -// UINTEGER -> DECIMAL - -template <> -int16_t CastToDecimal::Operation(uint32_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); -} +//===--------------------------------------------------------------------===// +// Cast Decimal -> uint16_t +//===--------------------------------------------------------------------===// template <> -int32_t CastToDecimal::Operation(uint32_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, uint16_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } template <> -int64_t CastToDecimal::Operation(uint32_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, uint16_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } template <> -hugeint_t CastToDecimal::Operation(uint32_t input, uint8_t width, uint8_t scale) { - return NumericToHugeDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, uint16_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - -// BIGINT -> DECIMAL template <> -int16_t CastToDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(hugeint_t input, uint16_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastHugeDecimalToNumeric(input, result, error_message, scale); } +//===--------------------------------------------------------------------===// +// Cast Decimal -> uint32_t +//===--------------------------------------------------------------------===// template <> -int32_t CastToDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, uint32_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int64_t CastToDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, uint32_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -hugeint_t CastToDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return NumericToHugeDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, uint32_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - -// BIGINT -> DECIMAL - template <> -int16_t CastToDecimal::Operation(uint64_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(hugeint_t input, uint32_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastHugeDecimalToNumeric(input, result, error_message, scale); } +//===--------------------------------------------------------------------===// +// Cast Decimal -> uint64_t +//===--------------------------------------------------------------------===// template <> -int32_t CastToDecimal::Operation(uint64_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, uint64_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int64_t CastToDecimal::Operation(uint64_t input, uint8_t width, uint8_t scale) { - return StandardNumericToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, uint64_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -hugeint_t CastToDecimal::Operation(uint64_t input, uint8_t width, uint8_t scale) { - return NumericToHugeDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, uint64_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - -template -DST HugeintToDecimalCast(hugeint_t input, uint8_t width, uint8_t scale) { - // check for overflow - hugeint_t max_width = Hugeint::POWERS_OF_TEN[width - scale]; - if (input >= max_width || input <= -max_width) { - throw OutOfRangeException("Could not cast value %s to DECIMAL(%d,%d)", input.ToString(), width, scale); - } - return Hugeint::Cast(input * Hugeint::POWERS_OF_TEN[scale]); +template <> +bool TryCastFromDecimal::Operation(hugeint_t input, uint64_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastHugeDecimalToNumeric(input, result, error_message, scale); } -// HUGEINT -> DECIMAL +//===--------------------------------------------------------------------===// +// Cast Decimal -> hugeint_t +//===--------------------------------------------------------------------===// template <> -int16_t CastToDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return HugeintToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int32_t CastToDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return HugeintToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -int64_t CastToDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return HugeintToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToNumeric(input, result, error_message, scale); } - template <> -hugeint_t CastToDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return HugeintToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(hugeint_t input, hugeint_t &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastHugeDecimalToNumeric(input, result, error_message, scale); } +//===--------------------------------------------------------------------===// +// Decimal -> Float/Double Cast +//===--------------------------------------------------------------------===// template -DST DoubleToDecimalCast(SRC input, uint8_t width, uint8_t scale) { - double value = input * NumericHelper::DOUBLE_POWERS_OF_TEN[scale]; - if (value <= -NumericHelper::DOUBLE_POWERS_OF_TEN[width] || value >= NumericHelper::DOUBLE_POWERS_OF_TEN[width]) { - throw OutOfRangeException("Could not cast value %f to DECIMAL(%d,%d)", value, width, scale); - } - return Cast::Operation(value); +bool TryCastDecimalToFloatingPoint(SRC input, DST &result, uint8_t scale) { + result = Cast::Operation(input) / DST(NumericHelper::DOUBLE_POWERS_OF_TEN[scale]); + return true; } -// FLOAT -> DECIMAL +// DECIMAL -> FLOAT template <> -int16_t CastToDecimal::Operation(float input, uint8_t width, uint8_t scale) { - return DoubleToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, float &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastDecimalToFloatingPoint(input, result, scale); } template <> -int32_t CastToDecimal::Operation(float input, uint8_t width, uint8_t scale) { - return DoubleToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, float &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastDecimalToFloatingPoint(input, result, scale); } template <> -int64_t CastToDecimal::Operation(float input, uint8_t width, uint8_t scale) { - return DoubleToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, float &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastDecimalToFloatingPoint(input, result, scale); } template <> -hugeint_t CastToDecimal::Operation(float input, uint8_t width, uint8_t scale) { - return DoubleToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(hugeint_t input, float &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToFloatingPoint(input, result, scale); } -// DOUBLE -> DECIMAL +// DECIMAL -> DOUBLE template <> -int16_t CastToDecimal::Operation(double input, uint8_t width, uint8_t scale) { - return DoubleToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int16_t input, double &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastDecimalToFloatingPoint(input, result, scale); } template <> -int32_t CastToDecimal::Operation(double input, uint8_t width, uint8_t scale) { - return DoubleToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int32_t input, double &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastDecimalToFloatingPoint(input, result, scale); } template <> -int64_t CastToDecimal::Operation(double input, uint8_t width, uint8_t scale) { - return DoubleToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(int64_t input, double &result, string *error_message, uint8_t width, uint8_t scale) { + return TryCastDecimalToFloatingPoint(input, result, scale); } template <> -hugeint_t CastToDecimal::Operation(double input, uint8_t width, uint8_t scale) { - return DoubleToDecimalCast(input, width, scale); +bool TryCastFromDecimal::Operation(hugeint_t input, double &result, string *error_message, uint8_t width, + uint8_t scale) { + return TryCastDecimalToFloatingPoint(input, result, scale); } -//===--------------------------------------------------------------------===// -// Decimal -> Numeric Cast -//===--------------------------------------------------------------------===// -template -DST CastDecimalToNumeric(SRC input, uint8_t scale) { - return Cast::Operation(input / NumericHelper::POWERS_OF_TEN[scale]); -} +} // namespace duckdb -template -DST CastHugeDecimalToNumeric(hugeint_t input, uint8_t scale) { - return Cast::Operation(input / Hugeint::POWERS_OF_TEN[scale]); -} -// DECIMAL -> TINYINT -template <> -int8_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); + + +namespace duckdb { + +template +string StandardStringCast(T input) { + Vector v(LogicalType::VARCHAR); + return StringCast::Operation(input, v).GetString(); } template <> -int8_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(bool input) { + return StandardStringCast(input); } - template <> -int8_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(int8_t input) { + return StandardStringCast(input); } - template <> -int8_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastHugeDecimalToNumeric(input, scale); +string ConvertToString::Operation(int16_t input) { + return StandardStringCast(input); } - -// DECIMAL -> UTINYINT template <> -uint8_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(int32_t input) { + return StandardStringCast(input); } template <> -uint8_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(int64_t input) { + return StandardStringCast(input); } template <> -uint8_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(uint8_t input) { + return StandardStringCast(input); } template <> -uint8_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastHugeDecimalToNumeric(input, scale); +string ConvertToString::Operation(uint16_t input) { + return StandardStringCast(input); } - -// DECIMAL -> SMALLINT template <> -int16_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(uint32_t input) { + return StandardStringCast(input); } - template <> -int16_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(uint64_t input) { + return StandardStringCast(input); } - template <> -int16_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(hugeint_t input) { + return StandardStringCast(input); } - template <> -int16_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastHugeDecimalToNumeric(input, scale); +string ConvertToString::Operation(float input) { + return StandardStringCast(input); } - -// DECIMAL -> USMALLINT template <> -uint16_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(double input) { + return StandardStringCast(input); } - template <> -uint16_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(interval_t input) { + return StandardStringCast(input); } - template <> -uint16_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(date_t input) { + return StandardStringCast(input); } - template <> -uint16_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastHugeDecimalToNumeric(input, scale); +string ConvertToString::Operation(dtime_t input) { + return StandardStringCast(input); } - -// DECIMAL -> INTEGER template <> -int32_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(timestamp_t input) { + return StandardStringCast(input); } - template <> -int32_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string ConvertToString::Operation(string_t input) { + return input.GetString(); } +} // namespace duckdb + + + + + + + + + + + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Cast Numeric -> String +//===--------------------------------------------------------------------===// template <> -int32_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string_t StringCast::Operation(bool input, Vector &vector) { + if (input) { + return StringVector::AddString(vector, "true", 4); + } else { + return StringVector::AddString(vector, "false", 5); + } } template <> -int32_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastHugeDecimalToNumeric(input, scale); +string_t StringCast::Operation(int8_t input, Vector &vector) { + return NumericHelper::FormatSigned(input, vector); } -// DECIMAL -> UINTEGER template <> -uint32_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string_t StringCast::Operation(int16_t input, Vector &vector) { + return NumericHelper::FormatSigned(input, vector); } template <> -uint32_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string_t StringCast::Operation(int32_t input, Vector &vector) { + return NumericHelper::FormatSigned(input, vector); } + template <> -uint32_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string_t StringCast::Operation(int64_t input, Vector &vector) { + return NumericHelper::FormatSigned(input, vector); } template <> -uint32_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastHugeDecimalToNumeric(input, scale); +duckdb::string_t StringCast::Operation(uint8_t input, Vector &vector) { + return NumericHelper::FormatSigned(input, vector); } - -// DECIMAL -> BIGINT template <> -int64_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +duckdb::string_t StringCast::Operation(uint16_t input, Vector &vector) { + return NumericHelper::FormatSigned(input, vector); } - template <> -int64_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +duckdb::string_t StringCast::Operation(uint32_t input, Vector &vector) { + return NumericHelper::FormatSigned(input, vector); } - template <> -int64_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +duckdb::string_t StringCast::Operation(uint64_t input, Vector &vector) { + return NumericHelper::FormatSigned(input, vector); } template <> -int64_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastHugeDecimalToNumeric(input, scale); +string_t StringCast::Operation(float input, Vector &vector) { + std::string s = duckdb_fmt::format("{}", input); + return StringVector::AddString(vector, s); } -// DECIMAL -> UBIGINT template <> -uint64_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string_t StringCast::Operation(double input, Vector &vector) { + std::string s = duckdb_fmt::format("{}", input); + return StringVector::AddString(vector, s); } + template <> -uint64_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +string_t StringCast::Operation(interval_t input, Vector &vector) { + char buffer[70]; + idx_t length = IntervalToStringCast::Format(input, buffer); + return StringVector::AddString(vector, buffer, length); } + template <> -uint64_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +duckdb::string_t StringCast::Operation(hugeint_t input, Vector &vector) { + return HugeintToStringCast::FormatSigned(input, vector); } + template <> -uint64_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastHugeDecimalToNumeric(input, scale); +duckdb::string_t StringCast::Operation(date_t input, Vector &vector) { + int32_t date[3]; + Date::Convert(input, date[0], date[1], date[2]); + + idx_t year_length; + bool add_bc; + idx_t length = DateToStringCast::Length(date, year_length, add_bc); + + string_t result = StringVector::EmptyString(vector, length); + auto data = result.GetDataWriteable(); + + DateToStringCast::Format(data, date, year_length, add_bc); + + result.Finalize(); + return result; } -// DECIMAL -> HUGEINT template <> -hugeint_t CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +duckdb::string_t StringCast::Operation(dtime_t input, Vector &vector) { + int32_t time[4]; + Time::Convert(input, time[0], time[1], time[2], time[3]); + + char micro_buffer[10]; + idx_t length = TimeToStringCast::Length(time, micro_buffer); + + string_t result = StringVector::EmptyString(vector, length); + auto data = result.GetDataWriteable(); + + TimeToStringCast::Format(data, length, time, micro_buffer); + + result.Finalize(); + return result; } template <> -hugeint_t CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +duckdb::string_t StringCast::Operation(timestamp_t input, Vector &vector) { + date_t date_entry; + dtime_t time_entry; + Timestamp::Convert(input, date_entry, time_entry); + + int32_t date[3], time[4]; + Date::Convert(date_entry, date[0], date[1], date[2]); + Time::Convert(time_entry, time[0], time[1], time[2], time[3]); + + // format for timestamp is DATE TIME (separated by space) + idx_t year_length; + bool add_bc; + char micro_buffer[6]; + idx_t date_length = DateToStringCast::Length(date, year_length, add_bc); + idx_t time_length = TimeToStringCast::Length(time, micro_buffer); + idx_t length = date_length + time_length + 1; + + string_t result = StringVector::EmptyString(vector, length); + auto data = result.GetDataWriteable(); + + DateToStringCast::Format(data, date, year_length, add_bc); + data[date_length] = ' '; + TimeToStringCast::Format(data + date_length + 1, time_length, time, micro_buffer); + + result.Finalize(); + return result; } template <> -hugeint_t CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToNumeric(input, scale); +duckdb::string_t StringCast::Operation(duckdb::string_t input, Vector &result) { + return StringVector::AddStringOrBlob(result, input); } -template <> -hugeint_t CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastHugeDecimalToNumeric(input, scale); +} // namespace duckdb + + + + +namespace duckdb { +class PipeFile : public FileHandle { +public: + PipeFile(unique_ptr child_handle_p, const string &path) + : FileHandle(pipe_fs, path), child_handle(move(child_handle_p)) { + } + + int64_t ReadChunk(void *buffer, int64_t nr_bytes); + int64_t WriteChunk(void *buffer, int64_t nr_bytes); + + PipeFileSystem pipe_fs; + unique_ptr child_handle; + +protected: + void Close() override { + } +}; + +int64_t PipeFile::ReadChunk(void *buffer, int64_t nr_bytes) { + return child_handle->Read(buffer, nr_bytes); +} +int64_t PipeFile::WriteChunk(void *buffer, int64_t nr_bytes) { + return child_handle->Write(buffer, nr_bytes); } -template -DST CastDecimalToFloatingPoint(SRC input, uint8_t scale) { - return Cast::Operation(input) / DST(NumericHelper::DOUBLE_POWERS_OF_TEN[scale]); +int64_t PipeFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { + auto &pipe = (PipeFile &)handle; + return pipe.ReadChunk(buffer, nr_bytes); } -// DECIMAL -> FLOAT -template <> -float CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToFloatingPoint(input, scale); +int64_t PipeFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { + auto &pipe = (PipeFile &)handle; + return pipe.WriteChunk(buffer, nr_bytes); } -template <> -float CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToFloatingPoint(input, scale); +// LCOV_EXCL_START +void PipeFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { + throw NotImplementedException("Unsupported: Random read from pipe/stream"); } -template <> -float CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToFloatingPoint(input, scale); +void PipeFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { + throw NotImplementedException("Unsupported: Random write to pipe/stream"); +} +void PipeFileSystem::Truncate(FileHandle &handle, int64_t new_size) { + throw NotImplementedException("Unsupported: Truncate pipe/stream"); } -template <> -float CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastDecimalToFloatingPoint(input, scale); +void PipeFileSystem::FileSync(FileHandle &handle) { + throw NotImplementedException("Unsupported: Sync pipe/stream"); } -// DECIMAL -> DOUBLE -template <> -double CastFromDecimal::Operation(int16_t input, uint8_t width, uint8_t scale) { - return CastDecimalToFloatingPoint(input, scale); +void PipeFileSystem::Seek(FileHandle &handle, idx_t location) { + throw NotImplementedException("Unsupported: Seek within pipe/stream"); } -template <> -double CastFromDecimal::Operation(int32_t input, uint8_t width, uint8_t scale) { - return CastDecimalToFloatingPoint(input, scale); +void PipeFileSystem::Reset(FileHandle &handle) { + throw NotImplementedException("Unsupported: Reset pipe/stream"); } +// LCOV_EXCL_STOP -template <> -double CastFromDecimal::Operation(int64_t input, uint8_t width, uint8_t scale) { - return CastDecimalToFloatingPoint(input, scale); +int64_t PipeFileSystem::GetFileSize(FileHandle &handle) { + return 0; } -template <> -double CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) { - return CastDecimalToFloatingPoint(input, scale); +unique_ptr PipeFileSystem::OpenPipe(unique_ptr handle) { + auto path = handle->path; + return make_unique(move(handle), path); } } // namespace duckdb @@ -16987,6 +20264,7 @@ double CastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_t scale) namespace duckdb { +// LCOV_EXCL_START void Printer::Print(const string &str) { #ifndef DUCKDB_DISABLE_PRINT fprintf(stderr, "%s\n", str.c_str()); @@ -17008,6 +20286,7 @@ void Printer::FinishProgressBarPrint(const char *pbstr, int pbwidth) { fflush(stdout); #endif } +// LCOV_EXCL_STOP } // namespace duckdb @@ -17037,18 +20316,20 @@ int ProgressBar::GetCurrentPercentage() { void ProgressBar::Start() { #ifndef DUCKDB_NO_THREADS + stop = false; current_percentage = 0; - progress_bar_thread = std::thread(&ProgressBar::ProgressBarThread, this); + progress_bar_thread = thread(&ProgressBar::ProgressBarThread, this); #endif } +ProgressBar::~ProgressBar() { + Stop(); +} + void ProgressBar::Stop() { #ifndef DUCKDB_NO_THREADS if (progress_bar_thread.joinable()) { - { - std::lock_guard l(m); - stop = true; - } + stop = true; c.notify_one(); progress_bar_thread.join(); if (supported && current_percentage > 0 && executor->context.print_progress_bar) { @@ -17058,10 +20339,175 @@ void ProgressBar::Stop() { #endif } } // namespace duckdb + + +#include +#include // strlen() on Solaris +#include + +namespace duckdb { + +bool IsLittleEndian() { + int n = 1; + if (*(char *)&n == 1) { + return true; + } else { + return false; + } +} + +uint8_t FlipSign(uint8_t key_byte) { + return key_byte ^ 128; +} + +uint32_t EncodeFloat(float x) { + uint64_t buff; + + //! zero + if (x == 0) { + buff = 0; + buff |= (1u << 31); + return buff; + } + //! infinity + if (x > FLT_MAX) { + return UINT_MAX; + } + //! -infinity + if (x < -FLT_MAX) { + return 0; + } + buff = Load((const_data_ptr_t)&x); + if ((buff & (1u << 31)) == 0) { //! +0 and positive numbers + buff |= (1u << 31); + } else { //! negative numbers + buff = ~buff; //! complement 1 + } + + return buff; +} + +uint64_t EncodeDouble(double x) { + uint64_t buff; + //! zero + if (x == 0) { + buff = 0; + buff += (1ull << 63); + return buff; + } + //! infinity + if (x > DBL_MAX) { + return ULLONG_MAX; + } + //! -infinity + if (x < -DBL_MAX) { + return 0; + } + buff = Load((const_data_ptr_t)&x); + if (buff < (1ull << 63)) { //! +0 and positive numbers + buff += (1ull << 63); + } else { //! negative numbers + buff = ~buff; //! complement 1 + } + return buff; +} + +template <> +void EncodeData(data_ptr_t dataptr, bool value, bool is_little_endian) { + Store(value ? 1 : 0, dataptr); +} + +template <> +void EncodeData(data_ptr_t dataptr, int8_t value, bool is_little_endian) { + Store(value, dataptr); + dataptr[0] = FlipSign(dataptr[0]); +} + +template <> +void EncodeData(data_ptr_t dataptr, int16_t value, bool is_little_endian) { + Store(is_little_endian ? BSWAP16(value) : value, dataptr); + dataptr[0] = FlipSign(dataptr[0]); +} + +template <> +void EncodeData(data_ptr_t dataptr, int32_t value, bool is_little_endian) { + Store(is_little_endian ? BSWAP32(value) : value, dataptr); + dataptr[0] = FlipSign(dataptr[0]); +} + +template <> +void EncodeData(data_ptr_t dataptr, int64_t value, bool is_little_endian) { + Store(is_little_endian ? BSWAP64(value) : value, dataptr); + dataptr[0] = FlipSign(dataptr[0]); +} + +template <> +void EncodeData(data_ptr_t dataptr, uint8_t value, bool is_little_endian) { + Store(value, dataptr); +} + +template <> +void EncodeData(data_ptr_t dataptr, uint16_t value, bool is_little_endian) { + Store(is_little_endian ? BSWAP16(value) : value, dataptr); +} + +template <> +void EncodeData(data_ptr_t dataptr, uint32_t value, bool is_little_endian) { + Store(is_little_endian ? BSWAP32(value) : value, dataptr); +} + +template <> +void EncodeData(data_ptr_t dataptr, uint64_t value, bool is_little_endian) { + Store(is_little_endian ? BSWAP64(value) : value, dataptr); +} + +template <> +void EncodeData(data_ptr_t dataptr, hugeint_t value, bool is_little_endian) { + EncodeData(dataptr, value.upper, is_little_endian); + EncodeData(dataptr + sizeof(value.upper), value.lower, is_little_endian); +} + +template <> +void EncodeData(data_ptr_t dataptr, float value, bool is_little_endian) { + uint32_t converted_value = EncodeFloat(value); + Store(is_little_endian ? BSWAP32(converted_value) : converted_value, dataptr); +} + +template <> +void EncodeData(data_ptr_t dataptr, double value, bool is_little_endian) { + uint64_t converted_value = EncodeDouble(value); + Store(is_little_endian ? BSWAP64(converted_value) : converted_value, dataptr); +} + +template <> +void EncodeData(data_ptr_t dataptr, interval_t value, bool is_little_endian) { + EncodeData(dataptr, value.months, is_little_endian); + dataptr += sizeof(value.months); + EncodeData(dataptr, value.days, is_little_endian); + dataptr += sizeof(value.days); + EncodeData(dataptr, value.micros, is_little_endian); +} + +void EncodeStringDataPrefix(data_ptr_t dataptr, string_t value, idx_t prefix_len) { + auto len = value.GetSize(); + memcpy(dataptr, value.GetDataUnsafe(), MinValue(len, prefix_len)); + if (len < prefix_len) { + memset(dataptr + len, '\0', prefix_len - len); + } +} + +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/serializer/buffered_deserializer.hpp +// duckdb/common/types/row_operations/row_aggregate.cpp +// +// +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/row_operations/row_operations.hpp // // //===----------------------------------------------------------------------===// @@ -17073,45 +20519,105 @@ void ProgressBar::Stop() { namespace duckdb { -class BufferedDeserializer : public Deserializer { -public: - BufferedDeserializer(data_ptr_t ptr, idx_t data_size); - explicit BufferedDeserializer(BufferedSerializer &serializer); +struct AggregateObject; +class DataChunk; +class RowLayout; +class RowDataCollection; +struct SelectionVector; +class StringHeap; +class Vector; +struct VectorData; - data_ptr_t ptr; - data_ptr_t endptr; +// RowOperations contains a set of operations that operate on data using a RowLayout +struct RowOperations { + //===--------------------------------------------------------------------===// + // Aggregation Operators + //===--------------------------------------------------------------------===// + //! initialize - unaligned addresses + static void InitializeStates(RowLayout &layout, Vector &addresses, const SelectionVector &sel, idx_t count); + //! destructor - unaligned addresses, updated + static void DestroyStates(RowLayout &layout, Vector &addresses, idx_t count); + //! update - aligned addresses + static void UpdateStates(AggregateObject &aggr, Vector &addresses, DataChunk &payload, idx_t arg_idx, idx_t count); + //! filtered update - aligned addresses + static void UpdateFilteredStates(AggregateObject &aggr, Vector &addresses, DataChunk &payload, idx_t arg_idx); + //! combine - unaligned addresses, updated + static void CombineStates(RowLayout &layout, Vector &sources, Vector &targets, idx_t count); + //! finalize - unaligned addresses, updated + static void FinalizeStates(RowLayout &layout, Vector &addresses, DataChunk &result, idx_t aggr_idx); -public: - void ReadData(data_ptr_t buffer, uint64_t read_size) override; -}; + //===--------------------------------------------------------------------===// + // Read/Write Operators + //===--------------------------------------------------------------------===// + //! Scatter group data to the rows. Initialises the ValidityMask. + static void Scatter(DataChunk &columns, VectorData col_data[], const RowLayout &layout, Vector &rows, + RowDataCollection &string_heap, const SelectionVector &sel, idx_t count); + //! Gather a single column. + static void Gather(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel, + const idx_t count, const idx_t col_offset, const idx_t col_no); -} // namespace duckdb + //===--------------------------------------------------------------------===// + // Comparison Operators + //===--------------------------------------------------------------------===// + //! Compare a block of key data against the row values to produce an updated selection that matches + //! and a second (optional) selection of non-matching values. + //! Returns the number of matches remaining in the selection. + using Predicates = vector; + static idx_t Match(DataChunk &columns, VectorData col_data[], const RowLayout &layout, Vector &rows, + const Predicates &predicates, SelectionVector &sel, idx_t count, SelectionVector *no_match, + idx_t &no_match_count); -#include + //===--------------------------------------------------------------------===// + // Heap Operators + //===--------------------------------------------------------------------===// + //! Compute the entry sizes of a vector with variable size type (used before building heap buffer space). + static void ComputeEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t ser_count, + const SelectionVector &sel, idx_t offset = 0); + //! Compute the entry sizes of vector data with variable size type (used before building heap buffer space). + static void ComputeEntrySizes(Vector &v, VectorData &vdata, idx_t entry_sizes[], idx_t vcount, idx_t ser_count, + const SelectionVector &sel, idx_t offset = 0); + //! Scatter vector with variable size type to the heap. + static void HeapScatter(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx, + data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset = 0); + //! Scatter vector data with variable size type to the heap. + static void HeapScatterVData(VectorData &vdata, PhysicalType type, const SelectionVector &sel, idx_t ser_count, + idx_t col_idx, data_ptr_t *key_locations, data_ptr_t *validitymask_locations, + idx_t offset = 0); + //! Gather a single column with variable size type from the heap. + static void HeapGather(Vector &v, const idx_t &vcount, const SelectionVector &sel, const idx_t &col_idx, + data_ptr_t key_locations[], data_ptr_t validitymask_locations[]); -namespace duckdb { + //===--------------------------------------------------------------------===// + // Sorting Operators + //===--------------------------------------------------------------------===// + //! Scatter vector data to the rows in radix-sortable format. + static void RadixScatter(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, + data_ptr_t key_locations[], bool desc, bool has_null, bool nulls_first, idx_t prefix_len, + idx_t width, idx_t offset = 0); -BufferedDeserializer::BufferedDeserializer(data_ptr_t ptr, idx_t data_size) : ptr(ptr), endptr(ptr + data_size) { -} + //===--------------------------------------------------------------------===// + // Out-of-Core Operators + //===--------------------------------------------------------------------===// + //! Swizzles blob pointers to offset within heap row + static void SwizzleColumns(const RowLayout &layout, const data_ptr_t base_row_ptr, const idx_t count); + //! Swizzles the base pointer of each row to offset within heap block + static void SwizzleHeapPointer(const RowLayout &layout, data_ptr_t row_ptr, const data_ptr_t heap_base_ptr, + const idx_t count); + //! Swizzles the base offset of each row back to a pointer + static void UnswizzleHeapPointer(const RowLayout &layout, data_ptr_t row_ptr, const data_ptr_t heap_base_ptr, + const idx_t count); + //! Unswizzles offsets back to pointers to blobs + static void UnswizzleColumns(const RowLayout &layout, const data_ptr_t base_row_ptr, const idx_t count); +}; -BufferedDeserializer::BufferedDeserializer(BufferedSerializer &serializer) - : BufferedDeserializer(serializer.data, serializer.maximum_size) { -} +} // namespace duckdb -void BufferedDeserializer::ReadData(data_ptr_t buffer, idx_t read_size) { - if (ptr + read_size > endptr) { - throw SerializationException("Failed to deserialize: not enough data in buffer to fulfill read request"); - } - memcpy(buffer, ptr, read_size); - ptr += read_size; -} -} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/serializer/buffered_file_reader.hpp +// duckdb/common/types/row_layout.hpp // // //===----------------------------------------------------------------------===// @@ -17120,30 +20626,116 @@ void BufferedDeserializer::ReadData(data_ptr_t buffer, idx_t read_size) { + + + namespace duckdb { +class BoundAggregateExpression; -class BufferedFileReader : public Deserializer { -public: - BufferedFileReader(FileSystem &fs, const char *path); +struct AggregateObject { + AggregateObject(AggregateFunction function, FunctionData *bind_data, idx_t child_count, idx_t payload_size, + bool distinct, PhysicalType return_type, Expression *filter = nullptr) + : function(move(function)), bind_data(bind_data), child_count(child_count), payload_size(payload_size), + distinct(distinct), return_type(return_type), filter(filter) { + } - FileSystem &fs; - unique_ptr data; - idx_t offset; - idx_t read_data; - unique_ptr handle; + AggregateFunction function; + FunctionData *bind_data; + idx_t child_count; + idx_t payload_size; + bool distinct; + PhysicalType return_type; + Expression *filter = nullptr; + + static vector CreateAggregateObjects(const vector &bindings); +}; +class RowLayout { public: - void ReadData(data_ptr_t buffer, uint64_t read_size) override; - //! Returns true if the reader has finished reading the entire file - bool Finished(); + using Aggregates = vector; + using ValidityBytes = TemplatedValidityMask; - idx_t FileSize() { - return file_size; + static inline idx_t Align(idx_t n) { + return ((n + 7) / 8) * 8; + } + + //! Creates an empty RowLayout + RowLayout(); + +public: + //! Initializes the RowLayout with the specified types and aggregates to an empty RowLayout + void Initialize(vector types_p, Aggregates aggregates_p, bool align = true); + //! Initializes the RowLayout with the specified types to an empty RowLayout + void Initialize(vector types, bool align = true); + //! Initializes the RowLayout with the specified aggregates to an empty RowLayout + void Initialize(Aggregates aggregates_p, bool align = true); + //! Returns the number of data columns + inline idx_t ColumnCount() const { + return types.size(); + } + //! Returns a list of the column types for this data chunk + inline const vector &GetTypes() const { + return types; + } + //! Returns the number of aggregates + inline idx_t AggregateCount() const { + return aggregates.size(); + } + //! Returns a list of the aggregates for this data chunk + inline Aggregates &GetAggregates() { + return aggregates; + } + //! Returns the total width required for each row, including padding + inline idx_t GetRowWidth() const { + return row_width; + } + //! Returns the offset to the start of the data + inline idx_t GetDataOffset() const { + return flag_width; + } + //! Returns the total width required for the data, including padding + inline idx_t GetDataWidth() const { + return data_width; + } + //! Returns the offset to the start of the aggregates + inline idx_t GetAggrOffset() const { + return flag_width + data_width; + } + //! Returns the total width required for the aggregates, including padding + inline idx_t GetAggrWidth() const { + return aggr_width; + } + //! Returns the column offsets into each row + inline const vector &GetOffsets() const { + return offsets; + } + //! Returns whether all columns in this layout are constant size + inline bool AllConstant() const { + return all_constant; + } + inline idx_t GetHeapPointerOffset() const { + return heap_pointer_offset; } private: - idx_t file_size; - idx_t total_read; + //! The types of the data columns + vector types; + //! The aggregate functions + Aggregates aggregates; + //! The width of the validity header + idx_t flag_width; + //! The width of the data portion + idx_t data_width; + //! The width of the aggregate state portion + idx_t aggr_width; + //! The width of the entire row + idx_t row_width; + //! The offsets to the columns and aggregate data in each row + vector offsets; + //! Whether all columns in this layout are constant size + bool all_constant; + //! Offset to the pointer to the heap for each row + idx_t heap_pointer_offset; }; } // namespace duckdb @@ -17151,1419 +20743,2161 @@ class BufferedFileReader : public Deserializer { -#include -#include - namespace duckdb { -BufferedFileReader::BufferedFileReader(FileSystem &fs, const char *path) - : fs(fs), data(unique_ptr(new data_t[FILE_BUFFER_SIZE])), offset(0), read_data(0), total_read(0) { - handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ, FileLockType::READ_LOCK); - file_size = fs.GetFileSize(*handle); -} +void RowOperations::InitializeStates(RowLayout &layout, Vector &addresses, const SelectionVector &sel, idx_t count) { + if (count == 0) { + return; + } + auto pointers = FlatVector::GetData(addresses); + auto &offsets = layout.GetOffsets(); + auto aggr_idx = layout.ColumnCount(); -void BufferedFileReader::ReadData(data_ptr_t target_buffer, uint64_t read_size) { - // first copy anything we can from the buffer - data_ptr_t end_ptr = target_buffer + read_size; - while (true) { - idx_t to_read = MinValue(end_ptr - target_buffer, read_data - offset); - if (to_read > 0) { - memcpy(target_buffer, data.get() + offset, to_read); - offset += to_read; - target_buffer += to_read; + for (auto &aggr : layout.GetAggregates()) { + for (idx_t i = 0; i < count; ++i) { + auto row_idx = sel.get_index(i); + auto row = pointers[row_idx]; + aggr.function.initialize(row + offsets[aggr_idx]); } - if (target_buffer < end_ptr) { - D_ASSERT(offset == read_data); - total_read += read_data; - // did not finish reading yet but exhausted buffer - // read data into buffer - offset = 0; - read_data = fs.Read(*handle, data.get(), FILE_BUFFER_SIZE); - if (read_data == 0) { - throw SerializationException("not enough data in file to deserialize result"); - } - } else { - return; + ++aggr_idx; + } +} + +void RowOperations::DestroyStates(RowLayout &layout, Vector &addresses, idx_t count) { + if (count == 0) { + return; + } + // Move to the first aggregate state + VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), count); + for (auto &aggr : layout.GetAggregates()) { + if (aggr.function.destructor) { + aggr.function.destructor(addresses, count); } + // Move to the next aggregate state + VectorOperations::AddInPlace(addresses, aggr.payload_size, count); } } -bool BufferedFileReader::Finished() { - return total_read + offset == file_size; +void RowOperations::UpdateStates(AggregateObject &aggr, Vector &addresses, DataChunk &payload, idx_t arg_idx, + idx_t count) { + aggr.function.update(aggr.child_count == 0 ? nullptr : &payload.data[arg_idx], aggr.bind_data, aggr.child_count, + addresses, count); } -} // namespace duckdb +void RowOperations::UpdateFilteredStates(AggregateObject &aggr, Vector &addresses, DataChunk &payload, idx_t arg_idx) { + ExpressionExecutor filter_execution(aggr.filter); + SelectionVector true_sel(STANDARD_VECTOR_SIZE); + auto count = filter_execution.SelectExpression(payload, true_sel); + + DataChunk filtered_payload; + auto pay_types = payload.GetTypes(); + filtered_payload.Initialize(pay_types); + filtered_payload.Slice(payload, true_sel, count); + + Vector filtered_addresses(addresses, true_sel, count); + filtered_addresses.Normalify(count); + + UpdateStates(aggr, filtered_addresses, filtered_payload, arg_idx, filtered_payload.size()); +} + +void RowOperations::CombineStates(RowLayout &layout, Vector &sources, Vector &targets, idx_t count) { + if (count == 0) { + return; + } + + // Move to the first aggregate states + VectorOperations::AddInPlace(sources, layout.GetAggrOffset(), count); + VectorOperations::AddInPlace(targets, layout.GetAggrOffset(), count); + for (auto &aggr : layout.GetAggregates()) { + D_ASSERT(aggr.function.combine); + aggr.function.combine(sources, targets, count); + + // Move to the next aggregate states + VectorOperations::AddInPlace(sources, aggr.payload_size, count); + VectorOperations::AddInPlace(targets, aggr.payload_size, count); + } +} + +void RowOperations::FinalizeStates(RowLayout &layout, Vector &addresses, DataChunk &result, idx_t aggr_idx) { + // Move to the first aggregate state + VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), result.size()); + + auto &aggregates = layout.GetAggregates(); + for (idx_t i = 0; i < aggregates.size(); i++) { + auto &target = result.data[aggr_idx + i]; + auto &aggr = aggregates[i]; + aggr.function.finalize(addresses, aggr.bind_data, target, result.size(), 0); + // Move to the next aggregate state + VectorOperations::AddInPlace(addresses, aggr.payload_size, result.size()); + } +} +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/algorithm.hpp +// duckdb/common/types/row_operations/row_external.cpp // // //===----------------------------------------------------------------------===// -#include - -#include - namespace duckdb { -BufferedFileWriter::BufferedFileWriter(FileSystem &fs, string path, uint8_t open_flags) - : fs(fs), data(unique_ptr(new data_t[FILE_BUFFER_SIZE])), offset(0), total_written(0) { - handle = fs.OpenFile(path, open_flags, FileLockType::WRITE_LOCK); +void RowOperations::SwizzleColumns(const RowLayout &layout, const data_ptr_t base_row_ptr, const idx_t count) { + const idx_t row_width = layout.GetRowWidth(); + const idx_t heap_pointer_offset = layout.GetHeapPointerOffset(); + // Swizzle the blob columns one by one + for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) { + auto physical_type = layout.GetTypes()[col_idx].InternalType(); + if (TypeIsConstantSize(physical_type)) { + continue; + } + data_ptr_t row_ptr = base_row_ptr; + const idx_t &col_offset = layout.GetOffsets()[col_idx]; + if (physical_type == PhysicalType::VARCHAR) { + // Replace the pointer with the computed offset (if non-inlined) + const idx_t string_pointer_offset = sizeof(uint32_t) + string_t::PREFIX_LENGTH; + for (idx_t i = 0; i < count; i++) { + const string_t str = Load(row_ptr + col_offset); + if (!str.IsInlined()) { + // Load the pointer to the start of the row in the heap + data_ptr_t heap_row_ptr = Load(row_ptr + heap_pointer_offset); + // This is where the pointer that points to the heap is stored in the RowLayout + data_ptr_t col_ptr = row_ptr + col_offset + string_pointer_offset; + // Load the pointer to the data of this column in the same heap row + data_ptr_t heap_col_ptr = Load(col_ptr); + // Overwrite the column data pointer with the within-row offset (pointer arithmetic) + Store(heap_col_ptr - heap_row_ptr, col_ptr); + } + row_ptr += row_width; + } + } else { + // Replace the pointer with the computed offset + for (idx_t i = 0; i < count; i++) { + // Load the pointer to the start of the row in the heap + data_ptr_t heap_row_ptr = Load(row_ptr + heap_pointer_offset); + // This is where the pointer that points to the heap is stored in the RowLayout + data_ptr_t col_ptr = row_ptr + col_offset; + // Load the pointer to the data of this column in the same heap row + data_ptr_t heap_col_ptr = Load(col_ptr); + // Overwrite the column data pointer with the within-row offset (pointer arithmetic) + Store(heap_col_ptr - heap_row_ptr, col_ptr); + row_ptr += row_width; + } + } + } } -int64_t BufferedFileWriter::GetFileSize() { - return fs.GetFileSize(*handle); +void RowOperations::SwizzleHeapPointer(const RowLayout &layout, data_ptr_t row_ptr, const data_ptr_t heap_base_ptr, + const idx_t count) { + const idx_t row_width = layout.GetRowWidth(); + const idx_t heap_pointer_offset = layout.GetHeapPointerOffset(); + idx_t cumulative_offset = 0; + for (idx_t i = 0; i < count; i++) { + Store(cumulative_offset, row_ptr + heap_pointer_offset); + cumulative_offset += Load(heap_base_ptr + cumulative_offset); + row_ptr += row_width; + } } -idx_t BufferedFileWriter::GetTotalWritten() { - return total_written + offset; +void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, data_ptr_t row_ptr, const data_ptr_t heap_base_ptr, + const idx_t count) { + const idx_t row_width = layout.GetRowWidth(); + const idx_t heap_pointer_offset = layout.GetHeapPointerOffset(); + for (idx_t i = 0; i < count; i++) { + data_ptr_t heap_pointer_location = row_ptr + heap_pointer_offset; + idx_t heap_row_offset = Load(heap_pointer_location); + Store(heap_base_ptr + heap_row_offset, heap_pointer_location); + row_ptr += row_width; + } } -void BufferedFileWriter::WriteData(const_data_ptr_t buffer, uint64_t write_size) { - // first copy anything we can from the buffer - const_data_ptr_t end_ptr = buffer + write_size; - while (buffer < end_ptr) { - idx_t to_write = MinValue((end_ptr - buffer), FILE_BUFFER_SIZE - offset); - D_ASSERT(to_write > 0); - memcpy(data.get() + offset, buffer, to_write); - offset += to_write; - buffer += to_write; - if (offset == FILE_BUFFER_SIZE) { - Flush(); +void RowOperations::UnswizzleColumns(const RowLayout &layout, const data_ptr_t base_row_ptr, const idx_t count) { + const idx_t row_width = layout.GetRowWidth(); + const idx_t heap_pointer_offset = layout.GetHeapPointerOffset(); + // Unswizzle the columns one by one + for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) { + auto physical_type = layout.GetTypes()[col_idx].InternalType(); + if (TypeIsConstantSize(physical_type)) { + continue; + } + const idx_t col_offset = layout.GetOffsets()[col_idx]; + data_ptr_t row_ptr = base_row_ptr; + if (physical_type == PhysicalType::VARCHAR) { + // Replace offset with the pointer (if non-inlined) + const idx_t string_pointer_offset = sizeof(uint32_t) + string_t::PREFIX_LENGTH; + for (idx_t i = 0; i < count; i++) { + const string_t str = Load(row_ptr + col_offset); + if (!str.IsInlined()) { + // Load the pointer to the start of the row in the heap + data_ptr_t heap_row_ptr = Load(row_ptr + heap_pointer_offset); + // This is where the pointer that points to the heap is stored in the RowLayout + data_ptr_t col_ptr = row_ptr + col_offset + string_pointer_offset; + // Load the offset to the data of this column in the same heap row + idx_t heap_col_offset = Load(col_ptr); + // Overwrite the column data offset with the pointer + Store(heap_row_ptr + heap_col_offset, col_ptr); + } + row_ptr += row_width; + } + } else { + // Replace the offset with the pointer + for (idx_t i = 0; i < count; i++) { + // Load the pointer to the start of the row in the heap + data_ptr_t heap_row_ptr = Load(row_ptr + heap_pointer_offset); + // This is where the pointer that points to the heap is stored in the RowLayout + data_ptr_t col_ptr = row_ptr + col_offset; + // Load the offset to the data of this column in the same heap row + idx_t heap_col_offset = Load(col_ptr); + // Overwrite the column data offset with the pointer + Store(heap_row_ptr + heap_col_offset, col_ptr); + row_ptr += row_width; + } } } } -void BufferedFileWriter::Flush() { - if (offset == 0) { - return; +} // namespace duckdb +//===--------------------------------------------------------------------===// +// row_gather.cpp +// Description: This file contains the implementation of the gather operators +//===--------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/constant_operators.hpp +// +// +//===----------------------------------------------------------------------===// + + + +namespace duckdb { + +struct PickLeft { + template + static inline T Operation(T left, T right) { + return left; } - fs.Write(*handle, data.get(), offset); - total_written += offset; - offset = 0; -} +}; -void BufferedFileWriter::Sync() { - Flush(); - handle->Sync(); -} +struct PickRight { + template + static inline T Operation(T left, T right) { + return right; + } +}; -void BufferedFileWriter::Truncate(int64_t size) { - // truncate the physical file on disk - handle->Truncate(size); - // reset anything written in the buffer - offset = 0; -} +struct NOP { + template + static inline T Operation(T left) { + return left; + } +}; + +struct ConstantZero { + template + static inline T Operation(T left, T right) { + return 0; + } +}; + +struct ConstantOne { + template + static inline T Operation(T left, T right) { + return 1; + } +}; + +struct AddOne { + template + static inline T Operation(T left, T right) { + return right + 1; + } +}; } // namespace duckdb -#include +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/types/row_data_collection.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + namespace duckdb { -BufferedSerializer::BufferedSerializer(idx_t maximum_size) - : BufferedSerializer(unique_ptr(new data_t[maximum_size]), maximum_size) { -} +struct RowDataBlock { + RowDataBlock(BufferManager &buffer_manager, idx_t capacity, idx_t entry_size) + : capacity(capacity), entry_size(entry_size), count(0), byte_offset(0) { + block = buffer_manager.RegisterMemory(capacity * entry_size, false); + } + //! The buffer block handle + shared_ptr block; + //! Capacity (number of entries) and entry size that fit in this block + idx_t capacity; + const idx_t entry_size; + //! Number of entries currently in this block + idx_t count; + //! Write offset (if variable size entries) + idx_t byte_offset; +}; -BufferedSerializer::BufferedSerializer(unique_ptr data, idx_t size) : maximum_size(size), data(data.get()) { - blob.size = 0; - blob.data = move(data); -} +struct BlockAppendEntry { + BlockAppendEntry(data_ptr_t baseptr, idx_t count) : baseptr(baseptr), count(count) { + } + data_ptr_t baseptr; + idx_t count; +}; -BufferedSerializer::BufferedSerializer(data_ptr_t data, idx_t size) : maximum_size(size), data(data) { - blob.size = 0; -} +class RowDataCollection { +public: + RowDataCollection(BufferManager &buffer_manager, idx_t block_capacity, idx_t entry_size, bool keep_pinned = false); -void BufferedSerializer::WriteData(const_data_ptr_t buffer, idx_t write_size) { - if (blob.size + write_size >= maximum_size) { - do { - maximum_size *= 2; - } while (blob.size + write_size > maximum_size); - auto new_data = new data_t[maximum_size]; - memcpy(new_data, data, blob.size); - data = new_data; - blob.data = unique_ptr(new_data); + //! BufferManager + BufferManager &buffer_manager; + //! The total number of stored entries + idx_t count; + //! The number of entries per block + idx_t block_capacity; + //! Size of entries in the blocks + idx_t entry_size; + //! The blocks holding the main data + vector blocks; + //! The blocks that this collection currently has pinned + vector> pinned_blocks; + +public: + idx_t AppendToBlock(RowDataBlock &block, BufferHandle &handle, vector &append_entries, + idx_t remaining, idx_t entry_sizes[]); + vector> Build(idx_t added_count, data_ptr_t key_locations[], idx_t entry_sizes[], + const SelectionVector *sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR); + + void Merge(RowDataCollection &other); + + //! The size (in bytes) of this RowDataCollection if it were stored in a single block + idx_t SizeInBytes() const { + idx_t bytes = 0; + if (entry_size == 1) { + for (auto &block : blocks) { + bytes += block.byte_offset; + } + } else { + bytes = count * entry_size; + } + return MaxValue(bytes, (idx_t)Storage::BLOCK_SIZE); } - memcpy(data + blob.size, buffer, write_size); - blob.size += write_size; -} +private: + mutex rdc_lock; + + //! Whether the blocks should stay pinned (necessary for e.g. a heap) + const bool keep_pinned; +}; } // namespace duckdb + namespace duckdb { -template <> -string Deserializer::Read() { - uint32_t size = Read(); - auto buffer = unique_ptr(new data_t[size]); - ReadData(buffer.get(), size); - return string((char *)buffer.get(), size); +using ValidityBytes = RowLayout::ValidityBytes; + +template +static void TemplatedGatherLoop(Vector &rows, const SelectionVector &row_sel, Vector &col, + const SelectionVector &col_sel, idx_t count, idx_t col_offset, idx_t col_no) { + // Precompute mask indexes + idx_t entry_idx; + idx_t idx_in_entry; + ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry); + + auto ptrs = FlatVector::GetData(rows); + auto data = FlatVector::GetData(col); + auto &col_mask = FlatVector::Validity(col); + + for (idx_t i = 0; i < count; i++) { + auto row_idx = row_sel.get_index(i); + auto row = ptrs[row_idx]; + auto col_idx = col_sel.get_index(i); + data[col_idx] = Load(row + col_offset); + ValidityBytes row_mask(row); + if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) { + col_mask.SetInvalid(col_idx); + } + } } -void Deserializer::ReadStringVector(vector &list) { - uint32_t sz = Read(); - list.resize(sz); - for (idx_t i = 0; i < sz; i++) { - list[i] = Read(); +static void GatherNestedVector(Vector &rows, const SelectionVector &row_sel, Vector &col, + const SelectionVector &col_sel, idx_t count, idx_t col_offset, idx_t col_no) { + auto ptrs = FlatVector::GetData(rows); + + // Build the gather locations + data_ptr_t data_locations[STANDARD_VECTOR_SIZE]; + data_ptr_t mask_locations[STANDARD_VECTOR_SIZE]; + for (idx_t i = 0; i < count; i++) { + auto row_idx = row_sel.get_index(i); + mask_locations[i] = ptrs[row_idx]; + data_locations[i] = Load(ptrs[row_idx] + col_offset); } + + // Deserialise into the selected locations + RowOperations::HeapGather(col, count, col_sel, col_no, data_locations, mask_locations); } -} // namespace duckdb +void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel, + const idx_t count, const idx_t col_offset, const idx_t col_no) { + D_ASSERT(rows.GetVectorType() == VectorType::FLAT_VECTOR); + D_ASSERT(rows.GetType().id() == LogicalTypeId::POINTER); // "Cannot gather from non-pointer type!" + + col.SetVectorType(VectorType::FLAT_VECTOR); + switch (col.GetType().InternalType()) { + case PhysicalType::UINT8: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::UINT16: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::UINT32: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::UINT64: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::INT16: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::INT32: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::INT64: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::INT128: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::FLOAT: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::DOUBLE: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::INTERVAL: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::VARCHAR: + TemplatedGatherLoop(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + case PhysicalType::LIST: + case PhysicalType::MAP: + case PhysicalType::STRUCT: + GatherNestedVector(rows, row_sel, col, col_sel, count, col_offset, col_no); + break; + default: + throw InternalException("Unimplemented type for RowOperations::Gather"); + } +} +} // namespace duckdb -#include -#include -#include -#include -#include -#include -#include namespace duckdb { -bool StringUtil::Contains(const string &haystack, const string &needle) { - return (haystack.find(needle) != string::npos); -} +using ValidityBytes = TemplatedValidityMask; -void StringUtil::LTrim(string &str) { - auto it = str.begin(); - while (CharacterIsSpace(*it)) { - it++; +template +static void TemplatedHeapGather(Vector &v, const idx_t count, const SelectionVector &sel, data_ptr_t *key_locations) { + auto target = FlatVector::GetData(v); + + for (idx_t i = 0; i < count; ++i) { + const auto col_idx = sel.get_index(i); + target[col_idx] = Load(key_locations[i]); + key_locations[i] += sizeof(T); } - str.erase(str.begin(), it); } -// Remove trailing ' ', '\f', '\n', '\r', '\t', '\v' -void StringUtil::RTrim(string &str) { - str.erase(find_if(str.rbegin(), str.rend(), [](int ch) { return ch > 0 && !CharacterIsSpace(ch); }).base(), - str.end()); -} +static void HeapGatherStringVector(Vector &v, const idx_t vcount, const SelectionVector &sel, + data_ptr_t *key_locations) { + const auto &validity = FlatVector::Validity(v); + auto target = FlatVector::GetData(v); -void StringUtil::Trim(string &str) { - StringUtil::LTrim(str); - StringUtil::RTrim(str); + for (idx_t i = 0; i < vcount; i++) { + const auto col_idx = sel.get_index(i); + if (!validity.RowIsValid(col_idx)) { + continue; + } + auto len = Load(key_locations[i]); + key_locations[i] += sizeof(uint32_t); + target[col_idx] = StringVector::AddStringOrBlob(v, string_t((const char *)key_locations[i], len)); + key_locations[i] += len; + } } -bool StringUtil::StartsWith(string str, string prefix) { - if (prefix.size() > str.size()) { - return false; +static void HeapGatherStructVector(Vector &v, const idx_t vcount, const SelectionVector &sel, + data_ptr_t *key_locations) { + // struct must have a validitymask for its fields + auto &child_types = StructType::GetChildTypes(v.GetType()); + const idx_t struct_validitymask_size = (child_types.size() + 7) / 8; + data_ptr_t struct_validitymask_locations[STANDARD_VECTOR_SIZE]; + for (idx_t i = 0; i < vcount; i++) { + // use key_locations as the validitymask, and create struct_key_locations + struct_validitymask_locations[i] = key_locations[i]; + key_locations[i] += struct_validitymask_size; } - return equal(prefix.begin(), prefix.end(), str.begin()); -} -bool StringUtil::EndsWith(const string &str, const string &suffix) { - if (suffix.size() > str.size()) { - return false; + // now deserialize into the struct vectors + auto &children = StructVector::GetEntries(v); + for (idx_t i = 0; i < child_types.size(); i++) { + RowOperations::HeapGather(*children[i], vcount, sel, i, key_locations, struct_validitymask_locations); } - return equal(suffix.rbegin(), suffix.rend(), str.rbegin()); } -string StringUtil::Repeat(const string &str, idx_t n) { - std::ostringstream os; - if (n == 0 || str.empty()) { - return (os.str()); - } - for (int i = 0; i < static_cast(n); i++) { - os << str; +static void HeapGatherListVector(Vector &v, const idx_t vcount, const SelectionVector &sel, data_ptr_t *key_locations) { + const auto &validity = FlatVector::Validity(v); + + auto child_type = ListType::GetChildType(v.GetType()); + auto list_data = ListVector::GetData(v); + data_ptr_t list_entry_locations[STANDARD_VECTOR_SIZE]; + + uint64_t entry_offset = ListVector::GetListSize(v); + for (idx_t i = 0; i < vcount; i++) { + const auto col_idx = sel.get_index(i); + if (!validity.RowIsValid(col_idx)) { + continue; + } + // read list length + auto entry_remaining = Load(key_locations[i]); + key_locations[i] += sizeof(uint64_t); + // set list entry attributes + list_data[col_idx].length = entry_remaining; + list_data[col_idx].offset = entry_offset; + // skip over the validity mask + data_ptr_t validitymask_location = key_locations[i]; + idx_t offset_in_byte = 0; + key_locations[i] += (entry_remaining + 7) / 8; + // entry sizes + data_ptr_t var_entry_size_ptr = nullptr; + if (!TypeIsConstantSize(child_type.InternalType())) { + var_entry_size_ptr = key_locations[i]; + key_locations[i] += entry_remaining * sizeof(idx_t); + } + + // now read the list data + while (entry_remaining > 0) { + auto next = MinValue(entry_remaining, (idx_t)STANDARD_VECTOR_SIZE); + + // initialize a new vector to append + Vector append_vector(v.GetType()); + append_vector.SetVectorType(v.GetVectorType()); + + auto &list_vec_to_append = ListVector::GetEntry(append_vector); + + // set validity + //! Since we are constructing the vector, this will always be a flat vector. + auto &append_validity = FlatVector::Validity(list_vec_to_append); + for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { + append_validity.Set(entry_idx, *(validitymask_location) & (1 << offset_in_byte)); + if (++offset_in_byte == 8) { + validitymask_location++; + offset_in_byte = 0; + } + } + + // compute entry sizes and set locations where the list entries are + if (TypeIsConstantSize(child_type.InternalType())) { + // constant size list entries + const idx_t type_size = GetTypeIdSize(child_type.InternalType()); + for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { + list_entry_locations[entry_idx] = key_locations[i]; + key_locations[i] += type_size; + } + } else { + // variable size list entries + for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { + list_entry_locations[entry_idx] = key_locations[i]; + key_locations[i] += Load(var_entry_size_ptr); + var_entry_size_ptr += sizeof(idx_t); + } + } + + // now deserialize and add to listvector + RowOperations::HeapGather(list_vec_to_append, next, FlatVector::INCREMENTAL_SELECTION_VECTOR, 0, + list_entry_locations, nullptr); + ListVector::Append(v, list_vec_to_append, next); + + // update for next iteration + entry_remaining -= next; + entry_offset += next; + } } - return (os.str()); } -vector StringUtil::Split(const string &str, char delimiter) { - std::stringstream ss(str); - vector lines; - string temp; - while (getline(ss, temp, delimiter)) { - lines.push_back(temp); +void RowOperations::HeapGather(Vector &v, const idx_t &vcount, const SelectionVector &sel, const idx_t &col_no, + data_ptr_t *key_locations, data_ptr_t *validitymask_locations) { + v.SetVectorType(VectorType::FLAT_VECTOR); + + auto &validity = FlatVector::Validity(v); + if (validitymask_locations) { + // Precompute mask indexes + idx_t entry_idx; + idx_t idx_in_entry; + ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry); + + for (idx_t i = 0; i < vcount; i++) { + ValidityBytes row_mask(validitymask_locations[i]); + const auto valid = row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry); + const auto col_idx = sel.get_index(i); + validity.Set(col_idx, valid); + } } - return (lines); -} -string StringUtil::Join(const vector &input, const string &separator) { - return StringUtil::Join(input, input.size(), separator, [](const string &s) { return s; }); + auto type = v.GetType().InternalType(); + switch (type) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::INT16: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::INT32: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::INT64: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::UINT8: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::UINT16: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::UINT32: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::UINT64: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::INT128: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::FLOAT: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::DOUBLE: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::INTERVAL: + TemplatedHeapGather(v, vcount, sel, key_locations); + break; + case PhysicalType::VARCHAR: + HeapGatherStringVector(v, vcount, sel, key_locations); + break; + case PhysicalType::STRUCT: + HeapGatherStructVector(v, vcount, sel, key_locations); + break; + case PhysicalType::LIST: + HeapGatherListVector(v, vcount, sel, key_locations); + break; + default: + throw NotImplementedException("Unimplemented deserialize from row-format"); + } } -string StringUtil::Prefix(const string &str, const string &prefix) { - vector lines = StringUtil::Split(str, '\n'); - if (lines.empty()) { - return (""); +} // namespace duckdb + + + + +namespace duckdb { + +using ValidityBytes = TemplatedValidityMask; + +static void ComputeStringEntrySizes(VectorData &vdata, idx_t entry_sizes[], const idx_t ser_count, + const SelectionVector &sel, const idx_t offset) { + auto strings = (string_t *)vdata.data; + for (idx_t i = 0; i < ser_count; i++) { + auto idx = sel.get_index(i); + auto str_idx = vdata.sel->get_index(idx) + offset; + if (vdata.validity.RowIsValid(str_idx)) { + entry_sizes[i] += sizeof(uint32_t) + strings[str_idx].GetSize(); + } } +} - std::ostringstream os; - for (idx_t i = 0, cnt = lines.size(); i < cnt; i++) { - if (i > 0) { - os << std::endl; +static void ComputeStructEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t ser_count, + const SelectionVector &sel, idx_t offset) { + // obtain child vectors + idx_t num_children; + vector struct_vectors; + if (v.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &child = DictionaryVector::Child(v); + auto &dict_sel = DictionaryVector::SelVector(v); + auto &children = StructVector::GetEntries(child); + num_children = children.size(); + for (auto &struct_child : children) { + Vector struct_vector(*struct_child, dict_sel, vcount); + struct_vectors.push_back(move(struct_vector)); + } + } else { + auto &children = StructVector::GetEntries(v); + num_children = children.size(); + for (auto &struct_child : children) { + Vector struct_vector(*struct_child); + struct_vectors.push_back(move(struct_vector)); } - os << prefix << lines[i]; } - return (os.str()); + // add struct validitymask size + const idx_t struct_validitymask_size = (num_children + 7) / 8; + for (idx_t i = 0; i < ser_count; i++) { + entry_sizes[i] += struct_validitymask_size; + } + // compute size of child vectors + for (auto &struct_vector : struct_vectors) { + RowOperations::ComputeEntrySizes(struct_vector, entry_sizes, vcount, ser_count, sel, offset); + } } -// http://ubuntuforums.org/showpost.php?p=10215516&postcount=5 -string StringUtil::FormatSize(idx_t bytes) { - double multiplier = 1024; - double kilobytes = multiplier; - double megabytes = multiplier * kilobytes; - double gigabytes = multiplier * megabytes; +static void ComputeListEntrySizes(Vector &v, VectorData &vdata, idx_t entry_sizes[], idx_t ser_count, + const SelectionVector &sel, idx_t offset) { + auto list_data = ListVector::GetData(v); + auto &child_vector = ListVector::GetEntry(v); + idx_t list_entry_sizes[STANDARD_VECTOR_SIZE]; + for (idx_t i = 0; i < ser_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + if (vdata.validity.RowIsValid(source_idx)) { + auto list_entry = list_data[source_idx]; - std::ostringstream os; + // make room for list length, list validitymask + entry_sizes[i] += sizeof(list_entry.length); + entry_sizes[i] += (list_entry.length + 7) / 8; - if (bytes >= gigabytes) { - os << std::fixed << std::setprecision(2) << (bytes / gigabytes) << " GB"; - } else if (bytes >= megabytes) { - os << std::fixed << std::setprecision(2) << (bytes / megabytes) << " MB"; - } else if (bytes >= kilobytes) { - os << std::fixed << std::setprecision(2) << (bytes / kilobytes) << " KB"; - } else { - os << to_string(bytes) + " bytes"; + // serialize size of each entry (if non-constant size) + if (!TypeIsConstantSize(ListType::GetChildType(v.GetType()).InternalType())) { + entry_sizes[i] += list_entry.length * sizeof(list_entry.length); + } + + // compute size of each the elements in list_entry and sum them + auto entry_remaining = list_entry.length; + auto entry_offset = list_entry.offset; + while (entry_remaining > 0) { + // the list entry can span multiple vectors + auto next = MinValue((idx_t)STANDARD_VECTOR_SIZE, entry_remaining); + + // compute and add to the total + std::fill_n(list_entry_sizes, next, 0); + RowOperations::ComputeEntrySizes(child_vector, list_entry_sizes, next, next, + FlatVector::INCREMENTAL_SELECTION_VECTOR, entry_offset); + for (idx_t list_idx = 0; list_idx < next; list_idx++) { + entry_sizes[i] += list_entry_sizes[list_idx]; + } + + // update for next iteration + entry_remaining -= next; + entry_offset += next; + } + } } - return (os.str()); } -string StringUtil::Upper(const string &str) { - string copy(str); - transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) { return std::toupper(c); }); - return (copy); +void RowOperations::ComputeEntrySizes(Vector &v, VectorData &vdata, idx_t entry_sizes[], idx_t vcount, idx_t ser_count, + const SelectionVector &sel, idx_t offset) { + const auto physical_type = v.GetType().InternalType(); + if (TypeIsConstantSize(physical_type)) { + const auto type_size = GetTypeIdSize(physical_type); + for (idx_t i = 0; i < ser_count; i++) { + entry_sizes[i] += type_size; + } + } else { + switch (physical_type) { + case PhysicalType::VARCHAR: + ComputeStringEntrySizes(vdata, entry_sizes, ser_count, sel, offset); + break; + case PhysicalType::STRUCT: + ComputeStructEntrySizes(v, entry_sizes, vcount, ser_count, sel, offset); + break; + case PhysicalType::LIST: + ComputeListEntrySizes(v, vdata, entry_sizes, ser_count, sel, offset); + break; + default: + // LCOV_EXCL_START + throw NotImplementedException("Column with variable size type %s cannot be serialized to row-format", + v.GetType().ToString()); + // LCOV_EXCL_STOP + } + } } -string StringUtil::Lower(const string &str) { - string copy(str); - transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) { return std::tolower(c); }); - return (copy); +void RowOperations::ComputeEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t ser_count, + const SelectionVector &sel, idx_t offset) { + VectorData vdata; + v.Orrify(vcount, vdata); + ComputeEntrySizes(v, vdata, entry_sizes, vcount, ser_count, sel, offset); } -vector StringUtil::Split(const string &input, const string &split) { - vector splits; +template +static void TemplatedHeapScatter(VectorData &vdata, const SelectionVector &sel, idx_t count, idx_t col_idx, + data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) { + auto source = (T *)vdata.data; + if (!validitymask_locations) { + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; - idx_t last = 0; - idx_t input_len = input.size(); - idx_t split_len = split.size(); - while (last <= input_len) { - idx_t next = input.find(split, last); - if (next == string::npos) { - next = input_len; + auto target = (T *)key_locations[i]; + Store(source[source_idx], (data_ptr_t)target); + key_locations[i] += sizeof(T); } + } else { + idx_t entry_idx; + idx_t idx_in_entry; + ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry); + const auto bit = ~(1UL << idx_in_entry); + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; - // Push the substring [last, next) on to splits - string substr = input.substr(last, next - last); - if (substr.empty() == false) { - splits.push_back(substr); + auto target = (T *)key_locations[i]; + Store(source[source_idx], (data_ptr_t)target); + key_locations[i] += sizeof(T); + + // set the validitymask + if (!vdata.validity.RowIsValid(source_idx)) { + *(validitymask_locations[i] + entry_idx) &= bit; + } } - last = next + split_len; } - return splits; } -string StringUtil::Replace(string source, const string &from, const string &to) { - if (from.empty()) { - return source; - } - idx_t start_pos = 0; - while ((start_pos = source.find(from, start_pos)) != string::npos) { - source.replace(start_pos, from.length(), to); - start_pos += to.length(); // In case 'to' contains 'from', like - // replacing 'x' with 'yx' - } - return source; -} +static void HeapScatterStringVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx, + data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) { + VectorData vdata; + v.Orrify(vcount, vdata); -vector StringUtil::TopNStrings(vector> scores, idx_t n, idx_t threshold) { - if (scores.empty()) { - return vector(); - } - sort(scores.begin(), scores.end(), - [](const pair &a, const pair &b) -> bool { return a.second < b.second; }); - vector result; - result.push_back(scores[0].first); - for (idx_t i = 1; i < MinValue(scores.size(), n); i++) { - if (scores[i].second > threshold) { - break; + auto strings = (string_t *)vdata.data; + if (!validitymask_locations) { + for (idx_t i = 0; i < ser_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + if (vdata.validity.RowIsValid(source_idx)) { + auto &string_entry = strings[source_idx]; + // store string size + Store(string_entry.GetSize(), key_locations[i]); + key_locations[i] += sizeof(uint32_t); + // store the string + memcpy(key_locations[i], string_entry.GetDataUnsafe(), string_entry.GetSize()); + key_locations[i] += string_entry.GetSize(); + } + } + } else { + idx_t entry_idx; + idx_t idx_in_entry; + ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry); + const auto bit = ~(1UL << idx_in_entry); + for (idx_t i = 0; i < ser_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + if (vdata.validity.RowIsValid(source_idx)) { + auto &string_entry = strings[source_idx]; + // store string size + Store(string_entry.GetSize(), key_locations[i]); + key_locations[i] += sizeof(uint32_t); + // store the string + memcpy(key_locations[i], string_entry.GetDataUnsafe(), string_entry.GetSize()); + key_locations[i] += string_entry.GetSize(); + } else { + // set the validitymask + *(validitymask_locations[i] + entry_idx) &= bit; + } } - result.push_back(scores[i].first); } - return result; } -struct LevenshteinArray { - LevenshteinArray(idx_t len1, idx_t len2) : len1(len1) { - dist = unique_ptr(new idx_t[len1 * len2]); - } +static void HeapScatterStructVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx, + data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) { + VectorData vdata; + v.Orrify(vcount, vdata); - idx_t &Score(idx_t i, idx_t j) { - return dist[GetIndex(i, j)]; + idx_t num_children; + vector struct_vectors; + if (v.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &child = DictionaryVector::Child(v); + auto &dict_sel = DictionaryVector::SelVector(v); + auto &children = StructVector::GetEntries(child); + num_children = children.size(); + for (auto &struct_child : children) { + Vector struct_vector(*struct_child, dict_sel, vcount); + struct_vectors.push_back(move(struct_vector)); + } + } else { + auto &children = StructVector::GetEntries(v); + num_children = children.size(); + for (auto &struct_child : children) { + Vector struct_vector(*struct_child); + struct_vectors.push_back(move(struct_vector)); + } } -private: - idx_t len1; - unique_ptr dist; - - idx_t GetIndex(idx_t i, idx_t j) { - return j * len1 + i; - } -}; + // the whole struct itself can be NULL + idx_t entry_idx; + idx_t idx_in_entry; + ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry); + const auto bit = ~(1UL << idx_in_entry); -// adapted from https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C++ -idx_t StringUtil::LevenshteinDistance(const string &s1, const string &s2) { - idx_t len1 = s1.size(); - idx_t len2 = s2.size(); - if (len1 == 0) { - return len2; - } - if (len2 == 0) { - return len1; - } - LevenshteinArray array(len1 + 1, len2 + 1); - array.Score(0, 0) = 0; - for (idx_t i = 0; i <= len1; i++) { - array.Score(i, 0) = i; - } - for (idx_t j = 0; j <= len2; j++) { - array.Score(0, j) = j; - } - for (idx_t i = 1; i <= len1; i++) { - for (idx_t j = 1; j <= len2; j++) { - // d[i][j] = std::min({ d[i - 1][j] + 1, - // d[i][j - 1] + 1, - // d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1) }); - int equal = s1[i - 1] == s2[j - 1] ? 0 : 1; - idx_t adjacent_score1 = array.Score(i - 1, j) + 1; - idx_t adjacent_score2 = array.Score(i, j - 1) + 1; - idx_t adjacent_score3 = array.Score(i - 1, j - 1) + equal; + // struct must have a validitymask for its fields + const idx_t struct_validitymask_size = (num_children + 7) / 8; + data_ptr_t struct_validitymask_locations[STANDARD_VECTOR_SIZE]; + for (idx_t i = 0; i < ser_count; i++) { + // initialize the struct validity mask + struct_validitymask_locations[i] = key_locations[i]; + memset(struct_validitymask_locations[i], -1, struct_validitymask_size); + key_locations[i] += struct_validitymask_size; - idx_t t = MinValue(adjacent_score1, adjacent_score2); - array.Score(i, j) = MinValue(t, adjacent_score3); + // set whether the whole struct is null + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + if (validitymask_locations && !vdata.validity.RowIsValid(source_idx)) { + *(validitymask_locations[i] + entry_idx) &= bit; } } - return array.Score(len1, len2); -} - -vector StringUtil::TopNLevenshtein(const vector &strings, const string &target, idx_t n, - idx_t threshold) { - vector> scores; - scores.reserve(strings.size()); - for (auto &str : strings) { - scores.emplace_back(str, LevenshteinDistance(str, target)); - } - return TopNStrings(scores, n, threshold); -} -string StringUtil::CandidatesMessage(const vector &candidates, const string &candidate) { - string result_str; - if (!candidates.empty()) { - result_str = "\n" + candidate + ": "; - for (idx_t i = 0; i < candidates.size(); i++) { - if (i > 0) { - result_str += ", "; - } - result_str += "\"" + candidates[i] + "\""; - } + // now serialize the struct vectors + for (idx_t i = 0; i < struct_vectors.size(); i++) { + auto &struct_vector = struct_vectors[i]; + RowOperations::HeapScatter(struct_vector, vcount, sel, ser_count, i, key_locations, + struct_validitymask_locations, offset); } - return result_str; } -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/tree_renderer.hpp -// -// -//===----------------------------------------------------------------------===// +static void HeapScatterListVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_no, + data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) { + VectorData vdata; + v.Orrify(vcount, vdata); + idx_t entry_idx; + idx_t idx_in_entry; + ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry); + auto list_data = ListVector::GetData(v); + auto &child_vector = ListVector::GetEntry(v); + VectorData list_vdata; + child_vector.Orrify(ListVector::GetListSize(v), list_vdata); + auto child_type = ListType::GetChildType(v.GetType()).InternalType(); + idx_t list_entry_sizes[STANDARD_VECTOR_SIZE]; + data_ptr_t list_entry_locations[STANDARD_VECTOR_SIZE]; + for (idx_t i = 0; i < ser_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + if (!vdata.validity.RowIsValid(source_idx)) { + if (validitymask_locations) { + // set the row validitymask for this column to invalid + ValidityBytes row_mask(validitymask_locations[i]); + row_mask.SetInvalidUnsafe(entry_idx, idx_in_entry); + } + continue; + } + auto list_entry = list_data[source_idx]; -namespace duckdb { -class LogicalOperator; -class PhysicalOperator; + // store list length + Store(list_entry.length, key_locations[i]); + key_locations[i] += sizeof(list_entry.length); -struct RenderTreeNode { - string name; - string extra_text; -}; + // make room for the validitymask + data_ptr_t list_validitymask_location = key_locations[i]; + idx_t entry_offset_in_byte = 0; + idx_t validitymask_size = (list_entry.length + 7) / 8; + memset(list_validitymask_location, -1, validitymask_size); + key_locations[i] += validitymask_size; -struct RenderTree { - RenderTree(idx_t width, idx_t height); + // serialize size of each entry (if non-constant size) + data_ptr_t var_entry_size_ptr = nullptr; + if (!TypeIsConstantSize(child_type)) { + var_entry_size_ptr = key_locations[i]; + key_locations[i] += list_entry.length * sizeof(idx_t); + } - unique_ptr[]> nodes; - idx_t width; - idx_t height; + auto entry_remaining = list_entry.length; + auto entry_offset = list_entry.offset; + while (entry_remaining > 0) { + // the list entry can span multiple vectors + auto next = MinValue((idx_t)STANDARD_VECTOR_SIZE, entry_remaining); -public: - RenderTreeNode *GetNode(idx_t x, idx_t y); - void SetNode(idx_t x, idx_t y, unique_ptr node); - bool HasNode(idx_t x, idx_t y); + // serialize list validity + for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { + auto list_idx = list_vdata.sel->get_index(entry_idx) + entry_offset; + if (!list_vdata.validity.RowIsValid(list_idx)) { + *(list_validitymask_location) &= ~(1UL << entry_offset_in_byte); + } + if (++entry_offset_in_byte == 8) { + list_validitymask_location++; + entry_offset_in_byte = 0; + } + } - idx_t GetPosition(idx_t x, idx_t y); -}; + if (TypeIsConstantSize(child_type)) { + // constant size list entries: set list entry locations + const idx_t type_size = GetTypeIdSize(child_type); + for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { + list_entry_locations[entry_idx] = key_locations[i]; + key_locations[i] += type_size; + } + } else { + // variable size list entries: compute entry sizes and set list entry locations + std::fill_n(list_entry_sizes, next, 0); + RowOperations::ComputeEntrySizes(child_vector, list_entry_sizes, next, next, + FlatVector::INCREMENTAL_SELECTION_VECTOR, entry_offset); + for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { + list_entry_locations[entry_idx] = key_locations[i]; + key_locations[i] += list_entry_sizes[entry_idx]; + Store(list_entry_sizes[entry_idx], var_entry_size_ptr); + var_entry_size_ptr += sizeof(idx_t); + } + } -struct TreeRendererConfig { + // now serialize to the locations + RowOperations::HeapScatter(child_vector, ListVector::GetListSize(v), + FlatVector::INCREMENTAL_SELECTION_VECTOR, next, 0, list_entry_locations, nullptr, + entry_offset); - void enable_detailed() { - MAX_EXTRA_LINES = 1000; - detailed = true; + // update for next iteration + entry_remaining -= next; + entry_offset += next; + } } +} - void enable_standard() { - MAX_EXTRA_LINES = 30; - detailed = false; +void RowOperations::HeapScatter(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx, + data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) { + if (TypeIsConstantSize(v.GetType().InternalType())) { + VectorData vdata; + v.Orrify(vcount, vdata); + RowOperations::HeapScatterVData(vdata, v.GetType().InternalType(), sel, ser_count, col_idx, key_locations, + validitymask_locations, offset); + } else { + switch (v.GetType().InternalType()) { + case PhysicalType::VARCHAR: + HeapScatterStringVector(v, vcount, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::STRUCT: + HeapScatterStructVector(v, vcount, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::LIST: + HeapScatterListVector(v, vcount, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + default: + // LCOV_EXCL_START + throw NotImplementedException("Serialization of variable length vector with type %s", + v.GetType().ToString()); + // LCOV_EXCL_STOP + } } +} - idx_t MAXIMUM_RENDER_WIDTH = 240; - idx_t NODE_RENDER_WIDTH = 29; - idx_t MINIMUM_RENDER_WIDTH = 15; - idx_t MAX_EXTRA_LINES = 30; - bool detailed = false; - - const char *LTCORNER = "┌"; - const char *RTCORNER = "┐"; - const char *LDCORNER = "└"; - const char *RDCORNER = "┘"; - - const char *MIDDLE = "┼"; - const char *TMIDDLE = "┬"; - const char *LMIDDLE = "├"; - const char *RMIDDLE = "┤"; - const char *DMIDDLE = "┴"; - - const char *VERTICAL = "│"; - const char *HORIZONTAL = "─"; - - // ASCII version? - // static constexpr const char* LTCORNER = "<"; - // static constexpr const char* RTCORNER = ">"; - // static constexpr const char* LDCORNER = "<"; - // static constexpr const char* RDCORNER = ">"; - - // static constexpr const char* MIDDLE = "+"; - // static constexpr const char* TMIDDLE = "+"; - // static constexpr const char* LMIDDLE = "+"; - // static constexpr const char* RMIDDLE = "+"; - // static constexpr const char* DMIDDLE = "+"; - - // static constexpr const char* VERTICAL = "|"; - // static constexpr const char* HORIZONTAL = "-"; -}; - -class TreeRenderer { -public: - explicit TreeRenderer(TreeRendererConfig config_p = TreeRendererConfig()) : config(move(config_p)) { +void RowOperations::HeapScatterVData(VectorData &vdata, PhysicalType type, const SelectionVector &sel, idx_t ser_count, + idx_t col_idx, data_ptr_t *key_locations, data_ptr_t *validitymask_locations, + idx_t offset) { + switch (type) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::INT16: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::INT32: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::INT64: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::UINT8: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::UINT16: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::UINT32: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::UINT64: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::INT128: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::FLOAT: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::DOUBLE: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + case PhysicalType::INTERVAL: + TemplatedHeapScatter(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); + break; + default: + throw NotImplementedException("FIXME: Serialize to of constant type column to row-format"); } +} - string ToString(const LogicalOperator &op); - string ToString(const PhysicalOperator &op); - string ToString(const QueryProfiler::TreeNode &op); - - void Render(const LogicalOperator &op, std::ostream &ss); - void Render(const PhysicalOperator &op, std::ostream &ss); - void Render(const QueryProfiler::TreeNode &op, std::ostream &ss); - - void ToStream(RenderTree &root, std::ostream &ss); +} // namespace duckdb +//===--------------------------------------------------------------------===// +// row_match.cpp +// Description: This file contains the implementation of the match operators +//===--------------------------------------------------------------------===// - void EnableDetailed() { - config.enable_detailed(); - } - void EnableStandard() { - config.enable_standard(); - } -private: - unique_ptr CreateTree(const LogicalOperator &op); - unique_ptr CreateTree(const PhysicalOperator &op); - unique_ptr CreateTree(const QueryProfiler::TreeNode &op); - string ExtraInfoSeparator(); - unique_ptr CreateRenderNode(string name, string extra_info); - unique_ptr CreateNode(const LogicalOperator &op); - unique_ptr CreateNode(const PhysicalOperator &op); - unique_ptr CreateNode(const QueryProfiler::TreeNode &op); -private: - //! The configuration used for rendering - TreeRendererConfig config; -private: - void RenderTopLayer(RenderTree &root, std::ostream &ss, idx_t y); - void RenderBoxContent(RenderTree &root, std::ostream &ss, idx_t y); - void RenderBottomLayer(RenderTree &root, std::ostream &ss, idx_t y); - bool CanSplitOnThisChar(char l); - bool IsPadding(char l); - string RemovePadding(string l); - void SplitUpExtraInfo(const string &extra_info, vector &result); - void SplitStringBuffer(const string &source, vector &result); - template - idx_t CreateRenderTreeRecursive(RenderTree &result, const T &op, idx_t x, idx_t y); +namespace duckdb { - template - unique_ptr CreateRenderTree(const T &op); - string ExtractExpressionsRecursive(ExpressionInformation &states); -}; +using ValidityBytes = RowLayout::ValidityBytes; +using Predicates = RowOperations::Predicates; -} // namespace duckdb +template +static idx_t SelectComparison(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + throw NotImplementedException("Unsupported nested comparison operand for RowOperations::Match"); +} +template <> +idx_t SelectComparison(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedEquals(left, right, vcount, sel, count, true_sel, false_sel); +} +template <> +idx_t SelectComparison(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedNotEquals(left, right, vcount, sel, count, true_sel, false_sel); +} +template <> +idx_t SelectComparison(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedGreaterThan(left, right, vcount, sel, count, true_sel, false_sel); +} +template <> +idx_t SelectComparison(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedGreaterThanEquals(left, right, vcount, sel, count, true_sel, false_sel); +} +template <> +idx_t SelectComparison(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedLessThan(left, right, vcount, sel, count, true_sel, false_sel); +} +template <> +idx_t SelectComparison(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedLessThanEquals(left, right, vcount, sel, count, true_sel, false_sel); +} +template +static void TemplatedMatchType(VectorData &col, Vector &rows, SelectionVector &sel, idx_t &count, idx_t col_offset, + idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) { + // Precompute row_mask indexes + idx_t entry_idx; + idx_t idx_in_entry; + ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry); -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list + auto data = (T *)col.data; + auto ptrs = FlatVector::GetData(rows); + idx_t match_count = 0; + if (!col.validity.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto row = ptrs[idx]; + ValidityBytes row_mask(row); + auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry); + auto col_idx = col.sel->get_index(idx); + if (!col.validity.RowIsValid(col_idx)) { + if (isnull) { + // match: move to next value to compare + sel.set_index(match_count++, idx); + } else { + if (NO_MATCH_SEL) { + no_match->set_index(no_match_count++, idx); + } + } + } else { + auto value = Load(row + col_offset); + if (!isnull && OP::template Operation(data[col_idx], value)) { + sel.set_index(match_count++, idx); + } else { + if (NO_MATCH_SEL) { + no_match->set_index(no_match_count++, idx); + } + } + } + } + } else { + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); -#include -#include -#include + auto row = ptrs[idx]; + ValidityBytes row_mask(row); + auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry); -namespace duckdb { + auto col_idx = col.sel->get_index(idx); + auto value = Load(row + col_offset); + if (!isnull && OP::template Operation(data[col_idx], value)) { + sel.set_index(match_count++, idx); + } else { + if (NO_MATCH_SEL) { + no_match->set_index(no_match_count++, idx); + } + } + } + } + count = match_count; +} -enum class UnicodeType {INVALID, ASCII, UNICODE}; +template +static void TemplatedMatchNested(Vector &col, Vector &rows, const idx_t vcount, SelectionVector &sel, idx_t &count, + const idx_t col_offset, const idx_t col_no, SelectionVector *no_match, + idx_t &no_match_count) { + // Gather a scattered Vector containing the column values being matched + Vector key(col.GetType()); + RowOperations::Gather(rows, sel, key, sel, count, col_offset, col_no); + if (NO_MATCH_SEL) { + SelectionVector no_match_sel_offset(no_match->data() + no_match_count); + auto match_count = SelectComparison(col, key, vcount, sel, count, &sel, &no_match_sel_offset); + no_match_count += count - match_count; + count = match_count; + } else { + count = SelectComparison(col, key, vcount, sel, count, &sel, nullptr); + } +} -class Utf8Proc { -public: - //! Distinguishes ASCII, Valid UTF8 and Invalid UTF8 strings - static UnicodeType Analyze(const char *s, size_t len); - //! Performs UTF NFC normalization of string, return value needs to be free'd - static char* Normalize(const char* s, size_t len); - //! Returns whether or not the UTF8 string is valid - static bool IsValid(const char *s, size_t len); - //! Returns the position (in bytes) of the next grapheme cluster - static size_t NextGraphemeCluster(const char *s, size_t len, size_t pos); - //! Returns the position (in bytes) of the previous grapheme cluster - static size_t PreviousGraphemeCluster(const char *s, size_t len, size_t pos); +template +static void TemplatedMatchOp(Vector &vec, VectorData &col, const idx_t vcount, const RowLayout &layout, Vector &rows, + SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match, + idx_t &no_match_count) { + if (count == 0) { + return; + } + auto col_offset = layout.GetOffsets()[col_no]; + switch (layout.GetTypes()[col_no].InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::INT16: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::INT32: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::INT64: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::UINT8: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::UINT16: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::UINT32: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::UINT64: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::INT128: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::FLOAT: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::DOUBLE: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::INTERVAL: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::VARCHAR: + TemplatedMatchType(col, rows, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + case PhysicalType::LIST: + case PhysicalType::MAP: + case PhysicalType::STRUCT: + TemplatedMatchNested(vec, rows, vcount, sel, count, col_offset, col_no, no_match, + no_match_count); + break; + default: + throw InternalException("Unsupported column type for RowOperations::Match"); + } +} - //! Transform a codepoint to utf8 and writes it to "c", sets "sz" to the size of the codepoint - static bool CodepointToUtf8(int cp, int &sz, char *c); - //! Returns the codepoint length in bytes when encoded in UTF8 - static int CodepointLength(int cp); - //! Transform a UTF8 string to a codepoint; returns the codepoint and writes the length of the codepoint (in UTF8) to sz - static int32_t UTF8ToCodepoint(const char *c, int &sz); - static size_t RenderWidth(const char *s, size_t len, size_t pos); +template +static void TemplatedMatch(DataChunk &columns, VectorData col_data[], const RowLayout &layout, Vector &rows, + const Predicates &predicates, SelectionVector &sel, idx_t &count, SelectionVector *no_match, + idx_t &no_match_count) { + const idx_t vcount = columns.size(); + for (idx_t col_no = 0; col_no < predicates.size(); ++col_no) { + auto &vec = columns.data[col_no]; + auto &col = col_data[col_no]; + switch (predicates[col_no]) { + case ExpressionType::COMPARE_EQUAL: + TemplatedMatchOp(vec, col, vcount, layout, rows, sel, count, col_no, no_match, + no_match_count); + break; + case ExpressionType::COMPARE_NOTEQUAL: + TemplatedMatchOp(vec, col, vcount, layout, rows, sel, count, col_no, no_match, + no_match_count); + break; + case ExpressionType::COMPARE_GREATERTHAN: + TemplatedMatchOp(vec, col, vcount, layout, rows, sel, count, col_no, no_match, + no_match_count); + break; + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + TemplatedMatchOp(vec, col, vcount, layout, rows, sel, count, col_no, + no_match, no_match_count); + break; + case ExpressionType::COMPARE_LESSTHAN: + TemplatedMatchOp(vec, col, vcount, layout, rows, sel, count, col_no, no_match, + no_match_count); + break; + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + TemplatedMatchOp(vec, col, vcount, layout, rows, sel, count, col_no, no_match, + no_match_count); + break; + default: + throw InternalException("Unsupported comparison type for RowOperations::Match"); + } + } +} -}; +idx_t RowOperations::Match(DataChunk &columns, VectorData col_data[], const RowLayout &layout, Vector &rows, + const Predicates &predicates, SelectionVector &sel, idx_t count, SelectionVector *no_match, + idx_t &no_match_count) { + if (no_match) { + TemplatedMatch(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count); + } else { + TemplatedMatch(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count); + } + return count; } +} // namespace duckdb -// LICENSE_CHANGE_END -#include -namespace duckdb { -RenderTree::RenderTree(idx_t width_p, idx_t height_p) : width(width_p), height(height_p) { - nodes = unique_ptr[]>(new unique_ptr[(width + 1) * (height + 1)]); -} +namespace duckdb { -RenderTreeNode *RenderTree::GetNode(idx_t x, idx_t y) { - if (x >= width || y >= height) { - return nullptr; - } - return nodes[GetPosition(x, y)].get(); -} +template +void TemplatedRadixScatter(VectorData &vdata, const SelectionVector &sel, idx_t add_count, data_ptr_t *key_locations, + const bool desc, const bool has_null, const bool nulls_first, const bool is_little_endian, + const idx_t offset) { + auto source = (T *)vdata.data; + if (has_null) { + auto &validity = vdata.validity; + const data_t valid = nulls_first ? 1 : 0; + const data_t invalid = 1 - valid; -bool RenderTree::HasNode(idx_t x, idx_t y) { - if (x >= width || y >= height) { - return false; + for (idx_t i = 0; i < add_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + // write validity and according value + if (validity.RowIsValid(source_idx)) { + key_locations[i][0] = valid; + EncodeData(key_locations[i] + 1, source[source_idx], is_little_endian); + // invert bits if desc + if (desc) { + for (idx_t s = 1; s < sizeof(T) + 1; s++) { + *(key_locations[i] + s) = ~*(key_locations[i] + s); + } + } + } else { + key_locations[i][0] = invalid; + memset(key_locations[i] + 1, '\0', sizeof(T)); + } + key_locations[i] += sizeof(T) + 1; + } + } else { + for (idx_t i = 0; i < add_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + // write value + EncodeData(key_locations[i], source[source_idx], is_little_endian); + // invert bits if desc + if (desc) { + for (idx_t s = 0; s < sizeof(T); s++) { + *(key_locations[i] + s) = ~*(key_locations[i] + s); + } + } + key_locations[i] += sizeof(T); + } } - return nodes[GetPosition(x, y)].get() != nullptr; -} - -idx_t RenderTree::GetPosition(idx_t x, idx_t y) { - return y * width + x; } -void RenderTree::SetNode(idx_t x, idx_t y, unique_ptr node) { - nodes[GetPosition(x, y)] = move(node); -} +void RadixScatterStringVector(VectorData &vdata, const SelectionVector &sel, idx_t add_count, data_ptr_t *key_locations, + const bool desc, const bool has_null, const bool nulls_first, const idx_t prefix_len, + idx_t offset) { + auto source = (string_t *)vdata.data; + if (has_null) { + auto &validity = vdata.validity; + const data_t valid = nulls_first ? 1 : 0; + const data_t invalid = 1 - valid; -void TreeRenderer::RenderTopLayer(RenderTree &root, std::ostream &ss, idx_t y) { - for (idx_t x = 0; x < root.width; x++) { - if (x * config.NODE_RENDER_WIDTH >= config.MAXIMUM_RENDER_WIDTH) { - break; - } - if (root.HasNode(x, y)) { - ss << config.LTCORNER; - ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2 - 1); - if (y == 0) { - // top level node: no node above this one - ss << config.HORIZONTAL; + for (idx_t i = 0; i < add_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + // write validity and according value + if (validity.RowIsValid(source_idx)) { + key_locations[i][0] = valid; + EncodeStringDataPrefix(key_locations[i] + 1, source[source_idx], prefix_len); + // invert bits if desc + if (desc) { + for (idx_t s = 1; s < prefix_len + 1; s++) { + *(key_locations[i] + s) = ~*(key_locations[i] + s); + } + } } else { - // render connection to node above this one - ss << config.DMIDDLE; + key_locations[i][0] = invalid; + memset(key_locations[i] + 1, '\0', prefix_len); } - ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2 - 1); - ss << config.RTCORNER; - } else { - ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH); + key_locations[i] += prefix_len + 1; + } + } else { + for (idx_t i = 0; i < add_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + // write value + EncodeStringDataPrefix(key_locations[i], source[source_idx], prefix_len); + // invert bits if desc + if (desc) { + for (idx_t s = 0; s < prefix_len; s++) { + *(key_locations[i] + s) = ~*(key_locations[i] + s); + } + } + key_locations[i] += prefix_len; } } - ss << std::endl; } -void TreeRenderer::RenderBottomLayer(RenderTree &root, std::ostream &ss, idx_t y) { - for (idx_t x = 0; x <= root.width; x++) { - if (x * config.NODE_RENDER_WIDTH >= config.MAXIMUM_RENDER_WIDTH) { - break; +void RadixScatterListVector(Vector &v, VectorData &vdata, const SelectionVector &sel, idx_t add_count, + data_ptr_t *key_locations, const bool desc, const bool has_null, const bool nulls_first, + const idx_t prefix_len, const idx_t width, const idx_t offset) { + auto list_data = ListVector::GetData(v); + auto &child_vector = ListVector::GetEntry(v); + auto list_size = ListVector::GetListSize(v); + + // serialize null values + if (has_null) { + auto &validity = vdata.validity; + const data_t valid = nulls_first ? 1 : 0; + const data_t invalid = 1 - valid; + + for (idx_t i = 0; i < add_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + data_ptr_t key_location = key_locations[i] + 1; + // write validity and according value + if (validity.RowIsValid(source_idx)) { + key_locations[i][0] = valid; + key_locations[i]++; + auto &list_entry = list_data[source_idx]; + if (list_entry.length > 0) { + // denote that the list is not empty with a 1 + key_locations[i][0] = 1; + key_locations[i]++; + RowOperations::RadixScatter(child_vector, list_size, FlatVector::INCREMENTAL_SELECTION_VECTOR, 1, + key_locations + i, false, true, false, prefix_len, width - 1, + list_entry.offset); + } else { + // denote that the list is empty with a 0 + key_locations[i][0] = 0; + key_locations[i]++; + memset(key_locations[i], '\0', width - 2); + } + // invert bits if desc + if (desc) { + for (idx_t s = 0; s < width - 1; s++) { + *(key_location + s) = ~*(key_location + s); + } + } + } else { + key_locations[i][0] = invalid; + memset(key_locations[i] + 1, '\0', width - 1); + key_locations[i] += width; + } } - if (root.HasNode(x, y)) { - ss << config.LDCORNER; - ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2 - 1); - if (root.HasNode(x, y + 1)) { - // node below this one: connect to that one - ss << config.TMIDDLE; + } else { + for (idx_t i = 0; i < add_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + auto &list_entry = list_data[source_idx]; + data_ptr_t key_location = key_locations[i]; + if (list_entry.length > 0) { + // denote that the list is not empty with a 1 + key_locations[i][0] = 1; + key_locations[i]++; + RowOperations::RadixScatter(child_vector, list_size, FlatVector::INCREMENTAL_SELECTION_VECTOR, 1, + key_locations + i, false, true, false, prefix_len, width - 1, + list_entry.offset); } else { - // no node below this one: end the box - ss << config.HORIZONTAL; + // denote that the list is empty with a 0 + key_locations[i][0] = 0; + key_locations[i]++; + memset(key_locations[i], '\0', width - 1); + } + // invert bits if desc + if (desc) { + for (idx_t s = 0; s < width; s++) { + *(key_location + s) = ~*(key_location + s); + } } - ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2 - 1); - ss << config.RDCORNER; - } else if (root.HasNode(x, y + 1)) { - ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH / 2); - ss << config.VERTICAL; - ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH / 2); - } else { - ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH); } } - ss << std::endl; } -string AdjustTextForRendering(string source, idx_t max_render_width) { - idx_t cpos = 0; - idx_t render_width = 0; - vector> render_widths; - while (cpos < source.size()) { - idx_t char_render_width = Utf8Proc::RenderWidth(source.c_str(), source.size(), cpos); - cpos = Utf8Proc::NextGraphemeCluster(source.c_str(), source.size(), cpos); - render_width += char_render_width; - render_widths.emplace_back(cpos, render_width); - if (render_width > max_render_width) { - break; +void RadixScatterStructVector(Vector &v, VectorData &vdata, idx_t vcount, const SelectionVector &sel, idx_t add_count, + data_ptr_t *key_locations, const bool desc, const bool has_null, const bool nulls_first, + const idx_t prefix_len, idx_t width, const idx_t offset) { + // serialize null values + if (has_null) { + auto &validity = vdata.validity; + const data_t valid = nulls_first ? 1 : 0; + const data_t invalid = 1 - valid; + + for (idx_t i = 0; i < add_count; i++) { + auto idx = sel.get_index(i); + auto source_idx = vdata.sel->get_index(idx) + offset; + // write validity and according value + if (validity.RowIsValid(source_idx)) { + key_locations[i][0] = valid; + } else { + key_locations[i][0] = invalid; + } + key_locations[i]++; } + width--; } - if (render_width > max_render_width) { - // need to find a position to truncate - for (idx_t pos = render_widths.size(); pos > 0; pos--) { - if (render_widths[pos - 1].second < max_render_width - 4) { - return source.substr(0, render_widths[pos - 1].first) + "..." + - string(max_render_width - render_widths[pos - 1].second - 3, ' '); + // serialize the struct + auto &child_vector = *StructVector::GetEntries(v)[0]; + RowOperations::RadixScatter(child_vector, vcount, FlatVector::INCREMENTAL_SELECTION_VECTOR, add_count, + key_locations, false, true, false, prefix_len, width, offset); + // invert bits if desc + if (desc) { + for (idx_t i = 0; i < add_count; i++) { + for (idx_t s = 0; s < width; s++) { + *(key_locations[i] - width + s) = ~*(key_locations[i] - width + s); } } - source = "..."; } - // need to pad with spaces - idx_t total_spaces = max_render_width - render_width; - idx_t half_spaces = total_spaces / 2; - idx_t extra_left_space = total_spaces % 2 == 0 ? 0 : 1; - return string(half_spaces + extra_left_space, ' ') + source + string(half_spaces, ' '); } -static bool NodeHasMultipleChildren(RenderTree &root, idx_t x, idx_t y) { - for (; x < root.width && !root.HasNode(x + 1, y); x++) { - if (root.HasNode(x + 1, y + 1)) { - return true; - } +void RowOperations::RadixScatter(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, + data_ptr_t *key_locations, bool desc, bool has_null, bool nulls_first, + idx_t prefix_len, idx_t width, idx_t offset) { + auto is_little_endian = IsLittleEndian(); + + VectorData vdata; + v.Orrify(vcount, vdata); + switch (v.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::INT16: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::INT32: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::INT64: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::UINT8: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::UINT16: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::UINT32: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::UINT64: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::INT128: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::FLOAT: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::DOUBLE: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::INTERVAL: + TemplatedRadixScatter(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, + is_little_endian, offset); + break; + case PhysicalType::VARCHAR: + RadixScatterStringVector(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, prefix_len, offset); + break; + case PhysicalType::LIST: + RadixScatterListVector(v, vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, prefix_len, width, + offset); + break; + case PhysicalType::STRUCT: + RadixScatterStructVector(v, vdata, vcount, sel, ser_count, key_locations, desc, has_null, nulls_first, + prefix_len, width, offset); + break; + default: + throw NotImplementedException("Cannot ORDER BY column with type %s", v.GetType().ToString()); } - return false; } -void TreeRenderer::RenderBoxContent(RenderTree &root, std::ostream &ss, idx_t y) { - // we first need to figure out how high our boxes are going to be - vector> extra_info; - idx_t extra_height = 0; - extra_info.resize(root.width); - for (idx_t x = 0; x < root.width; x++) { - auto node = root.GetNode(x, y); - if (node) { - SplitUpExtraInfo(node->extra_text, extra_info[x]); - if (extra_info[x].size() > extra_height) { - extra_height = extra_info[x].size(); - } - } - } - extra_height = MinValue(extra_height, config.MAX_EXTRA_LINES); - idx_t halfway_point = (extra_height + 1) / 2; - // now we render the actual node - for (idx_t render_y = 0; render_y <= extra_height; render_y++) { - for (idx_t x = 0; x < root.width; x++) { - if (x * config.NODE_RENDER_WIDTH >= config.MAXIMUM_RENDER_WIDTH) { - break; - } - auto node = root.GetNode(x, y); - if (!node) { - if (render_y == halfway_point) { - bool has_child_to_the_right = NodeHasMultipleChildren(root, x, y); - if (root.HasNode(x, y + 1)) { - // node right below this one - ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2); - ss << config.RTCORNER; - if (has_child_to_the_right) { - // but we have another child to the right! keep rendering the line - ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2); - } else { - // only a child below this one: fill the rest with spaces - ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH / 2); - } - } else if (has_child_to_the_right) { - // child to the right, but no child right below this one: render a full line - ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH); - } else { - // empty spot: render spaces - ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH); - } - } else if (render_y >= halfway_point) { - if (root.HasNode(x, y + 1)) { - // we have a node below this empty spot: render a vertical line - ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH / 2); - ss << config.VERTICAL; - ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH / 2); - } else { - // empty spot: render spaces - ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH); - } - } else { - // empty spot: render spaces - ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH); - } - } else { - ss << config.VERTICAL; - // figure out what to render - string render_text; - if (render_y == 0) { - render_text = node->name; - } else { - if (render_y <= extra_info[x].size()) { - render_text = extra_info[x][render_y - 1]; - } - } - render_text = AdjustTextForRendering(render_text, config.NODE_RENDER_WIDTH - 2); - ss << render_text; +} // namespace duckdb +//===--------------------------------------------------------------------===// +// row_scatter.cpp +// Description: This file contains the implementation of the row scattering +// operators +//===--------------------------------------------------------------------===// - if (render_y == halfway_point && NodeHasMultipleChildren(root, x, y)) { - ss << config.LMIDDLE; - } else { - ss << config.VERTICAL; - } - } - } - ss << std::endl; - } -} -string TreeRenderer::ToString(const LogicalOperator &op) { - std::stringstream ss; - Render(op, ss); - return ss.str(); -} -string TreeRenderer::ToString(const PhysicalOperator &op) { - std::stringstream ss; - Render(op, ss); - return ss.str(); -} -string TreeRenderer::ToString(const QueryProfiler::TreeNode &op) { - std::stringstream ss; - Render(op, ss); - return ss.str(); -} -void TreeRenderer::Render(const LogicalOperator &op, std::ostream &ss) { - auto tree = CreateTree(op); - ToStream(*tree, ss); -} -void TreeRenderer::Render(const PhysicalOperator &op, std::ostream &ss) { - auto tree = CreateTree(op); - ToStream(*tree, ss); -} -void TreeRenderer::Render(const QueryProfiler::TreeNode &op, std::ostream &ss) { - auto tree = CreateTree(op); - ToStream(*tree, ss); -} -void TreeRenderer::ToStream(RenderTree &root, std::ostream &ss) { - while (root.width * config.NODE_RENDER_WIDTH > config.MAXIMUM_RENDER_WIDTH) { - if (config.NODE_RENDER_WIDTH - 2 < config.MINIMUM_RENDER_WIDTH) { - break; - } - config.NODE_RENDER_WIDTH -= 2; - } - for (idx_t y = 0; y < root.height; y++) { - // start by rendering the top layer - RenderTopLayer(root, ss, y); - // now we render the content of the boxes - RenderBoxContent(root, ss, y); - // render the bottom layer of each of the boxes - RenderBottomLayer(root, ss, y); - } -} -bool TreeRenderer::CanSplitOnThisChar(char l) { - return (l < '0' || (l > '9' && l < 'A') || (l > 'Z' && l < 'a')) && l != '_'; -} +namespace duckdb { -bool TreeRenderer::IsPadding(char l) { - return l == ' ' || l == '\t' || l == '\n' || l == '\r'; -} +using ValidityBytes = RowLayout::ValidityBytes; -string TreeRenderer::RemovePadding(string l) { - idx_t start = 0, end = l.size(); - while (start < l.size() && IsPadding(l[start])) { - start++; - } - while (end > 0 && IsPadding(l[end - 1])) { - end--; - } - return l.substr(start, end - start); -} +template +static void TemplatedScatter(VectorData &col, Vector &rows, const SelectionVector &sel, const idx_t count, + const idx_t col_offset, const idx_t col_no) { + auto data = (T *)col.data; + auto ptrs = FlatVector::GetData(rows); -void TreeRenderer::SplitStringBuffer(const string &source, vector &result) { - idx_t max_line_render_size = config.NODE_RENDER_WIDTH - 2; - // utf8 in prompt, get render width - idx_t cpos = 0; - idx_t start_pos = 0; - idx_t render_width = 0; - idx_t last_possible_split = 0; - while (cpos < source.size()) { - // check if we can split on this character - if (CanSplitOnThisChar(source[cpos])) { - last_possible_split = cpos; - } - size_t char_render_width = Utf8Proc::RenderWidth(source.c_str(), source.size(), cpos); - idx_t next_cpos = Utf8Proc::NextGraphemeCluster(source.c_str(), source.size(), cpos); - if (render_width + char_render_width > max_line_render_size) { - if (last_possible_split <= start_pos + 8) { - last_possible_split = cpos; + if (!col.validity.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto col_idx = col.sel->get_index(idx); + auto row = ptrs[idx]; + + auto isnull = !col.validity.RowIsValid(col_idx); + T store_value = isnull ? NullValue() : data[col_idx]; + Store(store_value, row + col_offset); + if (isnull) { + ValidityBytes col_mask(ptrs[idx]); + col_mask.SetInvalidUnsafe(col_no); } - result.push_back(source.substr(start_pos, last_possible_split - start_pos)); - start_pos = last_possible_split; - cpos = last_possible_split; - render_width = 0; } - cpos = next_cpos; - render_width += char_render_width; - } - if (source.size() > start_pos) { - result.push_back(source.substr(start_pos, source.size() - start_pos)); - } -} - -void TreeRenderer::SplitUpExtraInfo(const string &extra_info, vector &result) { - if (extra_info.empty()) { - return; - } - auto splits = StringUtil::Split(extra_info, "\n"); - if (!splits.empty() && splits[0] != "[INFOSEPARATOR]") { - result.push_back(ExtraInfoSeparator()); - } - for (auto &split : splits) { - if (split == "[INFOSEPARATOR]") { - result.push_back(ExtraInfoSeparator()); - continue; + } else { + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto col_idx = col.sel->get_index(idx); + auto row = ptrs[idx]; + + Store(data[col_idx], row + col_offset); } - string str = RemovePadding(split); - if (str.empty()) { - continue; + } +} + +static void ComputeStringEntrySizes(const VectorData &col, idx_t entry_sizes[], const SelectionVector &sel, + const idx_t count, const idx_t offset = 0) { + auto data = (const string_t *)col.data; + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto col_idx = col.sel->get_index(idx) + offset; + const auto &str = data[col_idx]; + if (col.validity.RowIsValid(col_idx) && !str.IsInlined()) { + entry_sizes[i] += str.GetSize(); } - SplitStringBuffer(str, result); } } -string TreeRenderer::ExtraInfoSeparator() { - return StringUtil::Repeat(string(config.HORIZONTAL) + " ", (config.NODE_RENDER_WIDTH - 7) / 2); +static void ScatterStringVector(VectorData &col, Vector &rows, data_ptr_t str_locations[], const SelectionVector &sel, + const idx_t count, const idx_t col_offset, const idx_t col_no) { + auto string_data = (string_t *)col.data; + auto ptrs = FlatVector::GetData(rows); + + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto col_idx = col.sel->get_index(idx); + auto row = ptrs[idx]; + if (!col.validity.RowIsValid(col_idx)) { + ValidityBytes col_mask(row); + col_mask.SetInvalidUnsafe(col_no); + Store(NullValue(), row + col_offset); + } else if (string_data[col_idx].IsInlined()) { + Store(string_data[col_idx], row + col_offset); + } else { + const auto &str = string_data[col_idx]; + string_t inserted((const char *)str_locations[i], str.GetSize()); + memcpy(inserted.GetDataWriteable(), str.GetDataUnsafe(), str.GetSize()); + str_locations[i] += str.GetSize(); + inserted.Finalize(); + Store(inserted, row + col_offset); + } + } } -unique_ptr TreeRenderer::CreateRenderNode(string name, string extra_info) { - auto result = make_unique(); - result->name = move(name); - result->extra_text = move(extra_info); - return result; +static void ScatterNestedVector(Vector &vec, VectorData &col, Vector &rows, data_ptr_t data_locations[], + const SelectionVector &sel, const idx_t count, const idx_t col_offset, + const idx_t col_no, const idx_t vcount) { + // Store pointers to the data in the row + // Do this first because SerializeVector destroys the locations + auto ptrs = FlatVector::GetData(rows); + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto row = ptrs[idx]; + + Store(data_locations[i], row + col_offset); + } + + // Serialise the data + RowOperations::HeapScatter(vec, vcount, sel, count, col_no, data_locations, ptrs); } -template -static void GetTreeWidthHeight(const T &op, idx_t &width, idx_t &height) { - if (op.children.empty()) { - width = 1; - height = 1; +void RowOperations::Scatter(DataChunk &columns, VectorData col_data[], const RowLayout &layout, Vector &rows, + RowDataCollection &string_heap, const SelectionVector &sel, idx_t count) { + if (count == 0) { return; } - width = 0; - height = 0; - for (auto &child : op.children) { - idx_t child_width, child_height; - GetTreeWidthHeight(*child, child_width, child_height); - width += child_width; - height = MaxValue(height, child_height); + // Set the validity mask for each row before inserting data + auto ptrs = FlatVector::GetData(rows); + for (idx_t i = 0; i < count; ++i) { + auto row_idx = sel.get_index(i); + auto row = ptrs[row_idx]; + ValidityBytes(row).SetAllValid(layout.ColumnCount()); } - height++; -} -template -idx_t TreeRenderer::CreateRenderTreeRecursive(RenderTree &result, const T &op, idx_t x, idx_t y) { - auto node = TreeRenderer::CreateNode(op); - result.SetNode(x, y, move(node)); + const auto vcount = columns.size(); + auto &offsets = layout.GetOffsets(); + auto &types = layout.GetTypes(); - if (op.children.empty()) { - return 1; + // Compute the entry size of the variable size columns + vector> handles; + data_ptr_t data_locations[STANDARD_VECTOR_SIZE]; + if (!layout.AllConstant()) { + idx_t entry_sizes[STANDARD_VECTOR_SIZE]; + std::fill_n(entry_sizes, count, sizeof(idx_t)); + for (idx_t col_no = 0; col_no < types.size(); col_no++) { + if (TypeIsConstantSize(types[col_no].InternalType())) { + continue; + } + + auto &vec = columns.data[col_no]; + auto &col = col_data[col_no]; + switch (types[col_no].InternalType()) { + case PhysicalType::VARCHAR: + ComputeStringEntrySizes(col, entry_sizes, sel, count); + break; + case PhysicalType::LIST: + case PhysicalType::MAP: + case PhysicalType::STRUCT: + RowOperations::ComputeEntrySizes(vec, col, entry_sizes, vcount, count, sel); + break; + default: + throw InternalException("Unsupported type for RowOperations::Scatter"); + } + } + + // Build out the buffer space + string_heap.Build(count, data_locations, entry_sizes); + + // Serialize information that is needed for swizzling if the computation goes out-of-core + const idx_t heap_pointer_offset = layout.GetHeapPointerOffset(); + for (idx_t i = 0; i < count; i++) { + auto row_idx = sel.get_index(i); + auto row = ptrs[row_idx]; + // Pointer to this row in the heap block + Store(data_locations[i], row + heap_pointer_offset); + // Row size is stored in the heap in front of each row + Store(entry_sizes[i], data_locations[i]); + data_locations[i] += sizeof(idx_t); + } } - idx_t width = 0; - // render the children of this node - for (auto &child : op.children) { - width += CreateRenderTreeRecursive(result, *child, x + width, y + 1); + + for (idx_t col_no = 0; col_no < types.size(); col_no++) { + auto &vec = columns.data[col_no]; + auto &col = col_data[col_no]; + auto col_offset = offsets[col_no]; + + switch (types[col_no].InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::INT16: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::INT32: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::INT64: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::UINT8: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::UINT16: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::UINT32: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::UINT64: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::INT128: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::FLOAT: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::DOUBLE: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::INTERVAL: + TemplatedScatter(col, rows, sel, count, col_offset, col_no); + break; + case PhysicalType::VARCHAR: + ScatterStringVector(col, rows, data_locations, sel, count, col_offset, col_no); + break; + case PhysicalType::LIST: + case PhysicalType::MAP: + case PhysicalType::STRUCT: + ScatterNestedVector(vec, col, rows, data_locations, sel, count, col_offset, col_no, vcount); + break; + default: + throw InternalException("Unsupported type for RowOperations::Scatter"); + } } - return width; } -template -unique_ptr TreeRenderer::CreateRenderTree(const T &op) { - idx_t width, height; - GetTreeWidthHeight(op, width, height); +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/serializer/buffered_deserializer.hpp +// +// +//===----------------------------------------------------------------------===// - auto result = make_unique(width, height); - // now fill in the tree - CreateRenderTreeRecursive(*result, op, 0, 0); - return result; -} -unique_ptr TreeRenderer::CreateNode(const LogicalOperator &op) { - return CreateRenderNode(op.GetName(), op.ParamsToString()); -} -unique_ptr TreeRenderer::CreateNode(const PhysicalOperator &op) { - return CreateRenderNode(op.GetName(), op.ParamsToString()); -} -string TreeRenderer::ExtractExpressionsRecursive(ExpressionInformation &state) { - string result = "\n[INFOSEPARATOR]"; - result += "\n" + state.name; - result += "\n" + StringUtil::Format("%.9f", double(state.time)); - if (state.children.empty()) { - return result; - } - // render the children of this node - for (auto &child : state.children) { - result += ExtractExpressionsRecursive(*child); - } - return result; -} -unique_ptr TreeRenderer::CreateNode(const QueryProfiler::TreeNode &op) { - auto result = TreeRenderer::CreateRenderNode(op.name, op.extra_info); - result->extra_text += "\n[INFOSEPARATOR]"; - result->extra_text += "\n" + to_string(op.info.elements); - string timing = StringUtil::Format("%.2f", op.info.time); - result->extra_text += "\n(" + timing + "s)"; - if (op.info.has_executor && config.detailed) { - string sample_count = to_string(op.info.executors_info->sample_count); - result->extra_text += "\n[INFOSEPARATOR]"; - result->extra_text += "\nsample_count: " + sample_count; - string sample_tuples_count = to_string(op.info.executors_info->sample_tuples_count); - result->extra_text += "\n[INFOSEPARATOR]"; - result->extra_text += "\nsample_tuples_count: " + sample_tuples_count; - string total_count = to_string(op.info.executors_info->total_count); - result->extra_text += "\n[INFOSEPARATOR]"; - result->extra_text += "\ntotal_count: " + total_count; - for (auto &state : op.info.executors_info->roots) { - result->extra_text += ExtractExpressionsRecursive(*state); - } - } +namespace duckdb { - return result; -} +class BufferedDeserializer : public Deserializer { +public: + BufferedDeserializer(data_ptr_t ptr, idx_t data_size); + explicit BufferedDeserializer(BufferedSerializer &serializer); -unique_ptr TreeRenderer::CreateTree(const LogicalOperator &op) { - return CreateRenderTree(op); + data_ptr_t ptr; + data_ptr_t endptr; + +public: + void ReadData(data_ptr_t buffer, uint64_t read_size) override; +}; + +} // namespace duckdb + + +#include + +namespace duckdb { + +BufferedDeserializer::BufferedDeserializer(data_ptr_t ptr, idx_t data_size) : ptr(ptr), endptr(ptr + data_size) { } -unique_ptr TreeRenderer::CreateTree(const PhysicalOperator &op) { - return CreateRenderTree(op); +BufferedDeserializer::BufferedDeserializer(BufferedSerializer &serializer) + : BufferedDeserializer(serializer.data, serializer.maximum_size) { } -unique_ptr TreeRenderer::CreateTree(const QueryProfiler::TreeNode &op) { - return CreateRenderTree(op); +void BufferedDeserializer::ReadData(data_ptr_t buffer, idx_t read_size) { + if (ptr + read_size > endptr) { + throw SerializationException("Failed to deserialize: not enough data in buffer to fulfill read request"); + } + memcpy(buffer, ptr, read_size); + ptr += read_size; } + } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/serializer/buffered_file_reader.hpp +// +// +//===----------------------------------------------------------------------===// + namespace duckdb { -constexpr const char *Blob::HEX_TABLE; -const int Blob::HEX_MAP[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; +class BufferedFileReader : public Deserializer { +public: + BufferedFileReader(FileSystem &fs, const char *path); -idx_t Blob::GetStringSize(string_t blob) { - auto data = (const_data_ptr_t)blob.GetDataUnsafe(); - auto len = blob.GetSize(); - idx_t str_len = 0; - for (idx_t i = 0; i < len; i++) { - if (data[i] >= 32 && data[i] <= 127 && data[i] != '\\') { - // ascii characters are rendered as-is - str_len++; - } else { - // non-ascii characters are rendered as hexadecimal (e.g. \x00) - str_len += 4; - } - } - return str_len; -} + FileSystem &fs; + unique_ptr data; + idx_t offset; + idx_t read_data; + unique_ptr handle; -void Blob::ToString(string_t blob, char *output) { - auto data = (const_data_ptr_t)blob.GetDataUnsafe(); - auto len = blob.GetSize(); - idx_t str_idx = 0; - for (idx_t i = 0; i < len; i++) { - if (data[i] >= 32 && data[i] <= 127 && data[i] != '\\') { - // ascii characters are rendered as-is - output[str_idx++] = data[i]; - } else { - auto byte_a = data[i] >> 4; - auto byte_b = data[i] & 0x0F; - D_ASSERT(byte_a >= 0 && byte_a < 16); - D_ASSERT(byte_b >= 0 && byte_b < 16); - // non-ascii characters are rendered as hexadecimal (e.g. \x00) - output[str_idx++] = '\\'; - output[str_idx++] = 'x'; - output[str_idx++] = Blob::HEX_TABLE[byte_a]; - output[str_idx++] = Blob::HEX_TABLE[byte_b]; - } +public: + void ReadData(data_ptr_t buffer, uint64_t read_size) override; + //! Returns true if the reader has finished reading the entire file + bool Finished(); + + idx_t FileSize() { + return file_size; } - D_ASSERT(str_idx == GetStringSize(blob)); -} -string Blob::ToString(string_t blob) { - auto str_len = GetStringSize(blob); - auto buffer = std::unique_ptr(new char[str_len]); - Blob::ToString(blob, buffer.get()); - return string(buffer.get(), str_len); +private: + idx_t file_size; + idx_t total_read; +}; + +} // namespace duckdb + + + + +#include +#include + +namespace duckdb { + +BufferedFileReader::BufferedFileReader(FileSystem &fs, const char *path) + : fs(fs), data(unique_ptr(new data_t[FILE_BUFFER_SIZE])), offset(0), read_data(0), total_read(0) { + handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ, FileLockType::READ_LOCK); + file_size = fs.GetFileSize(*handle); } -idx_t Blob::GetBlobSize(string_t str) { - auto data = (const_data_ptr_t)str.GetDataUnsafe(); - auto len = str.GetSize(); - idx_t str_len = 0; - for (idx_t i = 0; i < len; i++) { - if (data[i] == '\\') { - if (i + 3 >= len) { - throw ConversionException("Invalid hex escape code encountered in string -> blob conversion: " - "unterminated escape code at end of blob"); - } - if (data[i + 1] != 'x' || Blob::HEX_MAP[data[i + 2]] < 0 || Blob::HEX_MAP[data[i + 3]] < 0) { - throw ConversionException("Invalid hex escape code encountered in string -> blob conversion: %s", - string((char *)data + i, 4)); +void BufferedFileReader::ReadData(data_ptr_t target_buffer, uint64_t read_size) { + // first copy anything we can from the buffer + data_ptr_t end_ptr = target_buffer + read_size; + while (true) { + idx_t to_read = MinValue(end_ptr - target_buffer, read_data - offset); + if (to_read > 0) { + memcpy(target_buffer, data.get() + offset, to_read); + offset += to_read; + target_buffer += to_read; + } + if (target_buffer < end_ptr) { + D_ASSERT(offset == read_data); + total_read += read_data; + // did not finish reading yet but exhausted buffer + // read data into buffer + offset = 0; + read_data = fs.Read(*handle, data.get(), FILE_BUFFER_SIZE); + if (read_data == 0) { + throw SerializationException("not enough data in file to deserialize result"); } - str_len++; - i += 3; - } else if (data[i] >= 32 && data[i] <= 127) { - str_len++; } else { - throw ConversionException("Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters " - "must be escaped with hex codes (e.g. \\xAA)"); + return; } } - return str_len; } -void Blob::ToBlob(string_t str, data_ptr_t output) { - auto data = (const_data_ptr_t)str.GetDataUnsafe(); - auto len = str.GetSize(); - idx_t blob_idx = 0; - for (idx_t i = 0; i < len; i++) { - if (data[i] == '\\') { - int byte_a = Blob::HEX_MAP[data[i + 2]]; - int byte_b = Blob::HEX_MAP[data[i + 3]]; - D_ASSERT(i + 3 < len); - D_ASSERT(byte_a >= 0 && byte_b >= 0); - D_ASSERT(data[i + 1] == 'x'); - output[blob_idx++] = (byte_a << 4) + byte_b; - i += 3; - } else if (data[i] >= 32 && data[i] <= 127) { - output[blob_idx++] = data_t(data[i]); - } else { - throw ConversionException("Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters " - "must be escaped with hex codes (e.g. \\xAA)"); - } - } - D_ASSERT(blob_idx == GetBlobSize(str)); +bool BufferedFileReader::Finished() { + return total_read + offset == file_size; } -string Blob::ToBlob(string_t str) { - auto blob_len = GetBlobSize(str); - auto buffer = std::unique_ptr(new char[blob_len]); - Blob::ToBlob(str, (data_ptr_t)buffer.get()); - return string(buffer.get(), blob_len); -} +} // namespace duckdb -// base64 functions are adapted from https://gist.github.com/tomykaira/f0fd86b6c73063283afe550bc5d77594 -idx_t Blob::ToBase64Size(string_t blob) { - // every 4 characters in base64 encode 3 bytes, plus (potential) padding at the end - auto input_size = blob.GetSize(); - return ((input_size + 2) / 3) * 4; -} -void Blob::ToBase64(string_t blob, char *output) { - auto input_data = (const_data_ptr_t)blob.GetDataUnsafe(); - auto input_size = blob.GetSize(); - idx_t out_idx = 0; - idx_t i; - // convert the bulk of the string to base64 - // this happens in steps of 3 bytes -> 4 output bytes - for (i = 0; i + 2 < input_size; i += 3) { - output[out_idx++] = Blob::BASE64_MAP[(input_data[i] >> 2) & 0x3F]; - output[out_idx++] = Blob::BASE64_MAP[((input_data[i] & 0x3) << 4) | ((input_data[i + 1] & 0xF0) >> 4)]; - output[out_idx++] = Blob::BASE64_MAP[((input_data[i + 1] & 0xF) << 2) | ((input_data[i + 2] & 0xC0) >> 6)]; - output[out_idx++] = Blob::BASE64_MAP[input_data[i + 2] & 0x3F]; - } +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/algorithm.hpp +// +// +//===----------------------------------------------------------------------===// - if (i < input_size) { - // there are one or two bytes left over: we have to insert padding - // first write the first 6 bits of the first byte - output[out_idx++] = Blob::BASE64_MAP[(input_data[i] >> 2) & 0x3F]; - // now check the character count - if (i == input_size - 1) { - // single byte left over: convert the remainder of that byte and insert padding - output[out_idx++] = Blob::BASE64_MAP[((input_data[i] & 0x3) << 4)]; - output[out_idx++] = Blob::BASE64_PADDING; - } else { - // two bytes left over: convert the second byte as well - output[out_idx++] = Blob::BASE64_MAP[((input_data[i] & 0x3) << 4) | ((input_data[i + 1] & 0xF0) >> 4)]; - output[out_idx++] = Blob::BASE64_MAP[((input_data[i + 1] & 0xF) << 2)]; - } - output[out_idx++] = Blob::BASE64_PADDING; - } + + +#include + +#include + +namespace duckdb { + +BufferedFileWriter::BufferedFileWriter(FileSystem &fs, const string &path, uint8_t open_flags) + : fs(fs), data(unique_ptr(new data_t[FILE_BUFFER_SIZE])), offset(0), total_written(0) { + handle = fs.OpenFile(path, open_flags, FileLockType::WRITE_LOCK); } -static constexpr int BASE64_DECODING_TABLE[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, - -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; +int64_t BufferedFileWriter::GetFileSize() { + return fs.GetFileSize(*handle); +} -idx_t Blob::FromBase64Size(string_t str) { - auto input_data = str.GetDataUnsafe(); - auto input_size = str.GetSize(); - if (input_size % 4 != 0) { - // valid base64 needs to always be cleanly divisible by 4 - throw ConversionException("Could not decode string \"%s\" as base64: length must be a multiple of 4", - str.GetString()); - } - if (input_size < 4) { - // empty string - return 0; - } - auto base_size = input_size / 4 * 3; - // check for padding to figure out the length - if (input_data[input_size - 2] == Blob::BASE64_PADDING) { - // two bytes of padding - return base_size - 2; - } - if (input_data[input_size - 1] == Blob::BASE64_PADDING) { - // one byte of padding - return base_size - 1; - } - // no padding - return base_size; +idx_t BufferedFileWriter::GetTotalWritten() { + return total_written + offset; } -template -uint32_t DecodeBase64Bytes(const string_t &str, const_data_ptr_t input_data, idx_t base_idx) { - int decoded_bytes[4]; - for (idx_t decode_idx = 0; decode_idx < 4; decode_idx++) { - if (ALLOW_PADDING && decode_idx >= 2 && input_data[base_idx + decode_idx] == Blob::BASE64_PADDING) { - // the last two bytes of a base64 string can have padding: in this case we set the byte to 0 - decoded_bytes[decode_idx] = 0; - } else { - decoded_bytes[decode_idx] = BASE64_DECODING_TABLE[input_data[base_idx + decode_idx]]; - } - if (decoded_bytes[decode_idx] < 0) { - throw ConversionException( - "Could not decode string \"%s\" as base64: invalid byte value '%d' at position %d", str.GetString(), - input_data[base_idx + decode_idx], base_idx + decode_idx); +void BufferedFileWriter::WriteData(const_data_ptr_t buffer, uint64_t write_size) { + // first copy anything we can from the buffer + const_data_ptr_t end_ptr = buffer + write_size; + while (buffer < end_ptr) { + idx_t to_write = MinValue((end_ptr - buffer), FILE_BUFFER_SIZE - offset); + D_ASSERT(to_write > 0); + memcpy(data.get() + offset, buffer, to_write); + offset += to_write; + buffer += to_write; + if (offset == FILE_BUFFER_SIZE) { + Flush(); } } - return (decoded_bytes[0] << 3 * 6) + (decoded_bytes[1] << 2 * 6) + (decoded_bytes[2] << 1 * 6) + - (decoded_bytes[3] << 0 * 6); } -void Blob::FromBase64(string_t str, data_ptr_t output, idx_t output_size) { - D_ASSERT(output_size == FromBase64Size(str)); - auto input_data = (const_data_ptr_t)str.GetDataUnsafe(); - auto input_size = str.GetSize(); - if (input_size == 0) { +void BufferedFileWriter::Flush() { + if (offset == 0) { return; } - idx_t out_idx = 0; - idx_t i = 0; - for (i = 0; i + 4 < input_size; i += 4) { - auto combined = DecodeBase64Bytes(str, input_data, i); - output[out_idx++] = (combined >> 2 * 8) & 0xFF; - output[out_idx++] = (combined >> 1 * 8) & 0xFF; - output[out_idx++] = (combined >> 0 * 8) & 0xFF; - } - // decode the final four bytes: padding is allowed here - auto combined = DecodeBase64Bytes(str, input_data, i); - output[out_idx++] = (combined >> 2 * 8) & 0xFF; - if (out_idx < output_size) { - output[out_idx++] = (combined >> 1 * 8) & 0xFF; - } - if (out_idx < output_size) { - output[out_idx++] = (combined >> 0 * 8) & 0xFF; - } + fs.Write(*handle, data.get(), offset); + total_written += offset; + offset = 0; } -} // namespace duckdb +void BufferedFileWriter::Sync() { + Flush(); + handle->Sync(); +} +void BufferedFileWriter::Truncate(int64_t size) { + // truncate the physical file on disk + handle->Truncate(size); + // reset anything written in the buffer + offset = 0; +} -namespace duckdb { +} // namespace duckdb -const int64_t NumericHelper::POWERS_OF_TEN[] {1, - 10, - 100, - 1000, - 10000, - 100000, - 1000000, - 10000000, - 100000000, - 1000000000, - 10000000000, - 100000000000, - 1000000000000, - 10000000000000, - 100000000000000, - 1000000000000000, - 10000000000000000, - 100000000000000000, - 1000000000000000000}; -const double NumericHelper::DOUBLE_POWERS_OF_TEN[] {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, - 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, - 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, - 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39}; +#include -template <> -int NumericHelper::UnsignedLength(uint8_t value) { - int length = 1; - length += value >= 10; - length += value >= 100; - return length; +namespace duckdb { + +BufferedSerializer::BufferedSerializer(idx_t maximum_size) + : BufferedSerializer(unique_ptr(new data_t[maximum_size]), maximum_size) { } -template <> -int NumericHelper::UnsignedLength(uint16_t value) { - int length = 1; - length += value >= 10; - length += value >= 100; - length += value >= 1000; - length += value >= 10000; - return length; +BufferedSerializer::BufferedSerializer(unique_ptr data, idx_t size) : maximum_size(size), data(data.get()) { + blob.size = 0; + blob.data = move(data); } -template <> -int NumericHelper::UnsignedLength(uint32_t value) { - if (value >= 10000) { - int length = 5; - length += value >= 100000; - length += value >= 1000000; - length += value >= 10000000; - length += value >= 100000000; - length += value >= 1000000000; - return length; - } else { - int length = 1; - length += value >= 10; - length += value >= 100; - length += value >= 1000; - return length; - } +BufferedSerializer::BufferedSerializer(data_ptr_t data, idx_t size) : maximum_size(size), data(data) { + blob.size = 0; } -template <> -int NumericHelper::UnsignedLength(uint64_t value) { - if (value >= 10000000000ULL) { - if (value >= 1000000000000000ULL) { - int length = 16; - length += value >= 10000000000000000ULL; - length += value >= 100000000000000000ULL; - length += value >= 1000000000000000000ULL; - length += value >= 10000000000000000000ULL; - return length; - } else { - int length = 11; - length += value >= 100000000000ULL; - length += value >= 1000000000000ULL; - length += value >= 10000000000000ULL; - length += value >= 100000000000000ULL; - return length; - } - } else { - if (value >= 100000ULL) { - int length = 6; - length += value >= 1000000ULL; - length += value >= 10000000ULL; - length += value >= 100000000ULL; - length += value >= 1000000000ULL; - return length; - } else { - int length = 1; - length += value >= 10ULL; - length += value >= 100ULL; - length += value >= 1000ULL; - length += value >= 10000ULL; - return length; - } +void BufferedSerializer::WriteData(const_data_ptr_t buffer, idx_t write_size) { + if (blob.size + write_size >= maximum_size) { + do { + maximum_size *= 2; + } while (blob.size + write_size > maximum_size); + auto new_data = new data_t[maximum_size]; + memcpy(new_data, data, blob.size); + data = new_data; + blob.data = unique_ptr(new_data); } + + memcpy(data + blob.size, buffer, write_size); + blob.size += write_size; } } // namespace duckdb +namespace duckdb { + +template <> +string Deserializer::Read() { + uint32_t size = Read(); + auto buffer = unique_ptr(new data_t[size]); + ReadData(buffer.get(), size); + return string((char *)buffer.get(), size); +} +void Deserializer::ReadStringVector(vector &list) { + uint32_t sz = Read(); + list.resize(sz); + for (idx_t i = 0; i < sz; i++) { + list[i] = Read(); + } +} +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/value_operations/value_operations.hpp +// duckdb/common/sort/comparators.hpp // // //===----------------------------------------------------------------------===// @@ -18572,842 +22906,2330 @@ int NumericHelper::UnsignedLength(uint64_t value) { + namespace duckdb { -struct ValueOperations { - //===--------------------------------------------------------------------===// - // Numeric Operations - //===--------------------------------------------------------------------===// - // A + B - static Value Add(const Value &left, const Value &right); - // A - B - static Value Subtract(const Value &left, const Value &right); - // A * B - static Value Multiply(const Value &left, const Value &right); - // A / B - static Value Divide(const Value &left, const Value &right); - // A % B - static Value Modulo(const Value &left, const Value &right); - // // MIN(A, B) - // static Value Min(const Value &left, const Value &right); - // // MAX(A, B) - // static Value Max(const Value &left, const Value &right); - //===--------------------------------------------------------------------===// - // Comparison Operations - //===--------------------------------------------------------------------===// - // A == B - static bool Equals(const Value &left, const Value &right); - // A != B - static bool NotEquals(const Value &left, const Value &right); - // A > B - static bool GreaterThan(const Value &left, const Value &right); - // A >= B - static bool GreaterThanEquals(const Value &left, const Value &right); - // A < B - static bool LessThan(const Value &left, const Value &right); - // A <= B - static bool LessThanEquals(const Value &left, const Value &right); - //===--------------------------------------------------------------------===// - // Hash functions - //===--------------------------------------------------------------------===// - // result = HASH(A) - static hash_t Hash(const Value &left); +struct SortLayout; +struct SortedBlock; +struct SortedData; + +using ValidityBytes = RowLayout::ValidityBytes; + +struct Comparators { +public: + //! Whether a tie between two blobs can be broken + static bool TieIsBreakable(const idx_t &col_idx, const data_ptr_t row_ptr, const RowLayout &row_layout); + //! Compares the tuples that a being read from in the 'left' and 'right blocks during merge sort + //! (only in case we cannot simply 'memcmp' - if there are blob columns) + static int CompareTuple(const SortedBlock &left, const SortedBlock &right, const data_ptr_t &l_ptr, + const data_ptr_t &r_ptr, const SortLayout &sort_layout, const bool &external_sort); + //! Compare two blob values + static int CompareVal(const data_ptr_t l_ptr, const data_ptr_t r_ptr, const LogicalType &type); + +private: + //! Compares two blob values that were initially tied by their prefix + static int BreakBlobTie(const idx_t &tie_col, const SortedData &left, const SortedData &right, + const SortLayout &sort_layout, const bool &external); + //! Compare two fixed-size values + template + static int TemplatedCompareVal(const data_ptr_t &left_ptr, const data_ptr_t &right_ptr); + + //! Compare two values at the pointers (can be recursive if nested type) + static int CompareValAndAdvance(data_ptr_t &l_ptr, data_ptr_t &r_ptr, const LogicalType &type); + //! Compares two fixed-size values at the given pointers + template + static int TemplatedCompareAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr); + //! Compares two string values at the given pointers + static int CompareStringAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr); + //! Compares two struct values at the given pointers (recursive) + static int CompareStructAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr, + const child_list_t &types); + //! Compare two list values at the pointers (can be recursive if nested type) + static int CompareListAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr, const LogicalType &type); + //! Compares a list of fixed-size values + template + static int TemplatedCompareListLoop(data_ptr_t &left_ptr, data_ptr_t &right_ptr, const ValidityBytes &left_validity, + const ValidityBytes &right_validity, const idx_t &count); + + //! Unwizzles an offset into a pointer + static void UnswizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type); + //! Swizzles a pointer into an offset + static void SwizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type); }; + } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/sort/sort.hpp +// +// +//===----------------------------------------------------------------------===// -#include -#include -#include -namespace duckdb { +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/sort/sorted_block.hpp +// +// +//===----------------------------------------------------------------------===// -void ChunkCollection::Verify() { -#ifdef DEBUG - for (auto &chunk : chunks) { - chunk->Verify(); - } -#endif -} -void ChunkCollection::Append(ChunkCollection &other) { - for (auto &chunk : other.chunks) { - Append(*chunk); - } -} -void ChunkCollection::Merge(ChunkCollection &other) { - if (other.count == 0) { - return; - } - if (count == 0) { - chunks = move(other.chunks); - types = move(other.types); - count = other.count; - return; - } - unique_ptr old_back; - if (!chunks.empty() && chunks.back()->size() != STANDARD_VECTOR_SIZE) { - old_back = move(chunks.back()); - chunks.pop_back(); - count -= old_back->size(); - } - for (auto &chunk : other.chunks) { - chunks.push_back(move(chunk)); - } - count += other.count; - if (old_back) { - Append(*old_back); - } - Verify(); -} -void ChunkCollection::Append(DataChunk &new_chunk) { - if (new_chunk.size() == 0) { - return; - } - new_chunk.Verify(); +namespace duckdb { - // we have to ensure that every chunk in the ChunkCollection is completely - // filled, otherwise our O(1) lookup in GetValue and SetValue does not work - // first fill the latest chunk, if it exists - count += new_chunk.size(); +class BufferManager; +struct RowDataBlock; +struct SortLayout; +struct GlobalSortState; - idx_t remaining_data = new_chunk.size(); - idx_t offset = 0; - if (chunks.empty()) { - // first chunk - types = new_chunk.GetTypes(); - } else { - // the types of the new chunk should match the types of the previous one - D_ASSERT(types.size() == new_chunk.ColumnCount()); - auto new_types = new_chunk.GetTypes(); - for (idx_t i = 0; i < types.size(); i++) { - if (new_types[i] != types[i]) { - throw TypeMismatchException(new_types[i], types[i], "Type mismatch when combining rows"); - } - if (types[i].InternalType() == PhysicalType::LIST) { - for (auto &chunk : - chunks) { // need to check all the chunks because they can have only-null list entries - auto &chunk_vec = chunk->data[i]; - auto &new_vec = new_chunk.data[i]; - if (ListVector::HasEntry(chunk_vec) && ListVector::HasEntry(new_vec)) { - auto &chunk_type = chunk_vec.GetType(); - auto &new_type = new_vec.GetType(); - if (chunk_type != new_type) { - throw TypeMismatchException(chunk_type, new_type, "Type mismatch when combining lists"); - } - } - } - } - // TODO check structs, too - } +//! Object that holds sorted rows, and an accompanying heap if there are blobs +struct SortedData { +public: + SortedData(const RowLayout &layout, BufferManager &buffer_manager, GlobalSortState &state); + //! Number of rows that this object holds + idx_t Count(); + //! Pin the current block so it can be read + void Pin(); + //! Pointer to the row that is currently being read from + data_ptr_t DataPtr() const; + //! Pointer to the heap row that corresponds to the current row + data_ptr_t HeapPtr() const; + //! Advance one row + void Advance(const bool &adv); + //! Initialize new block to write to + void CreateBlock(); + //! Reset read indices to the given indices + void ResetIndices(idx_t block_idx_to, idx_t entry_idx_to); + //! Create a slice that holds the rows between the start and end indices + unique_ptr CreateSlice(idx_t start_block_index, idx_t start_entry_index, idx_t end_block_index, + idx_t end_entry_index); + //! Unswizzles all + void Unswizzle(); + +public: + //! Layout of this data + const RowLayout layout; + //! Data and heap blocks + vector data_blocks; + vector heap_blocks; + //! Buffer handles to the data being currently read + unique_ptr data_handle; + unique_ptr heap_handle; + //! Read indices + idx_t block_idx; + idx_t entry_idx; - // first append data to the current chunk - DataChunk &last_chunk = *chunks.back(); - idx_t added_data = MinValue(remaining_data, STANDARD_VECTOR_SIZE - last_chunk.size()); - if (added_data > 0) { - // copy elements to the last chunk - new_chunk.Normalify(); - // have to be careful here: setting the cardinality without calling normalify can cause incorrect partial - // decompression - idx_t old_count = new_chunk.size(); - new_chunk.SetCardinality(added_data); +private: + //! Pin fixed-size row data + void PinData(); + //! Pin the accompanying heap data (if any) + void PinHeap(); - last_chunk.Append(new_chunk); - remaining_data -= added_data; - // reset the chunk to the old data - new_chunk.SetCardinality(old_count); - offset = added_data; - } - } +private: + //! The buffer manager + BufferManager &buffer_manager; + //! The global state + GlobalSortState &state; + //! Pointers into the buffers being currently read + data_ptr_t data_ptr; + data_ptr_t heap_ptr; +}; + +//! Block that holds sorted rows: radix, blob and payload data +struct SortedBlock { +public: + SortedBlock(BufferManager &buffer_manager, GlobalSortState &gstate); + //! Number of rows that this object holds + idx_t Count() const; + //! The remaining number of rows to be read from this object + idx_t Remaining() const; + //! Initialize this block to write data to + void InitializeWrite(); + //! Init new block to write to + void CreateBlock(); + //! Pins radix block with given index + void PinRadix(idx_t pin_block_idx); + //! Fill this sorted block by appending the blocks held by a vector of sorted blocks + void AppendSortedBlocks(vector> &sorted_blocks); + //! Locate the block and entry index of a row in this block, + //! given an index between 0 and the total number of rows in this block + void GlobalToLocalIndex(const idx_t &global_idx, idx_t &local_block_index, idx_t &local_entry_index); + //! Create a slice that holds the rows between the start and end indices + unique_ptr CreateSlice(const idx_t start, const idx_t end); + + //! Size (in bytes) of the heap of this block + idx_t HeapSize() const; + //! Total size (in bytes) of this block + idx_t SizeInBytes() const; + +public: + //! Radix/memcmp sortable data + vector radix_sorting_data; + unique_ptr radix_handle; + idx_t block_idx; + idx_t entry_idx; + //! Variable sized sorting data + unique_ptr blob_sorting_data; + //! Payload data + unique_ptr payload_data; - if (remaining_data > 0) { - // create a new chunk and fill it with the remainder - auto chunk = make_unique(); - chunk->Initialize(types); - new_chunk.Copy(*chunk, offset); - chunks.push_back(move(chunk)); - } -} +private: + //! Buffer manager, and sorting layout constants + BufferManager &buffer_manager; + GlobalSortState &state; + const SortLayout &sort_layout; + const RowLayout &payload_layout; +}; -// returns an int similar to a C comparator: -// -1 if left < right -// 0 if left == right -// 1 if left > right +struct SortedDataScanner { +public: + SortedDataScanner(SortedData &sorted_data, GlobalSortState &global_sort_state); -template -static int8_t TemplatedCompareValue(Vector &left_vec, Vector &right_vec, idx_t left_idx, idx_t right_idx) { - D_ASSERT(left_vec.GetType() == right_vec.GetType()); - auto left_val = FlatVector::GetData(left_vec)[left_idx]; - auto right_val = FlatVector::GetData(right_vec)[right_idx]; - if (Equals::Operation(left_val, right_val)) { - return 0; - } - if (LessThan::Operation(left_val, right_val)) { - return -1; - } - return 1; -} + //! Scans the next data chunk from the sorted data + void Scan(DataChunk &chunk); -// return type here is int32 because strcmp() on some platforms returns rather large values -static int32_t CompareValue(Vector &left_vec, Vector &right_vec, idx_t vector_idx_left, idx_t vector_idx_right, - OrderByNullType null_order) { - auto left_null = FlatVector::IsNull(left_vec, vector_idx_left); - auto right_null = FlatVector::IsNull(right_vec, vector_idx_right); +private: + //! The sorted data being scanned + SortedData &sorted_data; + //! The total count of sorted_data + const idx_t total_count; + //! The global sort state + GlobalSortState &global_sort_state; + //! Addresses used to gather from the sorted data + Vector addresses = Vector(LogicalType::POINTER); + //! The number of rows scanned so far + idx_t total_scanned; +}; - if (left_null && right_null) { - return 0; - } else if (right_null) { - return null_order == OrderByNullType::NULLS_FIRST ? 1 : -1; - } else if (left_null) { - return null_order == OrderByNullType::NULLS_FIRST ? -1 : 1; - } +} // namespace duckdb - switch (left_vec.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::INT16: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::INT32: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::INT64: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::UINT8: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::UINT16: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::UINT32: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::UINT64: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::INT128: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::FLOAT: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::DOUBLE: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::VARCHAR: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - case PhysicalType::INTERVAL: - return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); - default: - throw NotImplementedException("Type for comparison"); - } -} -static int CompareTuple(ChunkCollection *sort_by, vector &desc, vector &null_order, - idx_t left, idx_t right) { - D_ASSERT(sort_by); - idx_t chunk_idx_left = left / STANDARD_VECTOR_SIZE; - idx_t chunk_idx_right = right / STANDARD_VECTOR_SIZE; - idx_t vector_idx_left = left % STANDARD_VECTOR_SIZE; - idx_t vector_idx_right = right % STANDARD_VECTOR_SIZE; - auto &left_chunk = sort_by->GetChunk(chunk_idx_left); - auto &right_chunk = sort_by->GetChunk(chunk_idx_right); +namespace duckdb { - for (idx_t col_idx = 0; col_idx < desc.size(); col_idx++) { - auto order_type = desc[col_idx]; +class RowLayout; +struct LocalSortState; - auto &left_vec = left_chunk.data[col_idx]; - auto &right_vec = right_chunk.data[col_idx]; +struct SortLayout { +public: + SortLayout(const vector &orders, const vector> &statistics); - D_ASSERT(left_vec.GetVectorType() == VectorType::FLAT_VECTOR); - D_ASSERT(right_vec.GetVectorType() == VectorType::FLAT_VECTOR); - D_ASSERT(left_vec.GetType() == right_vec.GetType()); +public: + idx_t column_count; + vector order_types; + vector order_by_null_types; + vector logical_types; - auto comp_res = CompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right, null_order[col_idx]); + bool all_constant; + vector constant_size; + vector column_sizes; + vector stats; + vector has_null; - if (comp_res == 0) { - continue; - } - return comp_res < 0 ? (order_type == OrderType::ASCENDING ? -1 : 1) - : (order_type == OrderType::ASCENDING ? 1 : -1); - } - return 0; -} + idx_t comparison_size; + idx_t entry_size; -static int64_t QuicksortInitial(ChunkCollection *sort_by, vector &desc, vector &null_order, - idx_t *result) { - // select pivot - int64_t pivot = 0; - int64_t low = 0, high = sort_by->Count() - 1; - // now insert elements - for (idx_t i = 1; i < sort_by->Count(); i++) { - if (CompareTuple(sort_by, desc, null_order, i, pivot) <= 0) { - result[low++] = i; - } else { - result[high--] = i; - } - } - D_ASSERT(low == high); - result[low] = pivot; - return low; -} + RowLayout blob_layout; + unordered_map sorting_to_blob_col; +}; -struct QuicksortInfo { - QuicksortInfo(int64_t left_p, int64_t right_p) : left(left_p), right(right_p) { - } +struct GlobalSortState { +public: + GlobalSortState(BufferManager &buffer_manager, vector &orders, + vector> &statistics, RowLayout &payload_layout); - int64_t left; - int64_t right; -}; + //! Add local state sorted data to this global state + void AddLocalState(LocalSortState &local_sort_state); + //! Prepares the GlobalSortState for the merge sort phase (after completing radix sort phase) + void PrepareMergePhase(); + //! Initializes the global sort state for another round of merging + void InitializeMergeRound(); + //! Completes the cascaded merge sort round + void CompleteMergeRound(); -struct QuicksortStack { - std::queue info_queue; +public: + //! The lock for updating the order global state + mutex lock; + //! The buffer manager + BufferManager &buffer_manager; - QuicksortInfo Pop() { - auto element = info_queue.front(); - info_queue.pop(); - return element; - } + //! Sorting and payload layouts + const SortLayout sort_layout; + const RowLayout payload_layout; - bool IsEmpty() { - return info_queue.empty(); - } + //! Sorted data + vector> sorted_blocks; + vector>> sorted_blocks_temp; + unique_ptr odd_one_out = nullptr; - void Enqueue(int64_t left, int64_t right) { - if (left >= right) { - return; - } - info_queue.emplace(left, right); - } -}; + //! Pinned heap data (if sorting in memory) + vector heap_blocks; + vector> pinned_blocks; -static void QuicksortInPlace(ChunkCollection *sort_by, vector &desc, vector &null_order, - idx_t *result, QuicksortInfo info, QuicksortStack &stack) { - auto left = info.left; - auto right = info.right; + //! Capacity (number of rows) used to initialize blocks + idx_t block_capacity; + //! Whether we are doing an external sort + bool external; - D_ASSERT(left < right); + //! Progress in merge path stage + idx_t pair_idx; + idx_t num_pairs; + idx_t l_start; + idx_t r_start; +}; - int64_t middle = left + (right - left) / 2; - int64_t pivot = result[middle]; - // move the mid point value to the front. - int64_t i = left + 1; - int64_t j = right; +struct LocalSortState { +public: + LocalSortState(); - std::swap(result[middle], result[left]); - bool all_equal = true; - while (i <= j) { - if (result) { - while (i <= j) { - int cmp = CompareTuple(sort_by, desc, null_order, result[i], pivot); - if (cmp < 0) { - all_equal = false; - } else if (cmp > 0) { - all_equal = false; - break; - } - i++; - } - } + //! Initialize the layouts and RowDataCollections + void Initialize(GlobalSortState &global_sort_state, BufferManager &buffer_manager_p); + //! Sink one DataChunk into the local sort state + void SinkChunk(DataChunk &sort, DataChunk &payload); + //! Size of accumulated data in bytes + idx_t SizeInBytes() const; + //! Sort the data accumulated so far + void Sort(GlobalSortState &global_sort_state); - while (i <= j && CompareTuple(sort_by, desc, null_order, result[j], pivot) > 0) { - j--; - } +private: + //! Concatenate the blocks held by a RowDataCollection into a single block + RowDataBlock ConcatenateBlocks(RowDataCollection &row_data); + //! Sorts the data in the newly created SortedBlock + void SortInMemory(); + //! Re-order the local state after sorting + void ReOrder(GlobalSortState &gstate); + //! Re-order a SortedData object after sorting + void ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataCollection &heap, GlobalSortState &gstate); - if (i < j) { - std::swap(result[i], result[j]); - } - } - std::swap(result[i - 1], result[left]); - int64_t part = i - 1; +public: + //! Whether this local state has been initialized + bool initialized; + //! The buffer manager + BufferManager *buffer_manager; + //! The sorting and payload layouts + const SortLayout *sort_layout; + const RowLayout *payload_layout; + //! Radix/memcmp sortable data + unique_ptr radix_sorting_data; + //! Variable sized sorting data and accompanying heap + unique_ptr blob_sorting_data; + unique_ptr blob_sorting_heap; + //! Payload data and accompanying heap + unique_ptr payload_data; + unique_ptr payload_heap; + //! Sorted data + vector> sorted_blocks; - if (all_equal) { - return; - } +private: + //! Selection vector and addresses for scattering the data to rows + const SelectionVector &sel_ptr = FlatVector::INCREMENTAL_SELECTION_VECTOR; + Vector addresses = Vector(LogicalType::POINTER); +}; - stack.Enqueue(left, part - 1); - stack.Enqueue(part + 1, right); -} +struct MergeSorter { +public: + MergeSorter(GlobalSortState &state, BufferManager &buffer_manager); -void ChunkCollection::Sort(vector &desc, vector &null_order, idx_t result[]) { - D_ASSERT(result); - if (count == 0) { - return; - } - // start off with an initial quicksort - int64_t part = QuicksortInitial(this, desc, null_order, result); + //! Finds and merges partitions until the current cascaded merge round is finished + void PerformInMergeRound(); - // now continuously perform - QuicksortStack stack; - stack.Enqueue(0, part); - stack.Enqueue(part + 1, count - 1); - while (!stack.IsEmpty()) { - auto element = stack.Pop(); - QuicksortInPlace(this, desc, null_order, result, element, stack); - } -} +private: + //! Computes the left and right block that will be merged next (Merge Path partition) + void GetNextPartition(); + //! Finds the boundary of the next partition using binary search + void GetIntersection(SortedBlock &l, SortedBlock &r, const idx_t diagonal, idx_t &l_idx, idx_t &r_idx); + //! Compare values within SortedBlocks using a global index + int CompareUsingGlobalIndex(SortedBlock &l, SortedBlock &r, const idx_t l_idx, const idx_t r_idx); + + //! Finds the next partition and merges it + void MergePartition(); + + //! Computes how the next 'count' tuples should be merged by setting the 'left_smaller' array + void ComputeMerge(const idx_t &count, bool left_smaller[]); + + //! Merges the radix sorting blocks according to the 'left_smaller' array + void MergeRadix(const idx_t &count, const bool left_smaller[]); + //! Merges SortedData according to the 'left_smaller' array + void MergeData(SortedData &result_data, SortedData &l_data, SortedData &r_data, const idx_t &count, + const bool left_smaller[], idx_t next_entry_sizes[]); + //! Merges constant size rows according to the 'left_smaller' array + void MergeRows(data_ptr_t &l_ptr, idx_t &l_entry_idx, const idx_t &l_count, data_ptr_t &r_ptr, idx_t &r_entry_idx, + const idx_t &r_count, RowDataBlock *target_block, data_ptr_t &target_ptr, const idx_t &entry_size, + const bool left_smaller[], idx_t &copied, const idx_t &count); + //! Flushes constant size rows into the result + void FlushRows(data_ptr_t &source_ptr, idx_t &source_entry_idx, const idx_t &source_count, + RowDataBlock *target_block, data_ptr_t &target_ptr, const idx_t &entry_size, idx_t &copied, + const idx_t &count); + //! Flushes blob rows and accompanying heap + void FlushBlobs(const RowLayout &layout, const idx_t &source_count, data_ptr_t &source_data_ptr, + idx_t &source_entry_idx, data_ptr_t &source_heap_ptr, RowDataBlock *target_data_block, + data_ptr_t &target_data_ptr, RowDataBlock *target_heap_block, BufferHandle &target_heap_handle, + data_ptr_t &target_heap_ptr, idx_t &copied, const idx_t &count); -// FIXME make this more efficient by not using the Value API -// just use memcpy in the vectors -// assert that there is no selection list -void ChunkCollection::Reorder(idx_t order_org[]) { - auto order = unique_ptr(new idx_t[count]); - memcpy(order.get(), order_org, sizeof(idx_t) * count); +private: + //! The global sorting state + GlobalSortState &state; + //! The sorting and payload layouts + BufferManager &buffer_manager; + const SortLayout &sort_layout; + + //! The left, right and result blocks of the current partition + unique_ptr left_block; + unique_ptr right_block; + SortedBlock *result; +}; - // adapted from https://stackoverflow.com/a/7366196/2652376 +} // namespace duckdb - auto val_buf = vector(); - val_buf.resize(ColumnCount()); - idx_t j, k; - for (idx_t i = 0; i < count; i++) { - for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - val_buf[col_idx] = GetValue(col_idx, i); +namespace duckdb { + +bool Comparators::TieIsBreakable(const idx_t &col_idx, const data_ptr_t row_ptr, const RowLayout &row_layout) { + // Check if the blob is NULL + ValidityBytes row_mask(row_ptr); + idx_t entry_idx; + idx_t idx_in_entry; + ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry); + if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) { + // Can't break a NULL tie + return false; + } + if (row_layout.GetTypes()[col_idx].InternalType() == PhysicalType::VARCHAR) { + const auto &tie_col_offset = row_layout.GetOffsets()[col_idx]; + string_t tie_string = Load(row_ptr + tie_col_offset); + if (tie_string.GetSize() < string_t::INLINE_LENGTH) { + // No need to break the tie - we already compared the full string + return false; } - j = i; - while (true) { - k = order[j]; - order[j] = j; - if (k == i) { - break; - } - for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - SetValue(col_idx, j, GetValue(col_idx, k)); - } - j = k; + } + return true; +} + +int Comparators::CompareTuple(const SortedBlock &left, const SortedBlock &right, const data_ptr_t &l_ptr, + const data_ptr_t &r_ptr, const SortLayout &sort_layout, const bool &external_sort) { + // Compare the sorting columns one by one + int comp_res = 0; + data_ptr_t l_ptr_offset = l_ptr; + data_ptr_t r_ptr_offset = r_ptr; + for (idx_t col_idx = 0; col_idx < sort_layout.column_count; col_idx++) { + comp_res = memcmp(l_ptr_offset, r_ptr_offset, sort_layout.column_sizes[col_idx]); + if (comp_res == 0 && !sort_layout.constant_size[col_idx]) { + comp_res = + BreakBlobTie(col_idx, *left.blob_sorting_data, *right.blob_sorting_data, sort_layout, external_sort); } - for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - SetValue(col_idx, j, val_buf[col_idx]); + if (comp_res != 0) { + break; } + l_ptr_offset += sort_layout.column_sizes[col_idx]; + r_ptr_offset += sort_layout.column_sizes[col_idx]; } + return comp_res; } -template -static void TemplatedSetValues(ChunkCollection *src_coll, Vector &tgt_vec, idx_t order[], idx_t col_idx, - idx_t start_offset, idx_t remaining_data) { - D_ASSERT(src_coll); +int Comparators::CompareVal(const data_ptr_t l_ptr, const data_ptr_t r_ptr, const LogicalType &type) { + switch (type.InternalType()) { + case PhysicalType::VARCHAR: + return TemplatedCompareVal(l_ptr, r_ptr); + case PhysicalType::LIST: + case PhysicalType::STRUCT: { + auto l_nested_ptr = Load(l_ptr); + auto r_nested_ptr = Load(r_ptr); + return CompareValAndAdvance(l_nested_ptr, r_nested_ptr, type); + } + default: + throw NotImplementedException("Unimplemented CompareVal for type %s", type.ToString()); + } +} - for (idx_t row_idx = 0; row_idx < remaining_data; row_idx++) { - idx_t chunk_idx_src = order[start_offset + row_idx] / STANDARD_VECTOR_SIZE; - idx_t vector_idx_src = order[start_offset + row_idx] % STANDARD_VECTOR_SIZE; +int Comparators::BreakBlobTie(const idx_t &tie_col, const SortedData &left, const SortedData &right, + const SortLayout &sort_layout, const bool &external) { + const idx_t &col_idx = sort_layout.sorting_to_blob_col.at(tie_col); + data_ptr_t l_data_ptr = left.DataPtr(); + data_ptr_t r_data_ptr = right.DataPtr(); + if (!TieIsBreakable(col_idx, l_data_ptr, sort_layout.blob_layout)) { + // Quick check to see if ties can be broken + return 0; + } + // Align the pointers + const auto &tie_col_offset = sort_layout.blob_layout.GetOffsets()[col_idx]; + l_data_ptr += tie_col_offset; + r_data_ptr += tie_col_offset; + // Do the comparison + const int order = sort_layout.order_types[tie_col] == OrderType::DESCENDING ? -1 : 1; + const auto &type = left.layout.GetTypes()[col_idx]; + int result; + if (external) { + // Store heap pointers + data_ptr_t l_heap_ptr = left.HeapPtr(); + data_ptr_t r_heap_ptr = right.HeapPtr(); + // Unswizzle offset to pointer + UnswizzleSingleValue(l_data_ptr, l_heap_ptr, type); + UnswizzleSingleValue(r_data_ptr, r_heap_ptr, type); + // Compare + result = CompareVal(l_data_ptr, r_data_ptr, type); + // Swizzle the pointers back to offsets + SwizzleSingleValue(l_data_ptr, l_heap_ptr, type); + SwizzleSingleValue(r_data_ptr, r_heap_ptr, type); + } else { + result = CompareVal(l_data_ptr, r_data_ptr, type); + } + return order * result; +} - auto &src_chunk = src_coll->GetChunk(chunk_idx_src); - Vector &src_vec = src_chunk.data[col_idx]; - auto source_data = FlatVector::GetData(src_vec); - auto target_data = FlatVector::GetData(tgt_vec); +template +int Comparators::TemplatedCompareVal(const data_ptr_t &left_ptr, const data_ptr_t &right_ptr) { + const auto left_val = Load(left_ptr); + const auto right_val = Load(right_ptr); + if (Equals::Operation(left_val, right_val)) { + return 0; + } else if (LessThan::Operation(left_val, right_val)) { + return -1; + } else { + return 1; + } +} - if (FlatVector::IsNull(src_vec, vector_idx_src)) { - FlatVector::SetNull(tgt_vec, row_idx, true); - } else { - target_data[row_idx] = source_data[vector_idx_src]; - } +int Comparators::CompareValAndAdvance(data_ptr_t &l_ptr, data_ptr_t &r_ptr, const LogicalType &type) { + switch (type.InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::INT16: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::INT32: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::INT64: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::UINT8: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::UINT16: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::UINT32: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::UINT64: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::INT128: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::FLOAT: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::DOUBLE: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::INTERVAL: + return TemplatedCompareAndAdvance(l_ptr, r_ptr); + case PhysicalType::VARCHAR: + return CompareStringAndAdvance(l_ptr, r_ptr); + case PhysicalType::LIST: + return CompareListAndAdvance(l_ptr, r_ptr, ListType::GetChildType(type)); + case PhysicalType::STRUCT: + return CompareStructAndAdvance(l_ptr, r_ptr, StructType::GetChildTypes(type)); + default: + throw NotImplementedException("Unimplemented CompareValAndAdvance for type %s", type.ToString()); } } -// TODO: reorder functionality is similar, perhaps merge -void ChunkCollection::MaterializeSortedChunk(DataChunk &target, idx_t order[], idx_t start_offset) { - idx_t remaining_data = MinValue(STANDARD_VECTOR_SIZE, count - start_offset); - D_ASSERT(target.GetTypes() == types); +template +int Comparators::TemplatedCompareAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr) { + auto result = TemplatedCompareVal(left_ptr, right_ptr); + left_ptr += sizeof(T); + right_ptr += sizeof(T); + return result; +} - target.SetCardinality(remaining_data); - for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - switch (types[col_idx].InternalType()) { +int Comparators::CompareStringAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr) { + // Construct the string_t + uint32_t left_string_size = Load(left_ptr); + uint32_t right_string_size = Load(right_ptr); + left_ptr += sizeof(uint32_t); + right_ptr += sizeof(uint32_t); + string_t left_val((const char *)left_ptr, left_string_size); + string_t right_val((const char *)right_ptr, left_string_size); + left_ptr += left_string_size; + right_ptr += right_string_size; + // Compare + return TemplatedCompareVal((data_ptr_t)&left_val, (data_ptr_t)&right_val); +} + +int Comparators::CompareStructAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr, + const child_list_t &types) { + idx_t count = types.size(); + // Load validity masks + ValidityBytes left_validity(left_ptr); + ValidityBytes right_validity(right_ptr); + left_ptr += (count + 7) / 8; + right_ptr += (count + 7) / 8; + // Initialize variables + bool left_valid; + bool right_valid; + idx_t entry_idx; + idx_t idx_in_entry; + // Compare + int comp_res = 0; + for (idx_t i = 0; i < count; i++) { + ValidityBytes::GetEntryIndex(i, entry_idx, idx_in_entry); + left_valid = left_validity.RowIsValid(left_validity.GetValidityEntry(entry_idx), idx_in_entry); + right_valid = right_validity.RowIsValid(right_validity.GetValidityEntry(entry_idx), idx_in_entry); + auto &type = types[i].second; + if ((left_valid && right_valid) || TypeIsConstantSize(type.InternalType())) { + comp_res = CompareValAndAdvance(left_ptr, right_ptr, types[i].second); + } + if (!left_valid && !right_valid) { + comp_res = 0; + } else if (!left_valid) { + comp_res = 1; + } else if (!right_valid) { + comp_res = -1; + } + if (comp_res != 0) { + break; + } + } + return comp_res; +} + +int Comparators::CompareListAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr, const LogicalType &type) { + // Load list lengths + auto left_len = Load(left_ptr); + auto right_len = Load(right_ptr); + left_ptr += sizeof(idx_t); + right_ptr += sizeof(idx_t); + // Load list validity masks + ValidityBytes left_validity(left_ptr); + ValidityBytes right_validity(right_ptr); + left_ptr += (left_len + 7) / 8; + right_ptr += (right_len + 7) / 8; + // Compare + int comp_res = 0; + idx_t count = MinValue(left_len, right_len); + if (TypeIsConstantSize(type.InternalType())) { + // Templated code for fixed-size types + switch (type.InternalType()) { case PhysicalType::BOOL: case PhysicalType::INT8: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::INT16: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::INT32: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::INT64: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::UINT8: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::UINT16: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::UINT32: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::UINT64: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::INT128: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::FLOAT: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::DOUBLE: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); - break; - case PhysicalType::VARCHAR: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; case PhysicalType::INTERVAL: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + comp_res = TemplatedCompareListLoop(left_ptr, right_ptr, left_validity, right_validity, count); break; - - case PhysicalType::LIST: - case PhysicalType::STRUCT: { - for (idx_t row_idx = 0; row_idx < remaining_data; row_idx++) { - idx_t chunk_idx_src = order[start_offset + row_idx] / STANDARD_VECTOR_SIZE; - idx_t vector_idx_src = order[start_offset + row_idx] % STANDARD_VECTOR_SIZE; - - auto &src_chunk = chunks[chunk_idx_src]; - Vector &src_vec = src_chunk->data[col_idx]; - auto &tgt_vec = target.data[col_idx]; - if (FlatVector::IsNull(src_vec, vector_idx_src)) { - FlatVector::SetNull(tgt_vec, row_idx, true); - } else { - tgt_vec.SetValue(row_idx, src_vec.GetValue(vector_idx_src)); + default: + throw NotImplementedException("CompareListAndAdvance for fixed-size type %s", type.ToString()); + } + } else { + // Variable-sized list entries + bool left_valid; + bool right_valid; + idx_t entry_idx; + idx_t idx_in_entry; + // Size (in bytes) of all variable-sizes entries is stored before the entries begin, + // to make deserialization easier. We need to skip over them + left_ptr += left_len * sizeof(idx_t); + right_ptr += right_len * sizeof(idx_t); + for (idx_t i = 0; i < count; i++) { + ValidityBytes::GetEntryIndex(i, entry_idx, idx_in_entry); + left_valid = left_validity.RowIsValid(left_validity.GetValidityEntry(entry_idx), idx_in_entry); + right_valid = right_validity.RowIsValid(right_validity.GetValidityEntry(entry_idx), idx_in_entry); + if (left_valid && right_valid) { + switch (type.InternalType()) { + case PhysicalType::LIST: + comp_res = CompareListAndAdvance(left_ptr, right_ptr, ListType::GetChildType(type)); + break; + case PhysicalType::VARCHAR: + comp_res = CompareStringAndAdvance(left_ptr, right_ptr); + break; + case PhysicalType::STRUCT: + comp_res = CompareStructAndAdvance(left_ptr, right_ptr, StructType::GetChildTypes(type)); + break; + default: + throw NotImplementedException("CompareListAndAdvance for variable-size type %s", type.ToString()); } + } else if (!left_valid && !right_valid) { + comp_res = 0; + } else if (left_valid) { + comp_res = -1; + } else { + comp_res = 1; + } + if (comp_res != 0) { + break; } - } break; - default: - throw NotImplementedException("Type is unsupported in MaterializeSortedChunk()"); } } - target.Verify(); + // All values that we looped over were equal + if (comp_res == 0 && left_len != right_len) { + // Smaller lists first + if (left_len < right_len) { + comp_res = -1; + } else { + comp_res = 1; + } + } + return comp_res; } -Value ChunkCollection::GetValue(idx_t column, idx_t index) { - return chunks[LocateChunk(index)]->GetValue(column, index % STANDARD_VECTOR_SIZE); +template +int Comparators::TemplatedCompareListLoop(data_ptr_t &left_ptr, data_ptr_t &right_ptr, + const ValidityBytes &left_validity, const ValidityBytes &right_validity, + const idx_t &count) { + int comp_res = 0; + bool left_valid; + bool right_valid; + idx_t entry_idx; + idx_t idx_in_entry; + for (idx_t i = 0; i < count; i++) { + ValidityBytes::GetEntryIndex(i, entry_idx, idx_in_entry); + left_valid = left_validity.RowIsValid(left_validity.GetValidityEntry(entry_idx), idx_in_entry); + right_valid = right_validity.RowIsValid(right_validity.GetValidityEntry(entry_idx), idx_in_entry); + comp_res = TemplatedCompareAndAdvance(left_ptr, right_ptr); + if (!left_valid && !right_valid) { + comp_res = 0; + } else if (!left_valid) { + comp_res = 1; + } else if (!right_valid) { + comp_res = -1; + } + if (comp_res != 0) { + break; + } + } + return comp_res; } -vector ChunkCollection::GetRow(idx_t index) { - vector values; - values.resize(ColumnCount()); +void Comparators::UnswizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type) { + if (type.InternalType() == PhysicalType::VARCHAR) { + data_ptr += sizeof(uint32_t) + string_t::PREFIX_LENGTH; + } + Store(heap_ptr + Load(data_ptr), data_ptr); +} - for (idx_t p_idx = 0; p_idx < ColumnCount(); p_idx++) { - values[p_idx] = GetValue(p_idx, index); +void Comparators::SwizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type) { + if (type.InternalType() == PhysicalType::VARCHAR) { + data_ptr += sizeof(uint32_t) + string_t::PREFIX_LENGTH; } - return values; + Store(Load(data_ptr) - heap_ptr, data_ptr); } -void ChunkCollection::SetValue(idx_t column, idx_t index, const Value &value) { - chunks[LocateChunk(index)]->SetValue(column, index % STANDARD_VECTOR_SIZE, value); +} // namespace duckdb + + + +namespace duckdb { + +MergeSorter::MergeSorter(GlobalSortState &state, BufferManager &buffer_manager) + : state(state), buffer_manager(buffer_manager), sort_layout(state.sort_layout) { } -void ChunkCollection::Print() { - Printer::Print(ToString()); +void MergeSorter::PerformInMergeRound() { + while (true) { + { + lock_guard pair_guard(state.lock); + if (state.pair_idx == state.num_pairs) { + break; + } + GetNextPartition(); + } + MergePartition(); + } } -bool ChunkCollection::Equals(ChunkCollection &other) { - if (count != other.count) { - return false; +void MergeSorter::MergePartition() { + auto &left = *left_block; + auto &right = *right_block; +#ifdef DEBUG + D_ASSERT(left.radix_sorting_data.size() == left.payload_data->data_blocks.size()); + D_ASSERT(right.radix_sorting_data.size() == right.payload_data->data_blocks.size()); + if (!state.payload_layout.AllConstant() && state.external) { + D_ASSERT(left.payload_data->data_blocks.size() == left.payload_data->heap_blocks.size()); + D_ASSERT(right.payload_data->data_blocks.size() == right.payload_data->heap_blocks.size()); } - if (ColumnCount() != other.ColumnCount()) { - return false; + if (!sort_layout.all_constant) { + D_ASSERT(left.radix_sorting_data.size() == left.blob_sorting_data->data_blocks.size()); + D_ASSERT(right.radix_sorting_data.size() == right.blob_sorting_data->data_blocks.size()); + if (state.external) { + D_ASSERT(left.blob_sorting_data->data_blocks.size() == left.blob_sorting_data->heap_blocks.size()); + D_ASSERT(right.blob_sorting_data->data_blocks.size() == right.blob_sorting_data->heap_blocks.size()); + } } - if (types != other.types) { - return false; +#endif + // Set up the write block + // Each merge task produces a SortedBlock with exactly state.block_capacity rows or less + result->InitializeWrite(); + // Initialize arrays to store merge data + bool left_smaller[STANDARD_VECTOR_SIZE]; + idx_t next_entry_sizes[STANDARD_VECTOR_SIZE]; + // Merge loop +#ifdef DEBUG + auto l_count = left.Remaining(); + auto r_count = right.Remaining(); +#endif + while (true) { + auto l_remaining = left.Remaining(); + auto r_remaining = right.Remaining(); + if (l_remaining + r_remaining == 0) { + // Done + break; + } + const idx_t next = MinValue(l_remaining + r_remaining, (idx_t)STANDARD_VECTOR_SIZE); + if (l_remaining != 0 && r_remaining != 0) { + // Compute the merge (not needed if one side is exhausted) + ComputeMerge(next, left_smaller); + } + // Actually merge the data (radix, blob, and payload) + MergeRadix(next, left_smaller); + if (!sort_layout.all_constant) { + MergeData(*result->blob_sorting_data, *left.blob_sorting_data, *right.blob_sorting_data, next, left_smaller, + next_entry_sizes); + D_ASSERT(left.block_idx == left.blob_sorting_data->block_idx && + left.entry_idx == left.blob_sorting_data->entry_idx); + D_ASSERT(right.block_idx == right.blob_sorting_data->block_idx && + right.entry_idx == right.blob_sorting_data->entry_idx); + D_ASSERT(result->radix_sorting_data.size() == result->blob_sorting_data->data_blocks.size()); + } + MergeData(*result->payload_data, *left.payload_data, *right.payload_data, next, left_smaller, next_entry_sizes); + D_ASSERT(left.block_idx == left.payload_data->block_idx && left.entry_idx == left.payload_data->entry_idx); + D_ASSERT(right.block_idx == right.payload_data->block_idx && right.entry_idx == right.payload_data->entry_idx); + D_ASSERT(result->radix_sorting_data.size() == result->payload_data->data_blocks.size()); } - // if count is equal amount of chunks should be equal - for (idx_t row_idx = 0; row_idx < count; row_idx++) { - for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - auto lvalue = GetValue(col_idx, row_idx); - auto rvalue = other.GetValue(col_idx, row_idx); - if (!Value::ValuesAreEqual(lvalue, rvalue)) { - return false; +#ifdef DEBUG + D_ASSERT(result->Count() == l_count + r_count); +#endif +} + +void MergeSorter::GetNextPartition() { + // Create result block + state.sorted_blocks_temp[state.pair_idx].push_back(make_unique(buffer_manager, state)); + result = state.sorted_blocks_temp[state.pair_idx].back().get(); + // Determine which blocks must be merged + auto &left = *state.sorted_blocks[state.pair_idx * 2]; + auto &right = *state.sorted_blocks[state.pair_idx * 2 + 1]; + const idx_t l_count = left.Count(); + const idx_t r_count = right.Count(); + // Compute the work that this thread must do using Merge Path + idx_t l_end; + idx_t r_end; + if (state.l_start + state.r_start + state.block_capacity < l_count + r_count) { + const idx_t intersection = state.l_start + state.r_start + state.block_capacity; + GetIntersection(left, right, intersection, l_end, r_end); + D_ASSERT(l_end <= l_count); + D_ASSERT(r_end <= r_count); + D_ASSERT(intersection == l_end + r_end); + // Unpin after finding the intersection + if (!sort_layout.blob_layout.AllConstant()) { + left.blob_sorting_data->ResetIndices(0, 0); + right.blob_sorting_data->ResetIndices(0, 0); + } + } else { + l_end = l_count; + r_end = r_count; + } + // Create slices of the data that this thread must merge + left_block = left.CreateSlice(state.l_start, l_end); + right_block = right.CreateSlice(state.r_start, r_end); + // Update global state + state.l_start = l_end; + state.r_start = r_end; + if (state.l_start == l_count && state.r_start == r_count) { + // Delete references to previous pair + state.sorted_blocks[state.pair_idx * 2] = nullptr; + state.sorted_blocks[state.pair_idx * 2 + 1] = nullptr; + // Advance pair + state.pair_idx++; + state.l_start = 0; + state.r_start = 0; + } +} + +int MergeSorter::CompareUsingGlobalIndex(SortedBlock &l, SortedBlock &r, const idx_t l_idx, const idx_t r_idx) { + D_ASSERT(l_idx < l.Count()); + D_ASSERT(r_idx < r.Count()); + + // Easy comparison using the previous result (intersections must increase monotonically) + if (l_idx < state.l_start) { + return -1; + } + if (r_idx < state.r_start) { + return 1; + } + + idx_t l_block_idx; + idx_t l_entry_idx; + l.GlobalToLocalIndex(l_idx, l_block_idx, l_entry_idx); + + idx_t r_block_idx; + idx_t r_entry_idx; + r.GlobalToLocalIndex(r_idx, r_block_idx, r_entry_idx); + + l.PinRadix(l_block_idx); + r.PinRadix(r_block_idx); + data_ptr_t l_ptr = l.radix_handle->Ptr() + l_entry_idx * sort_layout.entry_size; + data_ptr_t r_ptr = r.radix_handle->Ptr() + r_entry_idx * sort_layout.entry_size; + + int comp_res; + if (sort_layout.all_constant) { + comp_res = memcmp(l_ptr, r_ptr, sort_layout.comparison_size); + } else { + l.blob_sorting_data->block_idx = l_block_idx; + l.blob_sorting_data->entry_idx = l_entry_idx; + l.blob_sorting_data->Pin(); + r.blob_sorting_data->block_idx = r_block_idx; + r.blob_sorting_data->entry_idx = r_entry_idx; + r.blob_sorting_data->Pin(); + comp_res = Comparators::CompareTuple(l, r, l_ptr, r_ptr, sort_layout, state.external); + } + return comp_res; +} + +void MergeSorter::GetIntersection(SortedBlock &l, SortedBlock &r, const idx_t diagonal, idx_t &l_idx, idx_t &r_idx) { + const idx_t l_count = l.Count(); + const idx_t r_count = r.Count(); + // Cover some edge cases + // Code coverage off because these edge cases cannot happen unless other code changes + // Edge cases have been tested extensively while developing Merge Path in a script + // LCOV_EXCL_START + if (diagonal >= l_count + r_count) { + l_idx = l_count; + r_idx = r_count; + return; + } else if (diagonal == 0) { + l_idx = 0; + r_idx = 0; + return; + } else if (l_count == 0) { + l_idx = 0; + r_idx = diagonal; + return; + } else if (r_count == 0) { + r_idx = 0; + l_idx = diagonal; + return; + } + // LCOV_EXCL_STOP + // Determine offsets for the binary search + const idx_t l_offset = MinValue(l_count, diagonal); + const idx_t r_offset = diagonal > l_count ? diagonal - l_count : 0; + D_ASSERT(l_offset + r_offset == diagonal); + const idx_t search_space = diagonal > MaxValue(l_count, r_count) ? l_count + r_count - diagonal + : MinValue(diagonal, MinValue(l_count, r_count)); + // Double binary search + idx_t left = 0; + idx_t right = search_space - 1; + idx_t middle; + int comp_res; + while (left <= right) { + middle = (left + right) / 2; + l_idx = l_offset - middle; + r_idx = r_offset + middle; + if (l_idx == l_count || r_idx == 0) { + comp_res = CompareUsingGlobalIndex(l, r, l_idx - 1, r_idx); + if (comp_res > 0) { + l_idx--; + r_idx++; + } else { + return; + } + if (l_idx == 0 || r_idx == r_count) { + // This case is incredibly difficult to cover as it is dependent on parallelism randomness + // But it has been tested extensively during development in a script + // LCOV_EXCL_START + return; + // LCOV_EXCL_STOP + } else { + break; } } + comp_res = CompareUsingGlobalIndex(l, r, l_idx, r_idx); + if (comp_res > 0) { + left = middle + 1; + } else { + right = middle - 1; + } } - return true; -} -static void Heapify(ChunkCollection *input, vector &desc, vector &null_order, idx_t *heap, - idx_t heap_size, idx_t current_index) { - if (current_index >= heap_size) { + int l_r_min1 = CompareUsingGlobalIndex(l, r, l_idx, r_idx - 1); + int l_min1_r = CompareUsingGlobalIndex(l, r, l_idx - 1, r_idx); + if (l_r_min1 > 0 && l_min1_r < 0) { return; + } else if (l_r_min1 > 0) { + l_idx--; + r_idx++; + } else if (l_min1_r < 0) { + l_idx++; + r_idx--; + } +} + +void MergeSorter::ComputeMerge(const idx_t &count, bool left_smaller[]) { + auto &left = *left_block; + auto &right = *right_block; + // Store indices to restore after computing the merge + idx_t l_block_idx = left.block_idx; + idx_t r_block_idx = right.block_idx; + idx_t l_entry_idx = left.entry_idx; + idx_t r_entry_idx = right.entry_idx; + // Data pointers for both sides + data_ptr_t l_radix_ptr; + data_ptr_t r_radix_ptr; + // Compute the merge of the next 'count' tuples + idx_t compared = 0; + while (compared < count) { + // Move to the next block (if needed) + if (l_block_idx < left.radix_sorting_data.size() && l_entry_idx == left.radix_sorting_data[l_block_idx].count) { + l_block_idx++; + l_entry_idx = 0; + if (!sort_layout.all_constant) { + left.blob_sorting_data->block_idx = l_block_idx; + left.blob_sorting_data->entry_idx = l_entry_idx; + } + } + if (r_block_idx < right.radix_sorting_data.size() && + r_entry_idx == right.radix_sorting_data[r_block_idx].count) { + r_block_idx++; + r_entry_idx = 0; + if (!sort_layout.all_constant) { + right.blob_sorting_data->block_idx = r_block_idx; + right.blob_sorting_data->entry_idx = r_entry_idx; + } + } + const bool l_done = l_block_idx == left.radix_sorting_data.size(); + const bool r_done = r_block_idx == right.radix_sorting_data.size(); + if (l_done || r_done) { + // One of the sides is exhausted, no need to compare + break; + } + // Pin the radix sorting data + if (!l_done) { + left.PinRadix(l_block_idx); + l_radix_ptr = left.radix_handle->Ptr() + l_entry_idx * sort_layout.entry_size; + } + if (!r_done) { + right.PinRadix(r_block_idx); + r_radix_ptr = right.radix_handle->Ptr() + r_entry_idx * sort_layout.entry_size; + } + const idx_t &l_count = !l_done ? left.radix_sorting_data[l_block_idx].count : 0; + const idx_t &r_count = !r_done ? right.radix_sorting_data[r_block_idx].count : 0; + // Compute the merge + if (sort_layout.all_constant) { + // All sorting columns are constant size + for (; compared < count && l_entry_idx < l_count && r_entry_idx < r_count; compared++) { + left_smaller[compared] = memcmp(l_radix_ptr, r_radix_ptr, sort_layout.comparison_size) < 0; + const bool &l_smaller = left_smaller[compared]; + const bool r_smaller = !l_smaller; + // Use comparison bool (0 or 1) to increment entries and pointers + l_entry_idx += l_smaller; + r_entry_idx += r_smaller; + l_radix_ptr += l_smaller * sort_layout.entry_size; + r_radix_ptr += r_smaller * sort_layout.entry_size; + } + } else { + // Pin the blob data + if (!l_done) { + left.blob_sorting_data->Pin(); + } + if (!r_done) { + right.blob_sorting_data->Pin(); + } + // Merge with variable size sorting columns + for (; compared < count && l_entry_idx < l_count && r_entry_idx < r_count; compared++) { + D_ASSERT(l_block_idx == left.blob_sorting_data->block_idx && + l_entry_idx == left.blob_sorting_data->entry_idx); + D_ASSERT(r_block_idx == right.blob_sorting_data->block_idx && + r_entry_idx == right.blob_sorting_data->entry_idx); + left_smaller[compared] = + Comparators::CompareTuple(left, right, l_radix_ptr, r_radix_ptr, sort_layout, state.external) < 0; + const bool &l_smaller = left_smaller[compared]; + const bool r_smaller = !l_smaller; + // Use comparison bool (0 or 1) to increment entries and pointers + l_entry_idx += l_smaller; + r_entry_idx += r_smaller; + l_radix_ptr += l_smaller * sort_layout.entry_size; + r_radix_ptr += r_smaller * sort_layout.entry_size; + left.blob_sorting_data->Advance(l_smaller); + right.blob_sorting_data->Advance(r_smaller); + } + } + } + // Reset block indices before the actual merge + if (!sort_layout.all_constant) { + left.blob_sorting_data->ResetIndices(left.block_idx, left.entry_idx); + right.blob_sorting_data->ResetIndices(right.block_idx, right.entry_idx); + } +} + +void MergeSorter::MergeRadix(const idx_t &count, const bool left_smaller[]) { + auto &left = *left_block; + auto &right = *right_block; + RowDataBlock *l_block; + RowDataBlock *r_block; + + data_ptr_t l_ptr; + data_ptr_t r_ptr; + + RowDataBlock *result_block = &result->radix_sorting_data.back(); + auto result_handle = buffer_manager.Pin(result_block->block); + data_ptr_t result_ptr = result_handle->Ptr() + result_block->count * sort_layout.entry_size; + + idx_t copied = 0; + while (copied < count) { + // Move to the next block (if needed) + if (left.block_idx < left.radix_sorting_data.size() && + left.entry_idx == left.radix_sorting_data[left.block_idx].count) { + // Delete reference to previous block + left.radix_sorting_data[left.block_idx].block = nullptr; + // Advance block + left.block_idx++; + left.entry_idx = 0; + } + if (right.block_idx < right.radix_sorting_data.size() && + right.entry_idx == right.radix_sorting_data[right.block_idx].count) { + // Delete reference to previous block + right.radix_sorting_data[right.block_idx].block = nullptr; + // Advance block + right.block_idx++; + right.entry_idx = 0; + } + const bool l_done = left.block_idx == left.radix_sorting_data.size(); + const bool r_done = right.block_idx == right.radix_sorting_data.size(); + // Pin the radix sortable blocks + if (!l_done) { + l_block = &left.radix_sorting_data[left.block_idx]; + left.PinRadix(left.block_idx); + l_ptr = left.radix_handle->Ptr() + left.entry_idx * sort_layout.entry_size; + } + if (!r_done) { + r_block = &right.radix_sorting_data[right.block_idx]; + right.PinRadix(right.block_idx); + r_ptr = right.radix_handle->Ptr() + right.entry_idx * sort_layout.entry_size; + } + const idx_t &l_count = !l_done ? l_block->count : 0; + const idx_t &r_count = !r_done ? r_block->count : 0; + // Copy using computed merge + if (!l_done && !r_done) { + // Both sides have data - merge + MergeRows(l_ptr, left.entry_idx, l_count, r_ptr, right.entry_idx, r_count, result_block, result_ptr, + sort_layout.entry_size, left_smaller, copied, count); + } else if (r_done) { + // Right side is exhausted + FlushRows(l_ptr, left.entry_idx, l_count, result_block, result_ptr, sort_layout.entry_size, copied, count); + } else { + // Left side is exhausted + FlushRows(r_ptr, right.entry_idx, r_count, result_block, result_ptr, sort_layout.entry_size, copied, count); + } + } +} + +void MergeSorter::MergeData(SortedData &result_data, SortedData &l_data, SortedData &r_data, const idx_t &count, + const bool left_smaller[], idx_t next_entry_sizes[]) { + const auto &layout = result_data.layout; + const idx_t row_width = layout.GetRowWidth(); + const idx_t heap_pointer_offset = layout.GetHeapPointerOffset(); + + // Left and right row data to merge + data_ptr_t l_ptr; + data_ptr_t r_ptr; + // Accompanying left and right heap data (if needed) + data_ptr_t l_heap_ptr; + data_ptr_t r_heap_ptr; + + // Result rows to write to + RowDataBlock *result_data_block = &result_data.data_blocks.back(); + auto result_data_handle = buffer_manager.Pin(result_data_block->block); + data_ptr_t result_data_ptr = result_data_handle->Ptr() + result_data_block->count * row_width; + // Result heap to write to (if needed) + RowDataBlock *result_heap_block; + unique_ptr result_heap_handle; + data_ptr_t result_heap_ptr; + if (!layout.AllConstant() && state.external) { + result_heap_block = &result_data.heap_blocks.back(); + result_heap_handle = buffer_manager.Pin(result_heap_block->block); + result_heap_ptr = result_heap_handle->Ptr() + result_heap_block->byte_offset; + } + + idx_t copied = 0; + while (copied < count) { + // Move to new data blocks (if needed) + if (l_data.block_idx < l_data.data_blocks.size() && + l_data.entry_idx == l_data.data_blocks[l_data.block_idx].count) { + // Delete reference to previous block + l_data.data_blocks[l_data.block_idx].block = nullptr; + if (!layout.AllConstant() && state.external) { + l_data.heap_blocks[l_data.block_idx].block = nullptr; + } + // Advance block + l_data.block_idx++; + l_data.entry_idx = 0; + } + if (r_data.block_idx < r_data.data_blocks.size() && + r_data.entry_idx == r_data.data_blocks[r_data.block_idx].count) { + // Delete reference to previous block + r_data.data_blocks[r_data.block_idx].block = nullptr; + if (!layout.AllConstant() && state.external) { + r_data.heap_blocks[r_data.block_idx].block = nullptr; + } + // Advance block + r_data.block_idx++; + r_data.entry_idx = 0; + } + const bool l_done = l_data.block_idx == l_data.data_blocks.size(); + const bool r_done = r_data.block_idx == r_data.data_blocks.size(); + // Pin the row data blocks + if (!l_done) { + l_data.Pin(); + l_ptr = l_data.data_handle->Ptr() + l_data.entry_idx * row_width; + } + if (!r_done) { + r_data.Pin(); + r_ptr = r_data.data_handle->Ptr() + r_data.entry_idx * row_width; + } + const idx_t &l_count = !l_done ? l_data.data_blocks[l_data.block_idx].count : 0; + const idx_t &r_count = !r_done ? r_data.data_blocks[r_data.block_idx].count : 0; + // Perform the merge + if (layout.AllConstant() || !state.external) { + // If all constant size, or if we are doing an in-memory sort, we do not need to touch the heap + if (!l_done && !r_done) { + // Both sides have data - merge + MergeRows(l_ptr, l_data.entry_idx, l_count, r_ptr, r_data.entry_idx, r_count, result_data_block, + result_data_ptr, row_width, left_smaller, copied, count); + } else if (r_done) { + // Right side is exhausted + FlushRows(l_ptr, l_data.entry_idx, l_count, result_data_block, result_data_ptr, row_width, copied, + count); + } else { + // Left side is exhausted + FlushRows(r_ptr, r_data.entry_idx, r_count, result_data_block, result_data_ptr, row_width, copied, + count); + } + } else { + // External sorting with variable size data. Pin the heap blocks too + if (!l_done) { + l_heap_ptr = l_data.heap_handle->Ptr() + Load(l_ptr + heap_pointer_offset); + D_ASSERT(l_heap_ptr - l_data.heap_handle->Ptr() >= 0); + D_ASSERT((idx_t)(l_heap_ptr - l_data.heap_handle->Ptr()) < + l_data.heap_blocks[l_data.block_idx].byte_offset); + } + if (!r_done) { + r_heap_ptr = r_data.heap_handle->Ptr() + Load(r_ptr + heap_pointer_offset); + D_ASSERT(r_heap_ptr - r_data.heap_handle->Ptr() >= 0); + D_ASSERT((idx_t)(r_heap_ptr - r_data.heap_handle->Ptr()) < + r_data.heap_blocks[r_data.block_idx].byte_offset); + } + // Both the row and heap data need to be dealt with + if (!l_done && !r_done) { + // Both sides have data - merge + idx_t l_idx_copy = l_data.entry_idx; + idx_t r_idx_copy = r_data.entry_idx; + data_ptr_t result_data_ptr_copy = result_data_ptr; + idx_t copied_copy = copied; + // Merge row data + MergeRows(l_ptr, l_idx_copy, l_count, r_ptr, r_idx_copy, r_count, result_data_block, + result_data_ptr_copy, row_width, left_smaller, copied_copy, count); + const idx_t merged = copied_copy - copied; + // Compute the entry sizes and number of heap bytes that will be copied + idx_t copy_bytes = 0; + data_ptr_t l_heap_ptr_copy = l_heap_ptr; + data_ptr_t r_heap_ptr_copy = r_heap_ptr; + for (idx_t i = 0; i < merged; i++) { + // Store base heap offset in the row data + Store(result_heap_block->byte_offset + copy_bytes, result_data_ptr + heap_pointer_offset); + result_data_ptr += row_width; + // Compute entry size and add to total + const bool &l_smaller = left_smaller[copied + i]; + const bool r_smaller = !l_smaller; + auto &entry_size = next_entry_sizes[copied + i]; + entry_size = l_smaller * Load(l_heap_ptr_copy) + r_smaller * Load(r_heap_ptr_copy); + D_ASSERT(entry_size >= sizeof(idx_t)); + D_ASSERT(l_heap_ptr_copy - l_data.heap_handle->Ptr() + l_smaller * entry_size <= + l_data.heap_blocks[l_data.block_idx].byte_offset); + D_ASSERT(r_heap_ptr_copy - r_data.heap_handle->Ptr() + r_smaller * entry_size <= + r_data.heap_blocks[r_data.block_idx].byte_offset); + l_heap_ptr_copy += l_smaller * entry_size; + r_heap_ptr_copy += r_smaller * entry_size; + copy_bytes += entry_size; + } + // Reallocate result heap block size (if needed) + if (result_heap_block->byte_offset + copy_bytes > result_heap_block->capacity) { + idx_t new_capacity = result_heap_block->byte_offset + copy_bytes; + buffer_manager.ReAllocate(result_heap_block->block, new_capacity); + result_heap_block->capacity = new_capacity; + result_heap_ptr = result_heap_handle->Ptr() + result_heap_block->byte_offset; + } + D_ASSERT(result_heap_block->byte_offset + copy_bytes <= result_heap_block->capacity); + // Now copy the heap data + for (idx_t i = 0; i < merged; i++) { + const bool &l_smaller = left_smaller[copied + i]; + const bool r_smaller = !l_smaller; + const auto &entry_size = next_entry_sizes[copied + i]; + memcpy(result_heap_ptr, l_heap_ptr, l_smaller * entry_size); + memcpy(result_heap_ptr, r_heap_ptr, r_smaller * entry_size); + D_ASSERT(Load(result_heap_ptr) == entry_size); + result_heap_ptr += entry_size; + l_heap_ptr += l_smaller * entry_size; + r_heap_ptr += r_smaller * entry_size; + l_data.entry_idx += l_smaller; + r_data.entry_idx += r_smaller; + } + // Update result indices and pointers + result_heap_block->count += merged; + result_heap_block->byte_offset += copy_bytes; + copied += merged; + } else if (r_done) { + // Right side is exhausted - flush left + FlushBlobs(layout, l_count, l_ptr, l_data.entry_idx, l_heap_ptr, result_data_block, result_data_ptr, + result_heap_block, *result_heap_handle, result_heap_ptr, copied, count); + } else { + // Left side is exhausted - flush right + FlushBlobs(layout, r_count, r_ptr, r_data.entry_idx, r_heap_ptr, result_data_block, result_data_ptr, + result_heap_block, *result_heap_handle, result_heap_ptr, copied, count); + } + D_ASSERT(result_data_block->count == result_heap_block->count); + } } - idx_t left_child_index = current_index * 2 + 1; - idx_t right_child_index = current_index * 2 + 2; - idx_t swap_index = current_index; +} - if (left_child_index < heap_size) { - swap_index = CompareTuple(input, desc, null_order, heap[swap_index], heap[left_child_index]) <= 0 - ? left_child_index - : swap_index; +void MergeSorter::MergeRows(data_ptr_t &l_ptr, idx_t &l_entry_idx, const idx_t &l_count, data_ptr_t &r_ptr, + idx_t &r_entry_idx, const idx_t &r_count, RowDataBlock *target_block, + data_ptr_t &target_ptr, const idx_t &entry_size, const bool left_smaller[], idx_t &copied, + const idx_t &count) { + const idx_t next = MinValue(count - copied, target_block->capacity - target_block->count); + idx_t i; + for (i = 0; i < next && l_entry_idx < l_count && r_entry_idx < r_count; i++) { + const bool &l_smaller = left_smaller[copied + i]; + const bool r_smaller = !l_smaller; + // Use comparison bool (0 or 1) to copy an entry from either side + memcpy(target_ptr, l_ptr, l_smaller * entry_size); + memcpy(target_ptr, r_ptr, r_smaller * entry_size); + target_ptr += entry_size; + // Use the comparison bool to increment entries and pointers + l_entry_idx += l_smaller; + r_entry_idx += r_smaller; + l_ptr += l_smaller * entry_size; + r_ptr += r_smaller * entry_size; + } + // Update counts + target_block->count += i; + copied += i; +} + +void MergeSorter::FlushRows(data_ptr_t &source_ptr, idx_t &source_entry_idx, const idx_t &source_count, + RowDataBlock *target_block, data_ptr_t &target_ptr, const idx_t &entry_size, idx_t &copied, + const idx_t &count) { + // Compute how many entries we can fit + idx_t next = MinValue(count - copied, target_block->capacity - target_block->count); + next = MinValue(next, source_count - source_entry_idx); + // Copy them all in a single memcpy + const idx_t copy_bytes = next * entry_size; + memcpy(target_ptr, source_ptr, copy_bytes); + target_ptr += copy_bytes; + source_ptr += copy_bytes; + // Update counts + source_entry_idx += next; + target_block->count += next; + copied += next; +} + +void MergeSorter::FlushBlobs(const RowLayout &layout, const idx_t &source_count, data_ptr_t &source_data_ptr, + idx_t &source_entry_idx, data_ptr_t &source_heap_ptr, RowDataBlock *target_data_block, + data_ptr_t &target_data_ptr, RowDataBlock *target_heap_block, + BufferHandle &target_heap_handle, data_ptr_t &target_heap_ptr, idx_t &copied, + const idx_t &count) { + const idx_t row_width = layout.GetRowWidth(); + const idx_t heap_pointer_offset = layout.GetHeapPointerOffset(); + idx_t source_entry_idx_copy = source_entry_idx; + data_ptr_t target_data_ptr_copy = target_data_ptr; + idx_t copied_copy = copied; + // Flush row data + FlushRows(source_data_ptr, source_entry_idx_copy, source_count, target_data_block, target_data_ptr_copy, row_width, + copied_copy, count); + const idx_t flushed = copied_copy - copied; + // Compute the entry sizes and number of heap bytes that will be copied + idx_t copy_bytes = 0; + data_ptr_t source_heap_ptr_copy = source_heap_ptr; + for (idx_t i = 0; i < flushed; i++) { + // Store base heap offset in the row data + Store(target_heap_block->byte_offset + copy_bytes, target_data_ptr + heap_pointer_offset); + target_data_ptr += row_width; + // Compute entry size and add to total + auto entry_size = Load(source_heap_ptr_copy); + D_ASSERT(entry_size >= sizeof(idx_t)); + source_heap_ptr_copy += entry_size; + copy_bytes += entry_size; + } + // Reallocate result heap block size (if needed) + if (target_heap_block->byte_offset + copy_bytes > target_heap_block->capacity) { + idx_t new_capacity = target_heap_block->byte_offset + copy_bytes; + buffer_manager.ReAllocate(target_heap_block->block, new_capacity); + target_heap_block->capacity = new_capacity; + target_heap_ptr = target_heap_handle.Ptr() + target_heap_block->byte_offset; + } + D_ASSERT(target_heap_block->byte_offset + copy_bytes <= target_heap_block->capacity); + // Copy the heap data in one go + memcpy(target_heap_ptr, source_heap_ptr, copy_bytes); + target_heap_ptr += copy_bytes; + source_heap_ptr += copy_bytes; + source_entry_idx += flushed; + copied += flushed; + // Update result indices and pointers + target_heap_block->count += flushed; + target_heap_block->byte_offset += copy_bytes; + D_ASSERT(target_heap_block->byte_offset <= target_heap_block->capacity); +} + +} // namespace duckdb + + + +namespace duckdb { + +//! Calls std::sort on strings that are tied by their prefix after the radix sort +static void SortTiedBlobs(BufferManager &buffer_manager, const data_ptr_t dataptr, const idx_t &start, const idx_t &end, + const idx_t &tie_col, bool *ties, const data_ptr_t blob_ptr, const SortLayout &sort_layout) { + const auto row_width = sort_layout.blob_layout.GetRowWidth(); + const idx_t &col_idx = sort_layout.sorting_to_blob_col.at(tie_col); + // Locate the first blob row in question + data_ptr_t row_ptr = dataptr + start * sort_layout.entry_size; + data_ptr_t blob_row_ptr = blob_ptr + Load(row_ptr + sort_layout.comparison_size) * row_width; + if (!Comparators::TieIsBreakable(col_idx, blob_row_ptr, sort_layout.blob_layout)) { + // Quick check to see if ties can be broken + return; } - - if (right_child_index < heap_size) { - swap_index = CompareTuple(input, desc, null_order, heap[swap_index], heap[right_child_index]) <= 0 - ? right_child_index - : swap_index; + // Fill pointer array for sorting + auto ptr_block = unique_ptr(new data_ptr_t[end - start]); + auto entry_ptrs = (data_ptr_t *)ptr_block.get(); + for (idx_t i = start; i < end; i++) { + entry_ptrs[i - start] = row_ptr; + row_ptr += sort_layout.entry_size; + } + // Slow pointer-based sorting + const int order = sort_layout.order_types[tie_col] == OrderType::DESCENDING ? -1 : 1; + const auto &tie_col_offset = sort_layout.blob_layout.GetOffsets()[col_idx]; + auto logical_type = sort_layout.blob_layout.GetTypes()[col_idx]; + std::sort(entry_ptrs, entry_ptrs + end - start, + [&blob_ptr, &order, &sort_layout, &tie_col_offset, &row_width, &logical_type](const data_ptr_t l, + const data_ptr_t r) { + idx_t left_idx = Load(l + sort_layout.comparison_size); + idx_t right_idx = Load(r + sort_layout.comparison_size); + data_ptr_t left_ptr = blob_ptr + left_idx * row_width + tie_col_offset; + data_ptr_t right_ptr = blob_ptr + right_idx * row_width + tie_col_offset; + return order * Comparators::CompareVal(left_ptr, right_ptr, logical_type) < 0; + }); + // Re-order + auto temp_block = + buffer_manager.Allocate(MaxValue((end - start) * sort_layout.entry_size, (idx_t)Storage::BLOCK_SIZE)); + data_ptr_t temp_ptr = temp_block->Ptr(); + for (idx_t i = 0; i < end - start; i++) { + memcpy(temp_ptr, entry_ptrs[i], sort_layout.entry_size); + temp_ptr += sort_layout.entry_size; + } + memcpy(dataptr + start * sort_layout.entry_size, temp_block->Ptr(), (end - start) * sort_layout.entry_size); + // Determine if there are still ties (if this is not the last column) + if (tie_col < sort_layout.column_count - 1) { + data_ptr_t idx_ptr = dataptr + start * sort_layout.entry_size + sort_layout.comparison_size; + // Load current entry + data_ptr_t current_ptr = blob_ptr + Load(idx_ptr) * row_width + tie_col_offset; + for (idx_t i = 0; i < end - start - 1; i++) { + // Load next entry and compare + idx_ptr += sort_layout.entry_size; + data_ptr_t next_ptr = blob_ptr + Load(idx_ptr) * row_width + tie_col_offset; + ties[start + i] = Comparators::CompareVal(current_ptr, next_ptr, logical_type) == 0; + current_ptr = next_ptr; + } } +} - if (swap_index != current_index) { - std::swap(heap[current_index], heap[swap_index]); - Heapify(input, desc, null_order, heap, heap_size, swap_index); +//! Identifies sequences of rows that are tied by the prefix of a blob column, and sorts them +static void SortTiedBlobs(BufferManager &buffer_manager, SortedBlock &sb, bool *ties, data_ptr_t dataptr, + const idx_t &count, const idx_t &tie_col, const SortLayout &sort_layout) { + D_ASSERT(!ties[count - 1]); + auto &blob_block = sb.blob_sorting_data->data_blocks.back(); + auto blob_handle = buffer_manager.Pin(blob_block.block); + const data_ptr_t blob_ptr = blob_handle->Ptr(); + + for (idx_t i = 0; i < count; i++) { + if (!ties[i]) { + continue; + } + idx_t j; + for (j = i; j < count; j++) { + if (!ties[j]) { + break; + } + } + SortTiedBlobs(buffer_manager, dataptr, i, j + 1, tie_col, ties, blob_ptr, sort_layout); + i = j; } } -static void HeapCreate(ChunkCollection *input, vector &desc, vector &null_order, - idx_t *heap, idx_t heap_size) { - for (idx_t i = 0; i < heap_size; i++) { - heap[i] = i; +//! Returns whether there are any 'true' values in the ties[] array +static bool AnyTies(bool ties[], const idx_t &count) { + D_ASSERT(!ties[count - 1]); + bool any_ties = false; + for (idx_t i = 0; i < count - 1; i++) { + any_ties = any_ties || ties[i]; } + return any_ties; +} - // build heap - for (int64_t i = heap_size / 2 - 1; i >= 0; i--) { - Heapify(input, desc, null_order, heap, heap_size, i); +//! Compares subsequent rows to check for ties +static void ComputeTies(data_ptr_t dataptr, const idx_t &count, const idx_t &col_offset, const idx_t &tie_size, + bool ties[], const SortLayout &sort_layout) { + D_ASSERT(!ties[count - 1]); + D_ASSERT(col_offset + tie_size <= sort_layout.comparison_size); + // Align dataptr + dataptr += col_offset; + for (idx_t i = 0; i < count - 1; i++) { + ties[i] = ties[i] && memcmp(dataptr, dataptr + sort_layout.entry_size, tie_size) == 0; + dataptr += sort_layout.entry_size; } +} - // Run through all the rows. - for (idx_t i = heap_size; i < input->Count(); i++) { - if (CompareTuple(input, desc, null_order, i, heap[0]) <= 0) { - heap[0] = i; - Heapify(input, desc, null_order, heap, heap_size, 0); +//! Textbook LSD radix sort +static void RadixSort(BufferManager &buffer_manager, data_ptr_t dataptr, const idx_t &count, const idx_t &col_offset, + const idx_t &sorting_size, const SortLayout &sort_layout) { + auto temp_block = buffer_manager.Allocate(MaxValue(count * sort_layout.entry_size, (idx_t)Storage::BLOCK_SIZE)); + data_ptr_t temp = temp_block->Ptr(); + bool swap = false; + + idx_t counts[256]; + uint8_t byte; + for (idx_t offset = col_offset + sorting_size - 1; offset + 1 > col_offset; offset--) { + // Init counts to 0 + memset(counts, 0, sizeof(counts)); + // Collect counts + for (idx_t i = 0; i < count; i++) { + byte = *(dataptr + i * sort_layout.entry_size + offset); + counts[byte]++; + } + // Compute offsets from counts + for (idx_t val = 1; val < 256; val++) { + counts[val] = counts[val] + counts[val - 1]; + } + // Re-order the data in temporary array + for (idx_t i = count; i > 0; i--) { + byte = *(dataptr + (i - 1) * sort_layout.entry_size + offset); + memcpy(temp + (counts[byte] - 1) * sort_layout.entry_size, dataptr + (i - 1) * sort_layout.entry_size, + sort_layout.entry_size); + counts[byte]--; } + std::swap(dataptr, temp); + swap = !swap; + } + // Move data back to original buffer (if it was swapped) + if (swap) { + memcpy(temp, dataptr, count * sort_layout.entry_size); } } -void ChunkCollection::Heap(vector &desc, vector &null_order, idx_t heap[], - idx_t heap_size) { - D_ASSERT(heap); - if (count == 0) { - return; +//! Identifies sequences of rows that are tied, and calls radix sort on these +static void SubSortTiedTuples(BufferManager &buffer_manager, const data_ptr_t dataptr, const idx_t &count, + const idx_t &col_offset, const idx_t &sorting_size, bool ties[], + const SortLayout &sort_layout) { + D_ASSERT(!ties[count - 1]); + for (idx_t i = 0; i < count; i++) { + if (!ties[i]) { + continue; + } + idx_t j; + for (j = i + 1; j < count; j++) { + if (!ties[j]) { + break; + } + } + RadixSort(buffer_manager, dataptr + i * sort_layout.entry_size, j - i + 1, col_offset, sorting_size, + sort_layout); + i = j; } +} - HeapCreate(this, desc, null_order, heap, heap_size); - - // Heap is ready. Now do a heapsort - for (int64_t i = heap_size - 1; i >= 0; i--) { - std::swap(heap[i], heap[0]); - Heapify(this, desc, null_order, heap, i, 0); +void LocalSortState::SortInMemory() { + auto &sb = *sorted_blocks.back(); + auto &block = sb.radix_sorting_data.back(); + const auto &count = block.count; + auto handle = buffer_manager->Pin(block.block); + const auto dataptr = handle->Ptr(); + // Assign an index to each row + data_ptr_t idx_dataptr = dataptr + sort_layout->comparison_size; + for (idx_t i = 0; i < count; i++) { + Store(i, idx_dataptr); + idx_dataptr += sort_layout->entry_size; } -} + // Radix sort and break ties until no more ties, or until all columns are sorted + idx_t sorting_size = 0; + idx_t col_offset = 0; + unique_ptr ties_ptr; + unique_ptr ties_handle; + bool *ties = nullptr; + for (idx_t i = 0; i < sort_layout->column_count; i++) { + sorting_size += sort_layout->column_sizes[i]; + if (sort_layout->constant_size[i] && i < sort_layout->column_count - 1 && sorting_size < 32) { + // Add columns to the sorting size until we reach a variable size column, or the last column + continue; + } -idx_t ChunkCollection::MaterializeHeapChunk(DataChunk &target, idx_t order[], idx_t start_offset, idx_t heap_size) { - idx_t remaining_data = MinValue(STANDARD_VECTOR_SIZE, heap_size - start_offset); - D_ASSERT(target.GetTypes() == types); + if (!ties) { + // This is the first sort + RadixSort(*buffer_manager, dataptr, count, col_offset, sorting_size, *sort_layout); + ties_ptr = unique_ptr(new bool[count]); + ties = ties_ptr.get(); + std::fill_n(ties, count - 1, true); + ties[count - 1] = false; + } else { + // For subsequent sorts, we only have to subsort the tied tuples + SubSortTiedTuples(*buffer_manager, dataptr, count, col_offset, sorting_size, ties, *sort_layout); + } - target.SetCardinality(remaining_data); - for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - switch (types[col_idx].InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); - break; - case PhysicalType::INT16: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); - break; - case PhysicalType::INT32: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); - break; - case PhysicalType::INT64: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); - break; - case PhysicalType::INT128: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + if (sort_layout->constant_size[i] && i == sort_layout->column_count - 1) { + // All columns are sorted, no ties to break because last column is constant size break; - case PhysicalType::FLOAT: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); - break; - case PhysicalType::DOUBLE: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); - break; - case PhysicalType::VARCHAR: - TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + } + + ComputeTies(dataptr, count, col_offset, sorting_size, ties, *sort_layout); + if (!AnyTies(ties, count)) { + // No ties, stop sorting break; - // TODO this is ugly and sloooow! - case PhysicalType::STRUCT: - case PhysicalType::LIST: { - for (idx_t row_idx = 0; row_idx < remaining_data; row_idx++) { - idx_t chunk_idx_src = order[start_offset + row_idx] / STANDARD_VECTOR_SIZE; - idx_t vector_idx_src = order[start_offset + row_idx] % STANDARD_VECTOR_SIZE; + } - auto &src_chunk = chunks[chunk_idx_src]; - Vector &src_vec = src_chunk->data[col_idx]; - auto &tgt_vec = target.data[col_idx]; - if (FlatVector::IsNull(src_vec, vector_idx_src)) { - FlatVector::SetNull(tgt_vec, row_idx, true); - } else { - tgt_vec.SetValue(row_idx, src_vec.GetValue(vector_idx_src)); - } + if (!sort_layout->constant_size[i]) { + SortTiedBlobs(*buffer_manager, sb, ties, dataptr, count, i, *sort_layout); + if (!AnyTies(ties, count)) { + // No more ties after tie-breaking, stop + break; } - } break; - - default: - throw NotImplementedException("Type is unsupported in MaterializeHeapChunk()"); } + + col_offset += sorting_size; + sorting_size = 0; } - target.Verify(); - return start_offset + remaining_data; } } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/array.hpp -// -// -//===----------------------------------------------------------------------===// - -#include +#include namespace duckdb { -using std::array; -} +static idx_t GetSortingColSize(const LogicalType &type) { + auto physical_type = type.InternalType(); + if (TypeIsConstantSize(physical_type)) { + return GetTypeIdSize(physical_type); + } else { + switch (physical_type) { + case PhysicalType::VARCHAR: + // TODO: make use of statistics + return string_t::INLINE_LENGTH; + case PhysicalType::LIST: + // Lists get another byte to denote the empty list + return 2 + GetSortingColSize(ListType::GetChildType(type)); + case PhysicalType::MAP: + case PhysicalType::STRUCT: + return 1 + GetSortingColSize(StructType::GetChildType(type, 0)); + default: + throw NotImplementedException("Unable to order column with type %s", type.ToString()); + } + } +} +SortLayout::SortLayout(const vector &orders, const vector> &statistics) + : column_count(orders.size()), all_constant(true), comparison_size(0), entry_size(0) { + vector blob_layout_types; + for (idx_t i = 0; i < orders.size(); i++) { + const auto &order = orders[i]; + order_types.push_back(order.type); + order_by_null_types.push_back(order.null_order); + auto &expr = *order.expression; + logical_types.push_back(expr.return_type); + auto physical_type = expr.return_type.InternalType(); + all_constant = all_constant && TypeIsConstantSize(physical_type); + constant_size.push_back(TypeIsConstantSize(physical_type)); + column_sizes.push_back(0); + auto &col_size = column_sizes.back(); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/null_value.hpp -// -// -//===----------------------------------------------------------------------===// + if (!statistics.empty() && statistics[i]) { + stats.push_back(statistics[i].get()); + has_null.push_back(stats.back()->CanHaveNull()); + } else { + stats.push_back(nullptr); + has_null.push_back(true); + } + col_size += has_null.back() ? 1 : 0; + if (TypeIsConstantSize(physical_type)) { + col_size += GetTypeIdSize(physical_type); + } else { + col_size += GetSortingColSize(expr.return_type); + sorting_to_blob_col[i] = blob_layout_types.size(); + blob_layout_types.push_back(expr.return_type); + } + comparison_size += col_size; + } + entry_size = comparison_size + sizeof(idx_t); + blob_layout.Initialize(blob_layout_types, false); +} +LocalSortState::LocalSortState() : initialized(false) { +} +static idx_t EntriesPerBlock(idx_t width) { + return (Storage::BLOCK_SIZE + width * STANDARD_VECTOR_SIZE - 1) / width; +} +void LocalSortState::Initialize(GlobalSortState &global_sort_state, BufferManager &buffer_manager_p) { + sort_layout = &global_sort_state.sort_layout; + payload_layout = &global_sort_state.payload_layout; + buffer_manager = &buffer_manager_p; + // Radix sorting data + radix_sorting_data = make_unique(*buffer_manager, EntriesPerBlock(sort_layout->entry_size), + sort_layout->entry_size); + // Blob sorting data + if (!sort_layout->all_constant) { + auto blob_row_width = sort_layout->blob_layout.GetRowWidth(); + blob_sorting_data = + make_unique(*buffer_manager, EntriesPerBlock(blob_row_width), blob_row_width); + blob_sorting_heap = make_unique(*buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true); + } + // Payload data + auto payload_row_width = payload_layout->GetRowWidth(); + payload_data = + make_unique(*buffer_manager, EntriesPerBlock(payload_row_width), payload_row_width); + payload_heap = make_unique(*buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true); + // Init done + initialized = true; +} +void LocalSortState::SinkChunk(DataChunk &sort, DataChunk &payload) { + // Build and serialize sorting data to radix sortable rows + auto data_pointers = FlatVector::GetData(addresses); + auto handles = radix_sorting_data->Build(sort.size(), data_pointers, nullptr); + for (idx_t sort_col = 0; sort_col < sort.ColumnCount(); sort_col++) { + bool has_null = sort_layout->has_null[sort_col]; + bool nulls_first = sort_layout->order_by_null_types[sort_col] == OrderByNullType::NULLS_FIRST; + bool desc = sort_layout->order_types[sort_col] == OrderType::DESCENDING; + // TODO: use actual string statistics + RowOperations::RadixScatter(sort.data[sort_col], sort.size(), sel_ptr, sort.size(), data_pointers, desc, + has_null, nulls_first, string_t::INLINE_LENGTH, + sort_layout->column_sizes[sort_col]); + } + // Also fully serialize blob sorting columns (to be able to break ties + if (!sort_layout->all_constant) { + DataChunk blob_chunk; + blob_chunk.SetCardinality(sort.size()); + for (idx_t sort_col = 0; sort_col < sort.ColumnCount(); sort_col++) { + if (!TypeIsConstantSize(sort.data[sort_col].GetType().InternalType())) { + blob_chunk.data.emplace_back(sort.data[sort_col]); + } + } + handles = blob_sorting_data->Build(blob_chunk.size(), data_pointers, nullptr); + auto blob_data = blob_chunk.Orrify(); + RowOperations::Scatter(blob_chunk, blob_data.get(), sort_layout->blob_layout, addresses, *blob_sorting_heap, + sel_ptr, blob_chunk.size()); + } + // Finally, serialize payload data + handles = payload_data->Build(payload.size(), data_pointers, nullptr); + auto input_data = payload.Orrify(); + RowOperations::Scatter(payload, input_data.get(), *payload_layout, addresses, *payload_heap, sel_ptr, + payload.size()); +} +idx_t LocalSortState::SizeInBytes() const { + idx_t size_in_bytes = radix_sorting_data->SizeInBytes() + payload_data->SizeInBytes(); + if (!sort_layout->all_constant) { + size_in_bytes += blob_sorting_data->SizeInBytes() + blob_sorting_heap->SizeInBytes(); + } + if (!payload_layout->AllConstant()) { + size_in_bytes += payload_heap->SizeInBytes(); + } + return size_in_bytes; +} -#include -#include -#include +void LocalSortState::Sort(GlobalSortState &global_sort_state) { + D_ASSERT(radix_sorting_data->count == payload_data->count); + if (radix_sorting_data->count == 0) { + return; + } + // Move all data to a single SortedBlock + sorted_blocks.emplace_back(make_unique(*buffer_manager, global_sort_state)); + auto &sb = *sorted_blocks.back(); + // Fixed-size sorting data + auto sorting_block = ConcatenateBlocks(*radix_sorting_data); + sb.radix_sorting_data.push_back(move(sorting_block)); + // Variable-size sorting data + if (!sort_layout->all_constant) { + auto &blob_data = *blob_sorting_data; + auto new_block = ConcatenateBlocks(blob_data); + sb.blob_sorting_data->data_blocks.push_back(move(new_block)); + } + // Payload data + auto payload_block = ConcatenateBlocks(*payload_data); + sb.payload_data->data_blocks.push_back(move(payload_block)); + // Now perform the actual sort + SortInMemory(); + // Re-order before the merge sort + ReOrder(global_sort_state); +} + +RowDataBlock LocalSortState::ConcatenateBlocks(RowDataCollection &row_data) { + // Create block with the correct capacity + const idx_t &entry_size = row_data.entry_size; + idx_t capacity = MaxValue(((idx_t)Storage::BLOCK_SIZE + entry_size - 1) / entry_size, row_data.count); + RowDataBlock new_block(*buffer_manager, capacity, entry_size); + new_block.count = row_data.count; + auto new_block_handle = buffer_manager->Pin(new_block.block); + data_ptr_t new_block_ptr = new_block_handle->Ptr(); + // Copy the data of the blocks into a single block + for (auto &block : row_data.blocks) { + auto block_handle = buffer_manager->Pin(block.block); + memcpy(new_block_ptr, block_handle->Ptr(), block.count * entry_size); + new_block_ptr += block.count * entry_size; + } + row_data.blocks.clear(); + row_data.count = 0; + return new_block; +} + +void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataCollection &heap, GlobalSortState &gstate) { + auto &unordered_data_block = sd.data_blocks.back(); + const idx_t &count = unordered_data_block.count; + auto unordered_data_handle = buffer_manager->Pin(unordered_data_block.block); + const data_ptr_t unordered_data_ptr = unordered_data_handle->Ptr(); + // Create new block that will hold re-ordered row data + RowDataBlock ordered_data_block(*buffer_manager, unordered_data_block.capacity, unordered_data_block.entry_size); + ordered_data_block.count = count; + auto ordered_data_handle = buffer_manager->Pin(ordered_data_block.block); + data_ptr_t ordered_data_ptr = ordered_data_handle->Ptr(); + // Re-order fixed-size row layout + const idx_t row_width = sd.layout.GetRowWidth(); + const idx_t sorting_entry_size = gstate.sort_layout.entry_size; + for (idx_t i = 0; i < count; i++) { + idx_t index = Load(sorting_ptr); + memcpy(ordered_data_ptr, unordered_data_ptr + index * row_width, row_width); + ordered_data_ptr += row_width; + sorting_ptr += sorting_entry_size; + } + // Replace the unordered data block with the re-ordered data block + sd.data_blocks.clear(); + sd.data_blocks.push_back(move(ordered_data_block)); + // Deal with the heap (if necessary) + if (!sd.layout.AllConstant()) { + // Swizzle the column pointers to offsets + RowOperations::SwizzleColumns(sd.layout, ordered_data_handle->Ptr(), count); + // Create a single heap block to store the ordered heap + idx_t total_byte_offset = std::accumulate(heap.blocks.begin(), heap.blocks.end(), 0, + [](idx_t a, const RowDataBlock &b) { return a + b.byte_offset; }); + idx_t heap_block_size = MaxValue(total_byte_offset, (idx_t)Storage::BLOCK_SIZE); + RowDataBlock ordered_heap_block(*buffer_manager, heap_block_size, 1); + ordered_heap_block.count = count; + ordered_heap_block.byte_offset = total_byte_offset; + auto ordered_heap_handle = buffer_manager->Pin(ordered_heap_block.block); + data_ptr_t ordered_heap_ptr = ordered_heap_handle->Ptr(); + // Fill the heap in order + ordered_data_ptr = ordered_data_handle->Ptr(); + const idx_t heap_pointer_offset = sd.layout.GetHeapPointerOffset(); + for (idx_t i = 0; i < count; i++) { + auto heap_row_ptr = Load(ordered_data_ptr + heap_pointer_offset); + auto heap_row_size = Load(heap_row_ptr); + memcpy(ordered_heap_ptr, heap_row_ptr, heap_row_size); + ordered_heap_ptr += heap_row_size; + ordered_data_ptr += row_width; + } + // Swizzle the base pointer to the offset of each row in the heap + RowOperations::SwizzleHeapPointer(sd.layout, ordered_data_handle->Ptr(), ordered_heap_handle->Ptr(), count); + // Move the re-ordered heap to the SortedData, and clear the local heap + sd.heap_blocks.push_back(move(ordered_heap_block)); + heap.pinned_blocks.clear(); + heap.blocks.clear(); + heap.count = 0; + } +} -namespace duckdb { +void LocalSortState::ReOrder(GlobalSortState &gstate) { + auto &sb = *sorted_blocks.back(); + auto sorting_handle = buffer_manager->Pin(sb.radix_sorting_data.back().block); + const data_ptr_t sorting_ptr = sorting_handle->Ptr() + gstate.sort_layout.comparison_size; + // Re-order variable size sorting columns + if (!gstate.sort_layout.all_constant) { + ReOrder(*sb.blob_sorting_data, sorting_ptr, *blob_sorting_heap, gstate); + } + // And the payload + ReOrder(*sb.payload_data, sorting_ptr, *payload_heap, gstate); +} -//! This is no longer used in regular vectors, however, hash tables use this -//! value to store a NULL -template -inline T NullValue() { - return std::numeric_limits::min(); +GlobalSortState::GlobalSortState(BufferManager &buffer_manager, vector &orders, + vector> &statistics, RowLayout &payload_layout) + : buffer_manager(buffer_manager), sort_layout(SortLayout(orders, statistics)), payload_layout(payload_layout), + block_capacity(0), external(false) { } -constexpr const char str_nil[2] = {'\200', '\0'}; +void GlobalSortState::AddLocalState(LocalSortState &local_sort_state) { + if (!local_sort_state.radix_sorting_data) { + return; + } -template <> -inline const char *NullValue() { - D_ASSERT(str_nil[0] == '\200' && str_nil[1] == '\0'); - return str_nil; -} + // Sort accumulated data + local_sort_state.Sort(*this); -template <> -inline string_t NullValue() { - return string_t(NullValue()); + // Append local state sorted data to this global state + lock_guard append_guard(lock); + for (auto &sb : local_sort_state.sorted_blocks) { + sorted_blocks.push_back(move(sb)); + } } -template <> -inline char *NullValue() { - return (char *)NullValue(); +void GlobalSortState::PrepareMergePhase() { + // Determine if we need to use do an external sort + idx_t total_heap_size = + std::accumulate(sorted_blocks.begin(), sorted_blocks.end(), (idx_t)0, + [](idx_t a, const unique_ptr &b) { return a + b->HeapSize(); }); + if (external || total_heap_size > 0.25 * buffer_manager.GetMaxMemory()) { + external = true; + } + // Use the data that we have to determine which block size to use during the merge + if (total_heap_size > 0) { + // If we have variable size data we need to be conservative, as there might be skew + idx_t max_block_size = 0; + for (auto &sb : sorted_blocks) { + idx_t size_in_bytes = sb->SizeInBytes(); + if (size_in_bytes > max_block_size) { + max_block_size = size_in_bytes; + block_capacity = sb->Count(); + } + } + } else { + for (auto &sb : sorted_blocks) { + block_capacity = MaxValue(block_capacity, sb->Count()); + } + } + // Unswizzle and pin heap blocks if we can fit everything in memory + if (!external) { + for (auto &sb : sorted_blocks) { + sb->blob_sorting_data->Unswizzle(); + sb->payload_data->Unswizzle(); + } + } } -template <> -inline string NullValue() { - return string(NullValue()); +void GlobalSortState::InitializeMergeRound() { + D_ASSERT(sorted_blocks_temp.empty()); + // Uneven number of blocks - keep one on the side + if (sorted_blocks.size() % 2 == 1) { + odd_one_out = move(sorted_blocks.back()); + sorted_blocks.pop_back(); + } + // Init merge path path indices + pair_idx = 0; + num_pairs = sorted_blocks.size() / 2; + l_start = 0; + r_start = 0; + // Allocate room for merge results + for (idx_t p_idx = 0; p_idx < num_pairs; p_idx++) { + sorted_blocks_temp.emplace_back(); + } } -template <> -inline interval_t NullValue() { - interval_t null_value; - null_value.days = NullValue(); - null_value.months = NullValue(); - null_value.micros = NullValue(); - return null_value; +void GlobalSortState::CompleteMergeRound() { + sorted_blocks.clear(); + if (odd_one_out) { + sorted_blocks.push_back(move(odd_one_out)); + odd_one_out = nullptr; + } + for (auto &sorted_block_vector : sorted_blocks_temp) { + sorted_blocks.push_back(make_unique(buffer_manager, *this)); + sorted_blocks.back()->AppendSortedBlocks(sorted_block_vector); + } + sorted_blocks_temp.clear(); + // Only one block left: Done! + if (sorted_blocks.size() == 1) { + sorted_blocks[0]->radix_sorting_data.clear(); + sorted_blocks[0]->blob_sorting_data = nullptr; + } } -template <> -inline hugeint_t NullValue() { - hugeint_t min; - min.lower = 0; - min.upper = std::numeric_limits::min(); - return min; -} +} // namespace duckdb -template <> -inline float NullValue() { - return NAN; + + + + + +#include + +namespace duckdb { + +SortedData::SortedData(const RowLayout &layout, BufferManager &buffer_manager, GlobalSortState &state) + : layout(layout), block_idx(0), entry_idx(0), buffer_manager(buffer_manager), state(state) { } -template <> -inline double NullValue() { - return NAN; +idx_t SortedData::Count() { + idx_t count = std::accumulate(data_blocks.begin(), data_blocks.end(), (idx_t)0, + [](idx_t a, const RowDataBlock &b) { return a + b.count; }); + if (!layout.AllConstant() && state.external) { + D_ASSERT(count == std::accumulate(heap_blocks.begin(), heap_blocks.end(), (idx_t)0, + [](idx_t a, const RowDataBlock &b) { return a + b.count; })); + } + return count; } -template -inline bool IsNullValue(T value) { - return value == NullValue(); +void SortedData::Pin() { + PinData(); + if (!layout.AllConstant() && state.external) { + PinHeap(); + } } -template <> -inline bool IsNullValue(const char *value) { - return *value == str_nil[0]; +data_ptr_t SortedData::DataPtr() const { + D_ASSERT(data_blocks[block_idx].block->Readers() != 0 && + data_handle->handle->BlockId() == data_blocks[block_idx].block->BlockId()); + return data_ptr + entry_idx * layout.GetRowWidth(); } -template <> -inline bool IsNullValue(string_t value) { - return value.GetDataUnsafe()[0] == str_nil[0]; +data_ptr_t SortedData::HeapPtr() const { + D_ASSERT(!layout.AllConstant() && state.external); + D_ASSERT(heap_blocks[block_idx].block->Readers() != 0 && + heap_handle->handle->BlockId() == heap_blocks[block_idx].block->BlockId()); + return heap_ptr + Load(DataPtr() + layout.GetHeapPointerOffset()); } -template <> -inline bool IsNullValue(interval_t value) { - return value.days == NullValue() && value.months == NullValue() && - value.micros == NullValue(); +void SortedData::Advance(const bool &adv) { + entry_idx += adv; + if (entry_idx == data_blocks[block_idx].count) { + block_idx++; + entry_idx = 0; + if (block_idx < data_blocks.size()) { + Pin(); + } + } } -template <> -inline bool IsNullValue(char *value) { - return IsNullValue(value); +void SortedData::CreateBlock() { + auto capacity = + MaxValue(((idx_t)Storage::BLOCK_SIZE + layout.GetRowWidth() - 1) / layout.GetRowWidth(), state.block_capacity); + data_blocks.emplace_back(buffer_manager, capacity, layout.GetRowWidth()); + if (!layout.AllConstant() && state.external) { + heap_blocks.emplace_back(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1); + D_ASSERT(data_blocks.size() == heap_blocks.size()); + } } -template <> -inline bool IsNullValue(float value) { - return std::isnan(value); +void SortedData::ResetIndices(idx_t block_idx_to, idx_t entry_idx_to) { + block_idx = block_idx_to; + entry_idx = entry_idx_to; } -template <> -inline bool IsNullValue(double value) { - return std::isnan(value); +unique_ptr SortedData::CreateSlice(idx_t start_block_index, idx_t start_entry_index, idx_t end_block_index, + idx_t end_entry_index) { + // Add the corresponding blocks to the result + auto result = make_unique(layout, buffer_manager, state); + for (idx_t i = start_block_index; i <= end_block_index; i++) { + result->data_blocks.push_back(data_blocks[i]); + if (!layout.AllConstant() && state.external) { + result->heap_blocks.push_back(heap_blocks[i]); + } + } + // All of the blocks that come before block with idx = start_block_idx can be reset (other references exist) + for (idx_t i = 0; i < start_block_index; i++) { + data_blocks[i].block = nullptr; + if (!layout.AllConstant() && state.external) { + heap_blocks[i].block = nullptr; + } + } + // Use start and end entry indices to set the boundaries + result->entry_idx = start_entry_index; + D_ASSERT(end_entry_index <= result->data_blocks.back().count); + result->data_blocks.back().count = end_entry_index; + if (!layout.AllConstant() && state.external) { + result->heap_blocks.back().count = end_entry_index; + } + return result; } -//! Compares a specific memory region against the types NULL value -bool IsNullValue(data_ptr_t ptr, PhysicalType type); +void SortedData::Unswizzle() { + if (layout.AllConstant()) { + return; + } + for (idx_t i = 0; i < data_blocks.size(); i++) { + auto &data_block = data_blocks[i]; + auto &heap_block = heap_blocks[i]; + auto data_handle_p = buffer_manager.Pin(data_block.block); + auto heap_handle_p = buffer_manager.Pin(heap_block.block); + RowOperations::UnswizzleHeapPointer(layout, data_handle_p->Ptr(), heap_handle_p->Ptr(), data_block.count); + RowOperations::UnswizzleColumns(layout, data_handle_p->Ptr(), data_block.count); + state.heap_blocks.push_back(move(heap_block)); + state.pinned_blocks.push_back(move(heap_handle_p)); + } + heap_blocks.clear(); +} -//! Writes NullValue value of a specific type to a memory address -void SetNullValue(data_ptr_t ptr, PhysicalType type); +void SortedData::PinData() { + D_ASSERT(block_idx < data_blocks.size()); + auto &block = data_blocks[block_idx]; + if (!data_handle || data_handle->handle->BlockId() != block.block->BlockId()) { + data_handle = buffer_manager.Pin(data_blocks[block_idx].block); + } + data_ptr = data_handle->Ptr(); +} -} // namespace duckdb +void SortedData::PinHeap() { + D_ASSERT(!layout.AllConstant() && state.external); + auto &block = heap_blocks[block_idx]; + if (!heap_handle || heap_handle->handle->BlockId() != block.block->BlockId()) { + heap_handle = buffer_manager.Pin(heap_blocks[block_idx].block); + } + heap_ptr = heap_handle->Ptr(); +} +SortedBlock::SortedBlock(BufferManager &buffer_manager, GlobalSortState &state) + : block_idx(0), entry_idx(0), buffer_manager(buffer_manager), state(state), sort_layout(state.sort_layout), + payload_layout(state.payload_layout) { + blob_sorting_data = make_unique(sort_layout.blob_layout, buffer_manager, state); + payload_data = make_unique(payload_layout, buffer_manager, state); +} +idx_t SortedBlock::Count() const { + idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), 0, + [](idx_t a, const RowDataBlock &b) { return a + b.count; }); + if (!sort_layout.all_constant) { + D_ASSERT(count == blob_sorting_data->Count()); + } + D_ASSERT(count == payload_data->Count()); + return count; +} +idx_t SortedBlock::Remaining() const { + idx_t remaining = 0; + if (block_idx < radix_sorting_data.size()) { + remaining += radix_sorting_data[block_idx].count - entry_idx; + for (idx_t i = block_idx + 1; i < radix_sorting_data.size(); i++) { + remaining += radix_sorting_data[i].count; + } + } + return remaining; +} +void SortedBlock::InitializeWrite() { + CreateBlock(); + if (!sort_layout.all_constant) { + blob_sorting_data->CreateBlock(); + } + payload_data->CreateBlock(); +} -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/sel_cache.hpp -// -// -//===----------------------------------------------------------------------===// +void SortedBlock::CreateBlock() { + auto capacity = MaxValue(((idx_t)Storage::BLOCK_SIZE + sort_layout.entry_size - 1) / sort_layout.entry_size, + state.block_capacity); + radix_sorting_data.emplace_back(buffer_manager, capacity, sort_layout.entry_size); +} +void SortedBlock::PinRadix(idx_t pin_block_idx) { + D_ASSERT(pin_block_idx < radix_sorting_data.size()); + auto &block = radix_sorting_data[pin_block_idx]; + if (!radix_handle || radix_handle->handle->BlockId() != block.block->BlockId()) { + radix_handle = buffer_manager.Pin(block.block); + } +} +void SortedBlock::AppendSortedBlocks(vector> &sorted_blocks) { + D_ASSERT(Count() == 0); + for (auto &sb : sorted_blocks) { + for (auto &radix_block : sb->radix_sorting_data) { + radix_sorting_data.push_back(move(radix_block)); + } + if (!sort_layout.all_constant) { + for (auto &blob_block : sb->blob_sorting_data->data_blocks) { + blob_sorting_data->data_blocks.push_back(move(blob_block)); + } + for (auto &heap_block : sb->blob_sorting_data->heap_blocks) { + blob_sorting_data->heap_blocks.push_back(move(heap_block)); + } + } + for (auto &payload_data_block : sb->payload_data->data_blocks) { + payload_data->data_blocks.push_back(move(payload_data_block)); + } + if (!payload_data->layout.AllConstant()) { + for (auto &payload_heap_block : sb->payload_data->heap_blocks) { + payload_data->heap_blocks.push_back(move(payload_heap_block)); + } + } + } +} +void SortedBlock::GlobalToLocalIndex(const idx_t &global_idx, idx_t &local_block_index, idx_t &local_entry_index) { + if (global_idx == Count()) { + local_block_index = radix_sorting_data.size() - 1; + local_entry_index = radix_sorting_data.back().count; + return; + } + D_ASSERT(global_idx < Count()); + local_entry_index = global_idx; + for (local_block_index = 0; local_block_index < radix_sorting_data.size(); local_block_index++) { + const idx_t &block_count = radix_sorting_data[local_block_index].count; + if (local_entry_index >= block_count) { + local_entry_index -= block_count; + } else { + break; + } + } + D_ASSERT(local_entry_index < radix_sorting_data[local_block_index].count); +} + +unique_ptr SortedBlock::CreateSlice(const idx_t start, const idx_t end) { + // Identify blocks/entry indices of this slice + idx_t start_block_index; + idx_t start_entry_index; + GlobalToLocalIndex(start, start_block_index, start_entry_index); + idx_t end_block_index; + idx_t end_entry_index; + GlobalToLocalIndex(end, end_block_index, end_entry_index); + // Add the corresponding blocks to the result + auto result = make_unique(buffer_manager, state); + for (idx_t i = start_block_index; i <= end_block_index; i++) { + result->radix_sorting_data.push_back(radix_sorting_data[i]); + } + // Reset all blocks that come before block with idx = start_block_idx (slice holds new reference) + for (idx_t i = 0; i < start_block_index; i++) { + radix_sorting_data[i].block = nullptr; + } + // Use start and end entry indices to set the boundaries + result->entry_idx = start_entry_index; + D_ASSERT(end_entry_index <= result->radix_sorting_data.back().count); + result->radix_sorting_data.back().count = end_entry_index; + // Same for the var size sorting data + if (!sort_layout.all_constant) { + result->blob_sorting_data = + blob_sorting_data->CreateSlice(start_block_index, start_entry_index, end_block_index, end_entry_index); + } + // And the payload data + result->payload_data = + payload_data->CreateSlice(start_block_index, start_entry_index, end_block_index, end_entry_index); + D_ASSERT(result->Remaining() == end - start); + return result; +} +idx_t SortedBlock::HeapSize() const { + idx_t result = 0; + if (!sort_layout.all_constant) { + for (auto &block : blob_sorting_data->heap_blocks) { + result += block.capacity; + } + } + if (!payload_layout.AllConstant()) { + for (auto &block : payload_data->heap_blocks) { + result += block.capacity; + } + } + return result; +} +idx_t SortedBlock::SizeInBytes() const { + idx_t bytes = 0; + for (idx_t i = 0; i < radix_sorting_data.size(); i++) { + bytes += radix_sorting_data[i].capacity * sort_layout.entry_size; + if (!sort_layout.all_constant) { + bytes += blob_sorting_data->data_blocks[i].capacity * sort_layout.blob_layout.GetRowWidth(); + bytes += blob_sorting_data->heap_blocks[i].capacity; + } + bytes += payload_data->data_blocks[i].capacity * payload_layout.GetRowWidth(); + if (!payload_layout.AllConstant()) { + bytes += payload_data->heap_blocks[i].capacity; + } + } + return bytes; +} -namespace duckdb { +SortedDataScanner::SortedDataScanner(SortedData &sorted_data, GlobalSortState &global_sort_state) + : sorted_data(sorted_data), total_count(sorted_data.Count()), global_sort_state(global_sort_state), + total_scanned(0) { +} -//! Selection vector cache used for caching vector slices -struct SelCache { - unordered_map> cache; -}; +void SortedDataScanner::Scan(DataChunk &chunk) { + auto count = MinValue((idx_t)STANDARD_VECTOR_SIZE, total_count - total_scanned); + if (count == 0) { + D_ASSERT(sorted_data.block_idx == sorted_data.data_blocks.size()); + return; + } + // Eagerly delete references to blocks that we've passed + for (idx_t i = 0; i < sorted_data.block_idx; i++) { + sorted_data.data_blocks[i].block = nullptr; + if (!sorted_data.layout.AllConstant() && global_sort_state.external) { + sorted_data.heap_blocks[i].block = nullptr; + } + } + const idx_t &row_width = sorted_data.layout.GetRowWidth(); + // Set up a batch of pointers to scan data from + idx_t scanned = 0; + auto data_pointers = FlatVector::GetData(addresses); + while (scanned < count) { + sorted_data.Pin(); + auto &data_block = sorted_data.data_blocks[sorted_data.block_idx]; + idx_t next = MinValue(data_block.count - sorted_data.entry_idx, count - scanned); + const data_ptr_t data_ptr = sorted_data.data_handle->Ptr() + sorted_data.entry_idx * row_width; + // Set up the next pointers + data_ptr_t row_ptr = data_ptr; + for (idx_t i = 0; i < next; i++) { + data_pointers[scanned + i] = row_ptr; + row_ptr += row_width; + } + // Unswizzle the offsets back to pointers (if needed) + if (!sorted_data.layout.AllConstant() && global_sort_state.external) { + RowOperations::UnswizzleHeapPointer(sorted_data.layout, data_ptr, sorted_data.heap_handle->Ptr(), next); + RowOperations::UnswizzleColumns(sorted_data.layout, data_ptr, next); + } + // Update state indices + sorted_data.entry_idx += next; + if (sorted_data.entry_idx == data_block.count) { + sorted_data.block_idx++; + sorted_data.entry_idx = 0; + } + scanned += next; + } + D_ASSERT(scanned == count); + // Deserialize the payload data + for (idx_t col_idx = 0; col_idx < sorted_data.layout.ColumnCount(); col_idx++) { + const auto col_offset = sorted_data.layout.GetOffsets()[col_idx]; + RowOperations::Gather(addresses, FlatVector::INCREMENTAL_SELECTION_VECTOR, chunk.data[col_idx], + FlatVector::INCREMENTAL_SELECTION_VECTOR, count, col_offset, col_idx); + } + chunk.SetCardinality(count); + chunk.Verify(); + total_scanned += scanned; +} } // namespace duckdb @@ -19415,370 +25237,306 @@ struct SelCache { -namespace duckdb { +#include +#include +#include +#include +#include +#include +#include -DataChunk::DataChunk() : count(0) { -} +namespace duckdb { -void DataChunk::InitializeEmpty(const vector &types) { - D_ASSERT(types.size() > 0); - for (idx_t i = 0; i < types.size(); i++) { - data.emplace_back(Vector(types[i], nullptr)); - } +bool StringUtil::Contains(const string &haystack, const string &needle) { + return (haystack.find(needle) != string::npos); } -void DataChunk::Initialize(const vector &types) { - D_ASSERT(types.size() > 0); - InitializeEmpty(types); - for (idx_t i = 0; i < types.size(); i++) { - data[i].Initialize(); +void StringUtil::LTrim(string &str) { + auto it = str.begin(); + while (CharacterIsSpace(*it)) { + it++; } + str.erase(str.begin(), it); } -void DataChunk::Reset() { - for (idx_t i = 0; i < ColumnCount(); i++) { - data[i].Initialize(); - } - SetCardinality(0); +// Remove trailing ' ', '\f', '\n', '\r', '\t', '\v' +void StringUtil::RTrim(string &str) { + str.erase(find_if(str.rbegin(), str.rend(), [](int ch) { return ch > 0 && !CharacterIsSpace(ch); }).base(), + str.end()); } -void DataChunk::Destroy() { - data.clear(); - SetCardinality(0); +void StringUtil::Trim(string &str) { + StringUtil::LTrim(str); + StringUtil::RTrim(str); } -Value DataChunk::GetValue(idx_t col_idx, idx_t index) const { - D_ASSERT(index < size()); - return data[col_idx].GetValue(index); +bool StringUtil::StartsWith(string str, string prefix) { + if (prefix.size() > str.size()) { + return false; + } + return equal(prefix.begin(), prefix.end(), str.begin()); } -void DataChunk::SetValue(idx_t col_idx, idx_t index, const Value &val) { - data[col_idx].SetValue(index, val); +bool StringUtil::EndsWith(const string &str, const string &suffix) { + if (suffix.size() > str.size()) { + return false; + } + return equal(suffix.rbegin(), suffix.rend(), str.rbegin()); } -void DataChunk::Reference(DataChunk &chunk) { - D_ASSERT(chunk.ColumnCount() <= ColumnCount()); - SetCardinality(chunk); - for (idx_t i = 0; i < chunk.ColumnCount(); i++) { - data[i].Reference(chunk.data[i]); +string StringUtil::Repeat(const string &str, idx_t n) { + std::ostringstream os; + for (idx_t i = 0; i < n; i++) { + os << str; } + return (os.str()); } -void DataChunk::Copy(DataChunk &other, idx_t offset) const { - D_ASSERT(ColumnCount() == other.ColumnCount()); - D_ASSERT(other.size() == 0); - - for (idx_t i = 0; i < ColumnCount(); i++) { - D_ASSERT(other.data[i].GetVectorType() == VectorType::FLAT_VECTOR); - VectorOperations::Copy(data[i], other.data[i], size(), offset, 0); +vector StringUtil::Split(const string &str, char delimiter) { + std::stringstream ss(str); + vector lines; + string temp; + while (getline(ss, temp, delimiter)) { + lines.push_back(temp); } - other.SetCardinality(size() - offset); + return (lines); } -void DataChunk::Copy(DataChunk &other, const SelectionVector &sel, const idx_t source_count, const idx_t offset) const { - D_ASSERT(ColumnCount() == other.ColumnCount()); - D_ASSERT(other.size() == 0); - D_ASSERT((offset + source_count) <= size()); +string StringUtil::Join(const vector &input, const string &separator) { + return StringUtil::Join(input, input.size(), separator, [](const string &s) { return s; }); +} - for (idx_t i = 0; i < ColumnCount(); i++) { - D_ASSERT(other.data[i].GetVectorType() == VectorType::FLAT_VECTOR); - VectorOperations::Copy(data[i], other.data[i], sel, source_count, offset, 0); +string StringUtil::BytesToHumanReadableString(idx_t bytes) { + string db_size; + auto kilobytes = bytes / 1000; + auto megabytes = kilobytes / 1000; + kilobytes -= megabytes * 1000; + auto gigabytes = megabytes / 1000; + megabytes -= gigabytes * 1000; + auto terabytes = gigabytes / 1000; + gigabytes -= terabytes * 1000; + if (terabytes > 0) { + return to_string(terabytes) + "." + to_string(gigabytes / 100) + "TB"; + } else if (gigabytes > 0) { + return to_string(gigabytes) + "." + to_string(megabytes / 100) + "GB"; + } else if (megabytes > 0) { + return to_string(megabytes) + "." + to_string(kilobytes / 100) + "MB"; + } else if (kilobytes > 0) { + return to_string(kilobytes) + "KB"; + } else { + return to_string(bytes) + " bytes"; } - other.SetCardinality(source_count - offset); } -void DataChunk::Append(const DataChunk &other) { - if (other.size() == 0) { - return; - } - if (ColumnCount() != other.ColumnCount()) { - throw OutOfRangeException("Column counts of appending chunk doesn't match!"); - } - for (idx_t i = 0; i < ColumnCount(); i++) { - D_ASSERT(data[i].GetVectorType() == VectorType::FLAT_VECTOR); - VectorOperations::Copy(other.data[i], data[i], other.size(), 0, size()); - } - SetCardinality(size() + other.size()); +string StringUtil::Upper(const string &str) { + string copy(str); + transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) { return std::toupper(c); }); + return (copy); } -void DataChunk::Normalify() { - for (idx_t i = 0; i < ColumnCount(); i++) { - data[i].Normalify(size()); - } +string StringUtil::Lower(const string &str) { + string copy(str); + transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) { return std::tolower(c); }); + return (copy); } -vector DataChunk::GetTypes() { - vector types; - for (idx_t i = 0; i < ColumnCount(); i++) { - types.push_back(data[i].GetType()); +vector StringUtil::Split(const string &input, const string &split) { + vector splits; + + idx_t last = 0; + idx_t input_len = input.size(); + idx_t split_len = split.size(); + while (last <= input_len) { + idx_t next = input.find(split, last); + if (next == string::npos) { + next = input_len; + } + + // Push the substring [last, next) on to splits + string substr = input.substr(last, next - last); + if (substr.empty() == false) { + splits.push_back(substr); + } + last = next + split_len; } - return types; + return splits; } -string DataChunk::ToString() const { - string retval = "Chunk - [" + to_string(ColumnCount()) + " Columns]\n"; - for (idx_t i = 0; i < ColumnCount(); i++) { - retval += "- " + data[i].ToString(size()) + "\n"; +string StringUtil::Replace(string source, const string &from, const string &to) { + idx_t start_pos = 0; + while ((start_pos = source.find(from, start_pos)) != string::npos) { + source.replace(start_pos, from.length(), to); + start_pos += to.length(); // In case 'to' contains 'from', like + // replacing 'x' with 'yx' } - return retval; + return source; } -void DataChunk::Serialize(Serializer &serializer) { - // write the count - serializer.Write(size()); - serializer.Write(ColumnCount()); - for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - // write the types - data[col_idx].GetType().Serialize(serializer); +vector StringUtil::TopNStrings(vector> scores, idx_t n, idx_t threshold) { + if (scores.empty()) { + return vector(); } - // write the data - for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - data[col_idx].Serialize(size(), serializer); + sort(scores.begin(), scores.end(), + [](const pair &a, const pair &b) -> bool { return a.second < b.second; }); + vector result; + result.push_back(scores[0].first); + for (idx_t i = 1; i < MinValue(scores.size(), n); i++) { + if (scores[i].second > threshold) { + break; + } + result.push_back(scores[i].first); } + return result; } -void DataChunk::Deserialize(Deserializer &source) { - auto rows = source.Read(); - idx_t column_count = source.Read(); +struct LevenshteinArray { + LevenshteinArray(idx_t len1, idx_t len2) : len1(len1) { + dist = unique_ptr(new idx_t[len1 * len2]); + } - vector types; - for (idx_t i = 0; i < column_count; i++) { - types.push_back(LogicalType::Deserialize(source)); + idx_t &Score(idx_t i, idx_t j) { + return dist[GetIndex(i, j)]; } - Initialize(types); - // now load the column data - SetCardinality(rows); - for (idx_t i = 0; i < column_count; i++) { - data[i].Deserialize(rows, source); + +private: + idx_t len1; + unique_ptr dist; + + idx_t GetIndex(idx_t i, idx_t j) { + return j * len1 + i; } - Verify(); -} +}; -void DataChunk::Slice(const SelectionVector &sel_vector, idx_t count) { - this->count = count; - SelCache merge_cache; - for (idx_t c = 0; c < ColumnCount(); c++) { - data[c].Slice(sel_vector, count, merge_cache); +// adapted from https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C++ +idx_t StringUtil::LevenshteinDistance(const string &s1, const string &s2) { + idx_t len1 = s1.size(); + idx_t len2 = s2.size(); + if (len1 == 0) { + return len2; } -} + if (len2 == 0) { + return len1; + } + LevenshteinArray array(len1 + 1, len2 + 1); + array.Score(0, 0) = 0; + for (idx_t i = 0; i <= len1; i++) { + array.Score(i, 0) = i; + } + for (idx_t j = 0; j <= len2; j++) { + array.Score(0, j) = j; + } + for (idx_t i = 1; i <= len1; i++) { + for (idx_t j = 1; j <= len2; j++) { + // d[i][j] = std::min({ d[i - 1][j] + 1, + // d[i][j - 1] + 1, + // d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1) }); + int equal = s1[i - 1] == s2[j - 1] ? 0 : 1; + idx_t adjacent_score1 = array.Score(i - 1, j) + 1; + idx_t adjacent_score2 = array.Score(i, j - 1) + 1; + idx_t adjacent_score3 = array.Score(i - 1, j - 1) + equal; -void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset) { - D_ASSERT(other.ColumnCount() <= col_offset + ColumnCount()); - this->count = count; - SelCache merge_cache; - for (idx_t c = 0; c < other.ColumnCount(); c++) { - if (other.data[c].GetVectorType() == VectorType::DICTIONARY_VECTOR) { - // already a dictionary! merge the dictionaries - data[col_offset + c].Reference(other.data[c]); - data[col_offset + c].Slice(sel, count, merge_cache); - } else { - data[col_offset + c].Slice(other.data[c], sel, count); + idx_t t = MinValue(adjacent_score1, adjacent_score2); + array.Score(i, j) = MinValue(t, adjacent_score3); } } + return array.Score(len1, len2); } -unique_ptr DataChunk::Orrify() { - auto orrified_data = unique_ptr(new VectorData[ColumnCount()]); - for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - data[col_idx].Orrify(size(), orrified_data[col_idx]); +vector StringUtil::TopNLevenshtein(const vector &strings, const string &target, idx_t n, + idx_t threshold) { + vector> scores; + scores.reserve(strings.size()); + for (auto &str : strings) { + scores.emplace_back(str, LevenshteinDistance(str, target)); } - return orrified_data; + return TopNStrings(scores, n, threshold); } -void DataChunk::Hash(Vector &result) { - D_ASSERT(result.GetType().id() == LogicalTypeId::HASH); - VectorOperations::Hash(data[0], result, size()); - for (idx_t i = 1; i < ColumnCount(); i++) { - VectorOperations::CombineHash(result, data[i], size()); +string StringUtil::CandidatesMessage(const vector &candidates, const string &candidate) { + string result_str; + if (!candidates.empty()) { + result_str = "\n" + candidate + ": "; + for (idx_t i = 0; i < candidates.size(); i++) { + if (i > 0) { + result_str += ", "; + } + result_str += "\"" + candidates[i] + "\""; + } } + return result_str; } -void DataChunk::Verify() { -#ifdef DEBUG - D_ASSERT(size() <= STANDARD_VECTOR_SIZE); - // verify that all vectors in this chunk have the chunk selection vector - for (idx_t i = 0; i < ColumnCount(); i++) { - data[i].Verify(size()); - } -#endif -} +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/tree_renderer.hpp +// +// +//===----------------------------------------------------------------------===// -void DataChunk::Print() { - Printer::Print(ToString()); -} -struct DuckDBArrowArrayChildHolder { - ArrowArray array; - // need max three pointers for strings - duckdb::array buffers = {{nullptr, nullptr, nullptr}}; - Vector vector = {}; - unique_ptr string_offsets = nullptr; - unique_ptr string_data = nullptr; -}; -struct DuckDBArrowArrayHolder { - vector children = {}; - vector children_ptrs = {}; - array buffers = {{nullptr}}; -}; -static void ReleaseDuckDBArrowArray(ArrowArray *array) { - if (!array || !array->release) { - return; - } - array->release = nullptr; - auto holder = static_cast(array->private_data); - delete holder; -} -void DataChunk::ToArrowArray(ArrowArray *out_array) { - Normalify(); - D_ASSERT(out_array); +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/main/query_profiler.hpp +// +// +//===----------------------------------------------------------------------===// - // Allocate as unique_ptr first to cleanup properly on error - auto root_holder = make_unique(); - // Allocate the children - root_holder->children.resize(ColumnCount()); - root_holder->children_ptrs.resize(ColumnCount(), nullptr); - for (size_t i = 0; i < ColumnCount(); ++i) { - root_holder->children_ptrs[i] = &root_holder->children[i].array; - } - out_array->children = root_holder->children_ptrs.data(); - out_array->n_children = ColumnCount(); - // Configure root array - out_array->length = size(); - out_array->n_children = ColumnCount(); - out_array->n_buffers = 1; - out_array->buffers = root_holder->buffers.data(); // there is no actual buffer there since we don't have NULLs - out_array->offset = 0; - out_array->null_count = 0; // needs to be 0 - out_array->dictionary = nullptr; - // Configure child arrays - for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - auto &child_holder = root_holder->children[col_idx]; - auto &child = child_holder.array; - auto &vector = child_holder.vector; - vector.Reference(data[col_idx]); +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/profiler.hpp +// +// +//===----------------------------------------------------------------------===// - child.private_data = nullptr; - child.release = ReleaseDuckDBArrowArray; - child.n_children = 0; - child.null_count = -1; // unknown - child.offset = 0; - child.dictionary = nullptr; - child.buffers = child_holder.buffers.data(); - child.length = size(); - switch (vector.GetVectorType()) { - // TODO support other vector types - case VectorType::FLAT_VECTOR: { - switch (GetTypes()[col_idx].id()) { - // TODO support other data types - case LogicalTypeId::BOOLEAN: - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - case LogicalTypeId::UBIGINT: - case LogicalTypeId::FLOAT: - case LogicalTypeId::DOUBLE: - case LogicalTypeId::HUGEINT: - case LogicalTypeId::DATE: - child.n_buffers = 2; - child.buffers[1] = (void *)FlatVector::GetData(vector); - break; - case LogicalTypeId::TIME: { - // convert time from microseconds to miliseconds - child.n_buffers = 2; - child_holder.string_data = unique_ptr(new data_t[sizeof(uint32_t) * (size() + 1)]); - child.buffers[1] = child_holder.string_data.get(); - auto source_ptr = FlatVector::GetData(vector); - auto target_ptr = (uint32_t *)child.buffers[1]; - for (idx_t row_idx = 0; row_idx < size(); row_idx++) { - target_ptr[row_idx] = uint32_t(source_ptr[row_idx] / 1000); - } - break; - } - case LogicalTypeId::TIMESTAMP: { - // convert timestamp from microseconds to nanoseconds - child.n_buffers = 2; - child.buffers[1] = (void *)FlatVector::GetData(vector); - auto target_ptr = (timestamp_t *)child.buffers[1]; - for (idx_t row_idx = 0; row_idx < size(); row_idx++) { - target_ptr[row_idx] = Timestamp::GetEpochNanoSeconds(target_ptr[row_idx]); - } - break; - } - case LogicalTypeId::VARCHAR: { - child.n_buffers = 3; - child_holder.string_offsets = unique_ptr(new data_t[sizeof(uint32_t) * (size() + 1)]); - child.buffers[1] = child_holder.string_offsets.get(); - D_ASSERT(child.buffers[1]); - // step 1: figure out total string length: - idx_t total_string_length = 0; - auto string_t_ptr = FlatVector::GetData(vector); - auto &mask = FlatVector::Validity(vector); - for (idx_t row_idx = 0; row_idx < size(); row_idx++) { - if (!mask.RowIsValid(row_idx)) { - continue; - } - total_string_length += string_t_ptr[row_idx].GetSize(); - } - // step 2: allocate this much - child_holder.string_data = unique_ptr(new data_t[total_string_length]); - child.buffers[2] = child_holder.string_data.get(); - D_ASSERT(child.buffers[2]); - // step 3: assign buffers - idx_t current_heap_offset = 0; - auto target_ptr = (uint32_t *)child.buffers[1]; - - for (idx_t row_idx = 0; row_idx < size(); row_idx++) { - target_ptr[row_idx] = current_heap_offset; - if (!mask.RowIsValid(row_idx)) { - continue; - } - auto &str = string_t_ptr[row_idx]; - memcpy((void *)((uint8_t *)child.buffers[2] + current_heap_offset), str.GetDataUnsafe(), - str.GetSize()); - current_heap_offset += str.GetSize(); - } - target_ptr[size()] = current_heap_offset; // need to terminate last string! - break; - } - default: - throw std::runtime_error("Unsupported type " + GetTypes()[col_idx].ToString()); - } - auto &mask = FlatVector::Validity(vector); - if (!mask.AllValid()) { - // any bits are set: might have nulls - child.null_count = -1; - } else { - // no bits are set; we know there are no nulls - child.null_count = 0; - } - child.buffers[0] = (void *)mask.GetData(); - break; - } - default: - throw NotImplementedException(VectorTypeToString(vector.GetVectorType())); - } - out_array->children[col_idx] = &child; + +namespace duckdb { + +//! The profiler can be used to measure elapsed time +template +class Profiler { +public: + //! Starts the timer + void Start() { + finished = false; + start = Tick(); + } + //! Finishes timing + void End() { + end = Tick(); + finished = true; + } + + //! Returns the elapsed time in seconds. If End() has been called, returns + //! the total elapsed time. Otherwise returns how far along the timer is + //! right now. + double Elapsed() const { + auto _end = finished ? end : Tick(); + return std::chrono::duration_cast>(_end - start).count(); } - // Release ownership to caller - out_array->private_data = root_holder.release(); - out_array->release = ReleaseDuckDBArrowArray; -} +private: + time_point Tick() const { + return T::now(); + } + time_point start; + time_point end; + bool finished = false; +}; } // namespace duckdb @@ -19788,1619 +25546,1929 @@ void DataChunk::ToArrowArray(ArrowArray *out_array) { +#include -#include -#include -#include namespace duckdb { +class ExpressionExecutor; +class PhysicalOperator; +class SQLStatement; -const string_t Date::MONTH_NAMES_ABBREVIATED[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; -const string_t Date::MONTH_NAMES[] = {"January", "February", "March", "April", "May", "June", - "July", "August", "September", "October", "November", "December"}; -const string_t Date::DAY_NAMES[] = {"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; -const string_t Date::DAY_NAMES_ABBREVIATED[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; +//! The ExpressionInfo keeps information related to an expression +struct ExpressionInfo { + explicit ExpressionInfo() : hasfunction(false) { + } + // A vector of children + vector> children; + // Extract ExpressionInformation from a given expression state + void ExtractExpressionsRecursive(unique_ptr &state); + //! Whether or not expression has function + bool hasfunction; + //! The function Name + string function_name; + //! The function time + uint64_t function_time; + //! Count the number of ALL tuples + uint64_t tuples_count = 0; + //! Count the number of tuples sampled + uint64_t sample_tuples_count = 0; +}; + +//! The ExpressionRootInfo keeps information related to the root of an expression tree +struct ExpressionRootInfo { + ExpressionRootInfo(ExpressionExecutorState &executor, string name); + //! Count the number of time the executor called + uint64_t total_count = 0; + //! Count the number of time the executor called since last sampling + uint64_t current_count = 0; + //! Count the number of samples + uint64_t sample_count = 0; + //! Count the number of tuples in all samples + uint64_t sample_tuples_count = 0; + //! Count the number of tuples processed by this executor + uint64_t tuples_count = 0; + //! A vector which contain the pointer to root of each expression tree + unique_ptr root; + //! Name + string name; + //! Elapsed time + double time; + //! Extra Info + string extra_info; +}; -const int32_t Date::NORMAL_DAYS[] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; -const int32_t Date::CUMULATIVE_DAYS[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}; -const int32_t Date::LEAP_DAYS[] = {0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; -const int32_t Date::CUMULATIVE_LEAP_DAYS[] = {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}; -const int8_t Date::MONTH_PER_DAY_OF_YEAR[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}; -const int8_t Date::LEAP_MONTH_PER_DAY_OF_YEAR[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}; -const int32_t Date::CUMULATIVE_YEAR_DAYS[] = { - 0, 365, 730, 1096, 1461, 1826, 2191, 2557, 2922, 3287, 3652, 4018, 4383, 4748, - 5113, 5479, 5844, 6209, 6574, 6940, 7305, 7670, 8035, 8401, 8766, 9131, 9496, 9862, - 10227, 10592, 10957, 11323, 11688, 12053, 12418, 12784, 13149, 13514, 13879, 14245, 14610, 14975, - 15340, 15706, 16071, 16436, 16801, 17167, 17532, 17897, 18262, 18628, 18993, 19358, 19723, 20089, - 20454, 20819, 21184, 21550, 21915, 22280, 22645, 23011, 23376, 23741, 24106, 24472, 24837, 25202, - 25567, 25933, 26298, 26663, 27028, 27394, 27759, 28124, 28489, 28855, 29220, 29585, 29950, 30316, - 30681, 31046, 31411, 31777, 32142, 32507, 32872, 33238, 33603, 33968, 34333, 34699, 35064, 35429, - 35794, 36160, 36525, 36890, 37255, 37621, 37986, 38351, 38716, 39082, 39447, 39812, 40177, 40543, - 40908, 41273, 41638, 42004, 42369, 42734, 43099, 43465, 43830, 44195, 44560, 44926, 45291, 45656, - 46021, 46387, 46752, 47117, 47482, 47847, 48212, 48577, 48942, 49308, 49673, 50038, 50403, 50769, - 51134, 51499, 51864, 52230, 52595, 52960, 53325, 53691, 54056, 54421, 54786, 55152, 55517, 55882, - 56247, 56613, 56978, 57343, 57708, 58074, 58439, 58804, 59169, 59535, 59900, 60265, 60630, 60996, - 61361, 61726, 62091, 62457, 62822, 63187, 63552, 63918, 64283, 64648, 65013, 65379, 65744, 66109, - 66474, 66840, 67205, 67570, 67935, 68301, 68666, 69031, 69396, 69762, 70127, 70492, 70857, 71223, - 71588, 71953, 72318, 72684, 73049, 73414, 73779, 74145, 74510, 74875, 75240, 75606, 75971, 76336, - 76701, 77067, 77432, 77797, 78162, 78528, 78893, 79258, 79623, 79989, 80354, 80719, 81084, 81450, - 81815, 82180, 82545, 82911, 83276, 83641, 84006, 84371, 84736, 85101, 85466, 85832, 86197, 86562, - 86927, 87293, 87658, 88023, 88388, 88754, 89119, 89484, 89849, 90215, 90580, 90945, 91310, 91676, - 92041, 92406, 92771, 93137, 93502, 93867, 94232, 94598, 94963, 95328, 95693, 96059, 96424, 96789, - 97154, 97520, 97885, 98250, 98615, 98981, 99346, 99711, 100076, 100442, 100807, 101172, 101537, 101903, - 102268, 102633, 102998, 103364, 103729, 104094, 104459, 104825, 105190, 105555, 105920, 106286, 106651, 107016, - 107381, 107747, 108112, 108477, 108842, 109208, 109573, 109938, 110303, 110669, 111034, 111399, 111764, 112130, - 112495, 112860, 113225, 113591, 113956, 114321, 114686, 115052, 115417, 115782, 116147, 116513, 116878, 117243, - 117608, 117974, 118339, 118704, 119069, 119435, 119800, 120165, 120530, 120895, 121260, 121625, 121990, 122356, - 122721, 123086, 123451, 123817, 124182, 124547, 124912, 125278, 125643, 126008, 126373, 126739, 127104, 127469, - 127834, 128200, 128565, 128930, 129295, 129661, 130026, 130391, 130756, 131122, 131487, 131852, 132217, 132583, - 132948, 133313, 133678, 134044, 134409, 134774, 135139, 135505, 135870, 136235, 136600, 136966, 137331, 137696, - 138061, 138427, 138792, 139157, 139522, 139888, 140253, 140618, 140983, 141349, 141714, 142079, 142444, 142810, - 143175, 143540, 143905, 144271, 144636, 145001, 145366, 145732, 146097}; +struct ExpressionExecutorInfo { + explicit ExpressionExecutorInfo() {}; + explicit ExpressionExecutorInfo(ExpressionExecutor &executor, const string &name, int id); + //! A vector which contain the pointer to all ExpressionRootInfo + vector> roots; + //! Id, it will be used as index for executors_info vector + int id; +}; -void Date::ExtractYearOffset(int32_t &n, int32_t &year, int32_t &year_offset) { - year = Date::EPOCH_YEAR; - // first we normalize n to be in the year range [1970, 2370] - // since leap years repeat every 400 years, we can safely normalize just by "shifting" the CumulativeYearDays array - while (n < 0) { - n += Date::DAYS_PER_YEAR_INTERVAL; - year -= Date::YEAR_INTERVAL; - } - while (n >= Date::DAYS_PER_YEAR_INTERVAL) { - n -= Date::DAYS_PER_YEAR_INTERVAL; - year += Date::YEAR_INTERVAL; - } - // interpolation search - // we can find an upper bound of the year by assuming each year has 365 days - year_offset = n / 365; - // because of leap years we might be off by a little bit: compensate by decrementing the year offset until we find - // our year - while (n < Date::CUMULATIVE_YEAR_DAYS[year_offset]) { - year_offset--; - D_ASSERT(year_offset >= 0); +struct OperatorInformation { + double time = 0; + idx_t elements = 0; + string name; + explicit OperatorInformation(double time_ = 0, idx_t elements_ = 0) : time(time_), elements(elements_) { } - year += year_offset; - D_ASSERT(n >= Date::CUMULATIVE_YEAR_DAYS[year_offset]); -} + //! A vector of Expression Executor Info + vector> executors_info; +}; -void Date::Convert(int32_t n, int32_t &year, int32_t &month, int32_t &day) { - int32_t year_offset; - Date::ExtractYearOffset(n, year, year_offset); +//! The OperatorProfiler measures timings of individual operators +class OperatorProfiler { + friend class QueryProfiler; - day = n - Date::CUMULATIVE_YEAR_DAYS[year_offset]; - D_ASSERT(day >= 0 && day <= 365); +public: + DUCKDB_API explicit OperatorProfiler(bool enabled); - bool is_leap_year = (Date::CUMULATIVE_YEAR_DAYS[year_offset + 1] - Date::CUMULATIVE_YEAR_DAYS[year_offset]) == 366; - if (is_leap_year) { - month = Date::LEAP_MONTH_PER_DAY_OF_YEAR[day]; - day -= Date::CUMULATIVE_LEAP_DAYS[month - 1]; - } else { - month = Date::MONTH_PER_DAY_OF_YEAR[day]; - day -= Date::CUMULATIVE_DAYS[month - 1]; - } - day++; - D_ASSERT(day > 0 && day <= (is_leap_year ? Date::LEAP_DAYS[month] : Date::NORMAL_DAYS[month])); - D_ASSERT(month > 0 && month <= 12); -} + DUCKDB_API void StartOperator(const PhysicalOperator *phys_op); + DUCKDB_API void EndOperator(DataChunk *chunk); + DUCKDB_API void Flush(const PhysicalOperator *phys_op, ExpressionExecutor *expression_executor, const string &name, + int id); -int32_t Date::FromDate(int32_t year, int32_t month, int32_t day) { - int32_t n = 0; - if (!Date::IsValid(year, month, day)) { - throw ConversionException("Date out of range: %d-%d-%d", year, month, day); - } - while (year < 1970) { - year += Date::YEAR_INTERVAL; - n -= Date::DAYS_PER_YEAR_INTERVAL; - } - while (year >= 2370) { - year -= Date::YEAR_INTERVAL; - n += Date::DAYS_PER_YEAR_INTERVAL; + ~OperatorProfiler() { } - n += Date::CUMULATIVE_YEAR_DAYS[year - 1970]; - n += Date::IsLeapYear(year) ? Date::CUMULATIVE_LEAP_DAYS[month - 1] : Date::CUMULATIVE_DAYS[month - 1]; - n += day - 1; - return n; -} -bool Date::ParseDoubleDigit(const char *buf, idx_t len, idx_t &pos, int32_t &result) { - if (pos < len && StringUtil::CharacterIsDigit(buf[pos])) { - result = buf[pos++] - '0'; - if (pos < len && StringUtil::CharacterIsDigit(buf[pos])) { - result = (buf[pos++] - '0') + result * 10; - } - return true; - } - return false; -} +private: + void AddTiming(const PhysicalOperator *op, double time, idx_t elements); -bool Date::TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool strict) { - pos = 0; - if (len == 0) { - return false; + //! Whether or not the profiler is enabled + bool enabled; + //! The timer used to time the execution time of the individual Physical Operators + Profiler op; + //! The stack of Physical Operators that are currently active + std::stack execution_stack; + //! A mapping of physical operators to recorded timings + unordered_map timings; +}; + +//! The QueryProfiler can be used to measure timings of queries +class QueryProfiler { +public: + DUCKDB_API QueryProfiler() + : automatic_print_format(ProfilerPrintFormat::NONE), enabled(false), detailed_enabled(false), running(false), + query_requires_profiling(false) { } - int32_t day = 0; - int32_t month = -1; - int32_t year = 0; - bool yearneg = false; - int sep; +public: + struct TreeNode { + string name; + string extra_info; + OperatorInformation info; + vector> children; + idx_t depth = 0; + }; - // skip leading spaces - while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) { - pos++; - } + // Propagate save_location, enabled, detailed_enabled and automatic_print_format. + void Propagate(QueryProfiler &qp); - if (pos >= len) { - return false; - } - if (buf[pos] == '-') { - yearneg = true; - pos++; - if (pos >= len) { - return false; - } - } - if (!StringUtil::CharacterIsDigit(buf[pos])) { - return false; - } - // first parse the year - for (; pos < len && StringUtil::CharacterIsDigit(buf[pos]); pos++) { - year = (buf[pos] - '0') + year * 10; - if (year > Date::MAX_YEAR) { - break; - } - } - if (yearneg) { - year = -year; - if (year < Date::MIN_YEAR) { - return false; - } - } + using TreeMap = unordered_map; - if (pos >= len) { - return false; - } +private: + unique_ptr CreateTree(PhysicalOperator *root, idx_t depth = 0); + void Render(const TreeNode &node, std::ostream &str) const; + //! The lock used for flushing information from a thread into the global query profiler + mutex flush_lock; - // fetch the separator - sep = buf[pos++]; - if (sep != ' ' && sep != '-' && sep != '/' && sep != '\\') { - // invalid separator - return false; +public: + DUCKDB_API void Enable() { + enabled = true; + detailed_enabled = false; } - // parse the month - if (!Date::ParseDoubleDigit(buf, len, pos, month)) { - return false; + DUCKDB_API void DetailedEnable() { + detailed_enabled = true; } - if (pos >= len) { - return false; + DUCKDB_API void Disable() { + enabled = false; } - if (buf[pos++] != sep) { - return false; + DUCKDB_API bool IsEnabled() { + return enabled; } - if (pos >= len) { - return false; + bool IsDetailedEnabled() const { + return detailed_enabled; } - // now parse the day - if (!Date::ParseDoubleDigit(buf, len, pos, day)) { - return false; - } + DUCKDB_API void StartQuery(string query); + DUCKDB_API void EndQuery(); - // check for an optional trailing " (BC)"" - if (len - pos >= 5 && StringUtil::CharacterIsSpace(buf[pos]) && buf[pos + 1] == '(' && buf[pos + 2] == 'B' && - buf[pos + 3] == 'C' && buf[pos + 4] == ')') { - if (yearneg || year == 0) { - return false; - } - year = -year + 1; - pos += 5; + //! Adds the timings gathered by an OperatorProfiler to this query profiler + DUCKDB_API void Flush(OperatorProfiler &profiler); - if (year < Date::MIN_YEAR) { - return false; - } - } + DUCKDB_API void StartPhase(string phase); + DUCKDB_API void EndPhase(); - // in strict mode, check remaining string for non-space characters - if (strict) { - // skip trailing spaces - while (pos < len && StringUtil::CharacterIsSpace((unsigned char)buf[pos])) { - pos++; - } - // check position. if end was not reached, non-space chars remaining - if (pos < len) { - return false; - } - } else { - // in non-strict mode, check for any direct trailing digits - if (pos < len && StringUtil::CharacterIsDigit((unsigned char)buf[pos])) { - return false; - } - } + DUCKDB_API void Initialize(PhysicalOperator *root); - result = Date::FromDate(year, month, day); - return true; -} + DUCKDB_API string ToString(bool print_optimizer_output = false) const; + DUCKDB_API void ToStream(std::ostream &str, bool print_optimizer_output = false) const; + DUCKDB_API void Print(); -date_t Date::FromCString(const char *buf, idx_t len, bool strict) { - date_t result; - idx_t pos; - if (!TryConvertDate(buf, len, pos, result, strict)) { - throw ConversionException("date/time field value out of range: \"%s\", " - "expected format is (YYYY-MM-DD)", - string(buf, len)); + DUCKDB_API string ToJSON() const; + DUCKDB_API void WriteToFile(const char *path, string &info) const; + + //! The format to automatically print query profiling information in (default: disabled) + ProfilerPrintFormat automatic_print_format; + //! The file to save query profiling information to, instead of printing it to the console (empty = print to + //! console) + string save_location; + + idx_t OperatorSize() { + return tree_map.size(); } - return result; -} -date_t Date::FromString(const string &str, bool strict) { - return Date::FromCString(str.c_str(), str.size(), strict); -} +private: + //! Whether or not query profiling is enabled + bool enabled; + //! Whether or not detailed query profiling is enabled + bool detailed_enabled; + //! Whether or not the query profiler is running + bool running; + + bool query_requires_profiling; + + //! The root of the query tree + unique_ptr root; + //! The query string + string query; + //! The timer used to time the execution time of the entire query + Profiler main_query; + //! A map of a Physical Operator pointer to a tree node + TreeMap tree_map; -string Date::ToString(int32_t date) { - int32_t date_units[3]; - idx_t year_length; - bool add_bc; - Date::Convert(date, date_units[0], date_units[1], date_units[2]); +public: + const TreeMap &GetTreeMap() const { + return tree_map; + } - auto length = DateToStringCast::Length(date_units, year_length, add_bc); - auto buffer = unique_ptr(new char[length]); - DateToStringCast::Format(buffer.get(), date_units, year_length, add_bc); - return string(buffer.get(), length); -} +private: + //! The timer used to time the individual phases of the planning process + Profiler phase_profiler; + //! A mapping of the phase names to the timings + using PhaseTimingStorage = unordered_map; + PhaseTimingStorage phase_timings; + using PhaseTimingItem = PhaseTimingStorage::value_type; + //! The stack of currently active phases + vector phase_stack; -string Date::Format(int32_t year, int32_t month, int32_t day) { - return ToString(Date::FromDate(year, month, day)); -} +private: + vector GetOrderedPhaseTimings() const; -bool Date::IsLeapYear(int32_t year) { - return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0); -} + //! Check whether or not an operator type requires query profiling. If none of the ops in a query require profiling + //! no profiling information is output. + bool OperatorRequiresProfiling(PhysicalOperatorType op_type); +}; -bool Date::IsValid(int32_t year, int32_t month, int32_t day) { - if (month < 1 || month > 12) { - return false; - } - if (year < Date::MIN_YEAR || year > Date::MAX_YEAR) { - return false; +//! The QueryProfilerHistory can be used to access the profiler of previous queries +class QueryProfilerHistory { +private: + //! Previous Query profilers + deque>> prev_profilers; + //! Previous Query profilers size + uint64_t prev_profilers_size = 20; + +public: + deque>> &GetPrevProfilers() { + return prev_profilers; } - if (day < 1) { - return false; + QueryProfilerHistory() { } - return Date::IsLeapYear(year) ? day <= Date::LEAP_DAYS[month] : day <= Date::NORMAL_DAYS[month]; -} - -date_t Date::EpochDaysToDate(int32_t epoch) { - return (date_t)epoch; -} -int32_t Date::EpochDays(date_t date) { - return (int32_t)date; -} + void SetPrevProfilersSize(uint64_t prevProfilersSize) { + prev_profilers_size = prevProfilersSize; + } + uint64_t GetPrevProfilersSize() const { + return prev_profilers_size; + } -date_t Date::EpochToDate(int64_t epoch) { - return (date_t)(epoch / Interval::SECS_PER_DAY); -} +public: + void SetProfilerHistorySize(uint64_t size) { + this->prev_profilers_size = size; + } +}; +} // namespace duckdb -int64_t Date::Epoch(date_t date) { - return ((int64_t)date) * Interval::SECS_PER_DAY; -} -int64_t Date::EpochNanoseconds(date_t date) { - return ((int64_t)date) * (Interval::MICROS_PER_DAY * 1000); -} +namespace duckdb { +class LogicalOperator; +class PhysicalOperator; -int32_t Date::ExtractYear(date_t n, int32_t *last_year) { - // cached look up: check if year of this date is the same as the last one we looked up - // note that this only works for years in the range [1970, 2370] - if (n >= Date::CUMULATIVE_YEAR_DAYS[*last_year] && n < Date::CUMULATIVE_YEAR_DAYS[*last_year + 1]) { - return Date::EPOCH_YEAR + *last_year; - } - int32_t year; - Date::ExtractYearOffset(n, year, *last_year); - return year; -} +struct RenderTreeNode { + string name; + string extra_text; +}; -int32_t Date::ExtractYear(timestamp_t ts, int32_t *last_year) { - return Date::ExtractYear(Timestamp::GetDate(ts), last_year); -} +struct RenderTree { + RenderTree(idx_t width, idx_t height); -int32_t Date::ExtractYear(date_t n) { - int32_t year, year_offset; - Date::ExtractYearOffset(n, year, year_offset); - return year; -} + unique_ptr[]> nodes; + idx_t width; + idx_t height; -int32_t Date::ExtractMonth(date_t date) { - int32_t out_year, out_month, out_day; - Date::Convert(date, out_year, out_month, out_day); - return out_month; -} +public: + RenderTreeNode *GetNode(idx_t x, idx_t y); + void SetNode(idx_t x, idx_t y, unique_ptr node); + bool HasNode(idx_t x, idx_t y); -int32_t Date::ExtractDay(date_t date) { - int32_t out_year, out_month, out_day; - Date::Convert(date, out_year, out_month, out_day); - return out_day; -} + idx_t GetPosition(idx_t x, idx_t y); +}; -int32_t Date::ExtractDayOfTheYear(date_t date) { - int32_t year, year_offset; - Date::ExtractYearOffset(date, year, year_offset); - return date - Date::CUMULATIVE_YEAR_DAYS[year_offset] + 1; -} +struct TreeRendererConfig { -int32_t Date::ExtractISODayOfTheWeek(date_t date) { - // date of 0 is 1970-01-01, which was a Thursday (4) - // -7 = 4 - // -6 = 5 - // -5 = 6 - // -4 = 7 - // -3 = 1 - // -2 = 2 - // -1 = 3 - // 0 = 4 - // 1 = 5 - // 2 = 6 - // 3 = 7 - // 4 = 1 - // 5 = 2 - // 6 = 3 - // 7 = 4 - if (date < 0) { - // negative date: start off at 4 and cycle downwards - return (7 - ((-date + 3) % 7)); - } else { - // positive date: start off at 4 and cycle upwards - return ((date + 3) % 7) + 1; + void enable_detailed() { + MAX_EXTRA_LINES = 1000; + detailed = true; } -} -static int32_t GetISOWeek(int32_t year, int32_t month, int32_t day) { - auto day_of_the_year = - (Date::IsLeapYear(year) ? Date::CUMULATIVE_LEAP_DAYS[month] : Date::CUMULATIVE_DAYS[month]) + day; - // get the first day of the first week of the year - // the first week is the week that has the 4th of January in it - auto day_of_the_fourth = Date::ExtractISODayOfTheWeek(Date::FromDate(year, 1, 4)); - // if fourth is monday, then fourth is the first day - // if fourth is tuesday, third is the first day - // if fourth is wednesday, second is the first day - // if fourth is thursday - sunday, first is the first day - auto first_day_of_the_first_week = day_of_the_fourth >= 4 ? 0 : 5 - day_of_the_fourth; - if (day_of_the_year < first_day_of_the_first_week) { - // day is part of last year - return GetISOWeek(year - 1, 12, day); - } else { - return ((day_of_the_year - first_day_of_the_first_week) / 7) + 1; + void enable_standard() { + MAX_EXTRA_LINES = 30; + detailed = false; } -} -int32_t Date::ExtractISOWeekNumber(date_t date) { - int32_t year, month, day; - Date::Convert(date, year, month, day); - return GetISOWeek(year, month - 1, day - 1); -} + idx_t MAXIMUM_RENDER_WIDTH = 240; + idx_t NODE_RENDER_WIDTH = 29; + idx_t MINIMUM_RENDER_WIDTH = 15; + idx_t MAX_EXTRA_LINES = 30; + bool detailed = false; -int32_t Date::ExtractWeekNumberRegular(date_t date, bool monday_first) { - int32_t year, month, day; - Date::Convert(date, year, month, day); - month -= 1; - day -= 1; - // get the day of the year - auto day_of_the_year = - (Date::IsLeapYear(year) ? Date::CUMULATIVE_LEAP_DAYS[month] : Date::CUMULATIVE_DAYS[month]) + day; - // now figure out the first monday or sunday of the year - // what day is January 1st? - auto day_of_jan_first = Date::ExtractISODayOfTheWeek(Date::FromDate(year, 1, 1)); - // monday = 1, sunday = 7 - int32_t first_week_start; - if (monday_first) { - // have to find next "1" - if (day_of_jan_first == 1) { - // jan 1 is monday: starts immediately - first_week_start = 0; - } else { - // jan 1 is not monday: count days until next monday - first_week_start = 8 - day_of_jan_first; - } - } else { - first_week_start = 7 - day_of_jan_first; - } - if (day_of_the_year < first_week_start) { - // day occurs before first week starts: week 0 - return 0; - } - return ((day_of_the_year - first_week_start) / 7) + 1; -} + const char *LTCORNER = "┌"; + const char *RTCORNER = "┐"; + const char *LDCORNER = "└"; + const char *RDCORNER = "┘"; -// Returns the date of the monday of the current week. -date_t Date::GetMondayOfCurrentWeek(date_t date) { - int32_t dotw = Date::ExtractISODayOfTheWeek(date); - return date - (dotw - 1); -} + const char *MIDDLE = "┼"; + const char *TMIDDLE = "┬"; + const char *LMIDDLE = "├"; + const char *RMIDDLE = "┤"; + const char *DMIDDLE = "┴"; -} // namespace duckdb + const char *VERTICAL = "│"; + const char *HORIZONTAL = "─"; + // ASCII version? + // static constexpr const char* LTCORNER = "<"; + // static constexpr const char* RTCORNER = ">"; + // static constexpr const char* LDCORNER = "<"; + // static constexpr const char* RDCORNER = ">"; + // static constexpr const char* MIDDLE = "+"; + // static constexpr const char* TMIDDLE = "+"; + // static constexpr const char* LMIDDLE = "+"; + // static constexpr const char* RMIDDLE = "+"; + // static constexpr const char* DMIDDLE = "+"; -namespace duckdb { + // static constexpr const char* VERTICAL = "|"; + // static constexpr const char* HORIZONTAL = "-"; +}; -template -string TemplatedDecimalToString(SIGNED value, uint8_t scale) { - auto len = DecimalToString::DecimalLength(value, scale); - auto data = unique_ptr(new char[len + 1]); - DecimalToString::FormatDecimal(value, scale, data.get(), len); - return string(data.get(), len); -} +class TreeRenderer { +public: + explicit TreeRenderer(TreeRendererConfig config_p = TreeRendererConfig()) : config(move(config_p)) { + } -string Decimal::ToString(int16_t value, uint8_t scale) { - return TemplatedDecimalToString(value, scale); -} + string ToString(const LogicalOperator &op); + string ToString(const PhysicalOperator &op); + string ToString(const QueryProfiler::TreeNode &op); -string Decimal::ToString(int32_t value, uint8_t scale) { - return TemplatedDecimalToString(value, scale); -} + void Render(const LogicalOperator &op, std::ostream &ss); + void Render(const PhysicalOperator &op, std::ostream &ss); + void Render(const QueryProfiler::TreeNode &op, std::ostream &ss); -string Decimal::ToString(int64_t value, uint8_t scale) { - return TemplatedDecimalToString(value, scale); -} + void ToStream(RenderTree &root, std::ostream &ss); -string Decimal::ToString(hugeint_t value, uint8_t scale) { - auto len = HugeintToStringCast::DecimalLength(value, scale); - auto data = unique_ptr(new char[len + 1]); - HugeintToStringCast::FormatDecimal(value, scale, data.get(), len); - return string(data.get(), len); -} + void EnableDetailed() { + config.enable_detailed(); + } + void EnableStandard() { + config.enable_standard(); + } -} // namespace duckdb +private: + unique_ptr CreateTree(const LogicalOperator &op); + unique_ptr CreateTree(const PhysicalOperator &op); + unique_ptr CreateTree(const QueryProfiler::TreeNode &op); + string ExtraInfoSeparator(); + unique_ptr CreateRenderNode(string name, string extra_info); + unique_ptr CreateNode(const LogicalOperator &op); + unique_ptr CreateNode(const PhysicalOperator &op); + unique_ptr CreateNode(const QueryProfiler::TreeNode &op); +private: + //! The configuration used for rendering + TreeRendererConfig config; +private: + void RenderTopLayer(RenderTree &root, std::ostream &ss, idx_t y); + void RenderBoxContent(RenderTree &root, std::ostream &ss, idx_t y); + void RenderBottomLayer(RenderTree &root, std::ostream &ss, idx_t y); + bool CanSplitOnThisChar(char l); + bool IsPadding(char l); + string RemovePadding(string l); + void SplitUpExtraInfo(const string &extra_info, vector &result); + void SplitStringBuffer(const string &source, vector &result); -#include + template + idx_t CreateRenderTreeRecursive(RenderTree &result, const T &op, idx_t x, idx_t y); -namespace duckdb { + template + unique_ptr CreateRenderTree(const T &op); + string ExtractExpressionsRecursive(ExpressionInfo &states); +}; -template <> -hash_t Hash(uint64_t val) { - return murmurhash64(val); -} +} // namespace duckdb -template <> -hash_t Hash(int64_t val) { - return murmurhash64((uint64_t)val); -} -template <> -hash_t Hash(hugeint_t val) { - return murmurhash64(val.lower) ^ murmurhash64(val.upper); -} -template <> -hash_t Hash(float val) { - return std::hash {}(val); -} -template <> -hash_t Hash(double val) { - return std::hash {}(val); -} -template <> -hash_t Hash(interval_t val) { - return Hash(val.days) ^ Hash(val.months) ^ Hash(val.micros); -} -template <> -hash_t Hash(const char *str) { - hash_t hash = 5381; - hash_t c; - while ((c = *str++)) { - hash = ((hash << 5) + hash) + c; - } - return hash; -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 +// See the end of this file for a list -template <> -hash_t Hash(string_t val) { - return Hash(val.GetDataUnsafe(), val.GetSize()); -} -template <> -hash_t Hash(char *val) { - return Hash(val); -} -hash_t Hash(const char *val, size_t size) { - hash_t hash = 5381; +#include +#include +#include - for (size_t i = 0; i < size; i++) { - hash = ((hash << 5) + hash) + val[i]; - } +namespace duckdb { - return hash; -} +enum class UnicodeType { INVALID, ASCII, UNICODE }; +enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE }; -hash_t Hash(char *val, size_t size) { - return Hash((const char *)val, size); -} +class Utf8Proc { +public: + //! Distinguishes ASCII, Valid UTF8 and Invalid UTF8 strings + static UnicodeType Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason = nullptr, size_t *invalid_pos = nullptr); + //! Performs UTF NFC normalization of string, return value needs to be free'd + static char* Normalize(const char* s, size_t len); + //! Returns whether or not the UTF8 string is valid + static bool IsValid(const char *s, size_t len); + //! Returns the position (in bytes) of the next grapheme cluster + static size_t NextGraphemeCluster(const char *s, size_t len, size_t pos); + //! Returns the position (in bytes) of the previous grapheme cluster + static size_t PreviousGraphemeCluster(const char *s, size_t len, size_t pos); -hash_t Hash(uint8_t *val, size_t size) { - return Hash((const char *)val, size); -} + //! Transform a codepoint to utf8 and writes it to "c", sets "sz" to the size of the codepoint + static bool CodepointToUtf8(int cp, int &sz, char *c); + //! Returns the codepoint length in bytes when encoded in UTF8 + static int CodepointLength(int cp); + //! Transform a UTF8 string to a codepoint; returns the codepoint and writes the length of the codepoint (in UTF8) to sz + static int32_t UTF8ToCodepoint(const char *c, int &sz); + static size_t RenderWidth(const char *s, size_t len, size_t pos); -} // namespace duckdb +}; +} +// LICENSE_CHANGE_END -#include +#include namespace duckdb { -//===--------------------------------------------------------------------===// -// String Conversion -//===--------------------------------------------------------------------===// -const hugeint_t Hugeint::POWERS_OF_TEN[] { - hugeint_t(1), - hugeint_t(10), - hugeint_t(100), - hugeint_t(1000), - hugeint_t(10000), - hugeint_t(100000), - hugeint_t(1000000), - hugeint_t(10000000), - hugeint_t(100000000), - hugeint_t(1000000000), - hugeint_t(10000000000), - hugeint_t(100000000000), - hugeint_t(1000000000000), - hugeint_t(10000000000000), - hugeint_t(100000000000000), - hugeint_t(1000000000000000), - hugeint_t(10000000000000000), - hugeint_t(100000000000000000), - hugeint_t(1000000000000000000), - hugeint_t(1000000000000000000) * hugeint_t(10), - hugeint_t(1000000000000000000) * hugeint_t(100), - hugeint_t(1000000000000000000) * hugeint_t(1000), - hugeint_t(1000000000000000000) * hugeint_t(10000), - hugeint_t(1000000000000000000) * hugeint_t(100000), - hugeint_t(1000000000000000000) * hugeint_t(1000000), - hugeint_t(1000000000000000000) * hugeint_t(10000000), - hugeint_t(1000000000000000000) * hugeint_t(100000000), - hugeint_t(1000000000000000000) * hugeint_t(1000000000), - hugeint_t(1000000000000000000) * hugeint_t(10000000000), - hugeint_t(1000000000000000000) * hugeint_t(100000000000), - hugeint_t(1000000000000000000) * hugeint_t(1000000000000), - hugeint_t(1000000000000000000) * hugeint_t(10000000000000), - hugeint_t(1000000000000000000) * hugeint_t(100000000000000), - hugeint_t(1000000000000000000) * hugeint_t(1000000000000000), - hugeint_t(1000000000000000000) * hugeint_t(10000000000000000), - hugeint_t(1000000000000000000) * hugeint_t(100000000000000000), - hugeint_t(1000000000000000000) * hugeint_t(1000000000000000000), - hugeint_t(1000000000000000000) * hugeint_t(1000000000000000000) * hugeint_t(10), - hugeint_t(1000000000000000000) * hugeint_t(1000000000000000000) * hugeint_t(100)}; +RenderTree::RenderTree(idx_t width_p, idx_t height_p) : width(width_p), height(height_p) { + nodes = unique_ptr[]>(new unique_ptr[(width + 1) * (height + 1)]); +} -static uint8_t PositiveHugeintHighestBit(hugeint_t bits) { - uint8_t out = 0; - if (bits.upper) { - out = 64; - uint64_t up = bits.upper; - while (up) { - up >>= 1; - out++; - } - } else { - uint64_t low = bits.lower; - while (low) { - low >>= 1; - out++; - } +RenderTreeNode *RenderTree::GetNode(idx_t x, idx_t y) { + if (x >= width || y >= height) { + return nullptr; } - return out; + return nodes[GetPosition(x, y)].get(); } -static bool PositiveHugeintIsBitSet(hugeint_t lhs, uint8_t bit_position) { - if (bit_position < 64) { - return lhs.lower & (uint64_t(1) << uint64_t(bit_position)); - } else { - return lhs.upper & (uint64_t(1) << uint64_t(bit_position - 64)); +bool RenderTree::HasNode(idx_t x, idx_t y) { + if (x >= width || y >= height) { + return false; } + return nodes[GetPosition(x, y)].get() != nullptr; } -hugeint_t PositiveHugeintLeftShift(hugeint_t lhs, uint32_t amount) { - D_ASSERT(amount > 0 && amount < 64); - hugeint_t result; - result.lower = lhs.lower << amount; - result.upper = (lhs.upper << amount) + (lhs.lower >> (64 - amount)); - return result; +idx_t RenderTree::GetPosition(idx_t x, idx_t y) { + return y * width + x; } -hugeint_t Hugeint::DivModPositive(hugeint_t lhs, uint64_t rhs, uint64_t &remainder) { - D_ASSERT(lhs.upper >= 0); - // DivMod code adapted from: - // https://github.com/calccrypto/uint128_t/blob/master/uint128_t.cpp - - // initialize the result and remainder to 0 - hugeint_t div_result; - div_result.lower = 0; - div_result.upper = 0; - remainder = 0; +void RenderTree::SetNode(idx_t x, idx_t y, unique_ptr node) { + nodes[GetPosition(x, y)] = move(node); +} - uint8_t highest_bit_set = PositiveHugeintHighestBit(lhs); - // now iterate over the amount of bits that are set in the LHS - for (uint8_t x = highest_bit_set; x > 0; x--) { - // left-shift the current result and remainder by 1 - div_result = PositiveHugeintLeftShift(div_result, 1); - remainder <<= 1; - // we get the value of the bit at position X, where position 0 is the least-significant bit - if (PositiveHugeintIsBitSet(lhs, x - 1)) { - // increment the remainder - remainder++; +void TreeRenderer::RenderTopLayer(RenderTree &root, std::ostream &ss, idx_t y) { + for (idx_t x = 0; x < root.width; x++) { + if (x * config.NODE_RENDER_WIDTH >= config.MAXIMUM_RENDER_WIDTH) { + break; } - if (remainder >= rhs) { - // the remainder has passed the division multiplier: add one to the divide result - remainder -= rhs; - div_result.lower++; - if (div_result.lower == 0) { - // overflow - div_result.upper++; + if (root.HasNode(x, y)) { + ss << config.LTCORNER; + ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2 - 1); + if (y == 0) { + // top level node: no node above this one + ss << config.HORIZONTAL; + } else { + // render connection to node above this one + ss << config.DMIDDLE; } + ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2 - 1); + ss << config.RTCORNER; + } else { + ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH); } } - return div_result; + ss << std::endl; } -string Hugeint::ToString(hugeint_t input) { - uint64_t remainder; - string result; - bool negative = input.upper < 0; - if (negative) { - NegateInPlace(input); - } - while (true) { - if (!input.lower && !input.upper) { +void TreeRenderer::RenderBottomLayer(RenderTree &root, std::ostream &ss, idx_t y) { + for (idx_t x = 0; x <= root.width; x++) { + if (x * config.NODE_RENDER_WIDTH >= config.MAXIMUM_RENDER_WIDTH) { break; } - input = Hugeint::DivModPositive(input, 10, remainder); - result = string(1, '0' + remainder) + result; // NOLINT - } - if (result.empty()) { - // value is zero - return "0"; + if (root.HasNode(x, y)) { + ss << config.LDCORNER; + ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2 - 1); + if (root.HasNode(x, y + 1)) { + // node below this one: connect to that one + ss << config.TMIDDLE; + } else { + // no node below this one: end the box + ss << config.HORIZONTAL; + } + ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2 - 1); + ss << config.RDCORNER; + } else if (root.HasNode(x, y + 1)) { + ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH / 2); + ss << config.VERTICAL; + ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH / 2); + } else { + ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH); + } } - return negative ? "-" + result : result; + ss << std::endl; } -//===--------------------------------------------------------------------===// -// Multiply -//===--------------------------------------------------------------------===// -bool Hugeint::TryMultiply(hugeint_t lhs, hugeint_t rhs, hugeint_t &result) { - bool lhs_negative = lhs.upper < 0; - bool rhs_negative = rhs.upper < 0; - if (lhs_negative) { - NegateInPlace(lhs); - } - if (rhs_negative) { - NegateInPlace(rhs); - } -#if ((__GNUC__ >= 5) || defined(__clang__)) && defined(__SIZEOF_INT128__) - __uint128_t left = __uint128_t(lhs.lower) + (__uint128_t(lhs.upper) << 64); - __uint128_t right = __uint128_t(rhs.lower) + (__uint128_t(rhs.upper) << 64); - __uint128_t result_i128; - if (__builtin_mul_overflow(left, right, &result_i128)) { - return false; +string AdjustTextForRendering(string source, idx_t max_render_width) { + idx_t cpos = 0; + idx_t render_width = 0; + vector> render_widths; + while (cpos < source.size()) { + idx_t char_render_width = Utf8Proc::RenderWidth(source.c_str(), source.size(), cpos); + cpos = Utf8Proc::NextGraphemeCluster(source.c_str(), source.size(), cpos); + render_width += char_render_width; + render_widths.emplace_back(cpos, render_width); + if (render_width > max_render_width) { + break; + } } - uint64_t upper = uint64_t(result_i128 >> 64); - if (upper & 0x8000000000000000) { - return false; + if (render_width > max_render_width) { + // need to find a position to truncate + for (idx_t pos = render_widths.size(); pos > 0; pos--) { + if (render_widths[pos - 1].second < max_render_width - 4) { + return source.substr(0, render_widths[pos - 1].first) + "..." + + string(max_render_width - render_widths[pos - 1].second - 3, ' '); + } + } + source = "..."; } - result.upper = int64_t(upper); - result.lower = uint64_t(result_i128 & 0xffffffffffffffff); -#else - // Multiply code adapted from: - // https://github.com/calccrypto/uint128_t/blob/master/uint128_t.cpp - - // split values into 4 32-bit parts - uint64_t top[4] = {uint64_t(lhs.upper) >> 32, uint64_t(lhs.upper) & 0xffffffff, lhs.lower >> 32, - lhs.lower & 0xffffffff}; - uint64_t bottom[4] = {uint64_t(rhs.upper) >> 32, uint64_t(rhs.upper) & 0xffffffff, rhs.lower >> 32, - rhs.lower & 0xffffffff}; - uint64_t products[4][4]; + // need to pad with spaces + idx_t total_spaces = max_render_width - render_width; + idx_t half_spaces = total_spaces / 2; + idx_t extra_left_space = total_spaces % 2 == 0 ? 0 : 1; + return string(half_spaces + extra_left_space, ' ') + source + string(half_spaces, ' '); +} - // multiply each component of the values - for (auto x = 0; x < 4; x++) { - for (auto y = 0; y < 4; y++) { - products[x][y] = top[x] * bottom[y]; +static bool NodeHasMultipleChildren(RenderTree &root, idx_t x, idx_t y) { + for (; x < root.width && !root.HasNode(x + 1, y); x++) { + if (root.HasNode(x + 1, y + 1)) { + return true; } } + return false; +} - // if any of these products are set to a non-zero value, there is always an overflow - if (products[0][0] || products[0][1] || products[0][2] || products[1][0] || products[2][0] || products[1][1]) { - return false; +void TreeRenderer::RenderBoxContent(RenderTree &root, std::ostream &ss, idx_t y) { + // we first need to figure out how high our boxes are going to be + vector> extra_info; + idx_t extra_height = 0; + extra_info.resize(root.width); + for (idx_t x = 0; x < root.width; x++) { + auto node = root.GetNode(x, y); + if (node) { + SplitUpExtraInfo(node->extra_text, extra_info[x]); + if (extra_info[x].size() > extra_height) { + extra_height = extra_info[x].size(); + } + } } - // if the high bits of any of these are set, there is always an overflow - if ((products[0][3] & 0xffffffff80000000) || (products[1][2] & 0xffffffff80000000) || - (products[2][1] & 0xffffffff80000000) || (products[3][0] & 0xffffffff80000000)) { - return false; + extra_height = MinValue(extra_height, config.MAX_EXTRA_LINES); + idx_t halfway_point = (extra_height + 1) / 2; + // now we render the actual node + for (idx_t render_y = 0; render_y <= extra_height; render_y++) { + for (idx_t x = 0; x < root.width; x++) { + if (x * config.NODE_RENDER_WIDTH >= config.MAXIMUM_RENDER_WIDTH) { + break; + } + auto node = root.GetNode(x, y); + if (!node) { + if (render_y == halfway_point) { + bool has_child_to_the_right = NodeHasMultipleChildren(root, x, y); + if (root.HasNode(x, y + 1)) { + // node right below this one + ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2); + ss << config.RTCORNER; + if (has_child_to_the_right) { + // but we have another child to the right! keep rendering the line + ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH / 2); + } else { + // only a child below this one: fill the rest with spaces + ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH / 2); + } + } else if (has_child_to_the_right) { + // child to the right, but no child right below this one: render a full line + ss << StringUtil::Repeat(config.HORIZONTAL, config.NODE_RENDER_WIDTH); + } else { + // empty spot: render spaces + ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH); + } + } else if (render_y >= halfway_point) { + if (root.HasNode(x, y + 1)) { + // we have a node below this empty spot: render a vertical line + ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH / 2); + ss << config.VERTICAL; + ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH / 2); + } else { + // empty spot: render spaces + ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH); + } + } else { + // empty spot: render spaces + ss << StringUtil::Repeat(" ", config.NODE_RENDER_WIDTH); + } + } else { + ss << config.VERTICAL; + // figure out what to render + string render_text; + if (render_y == 0) { + render_text = node->name; + } else { + if (render_y <= extra_info[x].size()) { + render_text = extra_info[x][render_y - 1]; + } + } + render_text = AdjustTextForRendering(render_text, config.NODE_RENDER_WIDTH - 2); + ss << render_text; + + if (render_y == halfway_point && NodeHasMultipleChildren(root, x, y)) { + ss << config.LMIDDLE; + } else { + ss << config.VERTICAL; + } + } + } + ss << std::endl; } +} - // otherwise we merge the result of the different products together in-order +string TreeRenderer::ToString(const LogicalOperator &op) { + std::stringstream ss; + Render(op, ss); + return ss.str(); +} - // first row - uint64_t fourth32 = (products[3][3] & 0xffffffff); - uint64_t third32 = (products[3][2] & 0xffffffff) + (products[3][3] >> 32); - uint64_t second32 = (products[3][1] & 0xffffffff) + (products[3][2] >> 32); - uint64_t first32 = (products[3][0] & 0xffffffff) + (products[3][1] >> 32); +string TreeRenderer::ToString(const PhysicalOperator &op) { + std::stringstream ss; + Render(op, ss); + return ss.str(); +} - // second row - third32 += (products[2][3] & 0xffffffff); - second32 += (products[2][2] & 0xffffffff) + (products[2][3] >> 32); - first32 += (products[2][1] & 0xffffffff) + (products[2][2] >> 32); +string TreeRenderer::ToString(const QueryProfiler::TreeNode &op) { + std::stringstream ss; + Render(op, ss); + return ss.str(); +} - // third row - second32 += (products[1][3] & 0xffffffff); - first32 += (products[1][2] & 0xffffffff) + (products[1][3] >> 32); +void TreeRenderer::Render(const LogicalOperator &op, std::ostream &ss) { + auto tree = CreateTree(op); + ToStream(*tree, ss); +} - // fourth row - first32 += (products[0][3] & 0xffffffff); +void TreeRenderer::Render(const PhysicalOperator &op, std::ostream &ss) { + auto tree = CreateTree(op); + ToStream(*tree, ss); +} - // move carry to next digit - third32 += fourth32 >> 32; - second32 += third32 >> 32; - first32 += second32 >> 32; +void TreeRenderer::Render(const QueryProfiler::TreeNode &op, std::ostream &ss) { + auto tree = CreateTree(op); + ToStream(*tree, ss); +} - // check if the combination of the different products resulted in an overflow - if (first32 & 0xffffff80000000) { - return false; +void TreeRenderer::ToStream(RenderTree &root, std::ostream &ss) { + while (root.width * config.NODE_RENDER_WIDTH > config.MAXIMUM_RENDER_WIDTH) { + if (config.NODE_RENDER_WIDTH - 2 < config.MINIMUM_RENDER_WIDTH) { + break; + } + config.NODE_RENDER_WIDTH -= 2; } - // remove carry from current digit - fourth32 &= 0xffffffff; - third32 &= 0xffffffff; - second32 &= 0xffffffff; - first32 &= 0xffffffff; - - // combine components - result.lower = (third32 << 32) | fourth32; - result.upper = (first32 << 32) | second32; -#endif - if (lhs_negative ^ rhs_negative) { - NegateInPlace(result); + for (idx_t y = 0; y < root.height; y++) { + // start by rendering the top layer + RenderTopLayer(root, ss, y); + // now we render the content of the boxes + RenderBoxContent(root, ss, y); + // render the bottom layer of each of the boxes + RenderBottomLayer(root, ss, y); } - return true; } -hugeint_t Hugeint::Multiply(hugeint_t lhs, hugeint_t rhs) { - hugeint_t result; - if (!TryMultiply(lhs, rhs, result)) { - throw OutOfRangeException("Overflow in HUGEINT multiplication!"); - } - return result; +bool TreeRenderer::CanSplitOnThisChar(char l) { + return (l < '0' || (l > '9' && l < 'A') || (l > 'Z' && l < 'a')) && l != '_'; } -//===--------------------------------------------------------------------===// -// Divide -//===--------------------------------------------------------------------===// -hugeint_t Hugeint::DivMod(hugeint_t lhs, hugeint_t rhs, hugeint_t &remainder) { - // division by zero not allowed - D_ASSERT(!(rhs.upper == 0 && rhs.lower == 0)); +bool TreeRenderer::IsPadding(char l) { + return l == ' ' || l == '\t' || l == '\n' || l == '\r'; +} - bool lhs_negative = lhs.upper < 0; - bool rhs_negative = rhs.upper < 0; - if (lhs_negative) { - Hugeint::NegateInPlace(lhs); +string TreeRenderer::RemovePadding(string l) { + idx_t start = 0, end = l.size(); + while (start < l.size() && IsPadding(l[start])) { + start++; } - if (rhs_negative) { - Hugeint::NegateInPlace(rhs); + while (end > 0 && IsPadding(l[end - 1])) { + end--; } - // DivMod code adapted from: - // https://github.com/calccrypto/uint128_t/blob/master/uint128_t.cpp - - // initialize the result and remainder to 0 - hugeint_t div_result; - div_result.lower = 0; - div_result.upper = 0; - remainder.lower = 0; - remainder.upper = 0; - - uint8_t highest_bit_set = PositiveHugeintHighestBit(lhs); - // now iterate over the amount of bits that are set in the LHS - for (uint8_t x = highest_bit_set; x > 0; x--) { - // left-shift the current result and remainder by 1 - div_result = PositiveHugeintLeftShift(div_result, 1); - remainder = PositiveHugeintLeftShift(remainder, 1); + return l.substr(start, end - start); +} - // we get the value of the bit at position X, where position 0 is the least-significant bit - if (PositiveHugeintIsBitSet(lhs, x - 1)) { - // increment the remainder - Hugeint::AddInPlace(remainder, 1); +void TreeRenderer::SplitStringBuffer(const string &source, vector &result) { + idx_t max_line_render_size = config.NODE_RENDER_WIDTH - 2; + // utf8 in prompt, get render width + idx_t cpos = 0; + idx_t start_pos = 0; + idx_t render_width = 0; + idx_t last_possible_split = 0; + while (cpos < source.size()) { + // check if we can split on this character + if (CanSplitOnThisChar(source[cpos])) { + last_possible_split = cpos; } - if (Hugeint::GreaterThanEquals(remainder, rhs)) { - // the remainder has passed the division multiplier: add one to the divide result - remainder = Hugeint::Subtract(remainder, rhs); - Hugeint::AddInPlace(div_result, 1); + size_t char_render_width = Utf8Proc::RenderWidth(source.c_str(), source.size(), cpos); + idx_t next_cpos = Utf8Proc::NextGraphemeCluster(source.c_str(), source.size(), cpos); + if (render_width + char_render_width > max_line_render_size) { + if (last_possible_split <= start_pos + 8) { + last_possible_split = cpos; + } + result.push_back(source.substr(start_pos, last_possible_split - start_pos)); + start_pos = last_possible_split; + cpos = last_possible_split; + render_width = 0; } + cpos = next_cpos; + render_width += char_render_width; } - if (lhs_negative ^ rhs_negative) { - Hugeint::NegateInPlace(div_result); + if (source.size() > start_pos) { + result.push_back(source.substr(start_pos, source.size() - start_pos)); } - if (lhs_negative) { - Hugeint::NegateInPlace(remainder); +} + +void TreeRenderer::SplitUpExtraInfo(const string &extra_info, vector &result) { + if (extra_info.empty()) { + return; + } + auto splits = StringUtil::Split(extra_info, "\n"); + if (!splits.empty() && splits[0] != "[INFOSEPARATOR]") { + result.push_back(ExtraInfoSeparator()); + } + for (auto &split : splits) { + if (split == "[INFOSEPARATOR]") { + result.push_back(ExtraInfoSeparator()); + continue; + } + string str = RemovePadding(split); + if (str.empty()) { + continue; + } + SplitStringBuffer(str, result); } - return div_result; } -hugeint_t Hugeint::Divide(hugeint_t lhs, hugeint_t rhs) { - hugeint_t remainder; - return Hugeint::DivMod(lhs, rhs, remainder); +string TreeRenderer::ExtraInfoSeparator() { + return StringUtil::Repeat(string(config.HORIZONTAL) + " ", (config.NODE_RENDER_WIDTH - 7) / 2); } -hugeint_t Hugeint::Modulo(hugeint_t lhs, hugeint_t rhs) { - hugeint_t remainder; - Hugeint::DivMod(lhs, rhs, remainder); - return remainder; +unique_ptr TreeRenderer::CreateRenderNode(string name, string extra_info) { + auto result = make_unique(); + result->name = move(name); + result->extra_text = move(extra_info); + return result; } -//===--------------------------------------------------------------------===// -// Add/Subtract -//===--------------------------------------------------------------------===// -bool Hugeint::AddInPlace(hugeint_t &lhs, hugeint_t rhs) { - int overflow = lhs.lower + rhs.lower < lhs.lower; - if (rhs.upper >= 0) { - // RHS is positive: check for overflow - if (lhs.upper > (std::numeric_limits::max() - rhs.upper - overflow)) { - return false; - } - } else { - // RHS is negative: check for underflow - if (lhs.upper < std::numeric_limits::min() - rhs.upper - overflow) { - return false; - } +template +static void GetTreeWidthHeight(const T &op, idx_t &width, idx_t &height) { + if (op.children.empty()) { + width = 1; + height = 1; + return; } - lhs.upper = lhs.upper + overflow + rhs.upper; - lhs.lower += rhs.lower; - if (lhs.upper == std::numeric_limits::min() && lhs.lower == 0) { - return false; + width = 0; + height = 0; + + for (auto &child : op.children) { + idx_t child_width, child_height; + GetTreeWidthHeight(*child, child_width, child_height); + width += child_width; + height = MaxValue(height, child_height); } - return true; + height++; } -bool Hugeint::SubtractInPlace(hugeint_t &lhs, hugeint_t rhs) { - // underflow - int underflow = lhs.lower - rhs.lower > lhs.lower; - if (rhs.upper >= 0) { - // RHS is positive: check for underflow - if (lhs.upper < (std::numeric_limits::min() + rhs.upper + underflow)) { - return false; - } - } else { - // RHS is negative: check for overflow - if (lhs.upper >= (std::numeric_limits::max() + rhs.upper + underflow - 1)) { - return false; - } +template +idx_t TreeRenderer::CreateRenderTreeRecursive(RenderTree &result, const T &op, idx_t x, idx_t y) { + auto node = TreeRenderer::CreateNode(op); + result.SetNode(x, y, move(node)); + + if (op.children.empty()) { + return 1; } - lhs.upper = lhs.upper - rhs.upper - underflow; - lhs.lower -= rhs.lower; - if (lhs.upper == std::numeric_limits::min() && lhs.lower == 0) { - return false; + idx_t width = 0; + // render the children of this node + for (auto &child : op.children) { + width += CreateRenderTreeRecursive(result, *child, x + width, y + 1); } - return true; + return width; } -hugeint_t Hugeint::Add(hugeint_t lhs, hugeint_t rhs) { - if (!AddInPlace(lhs, rhs)) { - throw OutOfRangeException("Overflow in HUGEINT addition"); - } - return lhs; +template +unique_ptr TreeRenderer::CreateRenderTree(const T &op) { + idx_t width, height; + GetTreeWidthHeight(op, width, height); + + auto result = make_unique(width, height); + + // now fill in the tree + CreateRenderTreeRecursive(*result, op, 0, 0); + return result; } -hugeint_t Hugeint::Subtract(hugeint_t lhs, hugeint_t rhs) { - if (!SubtractInPlace(lhs, rhs)) { - throw OutOfRangeException("Underflow in HUGEINT addition"); +unique_ptr TreeRenderer::CreateNode(const LogicalOperator &op) { + return CreateRenderNode(op.GetName(), op.ParamsToString()); +} + +unique_ptr TreeRenderer::CreateNode(const PhysicalOperator &op) { + return CreateRenderNode(op.GetName(), op.ParamsToString()); +} + +string TreeRenderer::ExtractExpressionsRecursive(ExpressionInfo &state) { + string result = "\n[INFOSEPARATOR]"; + result += "\n" + state.function_name; + result += "\n" + StringUtil::Format("%.9f", double(state.function_time)); + if (state.children.empty()) { + return result; } - return lhs; + // render the children of this node + for (auto &child : state.children) { + result += ExtractExpressionsRecursive(*child); + } + return result; } -//===--------------------------------------------------------------------===// -// Hugeint Cast/Conversion -//===--------------------------------------------------------------------===// -template -bool HugeintTryCastInteger(hugeint_t input, DST &result) { - switch (input.upper) { - case 0: - // positive number: check if the positive number is in range - if (input.lower <= uint64_t(NumericLimits::Maximum())) { - result = DST(input.lower); - return true; - } - break; - case -1: - // negative number: check if the negative number is in range - if (input.lower > NumericLimits::Maximum() - uint64_t(NumericLimits::Maximum())) { - result = -DST(NumericLimits::Maximum() - input.lower + 1); - return true; +unique_ptr TreeRenderer::CreateNode(const QueryProfiler::TreeNode &op) { + auto result = TreeRenderer::CreateRenderNode(op.name, op.extra_info); + result->extra_text += "\n[INFOSEPARATOR]"; + result->extra_text += "\n" + to_string(op.info.elements); + string timing = StringUtil::Format("%.2f", op.info.time); + result->extra_text += "\n(" + timing + "s)"; + if (config.detailed) { + for (auto &info : op.info.executors_info) { + if (!info) { + continue; + } + for (auto &executor_info : info->roots) { + string sample_count = to_string(executor_info->sample_count); + result->extra_text += "\n[INFOSEPARATOR]"; + result->extra_text += "\nsample_count: " + sample_count; + string sample_tuples_count = to_string(executor_info->sample_tuples_count); + result->extra_text += "\n[INFOSEPARATOR]"; + result->extra_text += "\nsample_tuples_count: " + sample_tuples_count; + string total_count = to_string(executor_info->total_count); + result->extra_text += "\n[INFOSEPARATOR]"; + result->extra_text += "\ntotal_count: " + total_count; + for (auto &state : executor_info->root->children) { + result->extra_text += ExtractExpressionsRecursive(*state); + } + } } - break; - default: - break; } - return false; + return result; } -template <> -bool Hugeint::TryCast(hugeint_t input, int8_t &result) { - return HugeintTryCastInteger(input, result); +unique_ptr TreeRenderer::CreateTree(const LogicalOperator &op) { + return CreateRenderTree(op); } -template <> -bool Hugeint::TryCast(hugeint_t input, int16_t &result) { - return HugeintTryCastInteger(input, result); +unique_ptr TreeRenderer::CreateTree(const PhysicalOperator &op) { + return CreateRenderTree(op); } -template <> -bool Hugeint::TryCast(hugeint_t input, int32_t &result) { - return HugeintTryCastInteger(input, result); +unique_ptr TreeRenderer::CreateTree(const QueryProfiler::TreeNode &op) { + return CreateRenderTree(op); } +} // namespace duckdb -template <> -bool Hugeint::TryCast(hugeint_t input, int64_t &result) { - return HugeintTryCastInteger(input, result); -} -template <> -bool Hugeint::TryCast(hugeint_t input, uint8_t &result) { - return HugeintTryCastInteger(input, result); -} -template <> -bool Hugeint::TryCast(hugeint_t input, uint16_t &result) { - return HugeintTryCastInteger(input, result); -} -template <> -bool Hugeint::TryCast(hugeint_t input, uint32_t &result) { - return HugeintTryCastInteger(input, result); + + +namespace duckdb { + +constexpr const char *Blob::HEX_TABLE; +const int Blob::HEX_MAP[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +idx_t Blob::GetStringSize(string_t blob) { + auto data = (const_data_ptr_t)blob.GetDataUnsafe(); + auto len = blob.GetSize(); + idx_t str_len = 0; + for (idx_t i = 0; i < len; i++) { + if (data[i] >= 32 && data[i] <= 127 && data[i] != '\\') { + // ascii characters are rendered as-is + str_len++; + } else { + // non-ascii characters are rendered as hexadecimal (e.g. \x00) + str_len += 4; + } + } + return str_len; } -template <> -bool Hugeint::TryCast(hugeint_t input, uint64_t &result) { - return HugeintTryCastInteger(input, result); +void Blob::ToString(string_t blob, char *output) { + auto data = (const_data_ptr_t)blob.GetDataUnsafe(); + auto len = blob.GetSize(); + idx_t str_idx = 0; + for (idx_t i = 0; i < len; i++) { + if (data[i] >= 32 && data[i] <= 127 && data[i] != '\\') { + // ascii characters are rendered as-is + output[str_idx++] = data[i]; + } else { + auto byte_a = data[i] >> 4; + auto byte_b = data[i] & 0x0F; + D_ASSERT(byte_a >= 0 && byte_a < 16); + D_ASSERT(byte_b >= 0 && byte_b < 16); + // non-ascii characters are rendered as hexadecimal (e.g. \x00) + output[str_idx++] = '\\'; + output[str_idx++] = 'x'; + output[str_idx++] = Blob::HEX_TABLE[byte_a]; + output[str_idx++] = Blob::HEX_TABLE[byte_b]; + } + } + D_ASSERT(str_idx == GetStringSize(blob)); } -template <> -bool Hugeint::TryCast(hugeint_t input, hugeint_t &result) { - result = input; - return true; +string Blob::ToString(string_t blob) { + auto str_len = GetStringSize(blob); + auto buffer = std::unique_ptr(new char[str_len]); + Blob::ToString(blob, buffer.get()); + return string(buffer.get(), str_len); } -template <> -bool Hugeint::TryCast(hugeint_t input, float &result) { - double dbl_result; - Hugeint::TryCast(input, dbl_result); - result = (float)dbl_result; +bool Blob::TryGetBlobSize(string_t str, idx_t &str_len, string *error_message) { + auto data = (const_data_ptr_t)str.GetDataUnsafe(); + auto len = str.GetSize(); + str_len = 0; + for (idx_t i = 0; i < len; i++) { + if (data[i] == '\\') { + if (i + 3 >= len) { + string error = "Invalid hex escape code encountered in string -> blob conversion: " + "unterminated escape code at end of blob"; + HandleCastError::AssignError(error, error_message); + return false; + } + if (data[i + 1] != 'x' || Blob::HEX_MAP[data[i + 2]] < 0 || Blob::HEX_MAP[data[i + 3]] < 0) { + string error = + StringUtil::Format("Invalid hex escape code encountered in string -> blob conversion: %s", + string((char *)data + i, 4)); + HandleCastError::AssignError(error, error_message); + return false; + } + str_len++; + i += 3; + } else if (data[i] >= 32 && data[i] <= 127) { + str_len++; + } else { + string error = "Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters " + "must be escaped with hex codes (e.g. \\xAA)"; + HandleCastError::AssignError(error, error_message); + return false; + } + } return true; } -template <> -bool Hugeint::TryCast(hugeint_t input, double &result) { - switch (input.upper) { - case -1: - // special case for upper = -1 to avoid rounding issues in small negative numbers - result = -double(NumericLimits::Maximum() - input.lower) - 1; - break; - default: - result = double(input.lower) + double(input.upper) * double(NumericLimits::Maximum()); - break; +idx_t Blob::GetBlobSize(string_t str) { + string error_message; + idx_t str_len; + if (!Blob::TryGetBlobSize(str, str_len, &error_message)) { + throw ConversionException(error_message); } - return true; + return str_len; } -template -hugeint_t HugeintConvertInteger(DST input) { - hugeint_t result; - result.lower = (uint64_t)input; - result.upper = (input < 0) * -1; - return result; +void Blob::ToBlob(string_t str, data_ptr_t output) { + auto data = (const_data_ptr_t)str.GetDataUnsafe(); + auto len = str.GetSize(); + idx_t blob_idx = 0; + for (idx_t i = 0; i < len; i++) { + if (data[i] == '\\') { + int byte_a = Blob::HEX_MAP[data[i + 2]]; + int byte_b = Blob::HEX_MAP[data[i + 3]]; + D_ASSERT(i + 3 < len); + D_ASSERT(byte_a >= 0 && byte_b >= 0); + D_ASSERT(data[i + 1] == 'x'); + output[blob_idx++] = (byte_a << 4) + byte_b; + i += 3; + } else if (data[i] >= 32 && data[i] <= 127) { + output[blob_idx++] = data_t(data[i]); + } else { + throw ConversionException("Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters " + "must be escaped with hex codes (e.g. \\xAA)"); + } + } + D_ASSERT(blob_idx == GetBlobSize(str)); } -template <> -hugeint_t Hugeint::Convert(int8_t value) { - return HugeintConvertInteger(value); +string Blob::ToBlob(string_t str) { + auto blob_len = GetBlobSize(str); + auto buffer = std::unique_ptr(new char[blob_len]); + Blob::ToBlob(str, (data_ptr_t)buffer.get()); + return string(buffer.get(), blob_len); } -template <> -hugeint_t Hugeint::Convert(int16_t value) { - return HugeintConvertInteger(value); +// base64 functions are adapted from https://gist.github.com/tomykaira/f0fd86b6c73063283afe550bc5d77594 +idx_t Blob::ToBase64Size(string_t blob) { + // every 4 characters in base64 encode 3 bytes, plus (potential) padding at the end + auto input_size = blob.GetSize(); + return ((input_size + 2) / 3) * 4; } -template <> -hugeint_t Hugeint::Convert(int32_t value) { - return HugeintConvertInteger(value); +void Blob::ToBase64(string_t blob, char *output) { + auto input_data = (const_data_ptr_t)blob.GetDataUnsafe(); + auto input_size = blob.GetSize(); + idx_t out_idx = 0; + idx_t i; + // convert the bulk of the string to base64 + // this happens in steps of 3 bytes -> 4 output bytes + for (i = 0; i + 2 < input_size; i += 3) { + output[out_idx++] = Blob::BASE64_MAP[(input_data[i] >> 2) & 0x3F]; + output[out_idx++] = Blob::BASE64_MAP[((input_data[i] & 0x3) << 4) | ((input_data[i + 1] & 0xF0) >> 4)]; + output[out_idx++] = Blob::BASE64_MAP[((input_data[i + 1] & 0xF) << 2) | ((input_data[i + 2] & 0xC0) >> 6)]; + output[out_idx++] = Blob::BASE64_MAP[input_data[i + 2] & 0x3F]; + } + + if (i < input_size) { + // there are one or two bytes left over: we have to insert padding + // first write the first 6 bits of the first byte + output[out_idx++] = Blob::BASE64_MAP[(input_data[i] >> 2) & 0x3F]; + // now check the character count + if (i == input_size - 1) { + // single byte left over: convert the remainder of that byte and insert padding + output[out_idx++] = Blob::BASE64_MAP[((input_data[i] & 0x3) << 4)]; + output[out_idx++] = Blob::BASE64_PADDING; + } else { + // two bytes left over: convert the second byte as well + output[out_idx++] = Blob::BASE64_MAP[((input_data[i] & 0x3) << 4) | ((input_data[i + 1] & 0xF0) >> 4)]; + output[out_idx++] = Blob::BASE64_MAP[((input_data[i + 1] & 0xF) << 2)]; + } + output[out_idx++] = Blob::BASE64_PADDING; + } } -template <> -hugeint_t Hugeint::Convert(int64_t value) { - return HugeintConvertInteger(value); +static constexpr int BASE64_DECODING_TABLE[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +idx_t Blob::FromBase64Size(string_t str) { + auto input_data = str.GetDataUnsafe(); + auto input_size = str.GetSize(); + if (input_size % 4 != 0) { + // valid base64 needs to always be cleanly divisible by 4 + throw ConversionException("Could not decode string \"%s\" as base64: length must be a multiple of 4", + str.GetString()); + } + if (input_size < 4) { + // empty string + return 0; + } + auto base_size = input_size / 4 * 3; + // check for padding to figure out the length + if (input_data[input_size - 2] == Blob::BASE64_PADDING) { + // two bytes of padding + return base_size - 2; + } + if (input_data[input_size - 1] == Blob::BASE64_PADDING) { + // one byte of padding + return base_size - 1; + } + // no padding + return base_size; } -template <> -hugeint_t Hugeint::Convert(uint8_t value) { - return HugeintConvertInteger(value); + +template +uint32_t DecodeBase64Bytes(const string_t &str, const_data_ptr_t input_data, idx_t base_idx) { + int decoded_bytes[4]; + for (idx_t decode_idx = 0; decode_idx < 4; decode_idx++) { + if (ALLOW_PADDING && decode_idx >= 2 && input_data[base_idx + decode_idx] == Blob::BASE64_PADDING) { + // the last two bytes of a base64 string can have padding: in this case we set the byte to 0 + decoded_bytes[decode_idx] = 0; + } else { + decoded_bytes[decode_idx] = BASE64_DECODING_TABLE[input_data[base_idx + decode_idx]]; + } + if (decoded_bytes[decode_idx] < 0) { + throw ConversionException( + "Could not decode string \"%s\" as base64: invalid byte value '%d' at position %d", str.GetString(), + input_data[base_idx + decode_idx], base_idx + decode_idx); + } + } + return (decoded_bytes[0] << 3 * 6) + (decoded_bytes[1] << 2 * 6) + (decoded_bytes[2] << 1 * 6) + + (decoded_bytes[3] << 0 * 6); } -template <> -hugeint_t Hugeint::Convert(uint16_t value) { - return HugeintConvertInteger(value); + +void Blob::FromBase64(string_t str, data_ptr_t output, idx_t output_size) { + D_ASSERT(output_size == FromBase64Size(str)); + auto input_data = (const_data_ptr_t)str.GetDataUnsafe(); + auto input_size = str.GetSize(); + if (input_size == 0) { + return; + } + idx_t out_idx = 0; + idx_t i = 0; + for (i = 0; i + 4 < input_size; i += 4) { + auto combined = DecodeBase64Bytes(str, input_data, i); + output[out_idx++] = (combined >> 2 * 8) & 0xFF; + output[out_idx++] = (combined >> 1 * 8) & 0xFF; + output[out_idx++] = (combined >> 0 * 8) & 0xFF; + } + // decode the final four bytes: padding is allowed here + auto combined = DecodeBase64Bytes(str, input_data, i); + output[out_idx++] = (combined >> 2 * 8) & 0xFF; + if (out_idx < output_size) { + output[out_idx++] = (combined >> 1 * 8) & 0xFF; + } + if (out_idx < output_size) { + output[out_idx++] = (combined >> 0 * 8) & 0xFF; + } } + +} // namespace duckdb + + +namespace duckdb { + +const int64_t NumericHelper::POWERS_OF_TEN[] {1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000, + 10000000000000000, + 100000000000000000, + 1000000000000000000}; + +const double NumericHelper::DOUBLE_POWERS_OF_TEN[] {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, + 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39}; + template <> -hugeint_t Hugeint::Convert(uint32_t value) { - return HugeintConvertInteger(value); +int NumericHelper::UnsignedLength(uint8_t value) { + int length = 1; + length += value >= 10; + length += value >= 100; + return length; } + template <> -hugeint_t Hugeint::Convert(uint64_t value) { - return HugeintConvertInteger(value); +int NumericHelper::UnsignedLength(uint16_t value) { + int length = 1; + length += value >= 10; + length += value >= 100; + length += value >= 1000; + length += value >= 10000; + return length; } template <> -hugeint_t Hugeint::Convert(float value) { - return Hugeint::Convert(value); +int NumericHelper::UnsignedLength(uint32_t value) { + if (value >= 10000) { + int length = 5; + length += value >= 100000; + length += value >= 1000000; + length += value >= 10000000; + length += value >= 100000000; + length += value >= 1000000000; + return length; + } else { + int length = 1; + length += value >= 10; + length += value >= 100; + length += value >= 1000; + return length; + } } template <> -hugeint_t Hugeint::Convert(double value) { - if (value <= -170141183460469231731687303715884105728.0 || value >= 170141183460469231731687303715884105727.0) { - throw OutOfRangeException("Double out of range of HUGEINT"); - } - hugeint_t result; - bool negative = value < 0; - if (negative) { - value = -value; - } - result.lower = (uint64_t)fmod(value, double(NumericLimits::Maximum())); - result.upper = (uint64_t)(value / double(NumericLimits::Maximum())); - if (negative) { - NegateInPlace(result); +int NumericHelper::UnsignedLength(uint64_t value) { + if (value >= 10000000000ULL) { + if (value >= 1000000000000000ULL) { + int length = 16; + length += value >= 10000000000000000ULL; + length += value >= 100000000000000000ULL; + length += value >= 1000000000000000000ULL; + length += value >= 10000000000000000000ULL; + return length; + } else { + int length = 11; + length += value >= 100000000000ULL; + length += value >= 1000000000000ULL; + length += value >= 10000000000000ULL; + length += value >= 100000000000000ULL; + return length; + } + } else { + if (value >= 100000ULL) { + int length = 6; + length += value >= 1000000ULL; + length += value >= 10000000ULL; + length += value >= 100000000ULL; + length += value >= 1000000000ULL; + return length; + } else { + int length = 1; + length += value >= 10ULL; + length += value >= 100ULL; + length += value >= 1000ULL; + length += value >= 10000ULL; + return length; + } } - return result; } -//===--------------------------------------------------------------------===// -// hugeint_t operators -//===--------------------------------------------------------------------===// -hugeint_t::hugeint_t(int64_t value) { - auto result = Hugeint::Convert(value); - this->lower = result.lower; - this->upper = result.upper; -} +} // namespace duckdb -bool hugeint_t::operator==(const hugeint_t &rhs) const { - return Hugeint::Equals(*this, rhs); -} -bool hugeint_t::operator!=(const hugeint_t &rhs) const { - return Hugeint::NotEquals(*this, rhs); -} -bool hugeint_t::operator<(const hugeint_t &rhs) const { - return Hugeint::LessThan(*this, rhs); -} -bool hugeint_t::operator<=(const hugeint_t &rhs) const { - return Hugeint::LessThanEquals(*this, rhs); -} +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/value_operations/value_operations.hpp +// +// +//===----------------------------------------------------------------------===// -bool hugeint_t::operator>(const hugeint_t &rhs) const { - return Hugeint::GreaterThan(*this, rhs); -} -bool hugeint_t::operator>=(const hugeint_t &rhs) const { - return Hugeint::GreaterThanEquals(*this, rhs); -} -hugeint_t hugeint_t::operator+(const hugeint_t &rhs) const { - return Hugeint::Add(*this, rhs); -} -hugeint_t hugeint_t::operator-(const hugeint_t &rhs) const { - return Hugeint::Subtract(*this, rhs); -} -hugeint_t hugeint_t::operator*(const hugeint_t &rhs) const { - return Hugeint::Multiply(*this, rhs); -} +namespace duckdb { -hugeint_t hugeint_t::operator/(const hugeint_t &rhs) const { - return Hugeint::Divide(*this, rhs); -} +struct ValueOperations { + //===--------------------------------------------------------------------===// + // Numeric Operations + //===--------------------------------------------------------------------===// + // A + B + static Value Add(const Value &left, const Value &right); + // A - B + static Value Subtract(const Value &left, const Value &right); + // A * B + static Value Multiply(const Value &left, const Value &right); + // A / B + static Value Divide(const Value &left, const Value &right); + // A % B + static Value Modulo(const Value &left, const Value &right); + // // MIN(A, B) + // static Value Min(const Value &left, const Value &right); + // // MAX(A, B) + // static Value Max(const Value &left, const Value &right); + //===--------------------------------------------------------------------===// + // Comparison Operations + //===--------------------------------------------------------------------===// + // A == B + static bool Equals(const Value &left, const Value &right); + // A != B + static bool NotEquals(const Value &left, const Value &right); + // A > B + static bool GreaterThan(const Value &left, const Value &right); + // A >= B + static bool GreaterThanEquals(const Value &left, const Value &right); + // A < B + static bool LessThan(const Value &left, const Value &right); + // A <= B + static bool LessThanEquals(const Value &left, const Value &right); + //===--------------------------------------------------------------------===// + // Distinction Operations + //===--------------------------------------------------------------------===// + // A == B, NULLs equal + static bool NotDistinctFrom(const Value &left, const Value &right); + // A != B, NULLs equal + static bool DistinctFrom(const Value &left, const Value &right); + // A > B, NULLs last + static bool DistinctGreaterThan(const Value &left, const Value &right); + // A >= B, NULLs last + static bool DistinctGreaterThanEquals(const Value &left, const Value &right); + // A < B, NULLs last + static bool DistinctLessThan(const Value &left, const Value &right); + // A <= B, NULLs last + static bool DistinctLessThanEquals(const Value &left, const Value &right); + //===--------------------------------------------------------------------===// + // Hash functions + //===--------------------------------------------------------------------===// + // result = HASH(A) + static hash_t Hash(const Value &left); +}; +} // namespace duckdb -hugeint_t hugeint_t::operator%(const hugeint_t &rhs) const { - return Hugeint::Modulo(*this, rhs); + + + + +#include +#include +#include + +namespace duckdb { + +void ChunkCollection::Verify() { +#ifdef DEBUG + for (auto &chunk : chunks) { + chunk->Verify(); + } +#endif } -hugeint_t hugeint_t::operator-() const { - return Hugeint::Negate(*this); +void ChunkCollection::Append(ChunkCollection &other) { + for (auto &chunk : other.chunks) { + Append(*chunk); + } } -hugeint_t hugeint_t::operator>>(const hugeint_t &rhs) const { - if (upper < 0) { - return hugeint_t(0); +void ChunkCollection::Merge(ChunkCollection &other) { + if (other.count == 0) { + return; } - hugeint_t result; - uint64_t shift = rhs.lower; - if (rhs.upper != 0 || shift >= 128) { - return hugeint_t(0); - } else if (shift == 64) { - result.upper = 0; - result.lower = upper; - } else if (shift == 0) { - return *this; - } else if (shift < 64) { - // perform upper shift in unsigned integer, and mask away the most significant bit - result.lower = (uint64_t(upper) << (64 - shift)) + (lower >> shift); - result.upper = uint64_t(upper) >> shift; - } else { - D_ASSERT(shift < 128); - result.lower = uint64_t(upper) >> (shift - 64); - result.upper = 0; + if (count == 0) { + chunks = move(other.chunks); + types = move(other.types); + count = other.count; + return; } - return result; + unique_ptr old_back; + if (!chunks.empty() && chunks.back()->size() != STANDARD_VECTOR_SIZE) { + old_back = move(chunks.back()); + chunks.pop_back(); + count -= old_back->size(); + } + for (auto &chunk : other.chunks) { + chunks.push_back(move(chunk)); + } + count += other.count; + if (old_back) { + Append(*old_back); + } + Verify(); } -hugeint_t hugeint_t::operator<<(const hugeint_t &rhs) const { - if (upper < 0) { - return hugeint_t(0); +void ChunkCollection::Append(DataChunk &new_chunk) { + if (new_chunk.size() == 0) { + return; } - hugeint_t result; - uint64_t shift = rhs.lower; - if (rhs.upper != 0 || shift >= 128) { - return hugeint_t(0); - } else if (shift == 64) { - result.upper = lower; - result.lower = 0; - } else if (shift == 0) { - return *this; - } else if (shift < 64) { - // perform upper shift in unsigned integer, and mask away the most significant bit - uint64_t upper_shift = ((uint64_t(upper) << shift) + (lower >> (64 - shift))) & 0x7FFFFFFFFFFFFFFF; - result.lower = lower << shift; - result.upper = upper_shift; + new_chunk.Verify(); + + // we have to ensure that every chunk in the ChunkCollection is completely + // filled, otherwise our O(1) lookup in GetValue and SetValue does not work + // first fill the latest chunk, if it exists + count += new_chunk.size(); + + idx_t remaining_data = new_chunk.size(); + idx_t offset = 0; + if (chunks.empty()) { + // first chunk + types = new_chunk.GetTypes(); } else { - D_ASSERT(shift < 128); - result.lower = 0; - result.upper = (lower << (shift - 64)) & 0x7FFFFFFFFFFFFFFF; + // the types of the new chunk should match the types of the previous one + D_ASSERT(types.size() == new_chunk.ColumnCount()); + auto new_types = new_chunk.GetTypes(); + for (idx_t i = 0; i < types.size(); i++) { + if (new_types[i] != types[i]) { + throw TypeMismatchException(new_types[i], types[i], "Type mismatch when combining rows"); + } + if (types[i].InternalType() == PhysicalType::LIST) { + // need to check all the chunks because they can have only-null list entries + for (auto &chunk : chunks) { + auto &chunk_vec = chunk->data[i]; + auto &new_vec = new_chunk.data[i]; + auto &chunk_type = chunk_vec.GetType(); + auto &new_type = new_vec.GetType(); + if (chunk_type != new_type) { + throw TypeMismatchException(chunk_type, new_type, "Type mismatch when combining lists"); + } + } + } + // TODO check structs, too + } + + // first append data to the current chunk + DataChunk &last_chunk = *chunks.back(); + idx_t added_data = MinValue(remaining_data, STANDARD_VECTOR_SIZE - last_chunk.size()); + if (added_data > 0) { + // copy elements to the last chunk + new_chunk.Normalify(); + // have to be careful here: setting the cardinality without calling normalify can cause incorrect partial + // decompression + idx_t old_count = new_chunk.size(); + new_chunk.SetCardinality(added_data); + + last_chunk.Append(new_chunk); + remaining_data -= added_data; + // reset the chunk to the old data + new_chunk.SetCardinality(old_count); + offset = added_data; + } } - return result; -} -hugeint_t hugeint_t::operator&(const hugeint_t &rhs) const { - hugeint_t result; - result.lower = lower & rhs.lower; - result.upper = upper & rhs.upper; - return result; + if (remaining_data > 0) { + // create a new chunk and fill it with the remainder + auto chunk = make_unique(); + chunk->Initialize(types); + new_chunk.Copy(*chunk, offset); + chunks.push_back(move(chunk)); + } } -hugeint_t hugeint_t::operator|(const hugeint_t &rhs) const { - hugeint_t result; - result.lower = lower | rhs.lower; - result.upper = upper | rhs.upper; - return result; +void ChunkCollection::Append(unique_ptr new_chunk) { + if (types.empty()) { + types = new_chunk->GetTypes(); + } + D_ASSERT(types == new_chunk->GetTypes()); + count += new_chunk->size(); + chunks.push_back(move(new_chunk)); } -hugeint_t hugeint_t::operator^(const hugeint_t &rhs) const { - hugeint_t result; - result.lower = lower ^ rhs.lower; - result.upper = upper ^ rhs.upper; - return result; +void ChunkCollection::Fuse(ChunkCollection &other) { + if (count == 0) { + Append(other); + } else { + D_ASSERT(this->ChunkCount() == other.ChunkCount()); + for (idx_t chunk_idx = 0; chunk_idx < ChunkCount(); ++chunk_idx) { + auto &lhs = this->GetChunk(chunk_idx); + auto &rhs = other.GetChunk(chunk_idx); + D_ASSERT(lhs.size() == rhs.size()); + for (auto &v : rhs.data) { + lhs.data.emplace_back(Vector(v)); + } + } + types.insert(types.end(), other.types.begin(), other.types.end()); + } } -hugeint_t hugeint_t::operator~() const { - hugeint_t result; - result.lower = ~lower; - result.upper = ~upper; - return result; -} +// returns an int similar to a C comparator: +// -1 if left < right +// 0 if left == right +// 1 if left > right -hugeint_t &hugeint_t::operator+=(const hugeint_t &rhs) { - Hugeint::AddInPlace(*this, rhs); - return *this; -} -hugeint_t &hugeint_t::operator-=(const hugeint_t &rhs) { - Hugeint::SubtractInPlace(*this, rhs); - return *this; -} -hugeint_t &hugeint_t::operator*=(const hugeint_t &rhs) { - *this = Hugeint::Multiply(*this, rhs); - return *this; -} -hugeint_t &hugeint_t::operator/=(const hugeint_t &rhs) { - *this = Hugeint::Divide(*this, rhs); - return *this; -} -hugeint_t &hugeint_t::operator%=(const hugeint_t &rhs) { - *this = Hugeint::Modulo(*this, rhs); - return *this; -} -hugeint_t &hugeint_t::operator>>=(const hugeint_t &rhs) { - *this = *this >> rhs; - return *this; -} -hugeint_t &hugeint_t::operator<<=(const hugeint_t &rhs) { - *this = *this << rhs; - return *this; -} -hugeint_t &hugeint_t::operator&=(const hugeint_t &rhs) { - lower &= rhs.lower; - upper &= rhs.upper; - return *this; -} -hugeint_t &hugeint_t::operator|=(const hugeint_t &rhs) { - lower |= rhs.lower; - upper |= rhs.upper; - return *this; -} -hugeint_t &hugeint_t::operator^=(const hugeint_t &rhs) { - lower ^= rhs.lower; - upper ^= rhs.upper; - return *this; +template +static int8_t TemplatedCompareValue(Vector &left_vec, Vector &right_vec, idx_t left_idx, idx_t right_idx) { + D_ASSERT(left_vec.GetType() == right_vec.GetType()); + auto left_val = FlatVector::GetData(left_vec)[left_idx]; + auto right_val = FlatVector::GetData(right_vec)[right_idx]; + if (Equals::Operation(left_val, right_val)) { + return 0; + } + if (LessThan::Operation(left_val, right_val)) { + return -1; + } + return 1; } -string hugeint_t::ToString() const { - return Hugeint::ToString(*this); +// return type here is int32 because strcmp() on some platforms returns rather large values +static int32_t CompareValue(Vector &left_vec, Vector &right_vec, idx_t vector_idx_left, idx_t vector_idx_right, + OrderByNullType null_order) { + auto left_null = FlatVector::IsNull(left_vec, vector_idx_left); + auto right_null = FlatVector::IsNull(right_vec, vector_idx_right); + + if (left_null && right_null) { + return 0; + } else if (right_null) { + return null_order == OrderByNullType::NULLS_FIRST ? 1 : -1; + } else if (left_null) { + return null_order == OrderByNullType::NULLS_FIRST ? -1 : 1; + } + + switch (left_vec.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::INT16: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::INT32: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::INT64: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::UINT8: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::UINT16: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::UINT32: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::UINT64: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::INT128: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::FLOAT: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::DOUBLE: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::VARCHAR: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + case PhysicalType::INTERVAL: + return TemplatedCompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right); + default: + throw NotImplementedException("Type for comparison"); + } } -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/hyperloglog.hpp -// -// -//===----------------------------------------------------------------------===// +static int CompareTuple(ChunkCollection *sort_by, vector &desc, vector &null_order, + idx_t left, idx_t right) { + D_ASSERT(sort_by); + + idx_t chunk_idx_left = left / STANDARD_VECTOR_SIZE; + idx_t chunk_idx_right = right / STANDARD_VECTOR_SIZE; + idx_t vector_idx_left = left % STANDARD_VECTOR_SIZE; + idx_t vector_idx_right = right % STANDARD_VECTOR_SIZE; + + auto &left_chunk = sort_by->GetChunk(chunk_idx_left); + auto &right_chunk = sort_by->GetChunk(chunk_idx_right); + + for (idx_t col_idx = 0; col_idx < desc.size(); col_idx++) { + auto order_type = desc[col_idx]; + + auto &left_vec = left_chunk.data[col_idx]; + auto &right_vec = right_chunk.data[col_idx]; + D_ASSERT(left_vec.GetVectorType() == VectorType::FLAT_VECTOR); + D_ASSERT(right_vec.GetVectorType() == VectorType::FLAT_VECTOR); + D_ASSERT(left_vec.GetType() == right_vec.GetType()); + auto comp_res = CompareValue(left_vec, right_vec, vector_idx_left, vector_idx_right, null_order[col_idx]); + if (comp_res == 0) { + continue; + } + return comp_res < 0 ? (order_type == OrderType::ASCENDING ? -1 : 1) + : (order_type == OrderType::ASCENDING ? 1 : -1); + } + return 0; +} +static int64_t QuicksortInitial(ChunkCollection *sort_by, vector &desc, vector &null_order, + idx_t *result) { + // select pivot + int64_t pivot = 0; + int64_t low = 0, high = sort_by->Count() - 1; + // now insert elements + for (idx_t i = 1; i < sort_by->Count(); i++) { + if (CompareTuple(sort_by, desc, null_order, i, pivot) <= 0) { + result[low++] = i; + } else { + result[high--] = i; + } + } + D_ASSERT(low == high); + result[low] = pivot; + return low; +} -namespace duckdb { +struct QuicksortInfo { + QuicksortInfo(int64_t left_p, int64_t right_p) : left(left_p), right(right_p) { + } -//! The HyperLogLog class holds a HyperLogLog counter for approximate cardinality counting -class HyperLogLog { -public: - HyperLogLog(); - ~HyperLogLog(); - // implicit copying of HyperLogLog is not allowed - HyperLogLog(const HyperLogLog &) = delete; + int64_t left; + int64_t right; +}; - //! Adds an element of the specified size to the HyperLogLog counter - void Add(data_ptr_t element, idx_t size); - //! Return the count of this HyperLogLog counter - idx_t Count(); - //! Merge this HyperLogLog counter with another counter to create a new one - unique_ptr Merge(HyperLogLog &other); - HyperLogLog *MergePointer(HyperLogLog &other); - //! Merge a set of HyperLogLogs to create one big one - static unique_ptr Merge(HyperLogLog logs[], idx_t count); +struct QuicksortStack { + std::queue info_queue; -private: - HyperLogLog(void *hll); + QuicksortInfo Pop() { + auto element = info_queue.front(); + info_queue.pop(); + return element; + } - void *hll; + bool IsEmpty() { + return info_queue.empty(); + } + + void Enqueue(int64_t left, int64_t right) { + if (left >= right) { + return; + } + info_queue.emplace(left, right); + } }; -} // namespace duckdb +static void QuicksortInPlace(ChunkCollection *sort_by, vector &desc, vector &null_order, + idx_t *result, QuicksortInfo info, QuicksortStack &stack) { + auto left = info.left; + auto right = info.right; + D_ASSERT(left < right); + int64_t middle = left + (right - left) / 2; + int64_t pivot = result[middle]; + // move the mid point value to the front. + int64_t i = left + 1; + int64_t j = right; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list + std::swap(result[middle], result[left]); + bool all_equal = true; + while (i <= j) { + if (result) { + while (i <= j) { + int cmp = CompareTuple(sort_by, desc, null_order, result[i], pivot); + if (cmp < 0) { + all_equal = false; + } else if (cmp > 0) { + all_equal = false; + break; + } + i++; + } + } -//===----------------------------------------------------------------------===// -// DuckDB -// -// third_party/hyperloglog/hyperloglog.hpp -// -// -//===----------------------------------------------------------------------===// + while (i <= j && CompareTuple(sort_by, desc, null_order, result[j], pivot) > 0) { + j--; + } + if (i < j) { + std::swap(result[i], result[j]); + } + } + std::swap(result[i - 1], result[left]); + int64_t part = i - 1; + if (all_equal) { + return; + } -#include -#include + stack.Enqueue(left, part - 1); + stack.Enqueue(part + 1, right); +} -namespace duckdb_hll { +void ChunkCollection::Sort(vector &desc, vector &null_order, idx_t result[]) { + D_ASSERT(result); + if (count == 0) { + return; + } + // start off with an initial quicksort + int64_t part = QuicksortInitial(this, desc, null_order, result); -/* Error codes */ -#define HLL_C_OK 0 -#define HLL_C_ERR -1 + // now continuously perform + QuicksortStack stack; + stack.Enqueue(0, part); + stack.Enqueue(part + 1, count - 1); + while (!stack.IsEmpty()) { + auto element = stack.Pop(); + QuicksortInPlace(this, desc, null_order, result, element, stack); + } +} -typedef struct { - void *ptr; -} robj; +// FIXME make this more efficient by not using the Value API +// just use memcpy in the vectors +// assert that there is no selection list +void ChunkCollection::Reorder(idx_t order_org[]) { + auto order = unique_ptr(new idx_t[count]); + memcpy(order.get(), order_org, sizeof(idx_t) * count); -//! Create a new empty HyperLogLog object -robj *hll_create(void); -//! Destroy the specified HyperLogLog object -void hll_destroy(robj *obj); -//! Add an element with the specified amount of bytes to the HyperLogLog. Returns C_ERR on failure, otherwise returns 0 if the cardinality did not change, and 1 otherwise. -int hll_add(robj *o, unsigned char *ele, size_t elesize); -//! Returns the estimated amount of unique elements seen by the HyperLogLog. Returns C_OK on success, or C_ERR on failure. -int hll_count(robj *o, size_t *result); -//! Merge hll_count HyperLogLog objects into a single one. Returns NULL on failure, or the new HLL object on success. -robj *hll_merge(robj **hlls, size_t hll_count); + // adapted from https://stackoverflow.com/a/7366196/2652376 -uint64_t MurmurHash64A (const void * key, int len, unsigned int seed); + auto val_buf = vector(); + val_buf.resize(ColumnCount()); + idx_t j, k; + for (idx_t i = 0; i < count; i++) { + for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { + val_buf[col_idx] = GetValue(col_idx, i); + } + j = i; + while (true) { + k = order[j]; + order[j] = j; + if (k == i) { + break; + } + for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { + SetValue(col_idx, j, GetValue(col_idx, k)); + } + j = k; + } + for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { + SetValue(col_idx, j, val_buf[col_idx]); + } + } } -// LICENSE_CHANGE_END +template +static void TemplatedSetValues(ChunkCollection *src_coll, Vector &tgt_vec, idx_t order[], idx_t col_idx, + idx_t start_offset, idx_t remaining_data) { + D_ASSERT(src_coll); + for (idx_t row_idx = 0; row_idx < remaining_data; row_idx++) { + idx_t chunk_idx_src = order[start_offset + row_idx] / STANDARD_VECTOR_SIZE; + idx_t vector_idx_src = order[start_offset + row_idx] % STANDARD_VECTOR_SIZE; -namespace duckdb { + auto &src_chunk = src_coll->GetChunk(chunk_idx_src); + Vector &src_vec = src_chunk.data[col_idx]; + auto source_data = FlatVector::GetData(src_vec); + auto target_data = FlatVector::GetData(tgt_vec); -HyperLogLog::HyperLogLog() : hll(nullptr) { - hll = duckdb_hll::hll_create(); + if (FlatVector::IsNull(src_vec, vector_idx_src)) { + FlatVector::SetNull(tgt_vec, row_idx, true); + } else { + target_data[row_idx] = source_data[vector_idx_src]; + } + } } -HyperLogLog::HyperLogLog(void *hll) : hll(hll) { +Value ChunkCollection::GetValue(idx_t column, idx_t index) { + return chunks[LocateChunk(index)]->GetValue(column, index % STANDARD_VECTOR_SIZE); } -HyperLogLog::~HyperLogLog() { - duckdb_hll::hll_destroy((duckdb_hll::robj *)hll); +void ChunkCollection::SetValue(idx_t column, idx_t index, const Value &value) { + chunks[LocateChunk(index)]->SetValue(column, index % STANDARD_VECTOR_SIZE, value); } -void HyperLogLog::Add(data_ptr_t element, idx_t size) { - if (duckdb_hll::hll_add((duckdb_hll::robj *)hll, element, size) == HLL_C_ERR) { - throw Exception("Could not add to HLL?"); - } +void ChunkCollection::CopyCell(idx_t column, idx_t index, Vector &target, idx_t target_offset) { + auto &chunk = GetChunkForRow(index); + auto &source = chunk.data[column]; + const auto source_offset = index % STANDARD_VECTOR_SIZE; + VectorOperations::Copy(source, target, source_offset + 1, source_offset, target_offset); } -idx_t HyperLogLog::Count() { - size_t result; // exception from size_t ban - if (duckdb_hll::hll_count((duckdb_hll::robj *)hll, &result) != HLL_C_OK) { - throw Exception("Could not count HLL?"); - } - return result; +void ChunkCollection::Print() { + Printer::Print(ToString()); } -unique_ptr HyperLogLog::Merge(HyperLogLog &other) { - duckdb_hll::robj *hlls[2]; - hlls[0] = (duckdb_hll::robj *)hll; - hlls[1] = (duckdb_hll::robj *)other.hll; - auto new_hll = duckdb_hll::hll_merge(hlls, 2); - if (!new_hll) { - throw Exception("Could not merge HLLs"); +bool ChunkCollection::Equals(ChunkCollection &other) { + if (count != other.count) { + return false; } - return unique_ptr(new HyperLogLog((void *)new_hll)); + if (ColumnCount() != other.ColumnCount()) { + return false; + } + if (types != other.types) { + return false; + } + // if count is equal amount of chunks should be equal + for (idx_t row_idx = 0; row_idx < count; row_idx++) { + for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { + auto lvalue = GetValue(col_idx, row_idx); + auto rvalue = other.GetValue(col_idx, row_idx); + if (!Value::ValuesAreEqual(lvalue, rvalue)) { + return false; + } + } + } + return true; } +static void Heapify(ChunkCollection *input, vector &desc, vector &null_order, idx_t *heap, + idx_t heap_size, idx_t current_index) { + if (current_index >= heap_size) { + return; + } + idx_t left_child_index = current_index * 2 + 1; + idx_t right_child_index = current_index * 2 + 2; + idx_t swap_index = current_index; -HyperLogLog *HyperLogLog::MergePointer(HyperLogLog &other) { - duckdb_hll::robj *hlls[2]; - hlls[0] = (duckdb_hll::robj *)hll; - hlls[1] = (duckdb_hll::robj *)other.hll; - auto new_hll = duckdb_hll::hll_merge(hlls, 2); - if (!new_hll) { - throw Exception("Could not merge HLLs"); + if (left_child_index < heap_size) { + swap_index = CompareTuple(input, desc, null_order, heap[swap_index], heap[left_child_index]) <= 0 + ? left_child_index + : swap_index; } - return new HyperLogLog((void *)new_hll); -} -unique_ptr HyperLogLog::Merge(HyperLogLog logs[], idx_t count) { - auto hlls_uptr = unique_ptr { - new duckdb_hll::robj *[count] - }; - auto hlls = hlls_uptr.get(); - for (idx_t i = 0; i < count; i++) { - hlls[i] = (duckdb_hll::robj *)logs[i].hll; + if (right_child_index < heap_size) { + swap_index = CompareTuple(input, desc, null_order, heap[swap_index], heap[right_child_index]) <= 0 + ? right_child_index + : swap_index; } - auto new_hll = duckdb_hll::hll_merge(hlls, count); - if (!new_hll) { - throw Exception("Could not merge HLLs"); + + if (swap_index != current_index) { + std::swap(heap[current_index], heap[swap_index]); + Heapify(input, desc, null_order, heap, heap_size, swap_index); } - return unique_ptr(new HyperLogLog((void *)new_hll)); } -} // namespace duckdb +static void HeapCreate(ChunkCollection *input, vector &desc, vector &null_order, + idx_t *heap, idx_t heap_size) { + for (idx_t i = 0; i < heap_size; i++) { + heap[i] = i; + } + // build heap + for (int64_t i = heap_size / 2 - 1; i >= 0; i--) { + Heapify(input, desc, null_order, heap, heap_size, i); + } -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/enums/date_part_specifier.hpp -// -// -//===----------------------------------------------------------------------===// + // Run through all the rows. + for (idx_t i = heap_size; i < input->Count(); i++) { + if (CompareTuple(input, desc, null_order, i, heap[0]) <= 0) { + heap[0] = i; + Heapify(input, desc, null_order, heap, heap_size, 0); + } + } +} +void ChunkCollection::Heap(vector &desc, vector &null_order, idx_t heap[], + idx_t heap_size) { + D_ASSERT(heap); + if (count == 0) { + return; + } + HeapCreate(this, desc, null_order, heap, heap_size); + // Heap is ready. Now do a heapsort + for (int64_t i = heap_size - 1; i >= 0; i--) { + std::swap(heap[i], heap[0]); + Heapify(this, desc, null_order, heap, i, 0); + } +} +idx_t ChunkCollection::MaterializeHeapChunk(DataChunk &target, idx_t order[], idx_t start_offset, idx_t heap_size) { + idx_t remaining_data = MinValue(STANDARD_VECTOR_SIZE, heap_size - start_offset); + D_ASSERT(target.GetTypes() == types); -namespace duckdb { + target.SetCardinality(remaining_data); + for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { + switch (types[col_idx].InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + break; + case PhysicalType::INT16: + TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + break; + case PhysicalType::INT32: + TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + break; + case PhysicalType::INT64: + TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + break; + case PhysicalType::INT128: + TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + break; + case PhysicalType::FLOAT: + TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + break; + case PhysicalType::DOUBLE: + TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + break; + case PhysicalType::VARCHAR: + TemplatedSetValues(this, target.data[col_idx], order, col_idx, start_offset, remaining_data); + break; + // TODO this is ugly and sloooow! + case PhysicalType::STRUCT: + case PhysicalType::LIST: { + for (idx_t row_idx = 0; row_idx < remaining_data; row_idx++) { + idx_t chunk_idx_src = order[start_offset + row_idx] / STANDARD_VECTOR_SIZE; + idx_t vector_idx_src = order[start_offset + row_idx] % STANDARD_VECTOR_SIZE; -enum class DatePartSpecifier : uint8_t { - YEAR, - MONTH, - DAY, - DECADE, - CENTURY, - MILLENNIUM, - MICROSECONDS, - MILLISECONDS, - SECOND, - MINUTE, - HOUR, - EPOCH, - DOW, - ISODOW, - WEEK, - QUARTER, - DOY -}; + auto &src_chunk = chunks[chunk_idx_src]; + Vector &src_vec = src_chunk->data[col_idx]; + auto &tgt_vec = target.data[col_idx]; + if (FlatVector::IsNull(src_vec, vector_idx_src)) { + FlatVector::SetNull(tgt_vec, row_idx, true); + } else { + tgt_vec.SetValue(row_idx, src_vec.GetValue(vector_idx_src)); + } + } + } break; -DatePartSpecifier GetDatePartSpecifier(string specifier); + default: + throw NotImplementedException("Type is unsupported in MaterializeHeapChunk()"); + } + } + target.Verify(); + return start_offset + remaining_data; +} } // namespace duckdb - - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/operator/add.hpp +// duckdb/common/array.hpp // // //===----------------------------------------------------------------------===// - - +#include namespace duckdb { +using std::array; +} -struct AddOperator { - template - static inline TR Operation(TA left, TB right) { - return left + right; - } -}; - -template <> -float AddOperator::Operation(float left, float right); -template <> -double AddOperator::Operation(double left, double right); -template <> -interval_t AddOperator::Operation(interval_t left, interval_t right); -template <> -date_t AddOperator::Operation(date_t left, interval_t right); -template <> -date_t AddOperator::Operation(interval_t left, date_t right); -template <> -timestamp_t AddOperator::Operation(timestamp_t left, interval_t right); -template <> -timestamp_t AddOperator::Operation(interval_t left, timestamp_t right); - -struct TryAddOperator { - template - static inline bool Operation(TA left, TB right, TR &result) { - throw InternalException("Unimplemented type for TryAddOperator"); - } -}; - -template <> -bool TryAddOperator::Operation(uint8_t left, uint8_t right, uint8_t &result); -template <> -bool TryAddOperator::Operation(uint16_t left, uint16_t right, uint16_t &result); -template <> -bool TryAddOperator::Operation(uint32_t left, uint32_t right, uint32_t &result); -template <> -bool TryAddOperator::Operation(uint64_t left, uint64_t right, uint64_t &result); - -template <> -bool TryAddOperator::Operation(int8_t left, int8_t right, int8_t &result); -template <> -bool TryAddOperator::Operation(int16_t left, int16_t right, int16_t &result); -template <> -bool TryAddOperator::Operation(int32_t left, int32_t right, int32_t &result); -template <> -bool TryAddOperator::Operation(int64_t left, int64_t right, int64_t &result); -struct AddOperatorOverflowCheck { - template - static inline TR Operation(TA left, TB right) { - TR result; - if (!TryAddOperator::Operation(left, right, result)) { - throw OutOfRangeException("Overflow in addition of %s (%d + %d)!", TypeIdToString(GetTypeId()), left, - right); - } - return result; - } -}; -struct TryDecimalAdd { - template - static inline bool Operation(TA left, TB right, TR &result) { - throw InternalException("Unimplemented type for TryDecimalAdd"); - } -}; -template <> -bool TryDecimalAdd::Operation(int16_t left, int16_t right, int16_t &result); -template <> -bool TryDecimalAdd::Operation(int32_t left, int32_t right, int32_t &result); -template <> -bool TryDecimalAdd::Operation(int64_t left, int64_t right, int64_t &result); -template <> -bool TryDecimalAdd::Operation(hugeint_t left, hugeint_t right, hugeint_t &result); -struct DecimalAddOverflowCheck { - template - static inline TR Operation(TA left, TB right) { - TR result; - if (!TryDecimalAdd::Operation(left, right, result)) { - throw OutOfRangeException("Overflow in addition of DECIMAL(18) (%d + %d). You might want to add an " - "explicit cast to a bigger decimal.", - left, right); - } - return result; - } -}; -template <> -hugeint_t DecimalAddOverflowCheck::Operation(hugeint_t left, hugeint_t right); -struct AddTimeOperator { - template - static inline TR Operation(TA left, TB right); -}; -template <> -dtime_t AddTimeOperator::Operation(dtime_t left, interval_t right); -template <> -dtime_t AddTimeOperator::Operation(interval_t left, dtime_t right); -} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/operator/multiply.hpp +// duckdb/common/types/sel_cache.hpp // // //===----------------------------------------------------------------------===// @@ -21412,1632 +27480,1248 @@ dtime_t AddTimeOperator::Operation(interval_t left, dtime_t right); namespace duckdb { -struct MultiplyOperator { - template - static inline TR Operation(TA left, TB right) { - return left * right; - } -}; - -template <> -float MultiplyOperator::Operation(float left, float right); -template <> -double MultiplyOperator::Operation(double left, double right); -template <> -interval_t MultiplyOperator::Operation(interval_t left, int64_t right); -template <> -interval_t MultiplyOperator::Operation(int64_t left, interval_t right); - -struct TryMultiplyOperator { - template - static inline bool Operation(TA left, TB right, TR &result) { - throw InternalException("Unimplemented type for TryMultiplyOperator"); - } +//! Selection vector cache used for caching vector slices +struct SelCache { + unordered_map> cache; }; -template <> -bool TryMultiplyOperator::Operation(uint8_t left, uint8_t right, uint8_t &result); -template <> -bool TryMultiplyOperator::Operation(uint16_t left, uint16_t right, uint16_t &result); -template <> -bool TryMultiplyOperator::Operation(uint32_t left, uint32_t right, uint32_t &result); -template <> -bool TryMultiplyOperator::Operation(uint64_t left, uint64_t right, uint64_t &result); - -template <> -bool TryMultiplyOperator::Operation(int8_t left, int8_t right, int8_t &result); -template <> -bool TryMultiplyOperator::Operation(int16_t left, int16_t right, int16_t &result); -template <> -bool TryMultiplyOperator::Operation(int32_t left, int32_t right, int32_t &result); -template <> -bool TryMultiplyOperator::Operation(int64_t left, int64_t right, int64_t &result); - -struct MultiplyOperatorOverflowCheck { - template - static inline TR Operation(TA left, TB right) { - TR result; - if (!TryMultiplyOperator::Operation(left, right, result)) { - throw OutOfRangeException("Overflow in multiplication of %s (%d * %d)!", TypeIdToString(GetTypeId()), - left, right); - } - return result; - } -}; +} // namespace duckdb -struct TryDecimalMultiply { - template - static inline bool Operation(TA left, TB right, TR &result) { - throw InternalException("Unimplemented type for TryDecimalMultiply"); - } -}; -template <> -bool TryDecimalMultiply::Operation(int16_t left, int16_t right, int16_t &result); -template <> -bool TryDecimalMultiply::Operation(int32_t left, int32_t right, int32_t &result); -template <> -bool TryDecimalMultiply::Operation(int64_t left, int64_t right, int64_t &result); -template <> -bool TryDecimalMultiply::Operation(hugeint_t left, hugeint_t right, hugeint_t &result); -struct DecimalMultiplyOverflowCheck { - template - static inline TR Operation(TA left, TB right) { - TR result; - if (!TryDecimalMultiply::Operation(left, right, result)) { - throw OutOfRangeException("Overflow in multiplication of DECIMAL(18) (%d * %d). You might want to add an " - "explicit cast to a bigger decimal.", - left, right); - } - return result; - } -}; -template <> -hugeint_t DecimalMultiplyOverflowCheck::Operation(hugeint_t left, hugeint_t right); -} // namespace duckdb namespace duckdb { -bool Interval::FromString(const string &str, interval_t &result) { - return Interval::FromCString(str.c_str(), str.size(), result); +DataChunk::DataChunk() : count(0) { } -template -void IntervalTryAddition(T &target, int64_t input, int64_t multiplier) { - int64_t addition; - if (!TryMultiplyOperator::Operation(input, multiplier, addition)) { - throw OutOfRangeException("interval value is out of range"); - } - T addition_base = Cast::Operation(addition); - if (!TryAddOperator::Operation(target, addition_base, target)) { - throw OutOfRangeException("interval value is out of range"); - } +DataChunk::~DataChunk() { } -bool Interval::FromCString(const char *str, idx_t len, interval_t &result) { - idx_t pos = 0; - idx_t start_pos; - bool negative; - bool found_any = false; - int64_t number; - DatePartSpecifier specifier; - - result.days = 0; - result.micros = 0; - result.months = 0; - - if (len == 0) { - return false; +void DataChunk::InitializeEmpty(const vector &types) { + D_ASSERT(data.empty()); // can only be initialized once + D_ASSERT(!types.empty()); // empty chunk not allowed + for (idx_t i = 0; i < types.size(); i++) { + data.emplace_back(Vector(types[i], nullptr)); } +} - switch (str[pos]) { - case '@': - pos++; - goto standard_interval; - case 'P': - case 'p': - pos++; - goto posix_interval; - default: - goto standard_interval; - } -standard_interval: - // start parsing a standard interval (e.g. 2 years 3 months...) - for (; pos < len; pos++) { - char c = str[pos]; - if (c == ' ' || c == '\t' || c == '\n') { - // skip spaces - continue; - } else if (c >= '0' && c <= '9') { - // start parsing a positive number - negative = false; - goto interval_parse_number; - } else if (c == '-') { - // negative number - negative = true; - pos++; - goto interval_parse_number; - } else if (c == 'a' || c == 'A') { - // parse the word "ago" as the final specifier - goto interval_parse_ago; - } else { - // unrecognized character, expected a number or end of string - return false; - } - } - goto end_of_string; -interval_parse_number: - start_pos = pos; - for (; pos < len; pos++) { - char c = str[pos]; - if (c >= '0' && c <= '9') { - // the number continues - continue; - } else if (c == ':') { - // colon: we are parsing a time - goto interval_parse_time; - } else { - if (pos == start_pos) { - return false; - } - // finished the number, parse it from the string - string_t nr_string(str + start_pos, pos - start_pos); - number = Cast::Operation(nr_string); - if (negative) { - number = -number; - } - goto interval_parse_identifier; - } +void DataChunk::Initialize(const vector &types) { + D_ASSERT(data.empty()); // can only be initialized once + D_ASSERT(!types.empty()); // empty chunk not allowed + for (idx_t i = 0; i < types.size(); i++) { + VectorCache cache(types[i]); + data.emplace_back(cache); + vector_caches.push_back(move(cache)); } - goto end_of_string; -interval_parse_time : { - // parse the remainder of the time as a Time type - dtime_t time = Time::FromCString(str + start_pos, len); - result.micros += time; - found_any = true; - goto end_of_string; } -interval_parse_identifier: - for (; pos < len; pos++) { - char c = str[pos]; - if (c == ' ' || c == '\t' || c == '\n') { - // skip spaces at the start - continue; - } else { - break; - } - } - // now parse the identifier - start_pos = pos; - for (; pos < len; pos++) { - char c = str[pos]; - if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { - // keep parsing the string - continue; - } else { - break; - } - } - specifier = GetDatePartSpecifier(string(str + start_pos, pos - start_pos)); - // add the specifier to the interval - switch (specifier) { - case DatePartSpecifier::MILLENNIUM: - IntervalTryAddition(result.months, number, MONTHS_PER_MILLENIUM); - break; - case DatePartSpecifier::CENTURY: - IntervalTryAddition(result.months, number, MONTHS_PER_CENTURY); - break; - case DatePartSpecifier::DECADE: - IntervalTryAddition(result.months, number, MONTHS_PER_DECADE); - break; - case DatePartSpecifier::YEAR: - IntervalTryAddition(result.months, number, MONTHS_PER_YEAR); - break; - case DatePartSpecifier::QUARTER: - IntervalTryAddition(result.months, number, MONTHS_PER_QUARTER); - break; - case DatePartSpecifier::MONTH: - IntervalTryAddition(result.months, number, 1); - break; - case DatePartSpecifier::DAY: - IntervalTryAddition(result.days, number, 1); - break; - case DatePartSpecifier::WEEK: - IntervalTryAddition(result.days, number, DAYS_PER_WEEK); - break; - case DatePartSpecifier::MICROSECONDS: - IntervalTryAddition(result.micros, number, 1); - break; - case DatePartSpecifier::MILLISECONDS: - IntervalTryAddition(result.micros, number, MICROS_PER_MSEC); - break; - case DatePartSpecifier::SECOND: - IntervalTryAddition(result.micros, number, MICROS_PER_SEC); - break; - case DatePartSpecifier::MINUTE: - IntervalTryAddition(result.micros, number, MICROS_PER_MINUTE); - break; - case DatePartSpecifier::HOUR: - IntervalTryAddition(result.micros, number, MICROS_PER_HOUR); - break; - default: - return false; - } - found_any = true; - goto standard_interval; -interval_parse_ago: - // parse the "ago" string at the end of the - if (len - pos < 3) { - return false; - } - if (!(str[pos] == 'a' || str[pos == 'A'])) { - return false; - } - pos++; - if (!(str[pos] == 'g' || str[pos == 'G'])) { - return false; - } - pos++; - if (!(str[pos] == 'o' || str[pos == 'O'])) { - return false; + +void DataChunk::Reset() { + if (data.empty()) { + return; } - pos++; - // parse any trailing whitespace - for (; pos < len; pos++) { - char c = str[pos]; - if (c == ' ' || c == '\t' || c == '\n') { - continue; - } else { - return false; - } + if (vector_caches.size() != data.size()) { + throw InternalException("VectorCache and column count mismatch in DataChunk::Reset"); } - // invert all the values - result.months = -result.months; - result.days = -result.days; - result.micros = -result.micros; - goto end_of_string; -end_of_string: - if (!found_any) { - // end of string and no identifiers were found: cannot convert empty interval - return false; + for (idx_t i = 0; i < ColumnCount(); i++) { + data[i].ResetFromCache(vector_caches[i]); } - return true; -posix_interval: - return false; + SetCardinality(0); } -string Interval::ToString(interval_t interval) { - char buffer[70]; - idx_t length = IntervalToStringCast::Format(interval, buffer); - return string(buffer, length); +void DataChunk::Destroy() { + data.clear(); + vector_caches.clear(); + SetCardinality(0); } -interval_t Interval::GetDifference(timestamp_t timestamp_1, timestamp_t timestamp_2) { - date_t date1, date2; - dtime_t time1, time2; - - Timestamp::Convert(timestamp_1, date1, time1); - Timestamp::Convert(timestamp_2, date2, time2); - - // and from date extract the years, months and days - int32_t year1, month1, day1; - int32_t year2, month2, day2; - Date::Convert(date1, year1, month1, day1); - Date::Convert(date2, year2, month2, day2); - // finally perform the differences - auto year_diff = year1 - year2; - auto month_diff = month1 - month2; - auto day_diff = day1 - day2; - - // and from time extract hours, minutes, seconds and miliseconds - int32_t hour1, min1, sec1, micros1; - int32_t hour2, min2, sec2, micros2; - Time::Convert(time1, hour1, min1, sec1, micros1); - Time::Convert(time2, hour2, min2, sec2, micros2); - // finally perform the differences - auto hour_diff = hour1 - hour2; - auto min_diff = min1 - min2; - auto sec_diff = sec1 - sec2; - auto micros_diff = micros1 - micros2; +Value DataChunk::GetValue(idx_t col_idx, idx_t index) const { + D_ASSERT(index < size()); + return data[col_idx].GetValue(index); +} - // flip sign if necessary - if (timestamp_1 < timestamp_2) { - year_diff = -year_diff; - month_diff = -month_diff; - day_diff = -day_diff; - hour_diff = -hour_diff; - min_diff = -min_diff; - sec_diff = -sec_diff; - micros_diff = -micros_diff; - } - // now propagate any negative field into the next higher field - while (micros_diff < 0) { - micros_diff += MICROS_PER_SEC; - sec_diff--; - } - while (sec_diff < 0) { - sec_diff += SECS_PER_MINUTE; - min_diff--; - } - while (min_diff < 0) { - min_diff += MINS_PER_HOUR; - hour_diff--; - } - while (hour_diff < 0) { - hour_diff += HOURS_PER_DAY; - day_diff--; - } - while (day_diff < 0) { - if (timestamp_1 < timestamp_2) { - day_diff += Date::IsLeapYear(year1) ? Date::LEAP_DAYS[month1] : Date::NORMAL_DAYS[month1]; - month_diff--; - } else { - day_diff += Date::IsLeapYear(year2) ? Date::LEAP_DAYS[month2] : Date::NORMAL_DAYS[month2]; - month_diff--; - } - } - while (month_diff < 0) { - month_diff += MONTHS_PER_YEAR; - year_diff--; - } +void DataChunk::SetValue(idx_t col_idx, idx_t index, const Value &val) { + data[col_idx].SetValue(index, val); +} - // recover sign if necessary - if (timestamp_1 < timestamp_2 && (month_diff != 0 || day_diff != 0)) { - year_diff = -year_diff; - month_diff = -month_diff; - day_diff = -day_diff; - hour_diff = -hour_diff; - min_diff = -min_diff; - sec_diff = -sec_diff; - micros_diff = -micros_diff; +void DataChunk::Reference(DataChunk &chunk) { + D_ASSERT(chunk.ColumnCount() <= ColumnCount()); + SetCardinality(chunk); + for (idx_t i = 0; i < chunk.ColumnCount(); i++) { + data[i].Reference(chunk.data[i]); } - interval_t interval; - interval.months = year_diff * MONTHS_PER_YEAR + month_diff; - interval.days = day_diff; - interval.micros = Time::FromTime(hour_diff, min_diff, sec_diff, micros_diff); - - return interval; } -static void NormalizeIntervalEntries(interval_t input, int64_t &months, int64_t &days, int64_t µs) { - int64_t extra_months_d = input.days / Interval::DAYS_PER_MONTH; - int64_t extra_months_micros = input.micros / Interval::MICROS_PER_MONTH; - input.days -= extra_months_d * Interval::DAYS_PER_MONTH; - input.micros -= extra_months_micros * Interval::MICROS_PER_MONTH; - - int64_t extra_days_micros = input.micros / Interval::MICROS_PER_DAY; - input.micros -= extra_days_micros * Interval::MICROS_PER_DAY; - - months = input.months + extra_months_d + extra_months_micros; - days = input.days + extra_days_micros; - micros = input.micros; -} +void DataChunk::Move(DataChunk &chunk) { + SetCardinality(chunk); + data = move(chunk.data); + vector_caches = move(chunk.vector_caches); -bool Interval::Equals(interval_t left, interval_t right) { - return left.months == right.months && left.days == right.days && left.micros == right.micros; + chunk.Destroy(); } -bool Interval::GreaterThan(interval_t left, interval_t right) { - int64_t lmonths, ldays, lmicros; - int64_t rmonths, rdays, rmicros; - NormalizeIntervalEntries(left, lmonths, ldays, lmicros); - NormalizeIntervalEntries(right, rmonths, rdays, rmicros); +void DataChunk::Copy(DataChunk &other, idx_t offset) const { + D_ASSERT(ColumnCount() == other.ColumnCount()); + D_ASSERT(other.size() == 0); - if (lmonths > rmonths) { - return true; - } else if (lmonths < rmonths) { - return false; - } - if (ldays > rdays) { - return true; - } else if (ldays < rdays) { - return false; + for (idx_t i = 0; i < ColumnCount(); i++) { + D_ASSERT(other.data[i].GetVectorType() == VectorType::FLAT_VECTOR); + VectorOperations::Copy(data[i], other.data[i], size(), offset, 0); } - return lmicros > rmicros; -} - -bool Interval::GreaterThanEquals(interval_t left, interval_t right) { - return GreaterThan(left, right) || Equals(left, right); + other.SetCardinality(size() - offset); } -} // namespace duckdb - - - - - -#include - -namespace duckdb { - -bool IsNullValue(data_ptr_t ptr, PhysicalType type) { - data_t data[100]; - SetNullValue(data, type); - return memcmp(ptr, data, GetTypeIdSize(type)) == 0; -} +void DataChunk::Copy(DataChunk &other, const SelectionVector &sel, const idx_t source_count, const idx_t offset) const { + D_ASSERT(ColumnCount() == other.ColumnCount()); + D_ASSERT(other.size() == 0); + D_ASSERT((offset + source_count) <= size()); -//! Writes NullValue value of a specific type to a memory address -void SetNullValue(data_ptr_t ptr, PhysicalType type) { - switch (type) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - Store(NullValue(), ptr); - break; - case PhysicalType::INT16: - Store(NullValue(), ptr); - break; - case PhysicalType::INT32: - Store(NullValue(), ptr); - break; - case PhysicalType::INT64: - Store(NullValue(), ptr); - break; - case PhysicalType::FLOAT: - Store(NullValue(), ptr); - break; - case PhysicalType::DOUBLE: - Store(NullValue(), ptr); - break; - case PhysicalType::VARCHAR: - Store(string_t(NullValue()), ptr); - break; - default: - throw InvalidTypeException(type, "Unsupported type for SetNullValue!"); + for (idx_t i = 0; i < ColumnCount(); i++) { + D_ASSERT(other.data[i].GetVectorType() == VectorType::FLAT_VECTOR); + VectorOperations::Copy(data[i], other.data[i], sel, source_count, offset, 0); } + other.SetCardinality(source_count - offset); } -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/row_chunk.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - - - -namespace duckdb { - -struct RowDataBlock { - RowDataBlock(BufferManager &buffer_manager, const idx_t &capacity, const idx_t &entry_size) - : CAPACITY(capacity), count(0), byte_offset(0) { - block = buffer_manager.RegisterMemory(capacity * entry_size, false); +void DataChunk::Append(const DataChunk &other) { + if (other.size() == 0) { + return; } - shared_ptr block; - const idx_t CAPACITY; - idx_t count; - idx_t byte_offset; -}; - -struct BlockAppendEntry { - BlockAppendEntry(data_ptr_t baseptr, idx_t count) : baseptr(baseptr), count(count) { + if (ColumnCount() != other.ColumnCount()) { + throw InternalException("Column counts of appending chunk doesn't match!"); } - data_ptr_t baseptr; - idx_t count; -}; - -class RowChunk { -public: - RowChunk(BufferManager &buffer_manager, idx_t block_capacity, idx_t entry_size); - - RowChunk(RowChunk &other); - - std::mutex rc_lock; - - //! BufferManager - BufferManager &buffer_manager; - //! The total number of stored entries - idx_t count; - //! The number of entries per block - idx_t block_capacity; - //! Size of entries in the blocks - idx_t entry_size; - //! The blocks holding the main data - vector blocks; + for (idx_t i = 0; i < ColumnCount(); i++) { + D_ASSERT(data[i].GetVectorType() == VectorType::FLAT_VECTOR); + VectorOperations::Copy(other.data[i], data[i], other.size(), 0, size()); + } + SetCardinality(size() + other.size()); +} - idx_t Size() { - return blocks.size(); +void DataChunk::Normalify() { + for (idx_t i = 0; i < ColumnCount(); i++) { + data[i].Normalify(size()); } +} -public: - void SerializeVectorSortable(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, - data_ptr_t key_locations[], bool desc, bool has_null, bool invert, idx_t prefix_len); +vector DataChunk::GetTypes() { + vector types; + for (idx_t i = 0; i < ColumnCount(); i++) { + types.push_back(data[i].GetType()); + } + return types; +} - static void ComputeEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t offset = 0); - static void ComputeEntrySizes(DataChunk &input, idx_t entry_sizes[], idx_t entry_size); +string DataChunk::ToString() const { + string retval = "Chunk - [" + to_string(ColumnCount()) + " Columns]\n"; + for (idx_t i = 0; i < ColumnCount(); i++) { + retval += "- " + data[i].ToString(size()) + "\n"; + } + return retval; +} - static void SerializeVectorData(VectorData &vdata, PhysicalType type, const SelectionVector &sel, idx_t ser_count, - idx_t col_idx, data_ptr_t key_locations[], data_ptr_t validitymask_locations[], - idx_t offset = 0); - static void SerializeVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx, - data_ptr_t key_locations[], data_ptr_t validitymask_locations[], idx_t offset = 0); - idx_t AppendToBlock(RowDataBlock &block, BufferHandle &handle, vector &append_entries, - idx_t remaining, idx_t entry_sizes[]); - void Build(idx_t added_count, data_ptr_t key_locations[], idx_t entry_sizes[]); +void DataChunk::Serialize(Serializer &serializer) { + // write the count + serializer.Write(size()); + serializer.Write(ColumnCount()); + for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { + // write the types + data[col_idx].GetType().Serialize(serializer); + } + // write the data + for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { + data[col_idx].Serialize(size(), serializer); + } +} - static void DeserializeIntoVector(Vector &v, const idx_t &vcount, const idx_t &col_idx, data_ptr_t key_locations[], - data_ptr_t validitymask_locations[]); +void DataChunk::Deserialize(Deserializer &source) { + auto rows = source.Read(); + idx_t column_count = source.Read(); -private: - template - void TemplatedSerializeVectorSortable(VectorData &vdata, const SelectionVector &sel, idx_t count, - data_ptr_t key_locations[], bool desc, bool has_null, bool invert); - void SerializeStringVectorSortable(VectorData &vdata, const SelectionVector &sel, idx_t add_count, - data_ptr_t key_locations[], const bool desc, const bool has_null, - const bool nulls_first, const idx_t prefix_len); + vector types; + for (idx_t i = 0; i < column_count; i++) { + types.push_back(LogicalType::Deserialize(source)); + } + Initialize(types); + // now load the column data + SetCardinality(rows); + for (idx_t i = 0; i < column_count; i++) { + data[i].Deserialize(rows, source); + } + Verify(); +} - static void ComputeStringEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t offset); - static void ComputeStructEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t offset); - static void ComputeListEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t offset); +void DataChunk::Slice(const SelectionVector &sel_vector, idx_t count_p) { + this->count = count_p; + SelCache merge_cache; + for (idx_t c = 0; c < ColumnCount(); c++) { + data[c].Slice(sel_vector, count_p, merge_cache); + } +} - static void SerializeStringVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, - idx_t col_idx, data_ptr_t key_locations[], data_ptr_t validitymask_locations[], - idx_t offset); - static void SerializeStructVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, - idx_t col_idx, data_ptr_t key_locations[], data_ptr_t validitymask_locations[], - idx_t offset); - static void SerializeListVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx, - data_ptr_t key_locations[], data_ptr_t validitymask_locations[], idx_t offset); +void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count_p, idx_t col_offset) { + D_ASSERT(other.ColumnCount() <= col_offset + ColumnCount()); + this->count = count_p; + SelCache merge_cache; + for (idx_t c = 0; c < other.ColumnCount(); c++) { + if (other.data[c].GetVectorType() == VectorType::DICTIONARY_VECTOR) { + // already a dictionary! merge the dictionaries + data[col_offset + c].Reference(other.data[c]); + data[col_offset + c].Slice(sel, count_p, merge_cache); + } else { + data[col_offset + c].Slice(other.data[c], sel, count_p); + } + } +} - static void DeserializeIntoStringVector(Vector &v, const idx_t &vcount, const idx_t &col_idx, - data_ptr_t *key_locations, data_ptr_t *validitymask_locations); - static void DeserializeIntoStructVector(Vector &v, const idx_t &vcount, const idx_t &col_idx, - data_ptr_t *key_locations, data_ptr_t *validitymask_locations); - static void DeserializeIntoListVector(Vector &v, const idx_t &vcount, const idx_t &col_idx, - data_ptr_t *key_locations, data_ptr_t *validitymask_locations); +unique_ptr DataChunk::Orrify() { + auto orrified_data = unique_ptr(new VectorData[ColumnCount()]); + for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { + data[col_idx].Orrify(size(), orrified_data[col_idx]); + } + return orrified_data; +} - //! Whether the system is little endian - const bool is_little_endian; -}; +void DataChunk::Hash(Vector &result) { + D_ASSERT(result.GetType().id() == LogicalTypeId::HASH); + VectorOperations::Hash(data[0], result, size()); + for (idx_t i = 1; i < ColumnCount(); i++) { + VectorOperations::CombineHash(result, data[i], size()); + } +} -} // namespace duckdb +void DataChunk::Verify() { +#ifdef DEBUG + D_ASSERT(size() <= STANDARD_VECTOR_SIZE); + // verify that all vectors in this chunk have the chunk selection vector + for (idx_t i = 0; i < ColumnCount(); i++) { + data[i].Verify(size()); + } +#endif +} +void DataChunk::Print() { + Printer::Print(ToString()); +} +struct DuckDBArrowArrayChildHolder { + ArrowArray array; + //! need max three pointers for strings + duckdb::array buffers = {{nullptr, nullptr, nullptr}}; + unique_ptr vector; + unique_ptr offsets; + unique_ptr data; + //! Children of nested structures + ::duckdb::vector children; + ::duckdb::vector children_ptrs; +}; +struct DuckDBArrowArrayHolder { + vector children = {}; + vector children_ptrs = {}; + array buffers = {{nullptr}}; +}; +static void ReleaseDuckDBArrowArray(ArrowArray *array) { + if (!array || !array->release) { + return; + } + array->release = nullptr; + auto holder = static_cast(array->private_data); + delete holder; +} -namespace duckdb { +void InitializeChild(DuckDBArrowArrayChildHolder &child_holder, idx_t size) { + auto &child = child_holder.array; + child.private_data = nullptr; + child.release = ReleaseDuckDBArrowArray; + child.n_children = 0; + child.null_count = 0; + child.offset = 0; + child.dictionary = nullptr; + child.buffers = child_holder.buffers.data(); -RowChunk::RowChunk(BufferManager &buffer_manager, idx_t block_capacity, idx_t entry_size) - : buffer_manager(buffer_manager), count(0), block_capacity(block_capacity), entry_size(entry_size), - is_little_endian(IsLittleEndian()) { - D_ASSERT(block_capacity * entry_size >= Storage::BLOCK_ALLOC_SIZE); + child.length = size; } -RowChunk::RowChunk(RowChunk &other) - : buffer_manager(other.buffer_manager), count(0), block_capacity(other.block_capacity), - entry_size(other.entry_size), is_little_endian(other.is_little_endian) { +void SetChildValidityMask(Vector &vector, ArrowArray &child) { + auto &mask = FlatVector::Validity(vector); + if (!mask.AllValid()) { + //! any bits are set: might have nulls + child.null_count = -1; + } else { + //! no bits are set; we know there are no nulls + child.null_count = 0; + } + child.buffers[0] = (void *)mask.GetData(); } -template -void RowChunk::TemplatedSerializeVectorSortable(VectorData &vdata, const SelectionVector &sel, idx_t add_count, - data_ptr_t key_locations[], const bool desc, const bool has_null, - const bool nulls_first) { - auto source = (T *)vdata.data; - if (has_null) { - auto &validity = vdata.validity; - const data_t valid = nulls_first ? 1 : 0; - const data_t invalid = 1 - valid; +void SetArrowChild(DuckDBArrowArrayChildHolder &child_holder, const LogicalType &type, Vector &data, idx_t size); - for (idx_t i = 0; i < add_count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx); - // write validity and according value - if (validity.RowIsValid(source_idx)) { - key_locations[i][0] = valid; - EncodeData(key_locations[i] + 1, source[source_idx], is_little_endian); - // invert bits if desc - if (desc) { - for (idx_t s = 1; s < sizeof(T) + 1; s++) { - *(key_locations[i] + s) = ~*(key_locations[i] + s); +void SetList(DuckDBArrowArrayChildHolder &child_holder, const LogicalType &type, Vector &data, idx_t size) { + auto &child = child_holder.array; + child_holder.vector = make_unique(data); + + //! Lists have two buffers + child.n_buffers = 2; + //! Second Buffer is the list offsets + child_holder.offsets = unique_ptr(new data_t[sizeof(uint32_t) * (size + 1)]); + child.buffers[1] = child_holder.offsets.get(); + auto offset_ptr = (uint32_t *)child.buffers[1]; + auto list_data = FlatVector::GetData(data); + auto list_mask = FlatVector::Validity(data); + idx_t offset = 0; + offset_ptr[0] = 0; + for (idx_t i = 0; i < size; i++) { + auto &le = list_data[i]; + + if (list_mask.RowIsValid(i)) { + offset += le.length; + } + + offset_ptr[i + 1] = offset; + } + auto list_size = ListVector::GetListSize(data); + child_holder.children.resize(1); + InitializeChild(child_holder.children[0], list_size); + child.n_children = 1; + child_holder.children_ptrs.push_back(&child_holder.children[0].array); + child.children = &child_holder.children_ptrs[0]; + auto &child_vector = ListVector::GetEntry(data); + auto &child_type = ListType::GetChildType(type); + SetArrowChild(child_holder.children[0], child_type, child_vector, list_size); + SetChildValidityMask(child_vector, child_holder.children[0].array); +} + +void SetStruct(DuckDBArrowArrayChildHolder &child_holder, const LogicalType &type, Vector &data, idx_t size) { + auto &child = child_holder.array; + child_holder.vector = make_unique(data); + + //! Structs only have validity buffers + child.n_buffers = 1; + auto &children = StructVector::GetEntries(*child_holder.vector); + child.n_children = children.size(); + child_holder.children.resize(child.n_children); + for (auto &struct_child : child_holder.children) { + InitializeChild(struct_child, size); + child_holder.children_ptrs.push_back(&struct_child.array); + } + child.children = &child_holder.children_ptrs[0]; + for (idx_t child_idx = 0; child_idx < child_holder.children.size(); child_idx++) { + SetArrowChild(child_holder.children[child_idx], StructType::GetChildType(type, child_idx), *children[child_idx], + size); + SetChildValidityMask(*children[child_idx], child_holder.children[child_idx].array); + } +} + +void SetStructMap(DuckDBArrowArrayChildHolder &child_holder, const LogicalType &type, Vector &data, idx_t size) { + auto &child = child_holder.array; + child_holder.vector = make_unique(data); + + //! Structs only have validity buffers + child.n_buffers = 1; + auto &children = StructVector::GetEntries(*child_holder.vector); + child.n_children = children.size(); + child_holder.children.resize(child.n_children); + auto list_size = ListVector::GetListSize(*children[0]); + child.length = list_size; + for (auto &struct_child : child_holder.children) { + InitializeChild(struct_child, list_size); + child_holder.children_ptrs.push_back(&struct_child.array); + } + child.children = &child_holder.children_ptrs[0]; + auto &child_types = StructType::GetChildTypes(type); + for (idx_t child_idx = 0; child_idx < child_holder.children.size(); child_idx++) { + auto &list_vector_child = ListVector::GetEntry(*children[child_idx]); + if (child_idx == 0) { + VectorData list_data; + children[child_idx]->Orrify(size, list_data); + auto list_child_validity = FlatVector::Validity(list_vector_child); + if (!list_child_validity.AllValid()) { + //! Get the offsets to check from the selection vector + auto list_offsets = FlatVector::GetData(*children[child_idx]); + for (idx_t list_idx = 0; list_idx < size; list_idx++) { + auto offset = list_offsets[list_data.sel->get_index(list_idx)]; + if (!list_child_validity.CheckAllValid(offset.length + offset.offset, offset.offset)) { + throw std::runtime_error("Arrow doesnt accept NULL keys on Maps"); } } - } else { - key_locations[i][0] = invalid; - memset(key_locations[i] + 1, '\0', sizeof(T)); } - key_locations[i] += sizeof(T) + 1; + } else { + SetChildValidityMask(list_vector_child, child_holder.children[child_idx].array); } - } else { - for (idx_t i = 0; i < add_count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx); - // write value - EncodeData(key_locations[i], source[source_idx], is_little_endian); - // invert bits if desc - if (desc) { - for (idx_t s = 1; s < sizeof(T); s++) { - *(key_locations[i] + s) = ~*(key_locations[i] + s); - } + SetArrowChild(child_holder.children[child_idx], ListType::GetChildType(child_types[child_idx].second), + list_vector_child, list_size); + } +} + +void SetArrowChild(DuckDBArrowArrayChildHolder &child_holder, const LogicalType &type, Vector &data, idx_t size) { + auto &child = child_holder.array; + switch (type.id()) { + case LogicalTypeId::BOOLEAN: { + //! Gotta bitpack these booleans + child_holder.vector = make_unique(data); + child.n_buffers = 2; + idx_t num_bytes = (size + 8 - 1) / 8; + child_holder.data = unique_ptr(new data_t[sizeof(uint8_t) * num_bytes]); + child.buffers[1] = child_holder.data.get(); + auto source_ptr = FlatVector::GetData(*child_holder.vector); + auto target_ptr = (uint8_t *)child.buffers[1]; + idx_t target_pos = 0; + idx_t cur_bit = 0; + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + if (cur_bit == 8) { + target_pos++; + cur_bit = 0; + } + if (source_ptr[row_idx] == 0) { + //! We set the bit to 0 + target_ptr[target_pos] &= ~(1 << cur_bit); + } else { + //! We set the bit to 1 + target_ptr[target_pos] |= 1 << cur_bit; } - key_locations[i] += sizeof(T); + cur_bit++; } + break; } -} + case LogicalTypeId::TINYINT: + case LogicalTypeId::SMALLINT: + case LogicalTypeId::INTEGER: + case LogicalTypeId::BIGINT: + case LogicalTypeId::UTINYINT: + case LogicalTypeId::USMALLINT: + case LogicalTypeId::UINTEGER: + case LogicalTypeId::UBIGINT: + case LogicalTypeId::FLOAT: + case LogicalTypeId::DOUBLE: + case LogicalTypeId::HUGEINT: + case LogicalTypeId::DATE: + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIME: + child_holder.vector = make_unique(data); + child.n_buffers = 2; + child.buffers[1] = (void *)FlatVector::GetData(*child_holder.vector); + break; + case LogicalTypeId::SQLNULL: + child.n_buffers = 1; + break; + case LogicalTypeId::DECIMAL: { + child.n_buffers = 2; + child_holder.vector = make_unique(data); -void RowChunk::SerializeStringVectorSortable(VectorData &vdata, const SelectionVector &sel, idx_t add_count, - data_ptr_t key_locations[], const bool desc, const bool has_null, - const bool nulls_first, const idx_t prefix_len) { - auto source = (string_t *)vdata.data; - if (has_null) { - auto &validity = vdata.validity; - const data_t valid = nulls_first ? 1 : 0; - const data_t invalid = 1 - valid; + //! We have to convert to INT128 + switch (type.InternalType()) { - for (idx_t i = 0; i < add_count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx); - // write validity and according value - if (validity.RowIsValid(source_idx)) { - key_locations[i][0] = valid; - EncodeStringDataPrefix(key_locations[i] + 1, source[source_idx], prefix_len); - // invert bits if desc - if (desc) { - for (idx_t s = 1; s < prefix_len + 1; s++) { - *(key_locations[i] + s) = ~*(key_locations[i] + s); - } - } - } else { - key_locations[i][0] = invalid; - memset(key_locations[i] + 1, '\0', prefix_len); + case PhysicalType::INT16: { + child_holder.data = unique_ptr(new data_t[sizeof(hugeint_t) * (size)]); + child.buffers[1] = child_holder.data.get(); + auto source_ptr = FlatVector::GetData(*child_holder.vector); + auto target_ptr = (hugeint_t *)child.buffers[1]; + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + target_ptr[row_idx] = source_ptr[row_idx]; } - key_locations[i] += prefix_len + 1; + break; } - } else { - for (idx_t i = 0; i < add_count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx); - // write value - EncodeStringDataPrefix(key_locations[i], source[source_idx], prefix_len); - // invert bits if desc - if (desc) { - for (idx_t s = 1; s < prefix_len; s++) { - *(key_locations[i] + s) = ~*(key_locations[i] + s); - } + case PhysicalType::INT32: { + child_holder.data = unique_ptr(new data_t[sizeof(hugeint_t) * (size)]); + child.buffers[1] = child_holder.data.get(); + auto source_ptr = FlatVector::GetData(*child_holder.vector); + auto target_ptr = (hugeint_t *)child.buffers[1]; + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + target_ptr[row_idx] = source_ptr[row_idx]; } - key_locations[i] += prefix_len; + break; + } + case PhysicalType::INT64: { + child_holder.data = unique_ptr(new data_t[sizeof(hugeint_t) * (size)]); + child.buffers[1] = child_holder.data.get(); + auto source_ptr = FlatVector::GetData(*child_holder.vector); + auto target_ptr = (hugeint_t *)child.buffers[1]; + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + target_ptr[row_idx] = source_ptr[row_idx]; + } + break; + } + case PhysicalType::INT128: { + child.buffers[1] = (void *)FlatVector::GetData(*child_holder.vector); + break; + } + default: + throw std::runtime_error("Unsupported physical type for Decimal" + TypeIdToString(type.InternalType())); } + break; } -} + case LogicalTypeId::BLOB: + case LogicalTypeId::VARCHAR: { + child_holder.vector = make_unique(data); + child.n_buffers = 3; + child_holder.offsets = unique_ptr(new data_t[sizeof(uint32_t) * (size + 1)]); + child.buffers[1] = child_holder.offsets.get(); + D_ASSERT(child.buffers[1]); + //! step 1: figure out total string length: + idx_t total_string_length = 0; + auto string_t_ptr = FlatVector::GetData(*child_holder.vector); + auto &mask = FlatVector::Validity(*child_holder.vector); + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + if (!mask.RowIsValid(row_idx)) { + continue; + } + total_string_length += string_t_ptr[row_idx].GetSize(); + } + //! step 2: allocate this much + child_holder.data = unique_ptr(new data_t[total_string_length]); + child.buffers[2] = child_holder.data.get(); + D_ASSERT(child.buffers[2]); + //! step 3: assign buffers + idx_t current_heap_offset = 0; + auto target_ptr = (uint32_t *)child.buffers[1]; -void RowChunk::SerializeVectorSortable(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, - data_ptr_t key_locations[], bool desc, bool has_null, bool nulls_first, - idx_t prefix_len) { - VectorData vdata; - v.Orrify(vcount, vdata); - switch (v.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); - break; - case PhysicalType::INT16: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); - break; - case PhysicalType::INT32: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); - break; - case PhysicalType::INT64: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); - break; - case PhysicalType::UINT8: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); - break; - case PhysicalType::UINT16: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); - break; - case PhysicalType::UINT32: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); - break; - case PhysicalType::UINT64: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); - break; - case PhysicalType::INT128: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); - break; - case PhysicalType::FLOAT: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + target_ptr[row_idx] = current_heap_offset; + if (!mask.RowIsValid(row_idx)) { + continue; + } + auto &str = string_t_ptr[row_idx]; + memcpy((void *)((uint8_t *)child.buffers[2] + current_heap_offset), str.GetDataUnsafe(), str.GetSize()); + current_heap_offset += str.GetSize(); + } + target_ptr[size] = current_heap_offset; //! need to terminate last string! break; - case PhysicalType::DOUBLE: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); + } + case LogicalTypeId::LIST: { + SetList(child_holder, type, data, size); break; - case PhysicalType::HASH: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); + } + case LogicalTypeId::STRUCT: { + SetStruct(child_holder, type, data, size); break; - case PhysicalType::INTERVAL: - TemplatedSerializeVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first); + } + case LogicalTypeId::MAP: { + child_holder.vector = make_unique(data); + + auto &map_mask = FlatVector::Validity(*child_holder.vector); + child.n_buffers = 2; + //! Maps have one child + child.n_children = 1; + child_holder.children.resize(1); + InitializeChild(child_holder.children[0], size); + child_holder.children_ptrs.push_back(&child_holder.children[0].array); + //! Second Buffer is the offsets + child_holder.offsets = unique_ptr(new data_t[sizeof(uint32_t) * (size + 1)]); + child.buffers[1] = child_holder.offsets.get(); + auto &struct_children = StructVector::GetEntries(data); + auto offset_ptr = (uint32_t *)child.buffers[1]; + auto list_data = FlatVector::GetData(*struct_children[0]); + idx_t offset = 0; + offset_ptr[0] = 0; + for (idx_t i = 0; i < size; i++) { + auto &le = list_data[i]; + if (map_mask.RowIsValid(i)) { + offset += le.length; + } + offset_ptr[i + 1] = offset; + } + child.children = &child_holder.children_ptrs[0]; + //! We need to set up a struct + auto struct_type = LogicalType::STRUCT(StructType::GetChildTypes(type)); + + SetStructMap(child_holder.children[0], struct_type, *child_holder.vector, size); break; - case PhysicalType::VARCHAR: - SerializeStringVectorSortable(vdata, sel, ser_count, key_locations, desc, has_null, nulls_first, prefix_len); + } + case LogicalTypeId::INTERVAL: { + //! convert interval from month/days/ucs to milliseconds + child_holder.vector = make_unique(data); + child.n_buffers = 2; + child_holder.data = unique_ptr(new data_t[sizeof(int64_t) * (size)]); + child.buffers[1] = child_holder.data.get(); + auto source_ptr = FlatVector::GetData(*child_holder.vector); + auto target_ptr = (int64_t *)child.buffers[1]; + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + target_ptr[row_idx] = Interval::GetMilli(source_ptr[row_idx]); + } break; + } default: - throw NotImplementedException("Cannot ORDER BY column with type %s", v.GetType().ToString()); + throw std::runtime_error("Unsupported type " + type.ToString()); } -} +} // namespace duckdb +void DataChunk::ToArrowArray(ArrowArray *out_array) { + Normalify(); + D_ASSERT(out_array); -void RowChunk::ComputeStringEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t offset) { - VectorData vdata; - v.Orrify(vcount, vdata); + // Allocate as unique_ptr first to cleanup properly on error + auto root_holder = make_unique(); - const idx_t string_prefix_len = string_t::PREFIX_LENGTH; - auto strings = (string_t *)vdata.data; - for (idx_t i = 0; i < vcount; i++) { - idx_t str_idx = vdata.sel->get_index(i) + offset; - if (vdata.validity.RowIsValid(str_idx)) { - entry_sizes[i] += string_prefix_len + strings[str_idx].GetSize(); - } + // Allocate the children + root_holder->children.resize(ColumnCount()); + root_holder->children_ptrs.resize(ColumnCount(), nullptr); + for (size_t i = 0; i < ColumnCount(); ++i) { + root_holder->children_ptrs[i] = &root_holder->children[i].array; } -} + out_array->children = root_holder->children_ptrs.data(); + out_array->n_children = ColumnCount(); -void RowChunk::ComputeStructEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t offset) { - VectorData vdata; - v.Orrify(vcount, vdata); + // Configure root array + out_array->length = size(); + out_array->n_children = ColumnCount(); + out_array->n_buffers = 1; + out_array->buffers = root_holder->buffers.data(); // there is no actual buffer there since we don't have NULLs + out_array->offset = 0; + out_array->null_count = 0; // needs to be 0 + out_array->dictionary = nullptr; - // obtain child vectors - idx_t num_children; - vector struct_vectors; - if (v.GetVectorType() == VectorType::DICTIONARY_VECTOR) { - auto &child = DictionaryVector::Child(v); - auto &dict_sel = DictionaryVector::SelVector(v); - auto &children = StructVector::GetEntries(child); - num_children = children.size(); - for (auto &struct_child : children) { - Vector struct_vector; - struct_vector.Slice(*struct_child.second, dict_sel, vcount); - struct_vectors.push_back(move(struct_vector)); - } - } else { - auto &children = StructVector::GetEntries(v); - num_children = children.size(); - for (auto &struct_child : children) { - Vector struct_vector; - struct_vector.Reference(*struct_child.second); - struct_vectors.push_back(move(struct_vector)); - } - } - // add struct validitymask size - const idx_t struct_validitymask_size = (num_children + 7) / 8; - for (idx_t i = 0; i < vcount; i++) { - // FIXME: don't serialize if the struct is NULL? - entry_sizes[i] += struct_validitymask_size; - } - // compute size of child vectors - for (auto &struct_vector : struct_vectors) { - ComputeEntrySizes(struct_vector, entry_sizes, vcount, offset); - } -} + //! Configure child arrays + for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { + auto &child_holder = root_holder->children[col_idx]; + InitializeChild(child_holder, size()); + auto &vector = child_holder.vector; + auto &child = child_holder.array; -static list_entry_t *GetListData(Vector &v) { - if (v.GetVectorType() == VectorType::DICTIONARY_VECTOR) { - auto &child = DictionaryVector::Child(v); - return GetListData(child); + //! We could, in theory, output other types of vectors here, currently only FLAT Vectors + SetArrowChild(child_holder, GetTypes()[col_idx], data[col_idx], size()); + SetChildValidityMask(*vector, child); + out_array->children[col_idx] = &child; } - return FlatVector::GetData(v); + + // Release ownership to caller + out_array->private_data = root_holder.release(); + out_array->release = ReleaseDuckDBArrowArray; } -void RowChunk::ComputeListEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t offset) { - VectorData vdata; - v.Orrify(vcount, vdata); +} // namespace duckdb - auto list_data = GetListData(v); - auto &child_vector = ListVector::GetEntry(v); - idx_t list_entry_sizes[STANDARD_VECTOR_SIZE]; - for (idx_t i = 0; i < vcount; i++) { - idx_t idx = vdata.sel->get_index(i) + offset; - if (vdata.validity.RowIsValid(idx)) { - auto list_entry = list_data[idx]; - // make room for list length, list validitymask - entry_sizes[i] += sizeof(list_entry.length); - entry_sizes[i] += (list_entry.length + 7) / 8; - // serialize size of each entry (if non-constant size) - if (!TypeIsConstantSize(v.GetType().child_types()[0].second.InternalType())) { - entry_sizes[i] += list_entry.length * sizeof(list_entry.length); - } - // compute size of each the elements in list_entry and sum them - auto entry_remaining = list_entry.length; - auto entry_offset = list_entry.offset; - while (entry_remaining > 0) { - // the list entry can span multiple vectors - auto next = MinValue((idx_t)STANDARD_VECTOR_SIZE, entry_remaining); - // compute and add to the total - std::fill_n(list_entry_sizes, next, 0); - ComputeEntrySizes(child_vector, list_entry_sizes, next, entry_offset); - for (idx_t list_idx = 0; list_idx < next; list_idx++) { - entry_sizes[i] += list_entry_sizes[list_idx]; - } - // update for next iteration - entry_remaining -= next; - entry_offset += next; - } - } - } -} -void RowChunk::ComputeEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t offset) { - auto physical_type = v.GetType().InternalType(); - if (TypeIsConstantSize(physical_type)) { - const auto type_size = GetTypeIdSize(physical_type); - for (idx_t i = 0; i < vcount; i++) { - entry_sizes[i] += type_size; - } - } else { - switch (physical_type) { - case PhysicalType::VARCHAR: - ComputeStringEntrySizes(v, entry_sizes, vcount, offset); - break; - case PhysicalType::STRUCT: - ComputeStructEntrySizes(v, entry_sizes, vcount, offset); - break; - case PhysicalType::LIST: - ComputeListEntrySizes(v, entry_sizes, vcount, offset); - break; - default: - throw NotImplementedException("Column with variable size type %s cannot be serialized to row-format", - v.GetType().ToString()); - } - } -} -void RowChunk::ComputeEntrySizes(DataChunk &input, idx_t entry_sizes[], idx_t entry_size) { - // fill array with constant portion of payload entry size - std::fill_n(entry_sizes, input.size(), entry_size); - // compute size of the constant portion of the payload columns - VectorData vdata; - for (idx_t col_idx = 0; col_idx < input.data.size(); col_idx++) { - auto physical_type = input.data[col_idx].GetType().InternalType(); - if (TypeIsConstantSize(physical_type)) { - continue; - } - ComputeEntrySizes(input.data[col_idx], entry_sizes, input.size()); - } -} +#include +#include +#include -template -static void TemplatedSerializeVData(VectorData &vdata, const SelectionVector &sel, idx_t count, idx_t col_idx, - data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) { - auto source = (T *)vdata.data; - if (!validitymask_locations) { - for (idx_t i = 0; i < count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx) + offset; +namespace duckdb { - auto target = (T *)key_locations[i]; - Store(source[source_idx], (data_ptr_t)target); - key_locations[i] += sizeof(T); - } - } else { - const auto byte_offset = col_idx / 8; - const auto bit = ~(1UL << (col_idx % 8)); - for (idx_t i = 0; i < count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx) + offset; +static_assert(sizeof(date_t) == sizeof(int32_t), "date_t was padded"); - auto target = (T *)key_locations[i]; - Store(source[source_idx], (data_ptr_t)target); - key_locations[i] += sizeof(T); +const string_t Date::MONTH_NAMES_ABBREVIATED[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; +const string_t Date::MONTH_NAMES[] = {"January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December"}; +const string_t Date::DAY_NAMES[] = {"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; +const string_t Date::DAY_NAMES_ABBREVIATED[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; - // set the validitymask - if (!vdata.validity.RowIsValid(source_idx)) { - *(validitymask_locations[i] + byte_offset) &= bit; - } - } - } -} +const int32_t Date::NORMAL_DAYS[] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; +const int32_t Date::CUMULATIVE_DAYS[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}; +const int32_t Date::LEAP_DAYS[] = {0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; +const int32_t Date::CUMULATIVE_LEAP_DAYS[] = {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}; +const int8_t Date::MONTH_PER_DAY_OF_YEAR[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}; +const int8_t Date::LEAP_MONTH_PER_DAY_OF_YEAR[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}; +const int32_t Date::CUMULATIVE_YEAR_DAYS[] = { + 0, 365, 730, 1096, 1461, 1826, 2191, 2557, 2922, 3287, 3652, 4018, 4383, 4748, + 5113, 5479, 5844, 6209, 6574, 6940, 7305, 7670, 8035, 8401, 8766, 9131, 9496, 9862, + 10227, 10592, 10957, 11323, 11688, 12053, 12418, 12784, 13149, 13514, 13879, 14245, 14610, 14975, + 15340, 15706, 16071, 16436, 16801, 17167, 17532, 17897, 18262, 18628, 18993, 19358, 19723, 20089, + 20454, 20819, 21184, 21550, 21915, 22280, 22645, 23011, 23376, 23741, 24106, 24472, 24837, 25202, + 25567, 25933, 26298, 26663, 27028, 27394, 27759, 28124, 28489, 28855, 29220, 29585, 29950, 30316, + 30681, 31046, 31411, 31777, 32142, 32507, 32872, 33238, 33603, 33968, 34333, 34699, 35064, 35429, + 35794, 36160, 36525, 36890, 37255, 37621, 37986, 38351, 38716, 39082, 39447, 39812, 40177, 40543, + 40908, 41273, 41638, 42004, 42369, 42734, 43099, 43465, 43830, 44195, 44560, 44926, 45291, 45656, + 46021, 46387, 46752, 47117, 47482, 47847, 48212, 48577, 48942, 49308, 49673, 50038, 50403, 50769, + 51134, 51499, 51864, 52230, 52595, 52960, 53325, 53691, 54056, 54421, 54786, 55152, 55517, 55882, + 56247, 56613, 56978, 57343, 57708, 58074, 58439, 58804, 59169, 59535, 59900, 60265, 60630, 60996, + 61361, 61726, 62091, 62457, 62822, 63187, 63552, 63918, 64283, 64648, 65013, 65379, 65744, 66109, + 66474, 66840, 67205, 67570, 67935, 68301, 68666, 69031, 69396, 69762, 70127, 70492, 70857, 71223, + 71588, 71953, 72318, 72684, 73049, 73414, 73779, 74145, 74510, 74875, 75240, 75606, 75971, 76336, + 76701, 77067, 77432, 77797, 78162, 78528, 78893, 79258, 79623, 79989, 80354, 80719, 81084, 81450, + 81815, 82180, 82545, 82911, 83276, 83641, 84006, 84371, 84736, 85101, 85466, 85832, 86197, 86562, + 86927, 87293, 87658, 88023, 88388, 88754, 89119, 89484, 89849, 90215, 90580, 90945, 91310, 91676, + 92041, 92406, 92771, 93137, 93502, 93867, 94232, 94598, 94963, 95328, 95693, 96059, 96424, 96789, + 97154, 97520, 97885, 98250, 98615, 98981, 99346, 99711, 100076, 100442, 100807, 101172, 101537, 101903, + 102268, 102633, 102998, 103364, 103729, 104094, 104459, 104825, 105190, 105555, 105920, 106286, 106651, 107016, + 107381, 107747, 108112, 108477, 108842, 109208, 109573, 109938, 110303, 110669, 111034, 111399, 111764, 112130, + 112495, 112860, 113225, 113591, 113956, 114321, 114686, 115052, 115417, 115782, 116147, 116513, 116878, 117243, + 117608, 117974, 118339, 118704, 119069, 119435, 119800, 120165, 120530, 120895, 121260, 121625, 121990, 122356, + 122721, 123086, 123451, 123817, 124182, 124547, 124912, 125278, 125643, 126008, 126373, 126739, 127104, 127469, + 127834, 128200, 128565, 128930, 129295, 129661, 130026, 130391, 130756, 131122, 131487, 131852, 132217, 132583, + 132948, 133313, 133678, 134044, 134409, 134774, 135139, 135505, 135870, 136235, 136600, 136966, 137331, 137696, + 138061, 138427, 138792, 139157, 139522, 139888, 140253, 140618, 140983, 141349, 141714, 142079, 142444, 142810, + 143175, 143540, 143905, 144271, 144636, 145001, 145366, 145732, 146097}; -void RowChunk::SerializeVectorData(VectorData &vdata, PhysicalType type, const SelectionVector &sel, idx_t ser_count, - idx_t col_idx, data_ptr_t key_locations[], data_ptr_t validitymask_locations[], - idx_t offset) { - switch (type) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - case PhysicalType::INT16: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - case PhysicalType::INT32: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - case PhysicalType::INT64: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - case PhysicalType::UINT8: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - case PhysicalType::UINT16: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, - offset); - break; - case PhysicalType::UINT32: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, - offset); - break; - case PhysicalType::UINT64: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, - offset); - break; - case PhysicalType::INT128: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, - offset); - break; - case PhysicalType::FLOAT: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - case PhysicalType::DOUBLE: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - case PhysicalType::HASH: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - case PhysicalType::INTERVAL: - TemplatedSerializeVData(vdata, sel, ser_count, col_idx, key_locations, validitymask_locations, - offset); - break; - default: - throw NotImplementedException("FIXME: unimplemented serialize to of constant type column to row-format"); +void Date::ExtractYearOffset(int32_t &n, int32_t &year, int32_t &year_offset) { + year = Date::EPOCH_YEAR; + // first we normalize n to be in the year range [1970, 2370] + // since leap years repeat every 400 years, we can safely normalize just by "shifting" the CumulativeYearDays array + while (n < 0) { + n += Date::DAYS_PER_YEAR_INTERVAL; + year -= Date::YEAR_INTERVAL; + } + while (n >= Date::DAYS_PER_YEAR_INTERVAL) { + n -= Date::DAYS_PER_YEAR_INTERVAL; + year += Date::YEAR_INTERVAL; + } + // interpolation search + // we can find an upper bound of the year by assuming each year has 365 days + year_offset = n / 365; + // because of leap years we might be off by a little bit: compensate by decrementing the year offset until we find + // our year + while (n < Date::CUMULATIVE_YEAR_DAYS[year_offset]) { + year_offset--; + D_ASSERT(year_offset >= 0); } + year += year_offset; + D_ASSERT(n >= Date::CUMULATIVE_YEAR_DAYS[year_offset]); } -void RowChunk::SerializeStringVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, - idx_t col_idx, data_ptr_t key_locations[], data_ptr_t validitymask_locations[], - idx_t offset) { - VectorData vdata; - v.Orrify(vcount, vdata); +void Date::Convert(date_t d, int32_t &year, int32_t &month, int32_t &day) { + auto n = d.days; + int32_t year_offset; + Date::ExtractYearOffset(n, year, year_offset); - const idx_t string_prefix_len = string_t::PREFIX_LENGTH; - auto strings = (string_t *)vdata.data; - if (!validitymask_locations) { - for (idx_t i = 0; i < ser_count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx) + offset; - if (vdata.validity.RowIsValid(source_idx)) { - auto &string_entry = strings[source_idx]; - // store string size - Store(string_entry.GetSize(), key_locations[i]); - key_locations[i] += string_prefix_len; - // store the string - memcpy(key_locations[i], string_entry.GetDataUnsafe(), string_entry.GetSize()); - key_locations[i] += string_entry.GetSize(); - } - } + day = n - Date::CUMULATIVE_YEAR_DAYS[year_offset]; + D_ASSERT(day >= 0 && day <= 365); + + bool is_leap_year = (Date::CUMULATIVE_YEAR_DAYS[year_offset + 1] - Date::CUMULATIVE_YEAR_DAYS[year_offset]) == 366; + if (is_leap_year) { + month = Date::LEAP_MONTH_PER_DAY_OF_YEAR[day]; + day -= Date::CUMULATIVE_LEAP_DAYS[month - 1]; } else { - auto byte_offset = col_idx / 8; - const auto bit = ~(1UL << (col_idx % 8)); - for (idx_t i = 0; i < ser_count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx) + offset; - if (vdata.validity.RowIsValid(source_idx)) { - auto &string_entry = strings[source_idx]; - // store string size - Store(string_entry.GetSize(), key_locations[i]); - key_locations[i] += string_prefix_len; - // store the string - memcpy(key_locations[i], string_entry.GetDataUnsafe(), string_entry.GetSize()); - key_locations[i] += string_entry.GetSize(); - } else { - // set the validitymask - *(validitymask_locations[i] + byte_offset) &= bit; - } - } + month = Date::MONTH_PER_DAY_OF_YEAR[day]; + day -= Date::CUMULATIVE_DAYS[month - 1]; } + day++; + D_ASSERT(day > 0 && day <= (is_leap_year ? Date::LEAP_DAYS[month] : Date::NORMAL_DAYS[month])); + D_ASSERT(month > 0 && month <= 12); } -void RowChunk::SerializeStructVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, - idx_t col_idx, data_ptr_t key_locations[], data_ptr_t validitymask_locations[], - idx_t offset) { - VectorData vdata; - v.Orrify(vcount, vdata); - - idx_t num_children; - vector struct_vectors; - if (v.GetVectorType() == VectorType::DICTIONARY_VECTOR) { - auto &child = DictionaryVector::Child(v); - auto &dict_sel = DictionaryVector::SelVector(v); - auto &children = StructVector::GetEntries(child); - num_children = children.size(); - for (auto &struct_child : children) { - Vector struct_vector; - struct_vector.Slice(*struct_child.second, dict_sel, vcount); - struct_vectors.push_back(move(struct_vector)); - } +bool Date::TryFromDate(int32_t year, int32_t month, int32_t day, date_t &result) { + int32_t n = 0; + if (!Date::IsValid(year, month, day)) { + return false; + } + n += Date::IsLeapYear(year) ? Date::CUMULATIVE_LEAP_DAYS[month - 1] : Date::CUMULATIVE_DAYS[month - 1]; + n += day - 1; + if (year < 1970) { + int32_t diff_from_base = 1970 - year; + int32_t year_index = 400 - (diff_from_base % 400); + int32_t fractions = diff_from_base / 400; + n += Date::CUMULATIVE_YEAR_DAYS[year_index]; + n -= Date::DAYS_PER_YEAR_INTERVAL; + n -= fractions * Date::DAYS_PER_YEAR_INTERVAL; + } else if (year >= 2370) { + int32_t diff_from_base = year - 2370; + int32_t year_index = diff_from_base % 400; + int32_t fractions = diff_from_base / 400; + n += Date::CUMULATIVE_YEAR_DAYS[year_index]; + n += Date::DAYS_PER_YEAR_INTERVAL; + n += fractions * Date::DAYS_PER_YEAR_INTERVAL; } else { - auto &children = StructVector::GetEntries(v); - num_children = children.size(); - for (auto &struct_child : children) { - Vector struct_vector; - struct_vector.Reference(*struct_child.second); - struct_vectors.push_back(move(struct_vector)); - } + n += Date::CUMULATIVE_YEAR_DAYS[year - 1970]; } +#ifdef DEBUG + int32_t y, m, d; + Date::Convert(date_t(n), y, m, d); + D_ASSERT(year == y); + D_ASSERT(month == m); + D_ASSERT(day == d); +#endif + result = date_t(n); + return true; +} - // the whole struct itself can be NULL - auto byte_offset = col_idx / 8; - const auto bit = ~(1UL << (col_idx % 8)); - - // struct must have a validitymask for its fields - const idx_t struct_validitymask_size = (num_children + 7) / 8; - data_ptr_t struct_validitymask_locations[STANDARD_VECTOR_SIZE]; - for (idx_t i = 0; i < ser_count; i++) { - // initialize the struct validity mask - struct_validitymask_locations[i] = key_locations[i]; - memset(struct_validitymask_locations[i], -1, struct_validitymask_size); - key_locations[i] += struct_validitymask_size; - - // set whether the whole struct is null - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx) + offset; - if (validitymask_locations && !vdata.validity.RowIsValid(source_idx)) { - *(validitymask_locations[i] + byte_offset) &= bit; - } +date_t Date::FromDate(int32_t year, int32_t month, int32_t day) { + date_t result; + if (!Date::TryFromDate(year, month, day, result)) { + throw ConversionException("Date out of range: %d-%d-%d", year, month, day); } + return result; +} - // now serialize the struct vectors - for (idx_t i = 0; i < struct_vectors.size(); i++) { - auto &struct_vector = struct_vectors[i]; - SerializeVector(struct_vector, vcount, sel, ser_count, i, key_locations, struct_validitymask_locations, offset); +bool Date::ParseDoubleDigit(const char *buf, idx_t len, idx_t &pos, int32_t &result) { + if (pos < len && StringUtil::CharacterIsDigit(buf[pos])) { + result = buf[pos++] - '0'; + if (pos < len && StringUtil::CharacterIsDigit(buf[pos])) { + result = (buf[pos++] - '0') + result * 10; + } + return true; } + return false; } -void RowChunk::SerializeListVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx, - data_ptr_t key_locations[], data_ptr_t validitymask_locations[], idx_t offset) { - VectorData vdata; - v.Orrify(vcount, vdata); - - auto byte_offset = col_idx / 8; - const auto bit = ~(1UL << (col_idx % 8)); - - auto list_data = GetListData(v); - auto &child_vector = ListVector::GetEntry(v); +bool Date::TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool strict) { + pos = 0; + if (len == 0) { + return false; + } - VectorData list_vdata; - child_vector.Orrify(ListVector::GetListSize(v), list_vdata); - auto child_type = v.GetType().child_types()[0].second.InternalType(); + int32_t day = 0; + int32_t month = -1; + int32_t year = 0; + bool yearneg = false; + int sep; - idx_t list_entry_sizes[STANDARD_VECTOR_SIZE]; - data_ptr_t list_entry_locations[STANDARD_VECTOR_SIZE]; + // skip leading spaces + while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) { + pos++; + } - for (idx_t i = 0; i < ser_count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx) + offset; - if (!vdata.validity.RowIsValid(source_idx)) { - if (validitymask_locations) { - // set the validitymask - *(validitymask_locations[i] + byte_offset) &= bit; - } - continue; + if (pos >= len) { + return false; + } + if (buf[pos] == '-') { + yearneg = true; + pos++; + if (pos >= len) { + return false; } - auto list_entry = list_data[source_idx]; + } + if (!StringUtil::CharacterIsDigit(buf[pos])) { + return false; + } + // first parse the year + for (; pos < len && StringUtil::CharacterIsDigit(buf[pos]); pos++) { + if (year >= 100000000) { + return false; + } + year = (buf[pos] - '0') + year * 10; + } + if (yearneg) { + year = -year; + } - // store list length - Store(list_entry.length, key_locations[i]); - key_locations[i] += sizeof(list_entry.length); + if (pos >= len) { + return false; + } - // make room for the validitymask - data_ptr_t list_validitymask_location = key_locations[i]; - idx_t entry_offset_in_byte = 0; - idx_t validitymask_size = (list_entry.length + 7) / 8; - memset(list_validitymask_location, -1, validitymask_size); - key_locations[i] += validitymask_size; + // fetch the separator + sep = buf[pos++]; + if (sep != ' ' && sep != '-' && sep != '/' && sep != '\\') { + // invalid separator + return false; + } - // serialize size of each entry (if non-constant size) - data_ptr_t var_entry_size_ptr = nullptr; - if (!TypeIsConstantSize(child_type)) { - var_entry_size_ptr = key_locations[i]; - key_locations[i] += list_entry.length * sizeof(idx_t); - } + // parse the month + if (!Date::ParseDoubleDigit(buf, len, pos, month)) { + return false; + } - auto entry_remaining = list_entry.length; - auto entry_offset = list_entry.offset; - while (entry_remaining > 0) { - // the list entry can span multiple vectors - auto next = MinValue((idx_t)STANDARD_VECTOR_SIZE, entry_remaining); + if (pos >= len) { + return false; + } - // serialize list validity - for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { - auto list_idx = list_vdata.sel->get_index(entry_idx) + entry_offset; - if (!list_vdata.validity.RowIsValid(list_idx)) { - *(list_validitymask_location) &= ~(1UL << entry_offset_in_byte); - } - if (++entry_offset_in_byte == 8) { - list_validitymask_location++; - entry_offset_in_byte = 0; - } - } + if (buf[pos++] != sep) { + return false; + } - if (TypeIsConstantSize(child_type)) { - // constant size list entries: set list entry locations - const idx_t type_size = GetTypeIdSize(child_type); - for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { - list_entry_locations[entry_idx] = key_locations[i]; - key_locations[i] += type_size; - } - } else { - // variable size list entries: compute entry sizes and set list entry locations - std::fill_n(list_entry_sizes, next, 0); - ComputeEntrySizes(child_vector, list_entry_sizes, next, entry_offset); - for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { - list_entry_locations[entry_idx] = key_locations[i]; - key_locations[i] += list_entry_sizes[entry_idx]; - Store(list_entry_sizes[entry_idx], var_entry_size_ptr); - var_entry_size_ptr += sizeof(idx_t); - } - } + if (pos >= len) { + return false; + } - // now serialize to the locations - SerializeVector(child_vector, ListVector::GetListSize(v), sel, next, 0, list_entry_locations, nullptr, - entry_offset); + // now parse the day + if (!Date::ParseDoubleDigit(buf, len, pos, day)) { + return false; + } - // update for next iteration - entry_remaining -= next; - entry_offset += next; + // check for an optional trailing " (BC)"" + if (len - pos >= 5 && StringUtil::CharacterIsSpace(buf[pos]) && buf[pos + 1] == '(' && + StringUtil::CharacterToLower(buf[pos + 2]) == 'b' && StringUtil::CharacterToLower(buf[pos + 3]) == 'c' && + buf[pos + 4] == ')') { + if (yearneg || year == 0) { + return false; } + year = -year + 1; + pos += 5; } -} -void RowChunk::SerializeVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx, - data_ptr_t key_locations[], data_ptr_t validitymask_locations[], idx_t offset) { - if (TypeIsConstantSize(v.GetType().InternalType())) { - VectorData vdata; - v.Orrify(vcount, vdata); - SerializeVectorData(vdata, v.GetType().InternalType(), sel, ser_count, col_idx, key_locations, - validitymask_locations, offset); + // in strict mode, check remaining string for non-space characters + if (strict) { + // skip trailing spaces + while (pos < len && StringUtil::CharacterIsSpace((unsigned char)buf[pos])) { + pos++; + } + // check position. if end was not reached, non-space chars remaining + if (pos < len) { + return false; + } } else { - switch (v.GetType().InternalType()) { - case PhysicalType::VARCHAR: - SerializeStringVector(v, vcount, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - case PhysicalType::STRUCT: - SerializeStructVector(v, vcount, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - case PhysicalType::LIST: - SerializeListVector(v, vcount, sel, ser_count, col_idx, key_locations, validitymask_locations, offset); - break; - default: - throw NotImplementedException("Serialization of variable length vector with type %s", - v.GetType().ToString()); + // in non-strict mode, check for any direct trailing digits + if (pos < len && StringUtil::CharacterIsDigit((unsigned char)buf[pos])) { + return false; } } + + return Date::TryFromDate(year, month, day, result); } -idx_t RowChunk::AppendToBlock(RowDataBlock &block, BufferHandle &handle, vector &append_entries, - idx_t remaining, idx_t entry_sizes[]) { - idx_t append_count = 0; - data_ptr_t dataptr; - if (entry_sizes) { - // compute how many entries fit if entry size if variable - dataptr = handle.node->buffer + block.byte_offset; - for (idx_t i = 0; i < remaining; i++) { - if (block.byte_offset + entry_sizes[i] > block_capacity * entry_size) { - while (entry_sizes[i] > block_capacity * entry_size) { - // if an entry does not fit, increase entry size until it does - entry_size *= 2; - } - break; - } - append_count++; - block.byte_offset += entry_sizes[i]; - } - } else { - append_count = MinValue(remaining, block.CAPACITY - block.count); - dataptr = handle.node->buffer + block.count * entry_size; - } - append_entries.emplace_back(dataptr, append_count); - block.count += append_count; - return append_count; +string Date::ConversionError(const string &str) { + return StringUtil::Format("date field value out of range: \"%s\", " + "expected format is (YYYY-MM-DD)", + str); } -void RowChunk::Build(idx_t added_count, data_ptr_t key_locations[], idx_t entry_sizes[]) { - vector> handles; - vector append_entries; +string Date::ConversionError(string_t str) { + return ConversionError(str.GetString()); +} - // first allocate space of where to serialize the keys and payload columns - idx_t remaining = added_count; - { - // first append to the last block (if any) - lock_guard append_lock(rc_lock); - count += added_count; - if (!blocks.empty()) { - auto &last_block = blocks.back(); - if (last_block.count < last_block.CAPACITY) { - // last block has space: pin the buffer of this block - auto handle = buffer_manager.Pin(last_block.block); - // now append to the block - idx_t append_count = AppendToBlock(last_block, *handle, append_entries, remaining, entry_sizes); - remaining -= append_count; - handles.push_back(move(handle)); - } - } - while (remaining > 0) { - // now for the remaining data, allocate new buffers to store the data and append there - RowDataBlock new_block(buffer_manager, block_capacity, entry_size); - auto handle = buffer_manager.Pin(new_block.block); +date_t Date::FromCString(const char *buf, idx_t len, bool strict) { + date_t result; + idx_t pos; + if (!TryConvertDate(buf, len, pos, result, strict)) { + throw ConversionException(ConversionError(string(buf, len))); + } + return result; +} - // offset the entry sizes array if we have added entries already - idx_t *offset_entry_sizes = entry_sizes ? entry_sizes + added_count - remaining : nullptr; +date_t Date::FromString(const string &str, bool strict) { + return Date::FromCString(str.c_str(), str.size(), strict); +} - idx_t append_count = AppendToBlock(new_block, *handle, append_entries, remaining, offset_entry_sizes); - remaining -= append_count; +string Date::ToString(date_t date) { + int32_t date_units[3]; + idx_t year_length; + bool add_bc; + Date::Convert(date, date_units[0], date_units[1], date_units[2]); - blocks.push_back(move(new_block)); - handles.push_back(move(handle)); - } - } - // now set up the key_locations based on the append entries - idx_t append_idx = 0; - for (auto &append_entry : append_entries) { - idx_t next = append_idx + append_entry.count; - if (entry_sizes) { - for (; append_idx < next; append_idx++) { - key_locations[append_idx] = append_entry.baseptr; - append_entry.baseptr += entry_sizes[append_idx]; - } - } else { - for (; append_idx < next; append_idx++) { - key_locations[append_idx] = append_entry.baseptr; - append_entry.baseptr += entry_size; - } - } - } + auto length = DateToStringCast::Length(date_units, year_length, add_bc); + auto buffer = unique_ptr(new char[length]); + DateToStringCast::Format(buffer.get(), date_units, year_length, add_bc); + return string(buffer.get(), length); } -template -static void TemplatedDeserializeIntoVector(Vector &v, idx_t count, idx_t col_idx, data_ptr_t *key_locations) { - auto target = FlatVector::GetData(v); - // fixed-size inner loop to allow unrolling - idx_t i; - for (i = 0; i + 7 < count; i += 8) { - for (idx_t j = 0; j < 8; j++) { - target[i + j] = Load(key_locations[i + j]); - key_locations[i + j] += sizeof(T); - } - } - // finishing up - for (; i < count; i++) { - target[i] = Load(key_locations[i]); - key_locations[i] += sizeof(T); - } +string Date::Format(int32_t year, int32_t month, int32_t day) { + return ToString(Date::FromDate(year, month, day)); } -static ValidityMask &GetValidity(Vector &v) { - switch (v.GetVectorType()) { - case VectorType::DICTIONARY_VECTOR: - return GetValidity(DictionaryVector::Child(v)); - case VectorType::FLAT_VECTOR: - return FlatVector::Validity(v); - case VectorType::CONSTANT_VECTOR: - return ConstantVector::Validity(v); - default: - throw NotImplementedException("FIXME: cannot deserialize vector with this vectortype"); - } +bool Date::IsLeapYear(int32_t year) { + return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0); } -void RowChunk::DeserializeIntoStringVector(Vector &v, const idx_t &vcount, const idx_t &col_idx, - data_ptr_t *key_locations, data_ptr_t *validitymask_locations) { - const auto &validity = FlatVector::Validity(v); - const idx_t string_prefix_len = string_t::PREFIX_LENGTH; - auto target = FlatVector::GetData(v); - // fixed size inner loop to allow unrolling - idx_t i = 0; - if (validity.AllValid()) { - for (; i + 7 < vcount; i += 8) { - for (idx_t j = 0; j < 8; j++) { - auto len = Load(key_locations[i + j]); - key_locations[i + j] += string_prefix_len; - target[i + j] = StringVector::AddStringOrBlob(v, string_t((const char *)key_locations[i + j], len)); - key_locations[i + j] += len; +bool Date::IsValid(int32_t year, int32_t month, int32_t day) { + if (month < 1 || month > 12) { + return false; + } + if (day < 1) { + return false; + } + if (year <= DATE_MIN_YEAR) { + if (year < DATE_MIN_YEAR) { + return false; + } else if (year == DATE_MIN_YEAR) { + if (month < DATE_MIN_MONTH || (month == DATE_MIN_MONTH && day < DATE_MIN_DAY)) { + return false; } } } - // finishing up - for (; i < vcount; i++) { - if (!validity.RowIsValid(i)) { - continue; + if (year >= DATE_MAX_YEAR) { + if (year > DATE_MAX_YEAR) { + return false; + } else if (year == DATE_MAX_YEAR) { + if (month > DATE_MAX_MONTH || (month == DATE_MAX_MONTH && day > DATE_MAX_DAY)) { + return false; + } } - auto len = Load(key_locations[i]); - key_locations[i] += string_prefix_len; - target[i] = StringVector::AddStringOrBlob(v, string_t((const char *)key_locations[i], len)); - key_locations[i] += len; } + return Date::IsLeapYear(year) ? day <= Date::LEAP_DAYS[month] : day <= Date::NORMAL_DAYS[month]; } -void RowChunk::DeserializeIntoStructVector(Vector &v, const idx_t &vcount, const idx_t &col_idx, - data_ptr_t *key_locations, data_ptr_t *validitymask_locations) { - // struct must have a validitymask for its fields - auto &child_types = v.GetType().child_types(); - const idx_t struct_validitymask_size = (child_types.size() + 7) / 8; - data_ptr_t struct_validitymask_locations[STANDARD_VECTOR_SIZE]; - for (idx_t i = 0; i < vcount; i++) { - // use key_locations as the validitymask, and create struct_key_locations - struct_validitymask_locations[i] = key_locations[i]; - key_locations[i] += struct_validitymask_size; - } +int32_t Date::MonthDays(int32_t year, int32_t month) { + D_ASSERT(month >= 1 && month <= 12); + return Date::IsLeapYear(year) ? Date::LEAP_DAYS[month] : Date::NORMAL_DAYS[month]; +} - // now deserialize into the struct vectors - for (idx_t i = 0; i < child_types.size(); i++) { - auto new_child = make_unique(child_types[i].second); - DeserializeIntoVector(*new_child, vcount, i, key_locations, struct_validitymask_locations); - StructVector::AddEntry(v, child_types[i].first, move(new_child)); - } +date_t Date::EpochDaysToDate(int32_t epoch) { + return (date_t)epoch; } -void RowChunk::DeserializeIntoListVector(Vector &v, const idx_t &vcount, const idx_t &col_idx, - data_ptr_t *key_locations, data_ptr_t *validitymask_locations) { - const auto &validity = FlatVector::Validity(v); +int32_t Date::EpochDays(date_t date) { + return date.days; +} - auto child_type = v.GetType().child_types()[0].second; - auto list_data = GetListData(v); - data_ptr_t list_entry_locations[STANDARD_VECTOR_SIZE]; +date_t Date::EpochToDate(int64_t epoch) { + return date_t(epoch / Interval::SECS_PER_DAY); +} - ListVector::Initialize(v); - uint64_t entry_offset = ListVector::GetListSize(v); - for (idx_t i = 0; i < vcount; i++) { - if (!validity.RowIsValid(i)) { - continue; - } - // read list length - auto entry_remaining = Load(key_locations[i]); - key_locations[i] += sizeof(uint64_t); - // set list entry attributes - list_data[i].length = entry_remaining; - list_data[i].offset = entry_offset; - // skip over the validity mask - data_ptr_t validitymask_location = key_locations[i]; - idx_t offset_in_byte = 0; - key_locations[i] += (entry_remaining + 7) / 8; - // entry sizes - data_ptr_t var_entry_size_ptr = nullptr; - if (!TypeIsConstantSize(child_type.InternalType())) { - var_entry_size_ptr = key_locations[i]; - key_locations[i] += entry_remaining * sizeof(idx_t); - } +int64_t Date::Epoch(date_t date) { + return ((int64_t)date.days) * Interval::SECS_PER_DAY; +} - // now read the list data - while (entry_remaining > 0) { - auto next = MinValue(entry_remaining, (idx_t)STANDARD_VECTOR_SIZE); +int64_t Date::EpochNanoseconds(date_t date) { + return ((int64_t)date.days) * (Interval::MICROS_PER_DAY * 1000); +} - // initialize a new vector to append - Vector append_vector(v.GetType()); - append_vector.SetVectorType(v.GetVectorType()); - ListVector::Initialize(append_vector); - auto &list_vec_to_append = ListVector::GetEntry(append_vector); +int32_t Date::ExtractYear(date_t d, int32_t *last_year) { + auto n = d.days; + // cached look up: check if year of this date is the same as the last one we looked up + // note that this only works for years in the range [1970, 2370] + if (n >= Date::CUMULATIVE_YEAR_DAYS[*last_year] && n < Date::CUMULATIVE_YEAR_DAYS[*last_year + 1]) { + return Date::EPOCH_YEAR + *last_year; + } + int32_t year; + Date::ExtractYearOffset(n, year, *last_year); + return year; +} - // set validity - auto &append_validity = GetValidity(list_vec_to_append); - for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { - append_validity.Set(entry_idx, *(validitymask_location) & (1 << offset_in_byte)); - if (++offset_in_byte == 8) { - validitymask_location++; - offset_in_byte = 0; - } - } +int32_t Date::ExtractYear(timestamp_t ts, int32_t *last_year) { + return Date::ExtractYear(Timestamp::GetDate(ts), last_year); +} - // compute entry sizes and set locations where the list entries are - if (TypeIsConstantSize(child_type.InternalType())) { - // constant size list entries - const idx_t type_size = GetTypeIdSize(child_type.InternalType()); - for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { - list_entry_locations[entry_idx] = key_locations[i]; - key_locations[i] += type_size; - } - } else { - // variable size list entries - for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { - list_entry_locations[entry_idx] = key_locations[i]; - key_locations[i] += Load(var_entry_size_ptr); - var_entry_size_ptr += sizeof(idx_t); - } - } +int32_t Date::ExtractYear(date_t d) { + int32_t year, year_offset; + Date::ExtractYearOffset(d.days, year, year_offset); + return year; +} - // now deserialize and add to listvector - DeserializeIntoVector(list_vec_to_append, next, 0, list_entry_locations, nullptr); - ListVector::Append(v, list_vec_to_append, next); +int32_t Date::ExtractMonth(date_t date) { + int32_t out_year, out_month, out_day; + Date::Convert(date, out_year, out_month, out_day); + return out_month; +} + +int32_t Date::ExtractDay(date_t date) { + int32_t out_year, out_month, out_day; + Date::Convert(date, out_year, out_month, out_day); + return out_day; +} + +int32_t Date::ExtractDayOfTheYear(date_t date) { + int32_t year, year_offset; + Date::ExtractYearOffset(date.days, year, year_offset); + return date.days - Date::CUMULATIVE_YEAR_DAYS[year_offset] + 1; +} - // update for next iteration - entry_remaining -= next; - entry_offset += next; - } +int32_t Date::ExtractISODayOfTheWeek(date_t date) { + // date of 0 is 1970-01-01, which was a Thursday (4) + // -7 = 4 + // -6 = 5 + // -5 = 6 + // -4 = 7 + // -3 = 1 + // -2 = 2 + // -1 = 3 + // 0 = 4 + // 1 = 5 + // 2 = 6 + // 3 = 7 + // 4 = 1 + // 5 = 2 + // 6 = 3 + // 7 = 4 + if (date.days < 0) { + // negative date: start off at 4 and cycle downwards + return (7 - ((-date.days + 3) % 7)); + } else { + // positive date: start off at 4 and cycle upwards + return ((date.days + 3) % 7) + 1; } } -void RowChunk::DeserializeIntoVector(Vector &v, const idx_t &vcount, const idx_t &col_idx, data_ptr_t key_locations[], - data_ptr_t validitymask_locations[]) { - auto &validity = FlatVector::Validity(v); - if (validitymask_locations) { - // validity mask is not yet set: deserialize it - const auto byte_offset = col_idx / 8; - const auto bit = 1 << (col_idx % 8); +static int32_t GetISOWeek(int32_t year, int32_t month, int32_t day) { + auto day_of_the_year = + (Date::IsLeapYear(year) ? Date::CUMULATIVE_LEAP_DAYS[month] : Date::CUMULATIVE_DAYS[month]) + day; + // get the first day of the first week of the year + // the first week is the week that has the 4th of January in it + auto day_of_the_fourth = Date::ExtractISODayOfTheWeek(Date::FromDate(year, 1, 4)); + // if fourth is monday, then fourth is the first day + // if fourth is tuesday, third is the first day + // if fourth is wednesday, second is the first day + // if fourth is thursday - sunday, first is the first day + auto first_day_of_the_first_week = day_of_the_fourth >= 4 ? 0 : 5 - day_of_the_fourth; + if (day_of_the_year < first_day_of_the_first_week) { + // day is part of last year + return GetISOWeek(year - 1, 12, day); + } else { + return ((day_of_the_year - first_day_of_the_first_week) / 7) + 1; + } +} - // fixed-size inner loop to allow unrolling - idx_t i; - for (i = 0; i + 7 < vcount; i += 8) { - for (idx_t j = 0; j < 8; j++) { - bool valid = *(validitymask_locations[i + j] + byte_offset) & bit; - validity.Set(i + j, valid); - } - } +int32_t Date::ExtractISOWeekNumber(date_t date) { + int32_t year, month, day; + Date::Convert(date, year, month, day); + return GetISOWeek(year, month - 1, day - 1); +} - // finishing up - for (i = 0; i < vcount; i++) { - bool valid = *(validitymask_locations[i] + byte_offset) & bit; - validity.Set(i, valid); +int32_t Date::ExtractWeekNumberRegular(date_t date, bool monday_first) { + int32_t year, month, day; + Date::Convert(date, year, month, day); + month -= 1; + day -= 1; + // get the day of the year + auto day_of_the_year = + (Date::IsLeapYear(year) ? Date::CUMULATIVE_LEAP_DAYS[month] : Date::CUMULATIVE_DAYS[month]) + day; + // now figure out the first monday or sunday of the year + // what day is January 1st? + auto day_of_jan_first = Date::ExtractISODayOfTheWeek(Date::FromDate(year, 1, 1)); + // monday = 1, sunday = 7 + int32_t first_week_start; + if (monday_first) { + // have to find next "1" + if (day_of_jan_first == 1) { + // jan 1 is monday: starts immediately + first_week_start = 0; + } else { + // jan 1 is not monday: count days until next monday + first_week_start = 8 - day_of_jan_first; } + } else { + first_week_start = 7 - day_of_jan_first; } - - auto type = v.GetType().InternalType(); - switch (type) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::INT16: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::INT32: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::INT64: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::UINT8: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::UINT16: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::UINT32: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::UINT64: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::INT128: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::FLOAT: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::DOUBLE: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::HASH: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::INTERVAL: - TemplatedDeserializeIntoVector(v, vcount, col_idx, key_locations); - break; - case PhysicalType::VARCHAR: - DeserializeIntoStringVector(v, vcount, col_idx, key_locations, validitymask_locations); - break; - case PhysicalType::STRUCT: - DeserializeIntoStructVector(v, vcount, col_idx, key_locations, validitymask_locations); - break; - case PhysicalType::LIST: - DeserializeIntoListVector(v, vcount, col_idx, key_locations, validitymask_locations); - break; - default: - throw NotImplementedException("FIXME: unimplemented deserialize from row-format"); + if (day_of_the_year < first_week_start) { + // day occurs before first week starts: week 0 + return 0; } + return ((day_of_the_year - first_week_start) / 7) + 1; } -} // namespace duckdb +// Returns the date of the monday of the current week. +date_t Date::GetMondayOfCurrentWeek(date_t date) { + int32_t dotw = Date::ExtractISODayOfTheWeek(date); + return date - (dotw - 1); +} +} // namespace duckdb namespace duckdb { -string SelectionVector::ToString(idx_t count) const { - string result = "Selection Vector (" + to_string(count) + ") ["; - for (idx_t i = 0; i < count; i++) { - if (i != 0) { - result += ", "; - } - result += to_string(get_index(i)); - } - result += "]"; - return result; +template +string TemplatedDecimalToString(SIGNED value, uint8_t scale) { + auto len = DecimalToString::DecimalLength(value, scale); + auto data = unique_ptr(new char[len + 1]); + DecimalToString::FormatDecimal(value, scale, data.get(), len); + return string(data.get(), len); } -void SelectionVector::Print(idx_t count) const { - Printer::Print(ToString(count)); +string Decimal::ToString(int16_t value, uint8_t scale) { + return TemplatedDecimalToString(value, scale); } -buffer_ptr SelectionVector::Slice(const SelectionVector &sel, idx_t count) const { - auto data = make_buffer(count); - auto result_ptr = data->owned_data.get(); - // for every element, we perform result[i] = target[new[i]] - for (idx_t i = 0; i < count; i++) { - auto new_idx = sel.get_index(i); - auto idx = this->get_index(new_idx); - result_ptr[i] = idx; - } - return data; +string Decimal::ToString(int32_t value, uint8_t scale) { + return TemplatedDecimalToString(value, scale); } -} // namespace duckdb +string Decimal::ToString(int64_t value, uint8_t scale) { + return TemplatedDecimalToString(value, scale); +} +string Decimal::ToString(hugeint_t value, uint8_t scale) { + auto len = HugeintToStringCast::DecimalLength(value, scale); + auto data = unique_ptr(new char[len + 1]); + HugeintToStringCast::FormatDecimal(value, scale, data.get(), len); + return string(data.get(), len); +} +} // namespace duckdb -#include +#include namespace duckdb { -#define MINIMUM_HEAP_SIZE 4096 +template <> +hash_t Hash(uint64_t val) { + return murmurhash64(val); +} -StringHeap::StringHeap() : tail(nullptr) { +template <> +hash_t Hash(int64_t val) { + return murmurhash64((uint64_t)val); } -string_t StringHeap::AddString(const char *data, idx_t len) { - D_ASSERT(Utf8Proc::Analyze(data, len) != UnicodeType::INVALID); - return AddBlob(data, len); +template <> +hash_t Hash(hugeint_t val) { + return murmurhash64(val.lower) ^ murmurhash64(val.upper); } -string_t StringHeap::AddString(const char *data) { - return AddString(data, strlen(data)); +template <> +hash_t Hash(float val) { + return std::hash {}(val); } -string_t StringHeap::AddString(const string &data) { - return AddString(data.c_str(), data.size()); +template <> +hash_t Hash(double val) { + return std::hash {}(val); } -string_t StringHeap::AddString(const string_t &data) { - return AddString(data.GetDataUnsafe(), data.GetSize()); +template <> +hash_t Hash(interval_t val) { + return Hash(val.days) ^ Hash(val.months) ^ Hash(val.micros); } -string_t StringHeap::AddBlob(const char *data, idx_t len) { - auto insert_string = EmptyString(len); - auto insert_pos = insert_string.GetDataWriteable(); - memcpy(insert_pos, data, len); - insert_string.Finalize(); - return insert_string; +template <> +hash_t Hash(const char *str) { + return Hash(str, strlen(str)); } -string_t StringHeap::EmptyString(idx_t len) { - D_ASSERT(len >= string_t::INLINE_LENGTH); - if (!chunk || chunk->current_position + len >= chunk->maximum_size) { - // have to make a new entry - auto new_chunk = make_unique(MaxValue(len, MINIMUM_HEAP_SIZE)); - new_chunk->prev = move(chunk); - chunk = move(new_chunk); - if (!tail) { - tail = chunk.get(); - } - } - auto insert_pos = chunk->data.get() + chunk->current_position; - chunk->current_position += len; - return string_t(insert_pos, len); +template <> +hash_t Hash(string_t val) { + return Hash(val.GetDataUnsafe(), val.GetSize()); } -void StringHeap::MergeHeap(StringHeap &other) { - if (!other.tail) { - return; - } - other.tail->prev = move(chunk); - this->chunk = move(other.chunk); - if (!tail) { - tail = this->chunk.get(); +template <> +hash_t Hash(char *val) { + return Hash(val); +} + +// Jenkins hash function: https://en.wikipedia.org/wiki/Jenkins_hash_function +uint32_t JenkinsOneAtATimeHash(const char *key, size_t length) { + size_t i = 0; + uint32_t hash = 0; + while (i != length) { + hash += key[i++]; + hash += hash << 10; + hash ^= hash >> 6; } - other.tail = nullptr; + hash += hash << 3; + hash ^= hash >> 11; + hash += hash << 15; + return hash; +} + +hash_t Hash(const char *val, size_t size) { + auto hash_val = JenkinsOneAtATimeHash(val, size); + return Hash(hash_val); +} + +hash_t Hash(uint8_t *val, size_t size) { + return Hash((const char *)val, size); } } // namespace duckdb @@ -23046,557 +28730,928 @@ void StringHeap::MergeHeap(StringHeap &other) { -namespace duckdb { +#include +#include -void string_t::Verify() { - auto dataptr = GetDataUnsafe(); - (void)dataptr; - D_ASSERT(dataptr); +namespace duckdb { -#ifdef DEBUG - auto utf_type = Utf8Proc::Analyze(dataptr, GetSize()); - D_ASSERT(utf_type != UnicodeType::INVALID); -#endif +//===--------------------------------------------------------------------===// +// String Conversion +//===--------------------------------------------------------------------===// +const hugeint_t Hugeint::POWERS_OF_TEN[] { + hugeint_t(1), + hugeint_t(10), + hugeint_t(100), + hugeint_t(1000), + hugeint_t(10000), + hugeint_t(100000), + hugeint_t(1000000), + hugeint_t(10000000), + hugeint_t(100000000), + hugeint_t(1000000000), + hugeint_t(10000000000), + hugeint_t(100000000000), + hugeint_t(1000000000000), + hugeint_t(10000000000000), + hugeint_t(100000000000000), + hugeint_t(1000000000000000), + hugeint_t(10000000000000000), + hugeint_t(100000000000000000), + hugeint_t(1000000000000000000), + hugeint_t(1000000000000000000) * hugeint_t(10), + hugeint_t(1000000000000000000) * hugeint_t(100), + hugeint_t(1000000000000000000) * hugeint_t(1000), + hugeint_t(1000000000000000000) * hugeint_t(10000), + hugeint_t(1000000000000000000) * hugeint_t(100000), + hugeint_t(1000000000000000000) * hugeint_t(1000000), + hugeint_t(1000000000000000000) * hugeint_t(10000000), + hugeint_t(1000000000000000000) * hugeint_t(100000000), + hugeint_t(1000000000000000000) * hugeint_t(1000000000), + hugeint_t(1000000000000000000) * hugeint_t(10000000000), + hugeint_t(1000000000000000000) * hugeint_t(100000000000), + hugeint_t(1000000000000000000) * hugeint_t(1000000000000), + hugeint_t(1000000000000000000) * hugeint_t(10000000000000), + hugeint_t(1000000000000000000) * hugeint_t(100000000000000), + hugeint_t(1000000000000000000) * hugeint_t(1000000000000000), + hugeint_t(1000000000000000000) * hugeint_t(10000000000000000), + hugeint_t(1000000000000000000) * hugeint_t(100000000000000000), + hugeint_t(1000000000000000000) * hugeint_t(1000000000000000000), + hugeint_t(1000000000000000000) * hugeint_t(1000000000000000000) * hugeint_t(10), + hugeint_t(1000000000000000000) * hugeint_t(1000000000000000000) * hugeint_t(100)}; - // verify that the prefix contains the first four characters of the string - for (idx_t i = 0; i < MinValue(PREFIX_LENGTH, GetSize()); i++) { - D_ASSERT(GetPrefix()[i] == dataptr[i]); - } - // verify that for strings with length < PREFIX_LENGTH, the rest of the prefix is zero - for (idx_t i = GetSize(); i < PREFIX_LENGTH; i++) { - D_ASSERT(GetPrefix()[i] == '\0'); +static uint8_t PositiveHugeintHighestBit(hugeint_t bits) { + uint8_t out = 0; + if (bits.upper) { + out = 64; + uint64_t up = bits.upper; + while (up) { + up >>= 1; + out++; + } + } else { + uint64_t low = bits.lower; + while (low) { + low >>= 1; + out++; + } } + return out; } -void string_t::VerifyNull() { - for (idx_t i = 0; i < GetSize(); i++) { - D_ASSERT(GetDataUnsafe()[i] != '\0'); +static bool PositiveHugeintIsBitSet(hugeint_t lhs, uint8_t bit_position) { + if (bit_position < 64) { + return lhs.lower & (uint64_t(1) << uint64_t(bit_position)); + } else { + return lhs.upper & (uint64_t(1) << uint64_t(bit_position - 64)); } } -} // namespace duckdb +hugeint_t PositiveHugeintLeftShift(hugeint_t lhs, uint32_t amount) { + D_ASSERT(amount > 0 && amount < 64); + hugeint_t result; + result.lower = lhs.lower << amount; + result.upper = (lhs.upper << amount) + (lhs.lower >> (64 - amount)); + return result; +} + +hugeint_t Hugeint::DivModPositive(hugeint_t lhs, uint64_t rhs, uint64_t &remainder) { + D_ASSERT(lhs.upper >= 0); + // DivMod code adapted from: + // https://github.com/calccrypto/uint128_t/blob/master/uint128_t.cpp + + // initialize the result and remainder to 0 + hugeint_t div_result; + div_result.lower = 0; + div_result.upper = 0; + remainder = 0; + + uint8_t highest_bit_set = PositiveHugeintHighestBit(lhs); + // now iterate over the amount of bits that are set in the LHS + for (uint8_t x = highest_bit_set; x > 0; x--) { + // left-shift the current result and remainder by 1 + div_result = PositiveHugeintLeftShift(div_result, 1); + remainder <<= 1; + // we get the value of the bit at position X, where position 0 is the least-significant bit + if (PositiveHugeintIsBitSet(lhs, x - 1)) { + // increment the remainder + remainder++; + } + if (remainder >= rhs) { + // the remainder has passed the division multiplier: add one to the divide result + remainder -= rhs; + div_result.lower++; + if (div_result.lower == 0) { + // overflow + div_result.upper++; + } + } + } + return div_result; +} +string Hugeint::ToString(hugeint_t input) { + uint64_t remainder; + string result; + bool negative = input.upper < 0; + if (negative) { + NegateInPlace(input); + } + while (true) { + if (!input.lower && !input.upper) { + break; + } + input = Hugeint::DivModPositive(input, 10, remainder); + result = string(1, '0' + remainder) + result; // NOLINT + } + if (result.empty()) { + // value is zero + return "0"; + } + return negative ? "-" + result : result; +} +//===--------------------------------------------------------------------===// +// Multiply +//===--------------------------------------------------------------------===// +bool Hugeint::TryMultiply(hugeint_t lhs, hugeint_t rhs, hugeint_t &result) { + bool lhs_negative = lhs.upper < 0; + bool rhs_negative = rhs.upper < 0; + if (lhs_negative) { + NegateInPlace(lhs); + } + if (rhs_negative) { + NegateInPlace(rhs); + } +#if ((__GNUC__ >= 5) || defined(__clang__)) && defined(__SIZEOF_INT128__) + __uint128_t left = __uint128_t(lhs.lower) + (__uint128_t(lhs.upper) << 64); + __uint128_t right = __uint128_t(rhs.lower) + (__uint128_t(rhs.upper) << 64); + __uint128_t result_i128; + if (__builtin_mul_overflow(left, right, &result_i128)) { + return false; + } + uint64_t upper = uint64_t(result_i128 >> 64); + if (upper & 0x8000000000000000) { + return false; + } + result.upper = int64_t(upper); + result.lower = uint64_t(result_i128 & 0xffffffffffffffff); +#else + // Multiply code adapted from: + // https://github.com/calccrypto/uint128_t/blob/master/uint128_t.cpp + // split values into 4 32-bit parts + uint64_t top[4] = {uint64_t(lhs.upper) >> 32, uint64_t(lhs.upper) & 0xffffffff, lhs.lower >> 32, + lhs.lower & 0xffffffff}; + uint64_t bottom[4] = {uint64_t(rhs.upper) >> 32, uint64_t(rhs.upper) & 0xffffffff, rhs.lower >> 32, + rhs.lower & 0xffffffff}; + uint64_t products[4][4]; + // multiply each component of the values + for (auto x = 0; x < 4; x++) { + for (auto y = 0; y < 4; y++) { + products[x][y] = top[x] * bottom[y]; + } + } + // if any of these products are set to a non-zero value, there is always an overflow + if (products[0][0] || products[0][1] || products[0][2] || products[1][0] || products[2][0] || products[1][1]) { + return false; + } + // if the high bits of any of these are set, there is always an overflow + if ((products[0][3] & 0xffffffff80000000) || (products[1][2] & 0xffffffff80000000) || + (products[2][1] & 0xffffffff80000000) || (products[3][0] & 0xffffffff80000000)) { + return false; + } + // otherwise we merge the result of the different products together in-order + // first row + uint64_t fourth32 = (products[3][3] & 0xffffffff); + uint64_t third32 = (products[3][2] & 0xffffffff) + (products[3][3] >> 32); + uint64_t second32 = (products[3][1] & 0xffffffff) + (products[3][2] >> 32); + uint64_t first32 = (products[3][0] & 0xffffffff) + (products[3][1] >> 32); -#include -#include -#include + // second row + third32 += (products[2][3] & 0xffffffff); + second32 += (products[2][2] & 0xffffffff) + (products[2][3] >> 32); + first32 += (products[2][1] & 0xffffffff) + (products[2][2] >> 32); -namespace duckdb { + // third row + second32 += (products[1][3] & 0xffffffff); + first32 += (products[1][2] & 0xffffffff) + (products[1][3] >> 32); -// string format is hh:mm:ss.microsecondsZ -// microseconds and Z are optional -// ISO 8601 + // fourth row + first32 += (products[0][3] & 0xffffffff); -bool Time::TryConvertTime(const char *buf, idx_t len, idx_t &pos, dtime_t &result, bool strict) { - int32_t hour = -1, min = -1, sec = -1, micros = -1; - pos = 0; + // move carry to next digit + third32 += fourth32 >> 32; + second32 += third32 >> 32; + first32 += second32 >> 32; - if (len == 0) { + // check if the combination of the different products resulted in an overflow + if (first32 & 0xffffff80000000) { return false; } - int sep; + // remove carry from current digit + fourth32 &= 0xffffffff; + third32 &= 0xffffffff; + second32 &= 0xffffffff; + first32 &= 0xffffffff; - // skip leading spaces - while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) { - pos++; + // combine components + result.lower = (third32 << 32) | fourth32; + result.upper = (first32 << 32) | second32; +#endif + if (lhs_negative ^ rhs_negative) { + NegateInPlace(result); } + return true; +} - if (pos >= len) { - return false; +hugeint_t Hugeint::Multiply(hugeint_t lhs, hugeint_t rhs) { + hugeint_t result; + if (!TryMultiply(lhs, rhs, result)) { + throw OutOfRangeException("Overflow in HUGEINT multiplication!"); } + return result; +} - if (!StringUtil::CharacterIsDigit(buf[pos])) { - return false; - } +//===--------------------------------------------------------------------===// +// Divide +//===--------------------------------------------------------------------===// +hugeint_t Hugeint::DivMod(hugeint_t lhs, hugeint_t rhs, hugeint_t &remainder) { + // division by zero not allowed + D_ASSERT(!(rhs.upper == 0 && rhs.lower == 0)); - if (!Date::ParseDoubleDigit(buf, len, pos, hour)) { - return false; + bool lhs_negative = lhs.upper < 0; + bool rhs_negative = rhs.upper < 0; + if (lhs_negative) { + Hugeint::NegateInPlace(lhs); } - if (hour < 0 || hour >= 24) { - return false; + if (rhs_negative) { + Hugeint::NegateInPlace(rhs); } + // DivMod code adapted from: + // https://github.com/calccrypto/uint128_t/blob/master/uint128_t.cpp - if (pos >= len) { - return false; - } + // initialize the result and remainder to 0 + hugeint_t div_result; + div_result.lower = 0; + div_result.upper = 0; + remainder.lower = 0; + remainder.upper = 0; - // fetch the separator - sep = buf[pos++]; - if (sep != ':') { - // invalid separator - return false; - } + uint8_t highest_bit_set = PositiveHugeintHighestBit(lhs); + // now iterate over the amount of bits that are set in the LHS + for (uint8_t x = highest_bit_set; x > 0; x--) { + // left-shift the current result and remainder by 1 + div_result = PositiveHugeintLeftShift(div_result, 1); + remainder = PositiveHugeintLeftShift(remainder, 1); - if (!Date::ParseDoubleDigit(buf, len, pos, min)) { - return false; + // we get the value of the bit at position X, where position 0 is the least-significant bit + if (PositiveHugeintIsBitSet(lhs, x - 1)) { + // increment the remainder + Hugeint::AddInPlace(remainder, 1); + } + if (Hugeint::GreaterThanEquals(remainder, rhs)) { + // the remainder has passed the division multiplier: add one to the divide result + remainder = Hugeint::Subtract(remainder, rhs); + Hugeint::AddInPlace(div_result, 1); + } } - if (min < 0 || min >= 60) { - return false; + if (lhs_negative ^ rhs_negative) { + Hugeint::NegateInPlace(div_result); } - - if (pos >= len) { - return false; + if (lhs_negative) { + Hugeint::NegateInPlace(remainder); } + return div_result; +} - if (buf[pos++] != sep) { +hugeint_t Hugeint::Divide(hugeint_t lhs, hugeint_t rhs) { + hugeint_t remainder; + return Hugeint::DivMod(lhs, rhs, remainder); +} + +hugeint_t Hugeint::Modulo(hugeint_t lhs, hugeint_t rhs) { + hugeint_t remainder; + Hugeint::DivMod(lhs, rhs, remainder); + return remainder; +} + +//===--------------------------------------------------------------------===// +// Add/Subtract +//===--------------------------------------------------------------------===// +bool Hugeint::AddInPlace(hugeint_t &lhs, hugeint_t rhs) { + int overflow = lhs.lower + rhs.lower < lhs.lower; + if (rhs.upper >= 0) { + // RHS is positive: check for overflow + if (lhs.upper > (std::numeric_limits::max() - rhs.upper - overflow)) { + return false; + } + lhs.upper = lhs.upper + overflow + rhs.upper; + } else { + // RHS is negative: check for underflow + if (lhs.upper < std::numeric_limits::min() - rhs.upper - overflow) { + return false; + } + lhs.upper = lhs.upper + (overflow + rhs.upper); + } + lhs.lower += rhs.lower; + if (lhs.upper == std::numeric_limits::min() && lhs.lower == 0) { return false; } + return true; +} - if (!Date::ParseDoubleDigit(buf, len, pos, sec)) { - return false; +bool Hugeint::SubtractInPlace(hugeint_t &lhs, hugeint_t rhs) { + // underflow + int underflow = lhs.lower - rhs.lower > lhs.lower; + if (rhs.upper >= 0) { + // RHS is positive: check for underflow + if (lhs.upper < (std::numeric_limits::min() + rhs.upper + underflow)) { + return false; + } + lhs.upper = (lhs.upper - rhs.upper) - underflow; + } else { + // RHS is negative: check for overflow + if (lhs.upper > std::numeric_limits::min() && + lhs.upper - 1 >= (std::numeric_limits::max() + rhs.upper + underflow)) { + return false; + } + lhs.upper = lhs.upper - (rhs.upper + underflow); } - if (sec < 0 || sec > 60) { + lhs.lower -= rhs.lower; + if (lhs.upper == std::numeric_limits::min() && lhs.lower == 0) { return false; } + return true; +} - micros = 0; - if (pos < len && buf[pos] == '.') { - pos++; - // we expect some microseconds - int32_t mult = 100000; - for (; pos < len && StringUtil::CharacterIsDigit(buf[pos]); pos++, mult /= 10) { - if (mult > 0) { - micros += (buf[pos] - '0') * mult; - } - } +hugeint_t Hugeint::Add(hugeint_t lhs, hugeint_t rhs) { + if (!AddInPlace(lhs, rhs)) { + throw OutOfRangeException("Overflow in HUGEINT addition"); } + return lhs; +} - // in strict mode, check remaining string for non-space characters - if (strict) { - // skip trailing spaces - while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) { - pos++; +hugeint_t Hugeint::Subtract(hugeint_t lhs, hugeint_t rhs) { + if (!SubtractInPlace(lhs, rhs)) { + throw OutOfRangeException("Underflow in HUGEINT addition"); + } + return lhs; +} + +//===--------------------------------------------------------------------===// +// Hugeint Cast/Conversion +//===--------------------------------------------------------------------===// +template +bool HugeintTryCastInteger(hugeint_t input, DST &result) { + switch (input.upper) { + case 0: + // positive number: check if the positive number is in range + if (input.lower <= uint64_t(NumericLimits::Maximum())) { + result = DST(input.lower); + return true; } - // check position. if end was not reached, non-space chars remaining - if (pos < len) { + break; + case -1: + if (!SIGNED) { return false; } + // negative number: check if the negative number is in range + if (input.lower >= NumericLimits::Maximum() - uint64_t(NumericLimits::Maximum())) { + result = -DST(NumericLimits::Maximum() - input.lower) - 1; + return true; + } + break; + default: + break; } + return false; +} - result = Time::FromTime(hour, min, sec, micros); +template <> +bool Hugeint::TryCast(hugeint_t input, int8_t &result) { + return HugeintTryCastInteger(input, result); +} + +template <> +bool Hugeint::TryCast(hugeint_t input, int16_t &result) { + return HugeintTryCastInteger(input, result); +} + +template <> +bool Hugeint::TryCast(hugeint_t input, int32_t &result) { + return HugeintTryCastInteger(input, result); +} + +template <> +bool Hugeint::TryCast(hugeint_t input, int64_t &result) { + return HugeintTryCastInteger(input, result); +} + +template <> +bool Hugeint::TryCast(hugeint_t input, uint8_t &result) { + return HugeintTryCastInteger(input, result); +} + +template <> +bool Hugeint::TryCast(hugeint_t input, uint16_t &result) { + return HugeintTryCastInteger(input, result); +} + +template <> +bool Hugeint::TryCast(hugeint_t input, uint32_t &result) { + return HugeintTryCastInteger(input, result); +} + +template <> +bool Hugeint::TryCast(hugeint_t input, uint64_t &result) { + return HugeintTryCastInteger(input, result); +} + +template <> +bool Hugeint::TryCast(hugeint_t input, hugeint_t &result) { + result = input; return true; } -dtime_t Time::FromCString(const char *buf, idx_t len, bool strict) { - dtime_t result; - idx_t pos; - if (!TryConvertTime(buf, len, pos, result, strict)) { - // last chance, check if we can parse as timestamp - if (!strict) { - return Timestamp::GetTime(Timestamp::FromCString(buf, len)); - } - throw ConversionException("time field value out of range: \"%s\", " - "expected format is ([YYY-MM-DD ]HH:MM:SS[.MS])", - string(buf, len)); +template <> +bool Hugeint::TryCast(hugeint_t input, float &result) { + double dbl_result; + Hugeint::TryCast(input, dbl_result); + result = (float)dbl_result; + return true; +} + +template <> +bool Hugeint::TryCast(hugeint_t input, double &result) { + switch (input.upper) { + case -1: + // special case for upper = -1 to avoid rounding issues in small negative numbers + result = -double(NumericLimits::Maximum() - input.lower) - 1; + break; + default: + result = double(input.lower) + double(input.upper) * double(NumericLimits::Maximum()); + break; } + return true; +} + +template +hugeint_t HugeintConvertInteger(DST input) { + hugeint_t result; + result.lower = (uint64_t)input; + result.upper = (input < 0) * -1; return result; } -dtime_t Time::FromString(const string &str, bool strict) { - return Time::FromCString(str.c_str(), str.size(), strict); +template <> +bool Hugeint::TryConvert(int8_t value, hugeint_t &result) { + result = HugeintConvertInteger(value); + return true; } -string Time::ToString(dtime_t time) { - int32_t time_units[4]; - Time::Convert(time, time_units[0], time_units[1], time_units[2], time_units[3]); +template <> +bool Hugeint::TryConvert(int16_t value, hugeint_t &result) { + result = HugeintConvertInteger(value); + return true; +} - char micro_buffer[6]; - auto length = TimeToStringCast::Length(time_units, micro_buffer); - auto buffer = unique_ptr(new char[length]); - TimeToStringCast::Format(buffer.get(), length, time_units, micro_buffer); - return string(buffer.get(), length); +template <> +bool Hugeint::TryConvert(int32_t value, hugeint_t &result) { + result = HugeintConvertInteger(value); + return true; } -string Time::Format(int32_t hour, int32_t minute, int32_t second, int32_t microseconds) { - return ToString(Time::FromTime(hour, minute, second, microseconds)); +template <> +bool Hugeint::TryConvert(int64_t value, hugeint_t &result) { + result = HugeintConvertInteger(value); + return true; +} +template <> +bool Hugeint::TryConvert(uint8_t value, hugeint_t &result) { + result = HugeintConvertInteger(value); + return true; +} +template <> +bool Hugeint::TryConvert(uint16_t value, hugeint_t &result) { + result = HugeintConvertInteger(value); + return true; +} +template <> +bool Hugeint::TryConvert(uint32_t value, hugeint_t &result) { + result = HugeintConvertInteger(value); + return true; +} +template <> +bool Hugeint::TryConvert(uint64_t value, hugeint_t &result) { + result = HugeintConvertInteger(value); + return true; } -dtime_t Time::FromTime(int32_t hour, int32_t minute, int32_t second, int32_t microseconds) { - dtime_t result; - result = hour; // hours - result = result * Interval::MINS_PER_HOUR + minute; // hours -> minutes - result = result * Interval::SECS_PER_MINUTE + second; // minutes -> seconds - result = result * Interval::MICROS_PER_SEC + microseconds; // seconds -> microseconds - return result; +template <> +bool Hugeint::TryConvert(float value, hugeint_t &result) { + return Hugeint::TryConvert(double(value), result); } -bool Time::IsValidTime(int32_t hour, int32_t minute, int32_t second, int32_t microseconds) { - if (hour < 0 || hour >= 24) { - return false; - } - if (minute < 0 || minute >= 60) { +template <> +bool Hugeint::TryConvert(double value, hugeint_t &result) { + if (value <= -170141183460469231731687303715884105728.0 || value >= 170141183460469231731687303715884105727.0) { return false; } - if (second < 0 || second > 60) { - return false; + bool negative = value < 0; + if (negative) { + value = -value; } - if (microseconds < 0 || microseconds > 1000000) { - return false; + result.lower = (uint64_t)fmod(value, double(NumericLimits::Maximum())); + result.upper = (uint64_t)(value / double(NumericLimits::Maximum())); + if (negative) { + NegateInPlace(result); } return true; } -void Time::Convert(dtime_t time, int32_t &hour, int32_t &min, int32_t &sec, int32_t µs) { - hour = int32_t(time / Interval::MICROS_PER_HOUR); - time -= dtime_t(hour) * Interval::MICROS_PER_HOUR; - min = int32_t(time / Interval::MICROS_PER_MINUTE); - time -= dtime_t(min) * Interval::MICROS_PER_MINUTE; - sec = int32_t(time / Interval::MICROS_PER_SEC); - time -= dtime_t(sec) * Interval::MICROS_PER_SEC; - micros = int32_t(time); - D_ASSERT(IsValidTime(hour, min, sec, micros)); +//===--------------------------------------------------------------------===// +// hugeint_t operators +//===--------------------------------------------------------------------===// +hugeint_t::hugeint_t(int64_t value) { + auto result = Hugeint::Convert(value); + this->lower = result.lower; + this->upper = result.upper; } -} // namespace duckdb +bool hugeint_t::operator==(const hugeint_t &rhs) const { + return Hugeint::Equals(*this, rhs); +} +bool hugeint_t::operator!=(const hugeint_t &rhs) const { + return Hugeint::NotEquals(*this, rhs); +} +bool hugeint_t::operator<(const hugeint_t &rhs) const { + return Hugeint::LessThan(*this, rhs); +} +bool hugeint_t::operator<=(const hugeint_t &rhs) const { + return Hugeint::LessThanEquals(*this, rhs); +} +bool hugeint_t::operator>(const hugeint_t &rhs) const { + return Hugeint::GreaterThan(*this, rhs); +} +bool hugeint_t::operator>=(const hugeint_t &rhs) const { + return Hugeint::GreaterThanEquals(*this, rhs); +} +hugeint_t hugeint_t::operator+(const hugeint_t &rhs) const { + return Hugeint::Add(*this, rhs); +} +hugeint_t hugeint_t::operator-(const hugeint_t &rhs) const { + return Hugeint::Subtract(*this, rhs); +} +hugeint_t hugeint_t::operator*(const hugeint_t &rhs) const { + return Hugeint::Multiply(*this, rhs); +} -#include +hugeint_t hugeint_t::operator/(const hugeint_t &rhs) const { + return Hugeint::Divide(*this, rhs); +} -namespace duckdb { +hugeint_t hugeint_t::operator%(const hugeint_t &rhs) const { + return Hugeint::Modulo(*this, rhs); +} -// timestamp/datetime uses 64 bits, high 32 bits for date and low 32 bits for time -// string format is YYYY-MM-DDThh:mm:ssZ -// T may be a space -// Z is optional -// ISO 8601 -timestamp_t Timestamp::FromCString(const char *str, idx_t len) { - idx_t pos; - date_t date; - dtime_t time; - if (!Date::TryConvertDate(str, len, pos, date)) { - throw ConversionException("timestamp field value out of range: \"%s\", " - "expected format is (YYYY-MM-DD HH:MM:SS[.MS])", - string(str, len)); +hugeint_t hugeint_t::operator-() const { + return Hugeint::Negate(*this); +} + +hugeint_t hugeint_t::operator>>(const hugeint_t &rhs) const { + if (upper < 0) { + return hugeint_t(0); } - if (pos == len) { - // no time: only a date - return Timestamp::FromDatetime(date, 0); + hugeint_t result; + uint64_t shift = rhs.lower; + if (rhs.upper != 0 || shift >= 128) { + return hugeint_t(0); + } else if (shift == 64) { + result.upper = 0; + result.lower = upper; + } else if (shift == 0) { + return *this; + } else if (shift < 64) { + // perform upper shift in unsigned integer, and mask away the most significant bit + result.lower = (uint64_t(upper) << (64 - shift)) + (lower >> shift); + result.upper = uint64_t(upper) >> shift; + } else { + D_ASSERT(shift < 128); + result.lower = uint64_t(upper) >> (shift - 64); + result.upper = 0; } - // try to parse a time field - if (str[pos] == ' ' || str[pos] == 'T') { - pos++; + return result; +} + +hugeint_t hugeint_t::operator<<(const hugeint_t &rhs) const { + if (upper < 0) { + return hugeint_t(0); } - idx_t time_pos = 0; - if (!Time::TryConvertTime(str + pos, len - pos, time_pos, time)) { - throw ConversionException("timestamp field value out of range: \"%s\", " - "expected format is (YYYY-MM-DD HH:MM:SS[.MS])", - string(str, len)); + hugeint_t result; + uint64_t shift = rhs.lower; + if (rhs.upper != 0 || shift >= 128) { + return hugeint_t(0); + } else if (shift == 64) { + result.upper = lower; + result.lower = 0; + } else if (shift == 0) { + return *this; + } else if (shift < 64) { + // perform upper shift in unsigned integer, and mask away the most significant bit + uint64_t upper_shift = ((uint64_t(upper) << shift) + (lower >> (64 - shift))) & 0x7FFFFFFFFFFFFFFF; + result.lower = lower << shift; + result.upper = upper_shift; + } else { + D_ASSERT(shift < 128); + result.lower = 0; + result.upper = (lower << (shift - 64)) & 0x7FFFFFFFFFFFFFFF; } - pos += time_pos; - auto timestamp = Timestamp::FromDatetime(date, time); - if (pos < len) { - // skip a "Z" at the end (as per the ISO8601 specs) - if (str[pos] == 'Z') { - pos++; - } - int hour_offset, minute_offset; - if (Timestamp::TryParseUTCOffset(str, pos, len, hour_offset, minute_offset)) { - timestamp -= hour_offset * Interval::MICROS_PER_HOUR + minute_offset * Interval::MICROS_PER_MINUTE; - } + return result; +} - // skip any spaces at the end - while (pos < len && StringUtil::CharacterIsSpace(str[pos])) { - pos++; - } - if (pos < len) { - throw ConversionException("timestamp field value out of range: \"%s\", " - "expected format is (YYYY-MM-DD HH:MM:SS[.MS])", - string(str, len)); - } - } - return timestamp; +hugeint_t hugeint_t::operator&(const hugeint_t &rhs) const { + hugeint_t result; + result.lower = lower & rhs.lower; + result.upper = upper & rhs.upper; + return result; } -bool Timestamp::TryParseUTCOffset(const char *str, idx_t &pos, idx_t len, int &hour_offset, int &minute_offset) { - minute_offset = 0; - idx_t curpos = pos; - // parse the next 3 characters - if (curpos + 3 > len) { - // no characters left to parse - return false; - } - char sign_char = str[curpos]; - if (sign_char != '+' && sign_char != '-') { - // expected either + or - - return false; - } - curpos++; - if (!StringUtil::CharacterIsDigit(str[curpos]) || !StringUtil::CharacterIsDigit(str[curpos + 1])) { - // expected +HH or -HH - return false; - } - hour_offset = (str[curpos] - '0') * 10 + (str[curpos + 1] - '0'); - if (sign_char == '-') { - hour_offset = -hour_offset; - } - curpos += 2; +hugeint_t hugeint_t::operator|(const hugeint_t &rhs) const { + hugeint_t result; + result.lower = lower | rhs.lower; + result.upper = upper | rhs.upper; + return result; +} - // optional minute specifier: expected either "MM" or ":MM" - if (curpos >= len) { - // done, nothing left - pos = curpos; - return true; - } - if (str[curpos] == ':') { - curpos++; - } - if (curpos + 2 > len || !StringUtil::CharacterIsDigit(str[curpos]) || - !StringUtil::CharacterIsDigit(str[curpos + 1])) { - // no MM specifier - pos = curpos; - return true; - } - // we have an MM specifier: parse it - minute_offset = (str[curpos] - '0') * 10 + (str[curpos + 1] - '0'); - if (sign_char == '-') { - minute_offset = -minute_offset; - } - pos = curpos + 2; - return true; +hugeint_t hugeint_t::operator^(const hugeint_t &rhs) const { + hugeint_t result; + result.lower = lower ^ rhs.lower; + result.upper = upper ^ rhs.upper; + return result; +} + +hugeint_t hugeint_t::operator~() const { + hugeint_t result; + result.lower = ~lower; + result.upper = ~upper; + return result; +} + +hugeint_t &hugeint_t::operator+=(const hugeint_t &rhs) { + Hugeint::AddInPlace(*this, rhs); + return *this; +} +hugeint_t &hugeint_t::operator-=(const hugeint_t &rhs) { + Hugeint::SubtractInPlace(*this, rhs); + return *this; +} +hugeint_t &hugeint_t::operator*=(const hugeint_t &rhs) { + *this = Hugeint::Multiply(*this, rhs); + return *this; +} +hugeint_t &hugeint_t::operator/=(const hugeint_t &rhs) { + *this = Hugeint::Divide(*this, rhs); + return *this; +} +hugeint_t &hugeint_t::operator%=(const hugeint_t &rhs) { + *this = Hugeint::Modulo(*this, rhs); + return *this; +} +hugeint_t &hugeint_t::operator>>=(const hugeint_t &rhs) { + *this = *this >> rhs; + return *this; +} +hugeint_t &hugeint_t::operator<<=(const hugeint_t &rhs) { + *this = *this << rhs; + return *this; +} +hugeint_t &hugeint_t::operator&=(const hugeint_t &rhs) { + lower &= rhs.lower; + upper &= rhs.upper; + return *this; +} +hugeint_t &hugeint_t::operator|=(const hugeint_t &rhs) { + lower |= rhs.lower; + upper |= rhs.upper; + return *this; +} +hugeint_t &hugeint_t::operator^=(const hugeint_t &rhs) { + lower ^= rhs.lower; + upper ^= rhs.upper; + return *this; } -timestamp_t Timestamp::FromString(const string &str) { - return Timestamp::FromCString(str.c_str(), str.size()); +string hugeint_t::ToString() const { + return Hugeint::ToString(*this); } -string Timestamp::ToString(timestamp_t timestamp) { - date_t date; - dtime_t time; - Timestamp::Convert(timestamp, date, time); - return Date::ToString(date) + " " + Time::ToString(time); -} +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/types/hyperloglog.hpp +// +// +//===----------------------------------------------------------------------===// -date_t Timestamp::GetDate(timestamp_t timestamp) { - return (timestamp + (timestamp < 0)) / Interval::MICROS_PER_DAY - (timestamp < 0); -} -dtime_t Timestamp::GetTime(timestamp_t timestamp) { - date_t date = Timestamp::GetDate(timestamp); - return timestamp - (int64_t(date) * int64_t(Interval::MICROS_PER_DAY)); -} -timestamp_t Timestamp::FromDatetime(date_t date, dtime_t time) { - return date * Interval::MICROS_PER_DAY + time; -} -void Timestamp::Convert(timestamp_t timestamp, date_t &out_date, dtime_t &out_time) { - out_date = GetDate(timestamp); - out_time = timestamp - (int64_t(out_date) * int64_t(Interval::MICROS_PER_DAY)); - D_ASSERT(timestamp == Timestamp::FromDatetime(out_date, out_time)); -} -timestamp_t Timestamp::GetCurrentTimestamp() { - auto now = system_clock::now(); - auto epoch_ms = duration_cast(now.time_since_epoch()).count(); - return Timestamp::FromEpochMs(epoch_ms); -} +namespace duckdb { -timestamp_t Timestamp::FromEpochSeconds(int64_t sec) { - return sec * Interval::MICROS_PER_SEC; -} +//! The HyperLogLog class holds a HyperLogLog counter for approximate cardinality counting +class HyperLogLog { +public: + HyperLogLog(); + ~HyperLogLog(); + // implicit copying of HyperLogLog is not allowed + HyperLogLog(const HyperLogLog &) = delete; -timestamp_t Timestamp::FromEpochMs(int64_t ms) { - return ms * Interval::MICROS_PER_MSEC; -} + //! Adds an element of the specified size to the HyperLogLog counter + void Add(data_ptr_t element, idx_t size); + //! Return the count of this HyperLogLog counter + idx_t Count(); + //! Merge this HyperLogLog counter with another counter to create a new one + unique_ptr Merge(HyperLogLog &other); + HyperLogLog *MergePointer(HyperLogLog &other); + //! Merge a set of HyperLogLogs to create one big one + static unique_ptr Merge(HyperLogLog logs[], idx_t count); -timestamp_t Timestamp::FromEpochMicroSeconds(int64_t micros) { - return micros; -} +private: + HyperLogLog(void *hll); -timestamp_t Timestamp::FromEpochNanoSeconds(int64_t ns) { - return ns / 1000; -} + void *hll; +}; +} // namespace duckdb -int64_t Timestamp::GetEpochSeconds(timestamp_t timestamp) { - return timestamp / Interval::MICROS_PER_SEC; -} -int64_t Timestamp::GetEpochMs(timestamp_t timestamp) { - return timestamp / Interval::MICROS_PER_MSEC; -} -int64_t Timestamp::GetEpochMicroSeconds(timestamp_t timestamp) { - return timestamp; -} -int64_t Timestamp::GetEpochNanoSeconds(timestamp_t timestamp) { - return timestamp * 1000; +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 +// See the end of this file for a list + +//===----------------------------------------------------------------------===// +// DuckDB +// +// third_party/hyperloglog/hyperloglog.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#include +#include + +namespace duckdb_hll { + +/* Error codes */ +#define HLL_C_OK 0 +#define HLL_C_ERR -1 + +typedef struct { + void *ptr; +} robj; + +//! Create a new empty HyperLogLog object +robj *hll_create(void); +//! Destroy the specified HyperLogLog object +void hll_destroy(robj *obj); +//! Add an element with the specified amount of bytes to the HyperLogLog. Returns C_ERR on failure, otherwise returns 0 if the cardinality did not change, and 1 otherwise. +int hll_add(robj *o, unsigned char *ele, size_t elesize); +//! Returns the estimated amount of unique elements seen by the HyperLogLog. Returns C_OK on success, or C_ERR on failure. +int hll_count(robj *o, size_t *result); +//! Merge hll_count HyperLogLog objects into a single one. Returns NULL on failure, or the new HLL object on success. +robj *hll_merge(robj **hlls, size_t hll_count); + +uint64_t MurmurHash64A (const void * key, int len, unsigned int seed); + } -} // namespace duckdb +// LICENSE_CHANGE_END namespace duckdb { -ValidityData::ValidityData(idx_t count) { - auto entry_count = EntryCount(count); - owned_data = unique_ptr(new validity_t[entry_count]); - for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { - owned_data[entry_idx] = MAX_ENTRY; - } -} -ValidityData::ValidityData(const ValidityMask &original, idx_t count) { - D_ASSERT(original.validity_mask); - auto entry_count = EntryCount(count); - owned_data = unique_ptr(new validity_t[entry_count]); - for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { - owned_data[entry_idx] = original.validity_mask[entry_idx]; - } +HyperLogLog::HyperLogLog() : hll(nullptr) { + hll = duckdb_hll::hll_create(); } -void ValidityMask::Combine(const ValidityMask &other, idx_t count) { - if (other.AllValid()) { - // X & 1 = X - return; - } - if (AllValid()) { - // 1 & Y = Y - Initialize(other); - return; - } - if (validity_mask == other.validity_mask) { - // X & X == X - return; - } - // have to merge - // create a new validity mask that contains the combined mask - auto owned_data = move(validity_data); - auto data = GetData(); - auto other_data = other.GetData(); +HyperLogLog::HyperLogLog(void *hll) : hll(hll) { +} - Initialize(count); - auto result_data = GetData(); +HyperLogLog::~HyperLogLog() { + duckdb_hll::hll_destroy((duckdb_hll::robj *)hll); +} - auto entry_count = ValidityData::EntryCount(count); - for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { - result_data[entry_idx] = data[entry_idx] & other_data[entry_idx]; +void HyperLogLog::Add(data_ptr_t element, idx_t size) { + if (duckdb_hll::hll_add((duckdb_hll::robj *)hll, element, size) == HLL_C_ERR) { + throw InternalException("Could not add to HLL?"); } } -string ValidityMask::ToString(idx_t count) const { - string result = "Validity Mask (" + to_string(count) + ") ["; - for (idx_t i = 0; i < count; i++) { - result += RowIsValid(i) ? "." : "X"; +idx_t HyperLogLog::Count() { + // exception from size_t ban + size_t result; + + if (duckdb_hll::hll_count((duckdb_hll::robj *)hll, &result) != HLL_C_OK) { + throw InternalException("Could not count HLL?"); } - result += "]"; return result; } -bool ValidityMask::IsMaskSet() const { - if (validity_mask) { - return true; +unique_ptr HyperLogLog::Merge(HyperLogLog &other) { + duckdb_hll::robj *hlls[2]; + hlls[0] = (duckdb_hll::robj *)hll; + hlls[1] = (duckdb_hll::robj *)other.hll; + auto new_hll = duckdb_hll::hll_merge(hlls, 2); + if (!new_hll) { + throw InternalException("Could not merge HLLs"); } - return false; + return unique_ptr(new HyperLogLog((void *)new_hll)); } -void ValidityMask::Resize(idx_t old_size, idx_t new_size) { - if (validity_mask) { - auto new_size_count = EntryCount(new_size); - auto old_size_count = EntryCount(old_size); - auto new_owned_data = unique_ptr(new validity_t[new_size_count]); - for (idx_t entry_idx = 0; entry_idx < old_size_count; entry_idx++) { - new_owned_data[entry_idx] = validity_mask[entry_idx]; - } - for (idx_t entry_idx = old_size_count; entry_idx < new_size_count; entry_idx++) { - new_owned_data[entry_idx] = ValidityData::MAX_ENTRY; - } - validity_data->owned_data = move(new_owned_data); - validity_mask = validity_data->owned_data.get(); - } else { - Initialize(new_size); +HyperLogLog *HyperLogLog::MergePointer(HyperLogLog &other) { + duckdb_hll::robj *hlls[2]; + hlls[0] = (duckdb_hll::robj *)hll; + hlls[1] = (duckdb_hll::robj *)other.hll; + auto new_hll = duckdb_hll::hll_merge(hlls, 2); + if (!new_hll) { + throw Exception("Could not merge HLLs"); } + return new HyperLogLog((void *)new_hll); } -void ValidityMask::Slice(const ValidityMask &other, idx_t offset) { - if (other.AllValid()) { - validity_mask = nullptr; - validity_data.reset(); - return; - } - if (offset == 0) { - Initialize(other); - return; - } - Initialize(STANDARD_VECTOR_SIZE); - - // first shift the "whole" units - idx_t entire_units = offset / BITS_PER_VALUE; - idx_t sub_units = offset - entire_units % BITS_PER_VALUE; - if (entire_units > 0) { - idx_t validity_idx; - for (validity_idx = 0; validity_idx + entire_units < STANDARD_ENTRY_COUNT; validity_idx++) { - validity_mask[validity_idx] = other.validity_mask[validity_idx + entire_units]; - } +unique_ptr HyperLogLog::Merge(HyperLogLog logs[], idx_t count) { + auto hlls_uptr = unique_ptr { + new duckdb_hll::robj *[count] + }; + auto hlls = hlls_uptr.get(); + for (idx_t i = 0; i < count; i++) { + hlls[i] = (duckdb_hll::robj *)logs[i].hll; } - // now we shift the remaining sub units - // this gets a bit more complicated because we have to shift over the borders of the entries - // e.g. suppose we have 2 entries of length 4 and we left-shift by two - // 0101|1010 - // a regular left-shift of both gets us: - // 0100|1000 - // we then OR the overflow (right-shifted by BITS_PER_VALUE - offset) together to get the correct result - // 0100|1000 -> - // 0110|1000 - if (sub_units > 0) { - idx_t validity_idx; - for (validity_idx = 0; validity_idx + 1 < STANDARD_ENTRY_COUNT; validity_idx++) { - validity_mask[validity_idx] = (other.validity_mask[validity_idx] >> sub_units) | - (other.validity_mask[validity_idx + 1] << (BITS_PER_VALUE - sub_units)); - } - validity_mask[validity_idx] >>= sub_units; + auto new_hll = duckdb_hll::hll_merge(hlls, count); + if (!new_hll) { + throw InternalException("Could not merge HLLs"); } + return unique_ptr(new HyperLogLog((void *)new_hll)); } } // namespace duckdb - - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/operator/aggregate_operators.hpp +// duckdb/common/enums/date_part_specifier.hpp // // //===----------------------------------------------------------------------===// -#include -#include -#include namespace duckdb { -struct Min { - template - static inline T Operation(T left, T right) { - return LessThan::Operation(left, right) ? left : right; - } +enum class DatePartSpecifier : uint8_t { + YEAR, + MONTH, + DAY, + DECADE, + CENTURY, + MILLENNIUM, + MICROSECONDS, + MILLISECONDS, + SECOND, + MINUTE, + HOUR, + EPOCH, + DOW, + ISODOW, + WEEK, + QUARTER, + DOY, + YEARWEEK }; -struct Max { - template - static inline T Operation(T left, T right) { - return GreaterThan::Operation(left, right) ? left : right; - } -}; +bool TryGetDatePartSpecifier(const string &specifier, DatePartSpecifier &result); +DatePartSpecifier GetDatePartSpecifier(const string &specifier); } // namespace duckdb @@ -23607,7 +29662,7 @@ struct Max { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/operator/numeric_binary_operators.hpp +// duckdb/common/operator/add.hpp // // //===----------------------------------------------------------------------===// @@ -23615,5899 +29670,6062 @@ struct Max { -#include + namespace duckdb { -struct NegateOperator { - template - static inline TR Operation(TA input) { - return -input; +struct AddOperator { + template + static inline TR Operation(TA left, TB right) { + return left + right; } }; -struct DivideOperator { +template <> +float AddOperator::Operation(float left, float right); +template <> +double AddOperator::Operation(double left, double right); +template <> +date_t AddOperator::Operation(date_t left, int32_t right); +template <> +date_t AddOperator::Operation(int32_t left, date_t right); +template <> +interval_t AddOperator::Operation(interval_t left, interval_t right); +template <> +date_t AddOperator::Operation(date_t left, interval_t right); +template <> +date_t AddOperator::Operation(interval_t left, date_t right); +template <> +timestamp_t AddOperator::Operation(timestamp_t left, interval_t right); +template <> +timestamp_t AddOperator::Operation(interval_t left, timestamp_t right); + +struct TryAddOperator { template - static inline TR Operation(TA left, TB right) { - D_ASSERT(right != 0); // this should be checked before! - return left / right; + static inline bool Operation(TA left, TB right, TR &result) { + throw InternalException("Unimplemented type for TryAddOperator"); } }; -struct ModuloOperator { +template <> +bool TryAddOperator::Operation(uint8_t left, uint8_t right, uint8_t &result); +template <> +bool TryAddOperator::Operation(uint16_t left, uint16_t right, uint16_t &result); +template <> +bool TryAddOperator::Operation(uint32_t left, uint32_t right, uint32_t &result); +template <> +bool TryAddOperator::Operation(uint64_t left, uint64_t right, uint64_t &result); + +template <> +bool TryAddOperator::Operation(int8_t left, int8_t right, int8_t &result); +template <> +bool TryAddOperator::Operation(int16_t left, int16_t right, int16_t &result); +template <> +bool TryAddOperator::Operation(int32_t left, int32_t right, int32_t &result); +template <> +bool TryAddOperator::Operation(int64_t left, int64_t right, int64_t &result); + +struct AddOperatorOverflowCheck { template static inline TR Operation(TA left, TB right) { - D_ASSERT(right != 0); - return left % right; + TR result; + if (!TryAddOperator::Operation(left, right, result)) { + throw OutOfRangeException("Overflow in addition of %s (%d + %d)!", TypeIdToString(GetTypeId()), left, + right); + } + return result; + } +}; + +struct TryDecimalAdd { + template + static inline bool Operation(TA left, TB right, TR &result) { + throw InternalException("Unimplemented type for TryDecimalAdd"); } }; template <> -float DivideOperator::Operation(float left, float right); +bool TryDecimalAdd::Operation(int16_t left, int16_t right, int16_t &result); template <> -double DivideOperator::Operation(double left, double right); +bool TryDecimalAdd::Operation(int32_t left, int32_t right, int32_t &result); template <> -hugeint_t DivideOperator::Operation(hugeint_t left, hugeint_t right); +bool TryDecimalAdd::Operation(int64_t left, int64_t right, int64_t &result); template <> -interval_t DivideOperator::Operation(interval_t left, int64_t right); +bool TryDecimalAdd::Operation(hugeint_t left, hugeint_t right, hugeint_t &result); + +struct DecimalAddOverflowCheck { + template + static inline TR Operation(TA left, TB right) { + TR result; + if (!TryDecimalAdd::Operation(left, right, result)) { + throw OutOfRangeException("Overflow in addition of DECIMAL(18) (%d + %d). You might want to add an " + "explicit cast to a bigger decimal.", + left, right); + } + return result; + } +}; template <> -float ModuloOperator::Operation(float left, float right); +hugeint_t DecimalAddOverflowCheck::Operation(hugeint_t left, hugeint_t right); + +struct AddTimeOperator { + template + static inline TR Operation(TA left, TB right); +}; + template <> -double ModuloOperator::Operation(double left, double right); +dtime_t AddTimeOperator::Operation(dtime_t left, interval_t right); template <> -hugeint_t ModuloOperator::Operation(hugeint_t left, hugeint_t right); +dtime_t AddTimeOperator::Operation(interval_t left, dtime_t right); } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/multiply.hpp +// +// +//===----------------------------------------------------------------------===// + +namespace duckdb { + +struct MultiplyOperator { + template + static inline TR Operation(TA left, TB right) { + return left * right; + } +}; + +template <> +float MultiplyOperator::Operation(float left, float right); +template <> +double MultiplyOperator::Operation(double left, double right); +template <> +interval_t MultiplyOperator::Operation(interval_t left, int64_t right); +template <> +interval_t MultiplyOperator::Operation(int64_t left, interval_t right); + +struct TryMultiplyOperator { + template + static inline bool Operation(TA left, TB right, TR &result) { + throw InternalException("Unimplemented type for TryMultiplyOperator"); + } +}; + +template <> +bool TryMultiplyOperator::Operation(uint8_t left, uint8_t right, uint8_t &result); +template <> +bool TryMultiplyOperator::Operation(uint16_t left, uint16_t right, uint16_t &result); +template <> +bool TryMultiplyOperator::Operation(uint32_t left, uint32_t right, uint32_t &result); +template <> +bool TryMultiplyOperator::Operation(uint64_t left, uint64_t right, uint64_t &result); +template <> +bool TryMultiplyOperator::Operation(int8_t left, int8_t right, int8_t &result); +template <> +bool TryMultiplyOperator::Operation(int16_t left, int16_t right, int16_t &result); +template <> +bool TryMultiplyOperator::Operation(int32_t left, int32_t right, int32_t &result); +template <> +bool TryMultiplyOperator::Operation(int64_t left, int64_t right, int64_t &result); +struct MultiplyOperatorOverflowCheck { + template + static inline TR Operation(TA left, TB right) { + TR result; + if (!TryMultiplyOperator::Operation(left, right, result)) { + throw OutOfRangeException("Overflow in multiplication of %s (%d * %d)!", TypeIdToString(GetTypeId()), + left, right); + } + return result; + } +}; +struct TryDecimalMultiply { + template + static inline bool Operation(TA left, TB right, TR &result) { + throw InternalException("Unimplemented type for TryDecimalMultiply"); + } +}; +template <> +bool TryDecimalMultiply::Operation(int16_t left, int16_t right, int16_t &result); +template <> +bool TryDecimalMultiply::Operation(int32_t left, int32_t right, int32_t &result); +template <> +bool TryDecimalMultiply::Operation(int64_t left, int64_t right, int64_t &result); +template <> +bool TryDecimalMultiply::Operation(hugeint_t left, hugeint_t right, hugeint_t &result); +struct DecimalMultiplyOverflowCheck { + template + static inline TR Operation(TA left, TB right) { + TR result; + if (!TryDecimalMultiply::Operation(left, right, result)) { + throw OutOfRangeException("Overflow in multiplication of DECIMAL(18) (%d * %d). You might want to add an " + "explicit cast to a bigger decimal.", + left, right); + } + return result; + } +}; +template <> +hugeint_t DecimalMultiplyOverflowCheck::Operation(hugeint_t left, hugeint_t right); +} // namespace duckdb namespace duckdb { -Value::Value(LogicalType type) : type_(move(type)), is_null(true) { +bool Interval::FromString(const string &str, interval_t &result) { + string error_message; + return Interval::FromCString(str.c_str(), str.size(), result, &error_message, false); } -Value::Value(int32_t val) : type_(LogicalType::INTEGER), is_null(false) { - value_.integer = val; +template +void IntervalTryAddition(T &target, int64_t input, int64_t multiplier) { + int64_t addition; + if (!TryMultiplyOperator::Operation(input, multiplier, addition)) { + throw OutOfRangeException("interval value is out of range"); + } + T addition_base = Cast::Operation(addition); + if (!TryAddOperator::Operation(target, addition_base, target)) { + throw OutOfRangeException("interval value is out of range"); + } } -Value::Value(int64_t val) : type_(LogicalType::BIGINT), is_null(false) { - value_.bigint = val; +bool Interval::FromCString(const char *str, idx_t len, interval_t &result, string *error_message, bool strict) { + idx_t pos = 0; + idx_t start_pos; + bool negative; + bool found_any = false; + int64_t number; + DatePartSpecifier specifier; + string specifier_str; + + result.days = 0; + result.micros = 0; + result.months = 0; + + if (len == 0) { + return false; + } + + switch (str[pos]) { + case '@': + pos++; + goto standard_interval; + case 'P': + case 'p': + pos++; + goto posix_interval; + default: + goto standard_interval; + } +standard_interval: + // start parsing a standard interval (e.g. 2 years 3 months...) + for (; pos < len; pos++) { + char c = str[pos]; + if (c == ' ' || c == '\t' || c == '\n') { + // skip spaces + continue; + } else if (c >= '0' && c <= '9') { + // start parsing a positive number + negative = false; + goto interval_parse_number; + } else if (c == '-') { + // negative number + negative = true; + pos++; + goto interval_parse_number; + } else if (c == 'a' || c == 'A') { + // parse the word "ago" as the final specifier + goto interval_parse_ago; + } else { + // unrecognized character, expected a number or end of string + return false; + } + } + goto end_of_string; +interval_parse_number: + start_pos = pos; + for (; pos < len; pos++) { + char c = str[pos]; + if (c >= '0' && c <= '9') { + // the number continues + continue; + } else if (c == ':') { + // colon: we are parsing a time + goto interval_parse_time; + } else { + if (pos == start_pos) { + return false; + } + // finished the number, parse it from the string + string_t nr_string(str + start_pos, pos - start_pos); + number = Cast::Operation(nr_string); + if (negative) { + number = -number; + } + goto interval_parse_identifier; + } + } + goto end_of_string; +interval_parse_time : { + // parse the remainder of the time as a Time type + dtime_t time; + idx_t pos; + if (!Time::TryConvertTime(str + start_pos, len, pos, time)) { + return false; + } + result.micros += time.micros; + found_any = true; + goto end_of_string; +} +interval_parse_identifier: + for (; pos < len; pos++) { + char c = str[pos]; + if (c == ' ' || c == '\t' || c == '\n') { + // skip spaces at the start + continue; + } else { + break; + } + } + // now parse the identifier + start_pos = pos; + for (; pos < len; pos++) { + char c = str[pos]; + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + // keep parsing the string + continue; + } else { + break; + } + } + specifier_str = string(str + start_pos, pos - start_pos); + if (!TryGetDatePartSpecifier(specifier_str, specifier)) { + HandleCastError::AssignError(StringUtil::Format("extract specifier \"%s\" not recognized", specifier_str), + error_message); + return false; + } + // add the specifier to the interval + switch (specifier) { + case DatePartSpecifier::MILLENNIUM: + IntervalTryAddition(result.months, number, MONTHS_PER_MILLENIUM); + break; + case DatePartSpecifier::CENTURY: + IntervalTryAddition(result.months, number, MONTHS_PER_CENTURY); + break; + case DatePartSpecifier::DECADE: + IntervalTryAddition(result.months, number, MONTHS_PER_DECADE); + break; + case DatePartSpecifier::YEAR: + IntervalTryAddition(result.months, number, MONTHS_PER_YEAR); + break; + case DatePartSpecifier::QUARTER: + IntervalTryAddition(result.months, number, MONTHS_PER_QUARTER); + break; + case DatePartSpecifier::MONTH: + IntervalTryAddition(result.months, number, 1); + break; + case DatePartSpecifier::DAY: + IntervalTryAddition(result.days, number, 1); + break; + case DatePartSpecifier::WEEK: + IntervalTryAddition(result.days, number, DAYS_PER_WEEK); + break; + case DatePartSpecifier::MICROSECONDS: + IntervalTryAddition(result.micros, number, 1); + break; + case DatePartSpecifier::MILLISECONDS: + IntervalTryAddition(result.micros, number, MICROS_PER_MSEC); + break; + case DatePartSpecifier::SECOND: + IntervalTryAddition(result.micros, number, MICROS_PER_SEC); + break; + case DatePartSpecifier::MINUTE: + IntervalTryAddition(result.micros, number, MICROS_PER_MINUTE); + break; + case DatePartSpecifier::HOUR: + IntervalTryAddition(result.micros, number, MICROS_PER_HOUR); + break; + default: + HandleCastError::AssignError( + StringUtil::Format("extract specifier \"%s\" not supported for interval", specifier_str), error_message); + return false; + } + found_any = true; + goto standard_interval; +interval_parse_ago: + D_ASSERT(str[pos] == 'a' || str[pos] == 'A'); + // parse the "ago" string at the end of the interval + if (len - pos < 3) { + return false; + } + pos++; + if (!(str[pos] == 'g' || str[pos] == 'G')) { + return false; + } + pos++; + if (!(str[pos] == 'o' || str[pos] == 'O')) { + return false; + } + pos++; + // parse any trailing whitespace + for (; pos < len; pos++) { + char c = str[pos]; + if (c == ' ' || c == '\t' || c == '\n') { + continue; + } else { + return false; + } + } + // invert all the values + result.months = -result.months; + result.days = -result.days; + result.micros = -result.micros; + goto end_of_string; +end_of_string: + if (!found_any) { + // end of string and no identifiers were found: cannot convert empty interval + return false; + } + return true; +posix_interval: + return false; } -Value::Value(float val) : type_(LogicalType::FLOAT), is_null(false) { - if (!Value::FloatIsValid(val)) { - throw OutOfRangeException("Invalid float value %f", val); +string Interval::ToString(interval_t interval) { + char buffer[70]; + idx_t length = IntervalToStringCast::Format(interval, buffer); + return string(buffer, length); +} + +int64_t Interval::GetMilli(interval_t val) { + int64_t milli_month, milli_day, milli; + if (!TryMultiplyOperator::Operation((int64_t)val.months, Interval::MICROS_PER_MONTH / 1000, milli_month)) { + throw ConversionException("Could not convert Interval to Milliseconds"); } - value_.float_ = val; + if (!TryMultiplyOperator::Operation((int64_t)val.days, Interval::MICROS_PER_DAY / 1000, milli_day)) { + throw ConversionException("Could not convert Interval to Milliseconds"); + } + milli = val.micros / 1000; + if (!TryAddOperator::Operation(milli, milli_month, milli)) { + throw ConversionException("Could not convert Interval to Milliseconds"); + } + if (!TryAddOperator::Operation(milli, milli_day, milli)) { + throw ConversionException("Could not convert Interval to Milliseconds"); + } + return milli; } -Value::Value(double val) : type_(LogicalType::DOUBLE), is_null(false) { - if (!Value::DoubleIsValid(val)) { - throw OutOfRangeException("Invalid double value %f", val); +int64_t Interval::GetNanoseconds(interval_t val) { + int64_t micro_month, micro_day, micro_total, nano; + int64_t ns_in_us = 1000; + micro_total = val.micros; + if (!TryMultiplyOperator::Operation((int64_t)val.months, Interval::MICROS_PER_MONTH, micro_month)) { + throw ConversionException("Could not convert Month to Nanoseconds"); } - value_.double_ = val; + if (!TryMultiplyOperator::Operation((int64_t)val.days, Interval::MICROS_PER_DAY, micro_day)) { + throw ConversionException("Could not convert Day to Nanoseconds"); + } + if (!TryAddOperator::Operation(micro_total, micro_month, micro_total)) { + throw ConversionException("Could not convert Interval to Nanoseconds"); + } + if (!TryAddOperator::Operation(micro_total, micro_day, micro_total)) { + throw ConversionException("Could not convert Interval to Nanoseconds"); + } + if (!TryMultiplyOperator::Operation(micro_total, ns_in_us, nano)) { + throw ConversionException("Could not convert Interval to Nanoseconds"); + } + + return nano; } +interval_t Interval::GetDifference(timestamp_t timestamp_1, timestamp_t timestamp_2) { + date_t date1, date2; + dtime_t time1, time2; -Value::Value(const char *val) : Value(val ? string(val) : string()) { + Timestamp::Convert(timestamp_1, date1, time1); + Timestamp::Convert(timestamp_2, date2, time2); + + // and from date extract the years, months and days + int32_t year1, month1, day1; + int32_t year2, month2, day2; + Date::Convert(date1, year1, month1, day1); + Date::Convert(date2, year2, month2, day2); + // finally perform the differences + auto year_diff = year1 - year2; + auto month_diff = month1 - month2; + auto day_diff = day1 - day2; + + // and from time extract hours, minutes, seconds and milliseconds + int32_t hour1, min1, sec1, micros1; + int32_t hour2, min2, sec2, micros2; + Time::Convert(time1, hour1, min1, sec1, micros1); + Time::Convert(time2, hour2, min2, sec2, micros2); + // finally perform the differences + auto hour_diff = hour1 - hour2; + auto min_diff = min1 - min2; + auto sec_diff = sec1 - sec2; + auto micros_diff = micros1 - micros2; + + // flip sign if necessary + bool sign_flipped = false; + if (timestamp_1 < timestamp_2) { + year_diff = -year_diff; + month_diff = -month_diff; + day_diff = -day_diff; + hour_diff = -hour_diff; + min_diff = -min_diff; + sec_diff = -sec_diff; + micros_diff = -micros_diff; + sign_flipped = true; + } + // now propagate any negative field into the next higher field + while (micros_diff < 0) { + micros_diff += MICROS_PER_SEC; + sec_diff--; + } + while (sec_diff < 0) { + sec_diff += SECS_PER_MINUTE; + min_diff--; + } + while (min_diff < 0) { + min_diff += MINS_PER_HOUR; + hour_diff--; + } + while (hour_diff < 0) { + hour_diff += HOURS_PER_DAY; + day_diff--; + } + while (day_diff < 0) { + if (timestamp_1 < timestamp_2) { + day_diff += Date::IsLeapYear(year1) ? Date::LEAP_DAYS[month1] : Date::NORMAL_DAYS[month1]; + month_diff--; + } else { + day_diff += Date::IsLeapYear(year2) ? Date::LEAP_DAYS[month2] : Date::NORMAL_DAYS[month2]; + month_diff--; + } + } + while (month_diff < 0) { + month_diff += MONTHS_PER_YEAR; + year_diff--; + } + + // recover sign if necessary + if (sign_flipped) { + year_diff = -year_diff; + month_diff = -month_diff; + day_diff = -day_diff; + hour_diff = -hour_diff; + min_diff = -min_diff; + sec_diff = -sec_diff; + micros_diff = -micros_diff; + } + interval_t interval; + interval.months = year_diff * MONTHS_PER_YEAR + month_diff; + interval.days = day_diff; + interval.micros = Time::FromTime(hour_diff, min_diff, sec_diff, micros_diff).micros; + + return interval; } -Value::Value(std::nullptr_t val) : Value(LogicalType::VARCHAR) { +static void NormalizeIntervalEntries(interval_t input, int64_t &months, int64_t &days, int64_t µs) { + int64_t extra_months_d = input.days / Interval::DAYS_PER_MONTH; + int64_t extra_months_micros = input.micros / Interval::MICROS_PER_MONTH; + input.days -= extra_months_d * Interval::DAYS_PER_MONTH; + input.micros -= extra_months_micros * Interval::MICROS_PER_MONTH; + + int64_t extra_days_micros = input.micros / Interval::MICROS_PER_DAY; + input.micros -= extra_days_micros * Interval::MICROS_PER_DAY; + + months = input.months + extra_months_d + extra_months_micros; + days = input.days + extra_days_micros; + micros = input.micros; } -Value::Value(string_t val) : Value(string(val.GetDataUnsafe(), val.GetSize())) { +bool Interval::Equals(interval_t left, interval_t right) { + return left.months == right.months && left.days == right.days && left.micros == right.micros; } -Value::Value(string val) : type_(LogicalType::VARCHAR), is_null(false), str_value(move(val)) { - auto utf_type = Utf8Proc::Analyze(str_value.c_str(), str_value.size()); - if (utf_type == UnicodeType::INVALID) { - throw Exception("String value is not valid UTF8"); +bool Interval::GreaterThan(interval_t left, interval_t right) { + int64_t lmonths, ldays, lmicros; + int64_t rmonths, rdays, rmicros; + NormalizeIntervalEntries(left, lmonths, ldays, lmicros); + NormalizeIntervalEntries(right, rmonths, rdays, rmicros); + + if (lmonths > rmonths) { + return true; + } else if (lmonths < rmonths) { + return false; + } + if (ldays > rdays) { + return true; + } else if (ldays < rdays) { + return false; } + return lmicros > rmicros; } -Value Value::MinimumValue(const LogicalType &type) { - switch (type.id()) { - case LogicalTypeId::BOOLEAN: - return Value::BOOLEAN(false); - case LogicalTypeId::TINYINT: - return Value::TINYINT(NumericLimits::Minimum()); - case LogicalTypeId::SMALLINT: - return Value::SMALLINT(NumericLimits::Minimum()); - case LogicalTypeId::INTEGER: - return Value::INTEGER(NumericLimits::Minimum()); - case LogicalTypeId::DATE: - return Value::DATE(NumericLimits::Minimum()); - case LogicalTypeId::TIME: - return Value::TIME(NumericLimits::Minimum()); - case LogicalTypeId::BIGINT: - return Value::BIGINT(NumericLimits::Minimum()); - case LogicalTypeId::UTINYINT: - return Value::UTINYINT(NumericLimits::Minimum()); - case LogicalTypeId::USMALLINT: - return Value::USMALLINT(NumericLimits::Minimum()); - case LogicalTypeId::UINTEGER: - return Value::UINTEGER(NumericLimits::Minimum()); - case LogicalTypeId::UBIGINT: - return Value::UBIGINT(NumericLimits::Minimum()); - case LogicalTypeId::TIMESTAMP: - return Value::TIMESTAMP(NumericLimits::Minimum()); - case LogicalTypeId::HUGEINT: - return Value::HUGEINT(NumericLimits::Minimum()); - case LogicalTypeId::FLOAT: - return Value::FLOAT(NumericLimits::Minimum()); - case LogicalTypeId::DOUBLE: - return Value::DOUBLE(NumericLimits::Minimum()); - case LogicalTypeId::DECIMAL: { - Value result; - switch (type.InternalType()) { - case PhysicalType::INT16: - result = Value::MinimumValue(LogicalType::SMALLINT); - break; - case PhysicalType::INT32: - result = Value::MinimumValue(LogicalType::INTEGER); - break; - case PhysicalType::INT64: - result = Value::MinimumValue(LogicalType::BIGINT); - break; - case PhysicalType::INT128: - result = Value::MinimumValue(LogicalType::HUGEINT); - break; - default: - throw InternalException("Unknown decimal type"); +bool Interval::GreaterThanEquals(interval_t left, interval_t right) { + return GreaterThan(left, right) || Equals(left, right); +} + +} // namespace duckdb + + +namespace duckdb { + +RowDataCollection::RowDataCollection(BufferManager &buffer_manager, idx_t block_capacity, idx_t entry_size, + bool keep_pinned) + : buffer_manager(buffer_manager), count(0), block_capacity(block_capacity), entry_size(entry_size), + keep_pinned(keep_pinned) { + D_ASSERT(block_capacity * entry_size >= Storage::BLOCK_SIZE); +} + +idx_t RowDataCollection::AppendToBlock(RowDataBlock &block, BufferHandle &handle, + vector &append_entries, idx_t remaining, idx_t entry_sizes[]) { + idx_t append_count = 0; + data_ptr_t dataptr; + if (entry_sizes) { + D_ASSERT(entry_size == 1); + // compute how many entries fit if entry size is variable + dataptr = handle.Ptr() + block.byte_offset; + for (idx_t i = 0; i < remaining; i++) { + if (block.byte_offset + entry_sizes[i] > block.capacity) { + if (block.count == 0 && append_count == 0 && entry_sizes[i] > block.capacity) { + // special case: single entry is bigger than block capacity + // resize current block to fit the entry, append it, and move to the next block + block.capacity = entry_sizes[i]; + buffer_manager.ReAllocate(block.block, block.capacity); + dataptr = handle.Ptr(); + append_count++; + block.byte_offset += entry_sizes[i]; + } + break; + } + append_count++; + block.byte_offset += entry_sizes[i]; } - result.type_ = type; - return result; - } - default: - throw InvalidTypeException(type, "MinimumValue requires numeric type"); + } else { + append_count = MinValue(remaining, block.capacity - block.count); + dataptr = handle.Ptr() + block.count * entry_size; } + append_entries.emplace_back(dataptr, append_count); + block.count += append_count; + return append_count; } -Value Value::MaximumValue(const LogicalType &type) { - switch (type.id()) { - case LogicalTypeId::BOOLEAN: - return Value::BOOLEAN(false); - case LogicalTypeId::TINYINT: - return Value::TINYINT(NumericLimits::Maximum()); - case LogicalTypeId::SMALLINT: - return Value::SMALLINT(NumericLimits::Maximum()); - case LogicalTypeId::INTEGER: - return Value::INTEGER(NumericLimits::Maximum()); - case LogicalTypeId::DATE: - return Value::DATE(NumericLimits::Maximum()); - case LogicalTypeId::TIME: - return Value::TIME(NumericLimits::Maximum()); - case LogicalTypeId::BIGINT: - return Value::BIGINT(NumericLimits::Maximum()); - case LogicalTypeId::UTINYINT: - return Value::UTINYINT(NumericLimits::Maximum()); - case LogicalTypeId::USMALLINT: - return Value::USMALLINT(NumericLimits::Maximum()); - case LogicalTypeId::UINTEGER: - return Value::UINTEGER(NumericLimits::Maximum()); - case LogicalTypeId::UBIGINT: - return Value::UBIGINT(NumericLimits::Maximum()); - case LogicalTypeId::TIMESTAMP: - return Value::TIMESTAMP(NumericLimits::Maximum()); - case LogicalTypeId::HUGEINT: - return Value::HUGEINT(NumericLimits::Maximum()); - case LogicalTypeId::FLOAT: - return Value::FLOAT(NumericLimits::Maximum()); - case LogicalTypeId::DOUBLE: - return Value::DOUBLE(NumericLimits::Maximum()); - case LogicalTypeId::DECIMAL: { - Value result; - switch (type.InternalType()) { - case PhysicalType::INT16: - result = Value::MaximumValue(LogicalType::SMALLINT); - break; - case PhysicalType::INT32: - result = Value::MaximumValue(LogicalType::INTEGER); - break; - case PhysicalType::INT64: - result = Value::MaximumValue(LogicalType::BIGINT); - break; - case PhysicalType::INT128: - result = Value::MaximumValue(LogicalType::HUGEINT); - break; - default: - throw InternalException("Unknown decimal type"); +vector> RowDataCollection::Build(idx_t added_count, data_ptr_t key_locations[], + idx_t entry_sizes[], const SelectionVector *sel) { + vector> handles; + vector append_entries; + + // first allocate space of where to serialize the keys and payload columns + idx_t remaining = added_count; + { + // first append to the last block (if any) + lock_guard append_lock(rdc_lock); + count += added_count; + + if (!blocks.empty()) { + auto &last_block = blocks.back(); + if (last_block.count < last_block.capacity) { + // last block has space: pin the buffer of this block + auto handle = buffer_manager.Pin(last_block.block); + // now append to the block + idx_t append_count = AppendToBlock(last_block, *handle, append_entries, remaining, entry_sizes); + remaining -= append_count; + handles.push_back(move(handle)); + } + } + while (remaining > 0) { + // now for the remaining data, allocate new buffers to store the data and append there + RowDataBlock new_block(buffer_manager, block_capacity, entry_size); + auto handle = buffer_manager.Pin(new_block.block); + + // offset the entry sizes array if we have added entries already + idx_t *offset_entry_sizes = entry_sizes ? entry_sizes + added_count - remaining : nullptr; + + idx_t append_count = AppendToBlock(new_block, *handle, append_entries, remaining, offset_entry_sizes); + D_ASSERT(new_block.count > 0); + remaining -= append_count; + + blocks.push_back(move(new_block)); + if (keep_pinned) { + pinned_blocks.push_back(move(handle)); + } else { + handles.push_back(move(handle)); + } + } + } + // now set up the key_locations based on the append entries + idx_t append_idx = 0; + for (auto &append_entry : append_entries) { + idx_t next = append_idx + append_entry.count; + if (entry_sizes) { + for (; append_idx < next; append_idx++) { + key_locations[append_idx] = append_entry.baseptr; + append_entry.baseptr += entry_sizes[append_idx]; + } + } else { + for (; append_idx < next; append_idx++) { + auto idx = sel->get_index(append_idx); + key_locations[idx] = append_entry.baseptr; + append_entry.baseptr += entry_size; + } + } + } + // return the unique pointers to the handles because they must stay pinned + return handles; +} + +void RowDataCollection::Merge(RowDataCollection &other) { + RowDataCollection temp(buffer_manager, Storage::BLOCK_SIZE, 1); + { + // One lock at a time to avoid deadlocks + lock_guard read_lock(other.rdc_lock); + temp.count = other.count; + temp.block_capacity = other.block_capacity; + temp.entry_size = other.entry_size; + temp.blocks = move(other.blocks); + other.count = 0; + } + + lock_guard write_lock(rdc_lock); + count += temp.count; + block_capacity = MaxValue(block_capacity, temp.block_capacity); + entry_size = MaxValue(entry_size, temp.entry_size); + for (auto &block : temp.blocks) { + blocks.emplace_back(move(block)); + } + for (auto &handle : temp.pinned_blocks) { + pinned_blocks.emplace_back(move(handle)); + } +} + +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/types/row_layout.cpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +vector AggregateObject::CreateAggregateObjects(const vector &bindings) { + vector aggregates; + for (auto &binding : bindings) { + auto payload_size = binding->function.state_size(); +#ifndef DUCKDB_ALLOW_UNDEFINED + payload_size = RowLayout::Align(payload_size); +#endif + aggregates.emplace_back(binding->function, binding->bind_info.get(), binding->children.size(), payload_size, + binding->distinct, binding->return_type.InternalType(), binding->filter.get()); + } + return aggregates; +} + +RowLayout::RowLayout() + : flag_width(0), data_width(0), aggr_width(0), row_width(0), all_constant(true), heap_pointer_offset(0) { +} + +void RowLayout::Initialize(vector types_p, Aggregates aggregates_p, bool align) { + offsets.clear(); + types = move(types_p); + + // Null mask at the front - 1 bit per value. + flag_width = ValidityBytes::ValidityMaskSize(types.size()); + row_width = flag_width; + + // Whether all columns are constant size. + for (const auto &type : types) { + all_constant = all_constant && TypeIsConstantSize(type.InternalType()); + } + + // This enables pointer swizzling for out-of-core computation. + if (!all_constant) { + // When unswizzled the pointer lives here. + // When swizzled, the pointer is replaced by an offset. + heap_pointer_offset = row_width; + // The 8 byte pointer will be replaced with an 8 byte idx_t when swizzled. + // However, this cannot be sizeof(data_ptr_t), since 32 bit builds use 4 byte pointers. + row_width += sizeof(idx_t); + } + + // Data columns. No alignment required. + for (const auto &type : types) { + offsets.push_back(row_width); + const auto internal_type = type.InternalType(); + if (TypeIsConstantSize(internal_type) || internal_type == PhysicalType::VARCHAR) { + row_width += GetTypeIdSize(type.InternalType()); + } else { + // Variable size types use pointers to the actual data (can be swizzled). + // Again, we would use sizeof(data_ptr_t), but this is not guaranteed to be equal to sizeof(idx_t). + row_width += sizeof(idx_t); } - result.type_ = type; - return result; } - default: - throw InvalidTypeException(type, "MaximumValue requires numeric type"); + + // Alignment padding for aggregates +#ifndef DUCKDB_ALLOW_UNDEFINED + if (align) { + row_width = Align(row_width); } -} +#endif + data_width = row_width - flag_width; -Value Value::BOOLEAN(int8_t value) { - Value result(LogicalType::BOOLEAN); - result.value_.boolean = value ? true : false; - result.is_null = false; - return result; -} + // Aggregate fields. + aggregates = move(aggregates_p); + for (auto &aggregate : aggregates) { + offsets.push_back(row_width); + row_width += aggregate.payload_size; +#ifndef DUCKDB_ALLOW_UNDEFINED + D_ASSERT(aggregate.payload_size == Align(aggregate.payload_size)); +#endif + } + aggr_width = row_width - data_width - flag_width; -Value Value::TINYINT(int8_t value) { - Value result(LogicalType::TINYINT); - result.value_.tinyint = value; - result.is_null = false; - return result; + // Alignment padding for the next row +#ifndef DUCKDB_ALLOW_UNDEFINED + if (align) { + row_width = Align(row_width); + } +#endif } -Value Value::SMALLINT(int16_t value) { - Value result(LogicalType::SMALLINT); - result.value_.smallint = value; - result.is_null = false; - return result; +void RowLayout::Initialize(vector types_p, bool align) { + Initialize(move(types_p), Aggregates(), align); } -Value Value::INTEGER(int32_t value) { - Value result(LogicalType::INTEGER); - result.value_.integer = value; - result.is_null = false; - return result; +void RowLayout::Initialize(Aggregates aggregates_p, bool align) { + Initialize(vector(), move(aggregates_p), align); } -Value Value::BIGINT(int64_t value) { - Value result(LogicalType::BIGINT); - result.value_.bigint = value; - result.is_null = false; - return result; -} +} // namespace duckdb -Value Value::HUGEINT(hugeint_t value) { - Value result(LogicalType::HUGEINT); - result.value_.hugeint = value; - result.is_null = false; + + + +namespace duckdb { + +// LCOV_EXCL_START +string SelectionVector::ToString(idx_t count) const { + string result = "Selection Vector (" + to_string(count) + ") ["; + for (idx_t i = 0; i < count; i++) { + if (i != 0) { + result += ", "; + } + result += to_string(get_index(i)); + } + result += "]"; return result; } -Value Value::UTINYINT(uint8_t value) { - Value result(LogicalType::UTINYINT); - result.value_.utinyint = value; - result.is_null = false; - return result; +void SelectionVector::Print(idx_t count) const { + Printer::Print(ToString(count)); } +// LCOV_EXCL_STOP -Value Value::USMALLINT(uint16_t value) { - Value result(LogicalType::USMALLINT); - result.value_.usmallint = value; - result.is_null = false; - return result; +buffer_ptr SelectionVector::Slice(const SelectionVector &sel, idx_t count) const { + auto data = make_buffer(count); + auto result_ptr = data->owned_data.get(); + // for every element, we perform result[i] = target[new[i]] + for (idx_t i = 0; i < count; i++) { + auto new_idx = sel.get_index(i); + auto idx = this->get_index(new_idx); + result_ptr[i] = idx; + } + return data; } -Value Value::UINTEGER(uint32_t value) { - Value result(LogicalType::UINTEGER); - result.value_.uinteger = value; - result.is_null = false; - return result; +} // namespace duckdb + + + + + + + +#include + +namespace duckdb { + +#define MINIMUM_HEAP_SIZE 4096 + +StringHeap::StringHeap() : tail(nullptr) { } -Value Value::UBIGINT(uint64_t value) { - Value result(LogicalType::UBIGINT); - result.value_.ubigint = value; - result.is_null = false; - return result; +string_t StringHeap::AddString(const char *data, idx_t len) { + D_ASSERT(Utf8Proc::Analyze(data, len) != UnicodeType::INVALID); + return AddBlob(data, len); } -bool Value::FloatIsValid(float value) { - return !(std::isnan(value) || std::isinf(value)); +string_t StringHeap::AddString(const char *data) { + return AddString(data, strlen(data)); } -bool Value::DoubleIsValid(double value) { - return !(std::isnan(value) || std::isinf(value)); +string_t StringHeap::AddString(const string &data) { + return AddString(data.c_str(), data.size()); } -Value Value::DECIMAL(int16_t value, uint8_t width, uint8_t scale) { - D_ASSERT(width <= Decimal::MAX_WIDTH_INT16); - Value result(LogicalType(LogicalTypeId::DECIMAL, width, scale)); - result.value_.smallint = value; - result.is_null = false; - return result; +string_t StringHeap::AddString(const string_t &data) { + return AddString(data.GetDataUnsafe(), data.GetSize()); } -Value Value::DECIMAL(int32_t value, uint8_t width, uint8_t scale) { - D_ASSERT(width >= Decimal::MAX_WIDTH_INT16 && width <= Decimal::MAX_WIDTH_INT32); - Value result(LogicalType(LogicalTypeId::DECIMAL, width, scale)); - result.value_.integer = value; - result.is_null = false; - return result; +string_t StringHeap::AddBlob(const char *data, idx_t len) { + auto insert_string = EmptyString(len); + auto insert_pos = insert_string.GetDataWriteable(); + memcpy(insert_pos, data, len); + insert_string.Finalize(); + return insert_string; } -Value Value::DECIMAL(int64_t value, uint8_t width, uint8_t scale) { - LogicalType decimal_type(LogicalTypeId::DECIMAL, width, scale); - Value result(decimal_type); - switch (decimal_type.InternalType()) { - case PhysicalType::INT16: - result.value_.smallint = value; - break; - case PhysicalType::INT32: - result.value_.integer = value; - break; - case PhysicalType::INT64: - result.value_.bigint = value; - break; - default: - result.value_.hugeint = value; - break; +string_t StringHeap::EmptyString(idx_t len) { + D_ASSERT(len >= string_t::INLINE_LENGTH); + if (!chunk || chunk->current_position + len >= chunk->maximum_size) { + // have to make a new entry + auto new_chunk = make_unique(MaxValue(len, MINIMUM_HEAP_SIZE)); + new_chunk->prev = move(chunk); + chunk = move(new_chunk); + if (!tail) { + tail = chunk.get(); + } } - result.type_.Verify(); - result.is_null = false; - return result; + auto insert_pos = chunk->data.get() + chunk->current_position; + chunk->current_position += len; + return string_t(insert_pos, len); } -Value Value::DECIMAL(hugeint_t value, uint8_t width, uint8_t scale) { - D_ASSERT(width >= Decimal::MAX_WIDTH_INT64 && width <= Decimal::MAX_WIDTH_INT128); - Value result(LogicalType(LogicalTypeId::DECIMAL, width, scale)); - result.value_.hugeint = value; - result.is_null = false; - return result; -} +} // namespace duckdb -Value Value::FLOAT(float value) { - if (!Value::FloatIsValid(value)) { - throw OutOfRangeException("Invalid float value %f", value); + + + + +namespace duckdb { + +void string_t::Verify() { + auto dataptr = GetDataUnsafe(); + (void)dataptr; + D_ASSERT(dataptr); + +#ifdef DEBUG + auto utf_type = Utf8Proc::Analyze(dataptr, GetSize()); + D_ASSERT(utf_type != UnicodeType::INVALID); +#endif + + // verify that the prefix contains the first four characters of the string + for (idx_t i = 0; i < MinValue(PREFIX_LENGTH, GetSize()); i++) { + D_ASSERT(GetPrefix()[i] == dataptr[i]); + } + // verify that for strings with length < INLINE_LENGTH, the rest of the string is zero + for (idx_t i = GetSize(); i < INLINE_LENGTH; i++) { + D_ASSERT(GetDataUnsafe()[i] == '\0'); } - Value result(LogicalType::FLOAT); - result.value_.float_ = value; - result.is_null = false; - return result; } -Value Value::DOUBLE(double value) { - if (!Value::DoubleIsValid(value)) { - throw OutOfRangeException("Invalid double value %f", value); +void string_t::VerifyNull() { + for (idx_t i = 0; i < GetSize(); i++) { + D_ASSERT(GetDataUnsafe()[i] != '\0'); } - Value result(LogicalType::DOUBLE); - result.value_.double_ = value; - result.is_null = false; - return result; } -Value Value::HASH(hash_t value) { - Value result(LogicalType::HASH); - result.value_.hash = value; - result.is_null = false; - return result; -} +} // namespace duckdb -Value Value::POINTER(uintptr_t value) { - Value result(LogicalType::POINTER); - result.value_.pointer = value; - result.is_null = false; - return result; -} -Value Value::DATE(date_t date) { - auto val = Value::INTEGER(date); - val.type_ = LogicalType::DATE; - return val; -} -Value Value::DATE(int32_t year, int32_t month, int32_t day) { - return Value::DATE(Date::FromDate(year, month, day)); -} -Value Value::TIME(dtime_t time) { - auto val = Value::BIGINT(time); - val.type_ = LogicalType::TIME; - return val; -} -Value Value::TIME(int32_t hour, int32_t min, int32_t sec, int32_t micros) { - return Value::TIME(Time::FromTime(hour, min, sec, micros)); -} -Value Value::TIMESTAMP(timestamp_t timestamp) { - auto val = Value::BIGINT(timestamp); - val.type_ = LogicalType::TIMESTAMP; - return val; -} -Value Value::TIMESTAMP(date_t date, dtime_t time) { - auto val = Value::BIGINT(Timestamp::FromDatetime(date, time)); - val.type_ = LogicalType::TIMESTAMP; - return val; -} -Value Value::TIMESTAMP(int32_t year, int32_t month, int32_t day, int32_t hour, int32_t min, int32_t sec, - int32_t micros) { - auto val = Value::TIMESTAMP(Date::FromDate(year, month, day), Time::FromTime(hour, min, sec, micros)); - val.type_ = LogicalType::TIMESTAMP; - return val; -} +#include +#include +#include -Value Value::STRUCT(child_list_t values) { - Value result; - child_list_t child_types; - for (auto &child : values) { - child_types.push_back(make_pair(child.first, child.second.type())); +namespace duckdb { + +static_assert(sizeof(dtime_t) == sizeof(int64_t), "dtime_t was padded"); + +// string format is hh:mm:ss.microsecondsZ +// microseconds and Z are optional +// ISO 8601 + +bool Time::TryConvertInternal(const char *buf, idx_t len, idx_t &pos, dtime_t &result, bool strict) { + int32_t hour = -1, min = -1, sec = -1, micros = -1; + pos = 0; + + if (len == 0) { + return false; } - result.type_ = LogicalType(LogicalTypeId::STRUCT, child_types); - result.struct_value = move(values); - result.is_null = false; - return result; -} + int sep; -Value Value::LIST(vector values) { - Value result; - result.type_ = LogicalType(LogicalTypeId::LIST); - result.list_value = move(values); - result.is_null = false; - return result; -} + // skip leading spaces + while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) { + pos++; + } -Value Value::BLOB(const_data_ptr_t data, idx_t len) { - Value result(LogicalType::BLOB); - result.is_null = false; - result.str_value = string((const char *)data, len); - return result; -} + if (pos >= len) { + return false; + } -Value Value::BLOB(const string &data) { - Value result(LogicalType::BLOB); - result.is_null = false; - result.str_value = Blob::ToBlob(string_t(data)); - return result; -} + if (!StringUtil::CharacterIsDigit(buf[pos])) { + return false; + } -Value Value::INTERVAL(int32_t months, int32_t days, int64_t micros) { - Value result(LogicalType::INTERVAL); - result.is_null = false; - result.value_.interval.months = months; - result.value_.interval.days = days; - result.value_.interval.micros = micros; - return result; -} + if (!Date::ParseDoubleDigit(buf, len, pos, hour)) { + return false; + } + if (hour < 0 || hour >= 24) { + return false; + } -Value Value::INTERVAL(interval_t interval) { - return Value::INTERVAL(interval.months, interval.days, interval.micros); -} + if (pos >= len) { + return false; + } -//===--------------------------------------------------------------------===// -// CreateValue -//===--------------------------------------------------------------------===// -template <> -Value Value::CreateValue(bool value) { - return Value::BOOLEAN(value); -} + // fetch the separator + sep = buf[pos++]; + if (sep != ':') { + // invalid separator + return false; + } -template <> -Value Value::CreateValue(int8_t value) { - return Value::TINYINT(value); -} + if (!Date::ParseDoubleDigit(buf, len, pos, min)) { + return false; + } + if (min < 0 || min >= 60) { + return false; + } -template <> -Value Value::CreateValue(int16_t value) { - return Value::SMALLINT(value); -} + if (pos >= len) { + return false; + } -template <> -Value Value::CreateValue(int32_t value) { - return Value::INTEGER(value); -} + if (buf[pos++] != sep) { + return false; + } -template <> -Value Value::CreateValue(int64_t value) { - return Value::BIGINT(value); -} + if (!Date::ParseDoubleDigit(buf, len, pos, sec)) { + return false; + } + if (sec < 0 || sec >= 60) { + return false; + } -template <> -Value Value::CreateValue(uint8_t value) { - return Value::UTINYINT(value); -} + micros = 0; + if (pos < len && buf[pos] == '.') { + pos++; + // we expect some microseconds + int32_t mult = 100000; + for (; pos < len && StringUtil::CharacterIsDigit(buf[pos]); pos++, mult /= 10) { + if (mult > 0) { + micros += (buf[pos] - '0') * mult; + } + } + } -template <> -Value Value::CreateValue(uint16_t value) { - return Value::USMALLINT(value); -} + // in strict mode, check remaining string for non-space characters + if (strict) { + // skip trailing spaces + while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) { + pos++; + } + // check position. if end was not reached, non-space chars remaining + if (pos < len) { + return false; + } + } -template <> -Value Value::CreateValue(uint32_t value) { - return Value::UINTEGER(value); + result = Time::FromTime(hour, min, sec, micros); + return true; } -template <> -Value Value::CreateValue(uint64_t value) { - return Value::UBIGINT(value); +bool Time::TryConvertTime(const char *buf, idx_t len, idx_t &pos, dtime_t &result, bool strict) { + if (!Time::TryConvertInternal(buf, len, pos, result, strict)) { + if (!strict) { + // last chance, check if we can parse as timestamp + timestamp_t timestamp; + if (Timestamp::TryConvertTimestamp(buf, len, timestamp)) { + result = Timestamp::GetTime(timestamp); + return true; + } + } + return false; + } + return true; } -template <> -Value Value::CreateValue(hugeint_t value) { - return Value::HUGEINT(value); +string Time::ConversionError(const string &str) { + return StringUtil::Format("time field value out of range: \"%s\", " + "expected format is ([YYY-MM-DD ]HH:MM:SS[.MS])", + str); } -template <> -Value Value::CreateValue(const char *value) { - return Value(string(value)); +string Time::ConversionError(string_t str) { + return Time::ConversionError(str.GetString()); } -template <> -Value Value::CreateValue(string value) { // NOLINT: required for templating - return Value::BLOB(value); +dtime_t Time::FromCString(const char *buf, idx_t len, bool strict) { + dtime_t result; + idx_t pos; + if (!Time::TryConvertTime(buf, len, pos, result, strict)) { + throw ConversionException(ConversionError(string(buf, len))); + } + return result; } -template <> -Value Value::CreateValue(string_t value) { - return Value(value); +dtime_t Time::FromString(const string &str, bool strict) { + return Time::FromCString(str.c_str(), str.size(), strict); } -template <> -Value Value::CreateValue(float value) { - return Value::FLOAT(value); -} +string Time::ToString(dtime_t time) { + int32_t time_units[4]; + Time::Convert(time, time_units[0], time_units[1], time_units[2], time_units[3]); -template <> -Value Value::CreateValue(double value) { - return Value::DOUBLE(value); + char micro_buffer[6]; + auto length = TimeToStringCast::Length(time_units, micro_buffer); + auto buffer = unique_ptr(new char[length]); + TimeToStringCast::Format(buffer.get(), length, time_units, micro_buffer); + return string(buffer.get(), length); } -template <> -Value Value::CreateValue(Value value) { - return value; +dtime_t Time::FromTime(int32_t hour, int32_t minute, int32_t second, int32_t microseconds) { + int64_t result; + result = hour; // hours + result = result * Interval::MINS_PER_HOUR + minute; // hours -> minutes + result = result * Interval::SECS_PER_MINUTE + second; // minutes -> seconds + result = result * Interval::MICROS_PER_SEC + microseconds; // seconds -> microseconds + return dtime_t(result); } -//===--------------------------------------------------------------------===// -// GetValue -//===--------------------------------------------------------------------===// -template -T Value::GetValueInternal() const { - if (is_null) { - return NullValue(); +// LCOV_EXCL_START +#ifdef DEBUG +static bool AssertValidTime(int32_t hour, int32_t minute, int32_t second, int32_t microseconds) { + if (hour < 0 || hour >= 24) { + return false; } - switch (type_.id()) { - case LogicalTypeId::BOOLEAN: - return Cast::Operation(value_.boolean); - case LogicalTypeId::TINYINT: - return Cast::Operation(value_.tinyint); - case LogicalTypeId::SMALLINT: - return Cast::Operation(value_.smallint); - case LogicalTypeId::INTEGER: - return Cast::Operation(value_.integer); - case LogicalTypeId::BIGINT: - return Cast::Operation(value_.bigint); - case LogicalTypeId::HUGEINT: - return Cast::Operation(value_.hugeint); - case LogicalTypeId::UTINYINT: - return Cast::Operation(value_.utinyint); - case LogicalTypeId::USMALLINT: - return Cast::Operation(value_.usmallint); - case LogicalTypeId::UINTEGER: - return Cast::Operation(value_.uinteger); - case LogicalTypeId::UBIGINT: - return Cast::Operation(value_.ubigint); - case LogicalTypeId::FLOAT: - return Cast::Operation(value_.float_); - case LogicalTypeId::DOUBLE: - return Cast::Operation(value_.double_); - case LogicalTypeId::VARCHAR: - return Cast::Operation(str_value.c_str()); - case LogicalTypeId::DECIMAL: - return CastAs(LogicalType::DOUBLE).GetValueInternal(); - default: - throw NotImplementedException("Unimplemented type \"%s\" for GetValue()", type_.ToString()); + if (minute < 0 || minute >= 60) { + return false; } -} - -template <> -bool Value::GetValue() const { - return GetValueInternal(); -} -template <> -int8_t Value::GetValue() const { - return GetValueInternal(); -} -template <> -int16_t Value::GetValue() const { - return GetValueInternal(); -} -template <> -int32_t Value::GetValue() const { - if (type_.id() == LogicalTypeId::DATE) { - return value_.integer; + if (second < 0 || second > 60) { + return false; } - return GetValueInternal(); -} -template <> -int64_t Value::GetValue() const { - if (type_.id() == LogicalTypeId::TIMESTAMP || type_.id() == LogicalTypeId::TIME) { - return value_.bigint; + if (microseconds < 0 || microseconds > 1000000) { + return false; } - return GetValueInternal(); -} -template <> -hugeint_t Value::GetValue() const { - return GetValueInternal(); -} -template <> -uint8_t Value::GetValue() const { - return GetValueInternal(); -} -template <> -uint16_t Value::GetValue() const { - return GetValueInternal(); -} -template <> -string Value::GetValue() const { - return ToString(); -} -template <> -float Value::GetValue() const { - return GetValueInternal(); -} -template <> -double Value::GetValue() const { - return GetValueInternal(); -} -template <> -uintptr_t Value::GetValue() const { - D_ASSERT(type() == LogicalType::POINTER); - return value_.pointer; + return true; } +#endif +// LCOV_EXCL_STOP -Value Value::Numeric(const LogicalType &type, int64_t value) { - switch (type.id()) { - case LogicalTypeId::TINYINT: - D_ASSERT(value <= NumericLimits::Maximum()); - return Value::TINYINT((int8_t)value); - case LogicalTypeId::SMALLINT: - D_ASSERT(value <= NumericLimits::Maximum()); - return Value::SMALLINT((int16_t)value); - case LogicalTypeId::INTEGER: - D_ASSERT(value <= NumericLimits::Maximum()); - return Value::INTEGER((int32_t)value); - case LogicalTypeId::BIGINT: - return Value::BIGINT(value); - case LogicalTypeId::HUGEINT: - return Value::HUGEINT(value); - case LogicalTypeId::DECIMAL: - return Value::DECIMAL(value, type.width(), type.scale()); - case LogicalTypeId::FLOAT: - return Value((float)value); - case LogicalTypeId::DOUBLE: - return Value((double)value); - case LogicalTypeId::HASH: - return Value::HASH(value); - case LogicalTypeId::POINTER: - return Value::POINTER(value); - case LogicalTypeId::DATE: - D_ASSERT(value <= NumericLimits::Maximum()); - return Value::DATE(value); - case LogicalTypeId::TIME: - D_ASSERT(value <= NumericLimits::Maximum()); - return Value::TIME(value); - case LogicalTypeId::TIMESTAMP: - return Value::TIMESTAMP(value); - default: - throw InvalidTypeException(type, "Numeric requires numeric type"); - } +void Time::Convert(dtime_t dtime, int32_t &hour, int32_t &min, int32_t &sec, int32_t µs) { + int64_t time = dtime.micros; + hour = int32_t(time / Interval::MICROS_PER_HOUR); + time -= int64_t(hour) * Interval::MICROS_PER_HOUR; + min = int32_t(time / Interval::MICROS_PER_MINUTE); + time -= int64_t(min) * Interval::MICROS_PER_MINUTE; + sec = int32_t(time / Interval::MICROS_PER_SEC); + time -= int64_t(sec) * Interval::MICROS_PER_SEC; + micros = int32_t(time); +#ifdef DEBUG + D_ASSERT(AssertValidTime(hour, min, sec, micros)); +#endif } -//===--------------------------------------------------------------------===// -// GetValueUnsafe -//===--------------------------------------------------------------------===// -template <> -int8_t &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::INT8 || type_.InternalType() == PhysicalType::BOOL); - return value_.tinyint; -} +} // namespace duckdb -template <> -int16_t &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::INT16); - return value_.smallint; -} -template <> -int32_t &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::INT32); - return value_.integer; -} -template <> -int64_t &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::INT64); - return value_.bigint; -} -template <> -hugeint_t &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::INT128); - return value_.hugeint; -} -template <> -uint8_t &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::UINT8); - return value_.utinyint; -} -template <> -uint16_t &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::UINT16); - return value_.usmallint; -} -template <> -uint32_t &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::UINT32); - return value_.uinteger; -} -template <> -uint64_t &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::UINT64); - return value_.ubigint; -} -template <> -string &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::VARCHAR); - return str_value; + + +#include + +namespace duckdb { + +static_assert(sizeof(timestamp_t) == sizeof(int64_t), "timestamp_t was padded"); + +// timestamp/datetime uses 64 bits, high 32 bits for date and low 32 bits for time +// string format is YYYY-MM-DDThh:mm:ssZ +// T may be a space +// Z is optional +// ISO 8601 +bool Timestamp::TryConvertTimestamp(const char *str, idx_t len, timestamp_t &result) { + idx_t pos; + date_t date; + dtime_t time; + if (!Date::TryConvertDate(str, len, pos, date)) { + return false; + } + if (pos == len) { + // no time: only a date + return Timestamp::TryFromDatetime(date, dtime_t(0), result); + } + // try to parse a time field + if (str[pos] == ' ' || str[pos] == 'T') { + pos++; + } + idx_t time_pos = 0; + if (!Time::TryConvertTime(str + pos, len - pos, time_pos, time)) { + return false; + } + pos += time_pos; + if (!Timestamp::TryFromDatetime(date, time, result)) { + return false; + } + if (pos < len) { + // skip a "Z" at the end (as per the ISO8601 specs) + if (str[pos] == 'Z') { + pos++; + } + int hour_offset, minute_offset; + if (Timestamp::TryParseUTCOffset(str, pos, len, hour_offset, minute_offset)) { + result -= hour_offset * Interval::MICROS_PER_HOUR + minute_offset * Interval::MICROS_PER_MINUTE; + } + + // skip any spaces at the end + while (pos < len && StringUtil::CharacterIsSpace(str[pos])) { + pos++; + } + if (pos < len) { + return false; + } + } + return true; } -template <> -float &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::FLOAT); - return value_.float_; +string Timestamp::ConversionError(const string &str) { + return StringUtil::Format("timestamp field value out of range: \"%s\", " + "expected format is (YYYY-MM-DD HH:MM:SS[.MS])", + str); } -template <> -double &Value::GetValueUnsafe() { - D_ASSERT(type_.InternalType() == PhysicalType::DOUBLE); - return value_.double_; +string Timestamp::ConversionError(string_t str) { + return Timestamp::ConversionError(str.GetString()); } -Value Value::Numeric(const LogicalType &type, hugeint_t value) { - switch (type.id()) { - case LogicalTypeId::HUGEINT: - return Value::HUGEINT(value); - default: - return Value::Numeric(type, Hugeint::Cast(value)); +timestamp_t Timestamp::FromCString(const char *str, idx_t len) { + timestamp_t result; + if (!Timestamp::TryConvertTimestamp(str, len, result)) { + throw ConversionException(Timestamp::ConversionError(string(str, len))); } + return result; } -string Value::ToString() const { - if (is_null) { - return "NULL"; +bool Timestamp::TryParseUTCOffset(const char *str, idx_t &pos, idx_t len, int &hour_offset, int &minute_offset) { + minute_offset = 0; + idx_t curpos = pos; + // parse the next 3 characters + if (curpos + 3 > len) { + // no characters left to parse + return false; } - switch (type_.id()) { - case LogicalTypeId::BOOLEAN: - return value_.boolean ? "True" : "False"; - case LogicalTypeId::TINYINT: - return to_string(value_.tinyint); - case LogicalTypeId::SMALLINT: - return to_string(value_.smallint); - case LogicalTypeId::INTEGER: - return to_string(value_.integer); - case LogicalTypeId::BIGINT: - return to_string(value_.bigint); - case LogicalTypeId::UTINYINT: - return to_string(value_.utinyint); - case LogicalTypeId::USMALLINT: - return to_string(value_.usmallint); - case LogicalTypeId::UINTEGER: - return to_string(value_.uinteger); - case LogicalTypeId::UBIGINT: - return to_string(value_.ubigint); - case LogicalTypeId::HUGEINT: - return Hugeint::ToString(value_.hugeint); - case LogicalTypeId::FLOAT: - return to_string(value_.float_); - case LogicalTypeId::DOUBLE: - return to_string(value_.double_); - case LogicalTypeId::DECIMAL: { - auto internal_type = type_.InternalType(); - if (internal_type == PhysicalType::INT16) { - return Decimal::ToString(value_.smallint, type_.scale()); - } else if (internal_type == PhysicalType::INT32) { - return Decimal::ToString(value_.integer, type_.scale()); - } else if (internal_type == PhysicalType::INT64) { - return Decimal::ToString(value_.bigint, type_.scale()); - } else { - D_ASSERT(internal_type == PhysicalType::INT128); - return Decimal::ToString(value_.hugeint, type_.scale()); - } + char sign_char = str[curpos]; + if (sign_char != '+' && sign_char != '-') { + // expected either + or - + return false; } - case LogicalTypeId::DATE: - return Date::ToString(value_.integer); - case LogicalTypeId::TIME: - return Time::ToString(value_.bigint); - case LogicalTypeId::TIMESTAMP: - return Timestamp::ToString(value_.bigint); - case LogicalTypeId::INTERVAL: - return Interval::ToString(value_.interval); - case LogicalTypeId::VARCHAR: - return str_value; - case LogicalTypeId::BLOB: - return Blob::ToString(string_t(str_value)); - case LogicalTypeId::POINTER: - return to_string(value_.pointer); - case LogicalTypeId::HASH: - return to_string(value_.hash); - case LogicalTypeId::STRUCT: { - string ret = "<"; - for (size_t i = 0; i < struct_value.size(); i++) { - auto &child = struct_value[i]; - ret += child.first + ": " + child.second.ToString(); - if (i < struct_value.size() - 1) { - ret += ", "; - } - } - ret += ">"; - return ret; + curpos++; + if (!StringUtil::CharacterIsDigit(str[curpos]) || !StringUtil::CharacterIsDigit(str[curpos + 1])) { + // expected +HH or -HH + return false; } - case LogicalTypeId::LIST: { - string ret = "["; - for (size_t i = 0; i < list_value.size(); i++) { - auto &child = list_value[i]; - ret += child.ToString(); - if (i < list_value.size() - 1) { - ret += ", "; - } - } - ret += "]"; - return ret; + hour_offset = (str[curpos] - '0') * 10 + (str[curpos + 1] - '0'); + if (sign_char == '-') { + hour_offset = -hour_offset; } - default: - throw NotImplementedException("Unimplemented type for printing: %s", type_.ToString()); + curpos += 2; + + // optional minute specifier: expected either "MM" or ":MM" + if (curpos >= len) { + // done, nothing left + pos = curpos; + return true; + } + if (str[curpos] == ':') { + curpos++; + } + if (curpos + 2 > len || !StringUtil::CharacterIsDigit(str[curpos]) || + !StringUtil::CharacterIsDigit(str[curpos + 1])) { + // no MM specifier + pos = curpos; + return true; + } + // we have an MM specifier: parse it + minute_offset = (str[curpos] - '0') * 10 + (str[curpos + 1] - '0'); + if (sign_char == '-') { + minute_offset = -minute_offset; } + pos = curpos + 2; + return true; } -//===--------------------------------------------------------------------===// -// Numeric Operators -//===--------------------------------------------------------------------===// -Value Value::operator+(const Value &rhs) const { - return ValueOperations::Add(*this, rhs); +timestamp_t Timestamp::FromString(const string &str) { + return Timestamp::FromCString(str.c_str(), str.size()); } -Value Value::operator-(const Value &rhs) const { - return ValueOperations::Subtract(*this, rhs); +string Timestamp::ToString(timestamp_t timestamp) { + date_t date; + dtime_t time; + Timestamp::Convert(timestamp, date, time); + return Date::ToString(date) + " " + Time::ToString(time); } -Value Value::operator*(const Value &rhs) const { - return ValueOperations::Multiply(*this, rhs); +date_t Timestamp::GetDate(timestamp_t timestamp) { + return date_t((timestamp.value + (timestamp.value < 0)) / Interval::MICROS_PER_DAY - (timestamp.value < 0)); } -Value Value::operator/(const Value &rhs) const { - return ValueOperations::Divide(*this, rhs); +dtime_t Timestamp::GetTime(timestamp_t timestamp) { + date_t date = Timestamp::GetDate(timestamp); + return dtime_t(timestamp.value - (int64_t(date.days) * int64_t(Interval::MICROS_PER_DAY))); } -Value Value::operator%(const Value &rhs) const { - throw NotImplementedException("value modulo"); - // return ValueOperations::Modulo(*this, rhs); +bool Timestamp::TryFromDatetime(date_t date, dtime_t time, timestamp_t &result) { + if (!TryMultiplyOperator::Operation(date.days, Interval::MICROS_PER_DAY, result.value)) { + return false; + } + if (!TryAddOperator::Operation(result.value, time.micros, result.value)) { + return false; + } + return true; } -//===--------------------------------------------------------------------===// -// Comparison Operators -//===--------------------------------------------------------------------===// -bool Value::operator==(const Value &rhs) const { - return ValueOperations::Equals(*this, rhs); +timestamp_t Timestamp::FromDatetime(date_t date, dtime_t time) { + timestamp_t result; + if (!TryFromDatetime(date, time, result)) { + throw Exception("Overflow exception in date/time -> timestamp conversion"); + } + return result; } -bool Value::operator!=(const Value &rhs) const { - return ValueOperations::NotEquals(*this, rhs); +void Timestamp::Convert(timestamp_t timestamp, date_t &out_date, dtime_t &out_time) { + out_date = GetDate(timestamp); + out_time = dtime_t(timestamp.value - (int64_t(out_date.days) * int64_t(Interval::MICROS_PER_DAY))); + D_ASSERT(timestamp == Timestamp::FromDatetime(out_date, out_time)); } -bool Value::operator<(const Value &rhs) const { - return ValueOperations::LessThan(*this, rhs); +timestamp_t Timestamp::GetCurrentTimestamp() { + auto now = system_clock::now(); + auto epoch_ms = duration_cast(now.time_since_epoch()).count(); + return Timestamp::FromEpochMs(epoch_ms); } -bool Value::operator>(const Value &rhs) const { - return ValueOperations::GreaterThan(*this, rhs); +timestamp_t Timestamp::FromEpochSeconds(int64_t sec) { + int64_t result; + if (!TryMultiplyOperator::Operation(sec, Interval::MICROS_PER_SEC, result)) { + throw ConversionException("Could not convert Timestamp(S) to Timestamp(US)"); + } + return timestamp_t(result); } -bool Value::operator<=(const Value &rhs) const { - return ValueOperations::LessThanEquals(*this, rhs); +timestamp_t Timestamp::FromEpochMs(int64_t ms) { + int64_t result; + if (!TryMultiplyOperator::Operation(ms, Interval::MICROS_PER_MSEC, result)) { + throw ConversionException("Could not convert Timestamp(MS) to Timestamp(US)"); + } + return timestamp_t(result); } -bool Value::operator>=(const Value &rhs) const { - return ValueOperations::GreaterThanEquals(*this, rhs); +timestamp_t Timestamp::FromEpochMicroSeconds(int64_t micros) { + return timestamp_t(micros); } -bool Value::operator==(const int64_t &rhs) const { - return *this == Value::Numeric(type_, rhs); +timestamp_t Timestamp::FromEpochNanoSeconds(int64_t ns) { + return timestamp_t(ns / 1000); } -bool Value::operator!=(const int64_t &rhs) const { - return *this != Value::Numeric(type_, rhs); +int64_t Timestamp::GetEpochSeconds(timestamp_t timestamp) { + return timestamp.value / Interval::MICROS_PER_SEC; } -bool Value::operator<(const int64_t &rhs) const { - return *this < Value::Numeric(type_, rhs); +int64_t Timestamp::GetEpochMs(timestamp_t timestamp) { + return timestamp.value / Interval::MICROS_PER_MSEC; } -bool Value::operator>(const int64_t &rhs) const { - return *this > Value::Numeric(type_, rhs); +int64_t Timestamp::GetEpochMicroSeconds(timestamp_t timestamp) { + return timestamp.value; } -bool Value::operator<=(const int64_t &rhs) const { - return *this <= Value::Numeric(type_, rhs); +int64_t Timestamp::GetEpochNanoSeconds(timestamp_t timestamp) { + int64_t result; + int64_t ns_in_us = 1000; + if (!TryMultiplyOperator::Operation(timestamp.value, ns_in_us, result)) { + throw ConversionException("Could not convert Timestamp(US) to Timestamp(NS)"); + } + return result; } -bool Value::operator>=(const int64_t &rhs) const { - return *this >= Value::Numeric(type_, rhs); -} +} // namespace duckdb -Value Value::CastAs(const LogicalType &target_type, bool strict) const { - if (type_ == target_type) { - return Copy(); - } - Vector input, result; - input.Reference(*this); - result.Initialize(target_type); - VectorOperations::Cast(input, result, 1, strict); - return result.GetValue(0); -} -bool Value::TryCastAs(const LogicalType &target_type, bool strict) { - try { - Value new_value = CastAs(target_type, strict); - type_ = target_type; - is_null = new_value.is_null; - value_ = new_value.value_; - str_value = new_value.str_value; - struct_value = new_value.struct_value; - list_value = new_value.list_value; - return true; - } catch (Exception &) { - return false; - } -} +namespace duckdb { -void Value::Serialize(Serializer &serializer) { - type_.Serialize(serializer); - serializer.Write(is_null); - if (!is_null) { - switch (type_.InternalType()) { - case PhysicalType::BOOL: - serializer.Write(value_.boolean); - break; - case PhysicalType::INT8: - serializer.Write(value_.tinyint); - break; - case PhysicalType::INT16: - serializer.Write(value_.smallint); - break; - case PhysicalType::INT32: - serializer.Write(value_.integer); - break; - case PhysicalType::INT64: - serializer.Write(value_.bigint); - break; - case PhysicalType::UINT8: - serializer.Write(value_.utinyint); - break; - case PhysicalType::UINT16: - serializer.Write(value_.usmallint); - break; - case PhysicalType::UINT32: - serializer.Write(value_.uinteger); - break; - case PhysicalType::UINT64: - serializer.Write(value_.ubigint); - break; - case PhysicalType::INT128: - serializer.Write(value_.hugeint); - break; - case PhysicalType::FLOAT: - serializer.Write(value_.float_); - break; - case PhysicalType::DOUBLE: - serializer.Write(value_.double_); - break; - case PhysicalType::POINTER: - serializer.Write(value_.pointer); - break; - case PhysicalType::INTERVAL: - serializer.Write(value_.interval); - break; - case PhysicalType::VARCHAR: - serializer.WriteString(str_value); - break; - default: - throw NotImplementedException("Value type not implemented for serialization!"); - } - } +ValidityData::ValidityData(idx_t count) : TemplatedValidityData(count) { +} +ValidityData::ValidityData(const ValidityMask &original, idx_t count) + : TemplatedValidityData(original.GetData(), count) { } -Value Value::Deserialize(Deserializer &source) { - auto type = LogicalType::Deserialize(source); - auto is_null = source.Read(); - Value new_value = Value(type); - if (is_null) { - return new_value; +void ValidityMask::Combine(const ValidityMask &other, idx_t count) { + if (other.AllValid()) { + // X & 1 = X + return; } - new_value.is_null = false; - switch (type.InternalType()) { - case PhysicalType::BOOL: - new_value.value_.boolean = source.Read(); - break; - case PhysicalType::INT8: - new_value.value_.tinyint = source.Read(); - break; - case PhysicalType::INT16: - new_value.value_.smallint = source.Read(); - break; - case PhysicalType::INT32: - new_value.value_.integer = source.Read(); - break; - case PhysicalType::INT64: - new_value.value_.bigint = source.Read(); - break; - case PhysicalType::UINT8: - new_value.value_.utinyint = source.Read(); - break; - case PhysicalType::UINT16: - new_value.value_.usmallint = source.Read(); - break; - case PhysicalType::UINT32: - new_value.value_.uinteger = source.Read(); - break; - case PhysicalType::UINT64: - new_value.value_.ubigint = source.Read(); - break; - case PhysicalType::INT128: - new_value.value_.hugeint = source.Read(); - break; - case PhysicalType::FLOAT: - new_value.value_.float_ = source.Read(); - break; - case PhysicalType::DOUBLE: - new_value.value_.double_ = source.Read(); - break; - case PhysicalType::POINTER: - new_value.value_.pointer = source.Read(); - break; - case PhysicalType::INTERVAL: - new_value.value_.interval = source.Read(); - break; - case PhysicalType::VARCHAR: - new_value.str_value = source.Read(); - break; - default: - throw NotImplementedException("Value type not implemented for deserialization"); + if (AllValid()) { + // 1 & Y = Y + Initialize(other); + return; + } + if (validity_mask == other.validity_mask) { + // X & X == X + return; + } + // have to merge + // create a new validity mask that contains the combined mask + auto owned_data = move(validity_data); + auto data = GetData(); + auto other_data = other.GetData(); + + Initialize(count); + auto result_data = GetData(); + + auto entry_count = ValidityData::EntryCount(count); + for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { + result_data[entry_idx] = data[entry_idx] & other_data[entry_idx]; } - return new_value; } -void Value::Print() { - Printer::Print(ToString()); +// LCOV_EXCL_START +string ValidityMask::ToString(idx_t count) const { + string result = "Validity Mask (" + to_string(count) + ") ["; + for (idx_t i = 0; i < count; i++) { + result += RowIsValid(i) ? "." : "X"; + } + result += "]"; + return result; } +// LCOV_EXCL_STOP -bool Value::ValuesAreEqual(const Value &result_value, const Value &value) { - if (result_value.is_null != value.is_null) { - return false; +void ValidityMask::Resize(idx_t old_size, idx_t new_size) { + if (validity_mask) { + auto new_size_count = EntryCount(new_size); + auto old_size_count = EntryCount(old_size); + auto new_owned_data = unique_ptr(new validity_t[new_size_count]); + for (idx_t entry_idx = 0; entry_idx < old_size_count; entry_idx++) { + new_owned_data[entry_idx] = validity_mask[entry_idx]; + } + for (idx_t entry_idx = old_size_count; entry_idx < new_size_count; entry_idx++) { + new_owned_data[entry_idx] = ValidityData::MAX_ENTRY; + } + validity_data->owned_data = move(new_owned_data); + validity_mask = validity_data->owned_data.get(); + } else { + Initialize(new_size); } - if (result_value.is_null && value.is_null) { - // NULL = NULL in checking code - return true; +} + +void ValidityMask::Slice(const ValidityMask &other, idx_t offset) { + if (other.AllValid()) { + validity_mask = nullptr; + validity_data.reset(); + return; } - switch (value.type_.id()) { - case LogicalTypeId::FLOAT: { - auto other = result_value.CastAs(LogicalType::FLOAT); - float ldecimal = value.value_.float_; - float rdecimal = other.value_.float_; - return ApproxEqual(ldecimal, rdecimal); + if (offset == 0) { + Initialize(other); + return; } - case LogicalTypeId::DOUBLE: { - auto other = result_value.CastAs(LogicalType::DOUBLE); - double ldecimal = value.value_.double_; - double rdecimal = other.value_.double_; - return ApproxEqual(ldecimal, rdecimal); + Initialize(STANDARD_VECTOR_SIZE); + +// FIXME THIS NEEDS FIXING! +#if 1 + for (idx_t i = offset; i < STANDARD_VECTOR_SIZE; i++) { + Set(i - offset, other.RowIsValid(i)); } - case LogicalTypeId::VARCHAR: { - auto other = result_value.CastAs(LogicalType::VARCHAR); - // some results might contain padding spaces, e.g. when rendering - // VARCHAR(10) and the string only has 6 characters, they will be padded - // with spaces to 10 in the rendering. We don't do that here yet as we - // are looking at internal structures. So just ignore any extra spaces - // on the right - string left = other.str_value; - string right = value.str_value; - StringUtil::RTrim(left); - StringUtil::RTrim(right); - return left == right; +#else + // first shift the "whole" units + idx_t entire_units = offset / BITS_PER_VALUE; + idx_t sub_units = offset - entire_units * BITS_PER_VALUE; + if (entire_units > 0) { + idx_t validity_idx; + for (validity_idx = 0; validity_idx + entire_units < STANDARD_ENTRY_COUNT; validity_idx++) { + validity_mask[validity_idx] = other.validity_mask[validity_idx + entire_units]; + } } - default: - return value == result_value; + // now we shift the remaining sub units + // this gets a bit more complicated because we have to shift over the borders of the entries + // e.g. suppose we have 2 entries of length 4 and we left-shift by two + // 0101|1010 + // a regular left-shift of both gets us: + // 0100|1000 + // we then OR the overflow (right-shifted by BITS_PER_VALUE - offset) together to get the correct result + // 0100|1000 -> + // 0110|1000 + if (sub_units > 0) { + idx_t validity_idx; + for (validity_idx = 0; validity_idx + 1 < STANDARD_ENTRY_COUNT; validity_idx++) { + validity_mask[validity_idx] = (other.validity_mask[validity_idx] >> sub_units) | + (other.validity_mask[validity_idx + 1] << (BITS_PER_VALUE - sub_units)); + } + validity_mask[validity_idx] >>= sub_units; } +#ifdef DEBUG + for (idx_t i = offset; i < STANDARD_VECTOR_SIZE; i++) { + D_ASSERT(RowIsValid(i - offset) == other.RowIsValid(i)); + } +#endif +#endif } -template <> -bool Value::IsValid(float value) { - return Value::FloatIsValid(value); -} +} // namespace duckdb + + -template <> -bool Value::IsValid(double value) { - return Value::DoubleIsValid(value); -} -} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/aggregate_operators.hpp +// +// +//===----------------------------------------------------------------------===// +#include +#include +#include +namespace duckdb { +struct Min { + template + static inline T Operation(T left, T right) { + return LessThan::Operation(left, right) ? left : right; + } +}; +struct Max { + template + static inline T Operation(T left, T right) { + return GreaterThan::Operation(left, right) ? left : right; + } +}; +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/numeric_binary_operators.hpp +// +// +//===----------------------------------------------------------------------===// -#include // strlen() on Solaris + + + +#include namespace duckdb { -Vector::Vector(const LogicalType &type, bool create_data, bool zero_data) : data(nullptr) { - buffer = make_buffer(VectorType::FLAT_VECTOR, type); - if (create_data) { - Initialize(type, zero_data); +struct DivideOperator { + template + static inline TR Operation(TA left, TB right) { + D_ASSERT(right != 0); // this should be checked before! + return left / right; } -} - -Vector::Vector(const LogicalType &type) : Vector(type, true, false) { -} +}; -Vector::Vector(const LogicalType &type, data_ptr_t dataptr) : data(dataptr) { - buffer = make_buffer(VectorType::FLAT_VECTOR, type); - if (dataptr && type.id() == LogicalTypeId::INVALID) { - throw InvalidTypeException(type, "Cannot create a vector of type INVALID!"); +struct ModuloOperator { + template + static inline TR Operation(TA left, TB right) { + D_ASSERT(right != 0); + return left % right; } -} +}; -Vector::Vector(const Value &value) { - buffer = make_buffer(VectorType::CONSTANT_VECTOR); - Reference(value); -} +template <> +float DivideOperator::Operation(float left, float right); +template <> +double DivideOperator::Operation(double left, double right); +template <> +hugeint_t DivideOperator::Operation(hugeint_t left, hugeint_t right); +template <> +interval_t DivideOperator::Operation(interval_t left, int64_t right); -Vector::Vector() : data(nullptr) { - buffer = make_buffer(VectorType::FLAT_VECTOR, LogicalTypeId::INVALID); -} +template <> +float ModuloOperator::Operation(float left, float right); +template <> +double ModuloOperator::Operation(double left, double right); +template <> +hugeint_t ModuloOperator::Operation(hugeint_t left, hugeint_t right); -Vector::Vector(Vector &&other) noexcept - : data(other.data), validity(move(other.validity)), buffer(move(other.buffer)), auxiliary(move(other.auxiliary)) { -} +} // namespace duckdb -void Vector::Reference(const Value &value) { - buffer = VectorBuffer::CreateConstantVector(VectorType::CONSTANT_VECTOR, value.type()); - auxiliary.reset(); - data = buffer->GetData(); - SetValue(0, value); -} -void Vector::Reference(Vector &other) { - buffer = other.buffer; - auxiliary = other.auxiliary; - data = other.data; - validity = other.validity; -} -void Vector::Slice(Vector &other, idx_t offset) { - if (other.GetVectorType() == VectorType::CONSTANT_VECTOR) { - Reference(other); - return; - } - D_ASSERT(GetVectorType() == VectorType::FLAT_VECTOR); - D_ASSERT(other.GetVectorType() == VectorType::FLAT_VECTOR); - // create a reference to the other vector - Reference(other); - if (offset > 0) { - data = data + GetTypeIdSize(GetType().InternalType()) * offset; - validity.Slice(other.validity, offset); - } -} -void Vector::Slice(Vector &other, const SelectionVector &sel, idx_t count) { - Reference(other); - Slice(sel, count); + + + + + + + + + + + +namespace duckdb { + +Value::Value(LogicalType type) : type_(move(type)), is_null(true) { } -void Vector::Slice(const SelectionVector &sel, idx_t count) { - if (GetVectorType() == VectorType::CONSTANT_VECTOR) { - // dictionary on a constant is just a constant - return; - } - if (GetVectorType() == VectorType::DICTIONARY_VECTOR) { - // already a dictionary, slice the current dictionary - auto ¤t_sel = DictionaryVector::SelVector(*this); - auto sliced_dictionary = current_sel.Slice(sel, count); - buffer = make_buffer(move(sliced_dictionary), GetType(), GetVectorType()); - return; - } - auto child_ref = make_buffer(); - child_ref->data.Reference(*this); +Value::Value(int32_t val) : type_(LogicalType::INTEGER), is_null(false) { + value_.integer = val; +} - auto dict_buffer = make_buffer(sel, GetType(), VectorType::DICTIONARY_VECTOR); - buffer = move(dict_buffer); - auxiliary = move(child_ref); +Value::Value(int64_t val) : type_(LogicalType::BIGINT), is_null(false) { + value_.bigint = val; } -void Vector::Slice(const SelectionVector &sel, idx_t count, SelCache &cache) { - if (GetVectorType() == VectorType::DICTIONARY_VECTOR) { - // dictionary vector: need to merge dictionaries - // check if we have a cached entry - auto ¤t_sel = DictionaryVector::SelVector(*this); - auto target_data = current_sel.data(); - auto entry = cache.cache.find(target_data); - if (entry != cache.cache.end()) { - // cached entry exists: use that - this->buffer = make_buffer(((DictionaryBuffer &)*entry->second).GetSelVector(), - buffer->GetType(), buffer->GetVectorType()); - } else { - Slice(sel, count); - cache.cache[target_data] = this->buffer; - } - } else { - Slice(sel, count); +Value::Value(float val) : type_(LogicalType::FLOAT), is_null(false) { + if (!Value::FloatIsValid(val)) { + throw OutOfRangeException("Invalid float value %f", val); } + value_.float_ = val; } -void Vector::Initialize(const LogicalType &new_type, bool zero_data) { - if (new_type.id() != LogicalTypeId::INVALID) { - SetType(new_type); - } - auxiliary.reset(); - validity.Reset(); - if (GetTypeIdSize(GetType().InternalType()) > 0) { - buffer = VectorBuffer::CreateStandardVector(VectorType::FLAT_VECTOR, GetType()); - data = buffer->GetData(); - if (zero_data) { - memset(data, 0, STANDARD_VECTOR_SIZE * GetTypeIdSize(new_type.InternalType())); - } - } else { - buffer = VectorBuffer::CreateStandardVector(VectorType::FLAT_VECTOR, GetType()); +Value::Value(double val) : type_(LogicalType::DOUBLE), is_null(false) { + if (!Value::DoubleIsValid(val)) { + throw OutOfRangeException("Invalid double value %f", val); } + value_.double_ = val; } -struct DataArrays { - Vector &vec; - data_ptr_t data; - VectorBuffer *buffer; - idx_t type_size; - bool is_nested; - DataArrays(Vector &vec, data_ptr_t data, VectorBuffer *buffer, idx_t type_size, bool is_nested) - : vec(vec), data(data), buffer(buffer), type_size(type_size), is_nested(is_nested) {}; -}; +Value::Value(const char *val) : Value(val ? string(val) : string()) { +} -void FindChildren(std::vector &to_resize, VectorBuffer &auxiliary) { - if (auxiliary.GetBufferType() == VectorBufferType::LIST_BUFFER) { - auto &buffer = (VectorListBuffer &)auxiliary; - auto &child = buffer.GetChild(); - auto data = child.GetData(); - if (!data) { - //! Nested type - DataArrays arrays(child, data, child.GetBuffer().get(), GetTypeIdSize(child.GetType().InternalType()), - true); - to_resize.emplace_back(arrays); - FindChildren(to_resize, *child.GetAuxiliary()); - } else { - DataArrays arrays(child, data, child.GetBuffer().get(), GetTypeIdSize(child.GetType().InternalType()), - false); - to_resize.emplace_back(arrays); - } - } else if (auxiliary.GetBufferType() == VectorBufferType::STRUCT_BUFFER) { - auto &buffer = (VectorStructBuffer &)auxiliary; - auto &children = buffer.GetChildren(); - for (auto &child : children) { - auto data = child.second->GetData(); - if (!data) { - //! Nested type - DataArrays arrays(*child.second, data, child.second->GetBuffer().get(), - GetTypeIdSize(child.second->GetType().InternalType()), true); - to_resize.emplace_back(arrays); - FindChildren(to_resize, *child.second->GetAuxiliary()); - } else { - DataArrays arrays(*child.second, data, child.second->GetBuffer().get(), - GetTypeIdSize(child.second->GetType().InternalType()), false); - to_resize.emplace_back(arrays); - } - } - } +Value::Value(std::nullptr_t val) : Value(LogicalType::VARCHAR) { } -void Vector::Resize(idx_t cur_size, idx_t new_size) { - std::vector to_resize; - if (!data) { - //! this is a nested structure - DataArrays arrays(*this, data, buffer.get(), GetTypeIdSize(GetType().InternalType()), true); - to_resize.emplace_back(arrays); - FindChildren(to_resize, *auxiliary); - } else { - DataArrays arrays(*this, data, buffer.get(), GetTypeIdSize(GetType().InternalType()), false); - to_resize.emplace_back(arrays); - } - for (auto &data_to_resize : to_resize) { - if (!data_to_resize.is_nested) { - auto new_data = unique_ptr(new data_t[new_size * data_to_resize.type_size]); - memcpy(new_data.get(), data_to_resize.data, cur_size * data_to_resize.type_size * sizeof(data_t)); - data_to_resize.buffer->SetData(move(new_data)); - data_to_resize.vec.data = data_to_resize.buffer->GetData(); - } - data_to_resize.vec.validity.Resize(cur_size, new_size); - } + +Value::Value(string_t val) : Value(string(val.GetDataUnsafe(), val.GetSize())) { } -void Vector::SetValue(idx_t index, const Value &val) { - if (GetVectorType() == VectorType::DICTIONARY_VECTOR) { - // dictionary: apply dictionary and forward to child - auto &sel_vector = DictionaryVector::SelVector(*this); - auto &child = DictionaryVector::Child(*this); - return child.SetValue(sel_vector.get_index(index), val); - } - if (val.type() != GetType()) { - SetValue(index, val.CastAs(GetType())); - return; - } - if (GetType().id() != LogicalTypeId::STRUCT) { - validity.EnsureWritable(); - validity.Set(index, !val.is_null); - if (val.is_null) { - return; - } +Value::Value(string val) : type_(LogicalType::VARCHAR), is_null(false), str_value(move(val)) { + if (!Value::StringIsValid(str_value.c_str(), str_value.size())) { + throw Exception("String value is not valid UTF8"); } +} - switch (GetType().id()) { +Value Value::MinimumValue(const LogicalType &type) { + switch (type.id()) { case LogicalTypeId::BOOLEAN: - ((bool *)data)[index] = val.value_.boolean; - break; + return Value::BOOLEAN(false); case LogicalTypeId::TINYINT: - ((int8_t *)data)[index] = val.value_.tinyint; - break; + return Value::TINYINT(NumericLimits::Minimum()); case LogicalTypeId::SMALLINT: - ((int16_t *)data)[index] = val.value_.smallint; - break; - case LogicalTypeId::DATE: + return Value::SMALLINT(NumericLimits::Minimum()); case LogicalTypeId::INTEGER: - ((int32_t *)data)[index] = val.value_.integer; - break; - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::HASH: - case LogicalTypeId::TIME: + case LogicalTypeId::SQLNULL: + return Value::INTEGER(NumericLimits::Minimum()); case LogicalTypeId::BIGINT: - ((int64_t *)data)[index] = val.value_.bigint; - break; + return Value::BIGINT(NumericLimits::Minimum()); + case LogicalTypeId::HUGEINT: + return Value::HUGEINT(NumericLimits::Minimum()); case LogicalTypeId::UTINYINT: - ((uint8_t *)data)[index] = val.value_.utinyint; - break; + return Value::UTINYINT(NumericLimits::Minimum()); case LogicalTypeId::USMALLINT: - ((uint16_t *)data)[index] = val.value_.usmallint; - break; + return Value::USMALLINT(NumericLimits::Minimum()); case LogicalTypeId::UINTEGER: - ((uint32_t *)data)[index] = val.value_.uinteger; - break; + return Value::UINTEGER(NumericLimits::Minimum()); case LogicalTypeId::UBIGINT: - ((uint64_t *)data)[index] = val.value_.ubigint; - break; - case LogicalTypeId::HUGEINT: - ((hugeint_t *)data)[index] = val.value_.hugeint; - break; - case LogicalTypeId::DECIMAL: - D_ASSERT(GetType().width() == val.type().width() && GetType().scale() == val.type().scale()); - switch (GetType().InternalType()) { + return Value::UBIGINT(NumericLimits::Minimum()); + case LogicalTypeId::DATE: + return Value::DATE(date_t(NumericLimits::Minimum())); + case LogicalTypeId::TIME: + return Value::TIME(dtime_t(0)); + case LogicalTypeId::TIMESTAMP: + return Value::TIMESTAMP(timestamp_t(NumericLimits::Minimum())); + case LogicalTypeId::TIMESTAMP_SEC: + return Value::TimestampSec(timestamp_t(NumericLimits::Minimum())); + case LogicalTypeId::TIMESTAMP_MS: + return Value::TimestampMs(timestamp_t(NumericLimits::Minimum())); + case LogicalTypeId::TIMESTAMP_NS: + return Value::TimestampNs(timestamp_t(NumericLimits::Minimum())); + case LogicalTypeId::FLOAT: + return Value::FLOAT(NumericLimits::Minimum()); + case LogicalTypeId::DOUBLE: + return Value::DOUBLE(NumericLimits::Minimum()); + case LogicalTypeId::DECIMAL: { + Value result; + switch (type.InternalType()) { case PhysicalType::INT16: - ((int16_t *)data)[index] = val.value_.smallint; + result = Value::MinimumValue(LogicalType::SMALLINT); break; case PhysicalType::INT32: - ((int32_t *)data)[index] = val.value_.integer; + result = Value::MinimumValue(LogicalType::INTEGER); break; case PhysicalType::INT64: - ((int64_t *)data)[index] = val.value_.bigint; + result = Value::MinimumValue(LogicalType::BIGINT); break; case PhysicalType::INT128: - ((hugeint_t *)data)[index] = val.value_.hugeint; + result = Value::MinimumValue(LogicalType::HUGEINT); break; default: - throw NotImplementedException("Widths bigger than 38 are not supported"); - } - break; - case LogicalTypeId::FLOAT: - ((float *)data)[index] = val.value_.float_; - break; - case LogicalTypeId::DOUBLE: - ((double *)data)[index] = val.value_.double_; - break; - case LogicalTypeId::POINTER: - ((uintptr_t *)data)[index] = val.value_.pointer; - break; - case LogicalTypeId::INTERVAL: - ((interval_t *)data)[index] = val.value_.interval; - break; - case LogicalTypeId::VARCHAR: - case LogicalTypeId::BLOB: - ((string_t *)data)[index] = StringVector::AddStringOrBlob(*this, val.str_value); - break; - case LogicalTypeId::STRUCT: { - if (!auxiliary || StructVector::GetEntries(*this).empty()) { - for (size_t i = 0; i < val.struct_value.size(); i++) { - auto &struct_child = val.struct_value[i]; - auto cv = make_unique(struct_child.second.type()); - cv->SetVectorType(GetVectorType()); - StructVector::AddEntry(*this, struct_child.first, move(cv)); - } - } - - auto &children = StructVector::GetEntries(*this); - D_ASSERT(children.size() == val.struct_value.size()); - - for (size_t i = 0; i < val.struct_value.size(); i++) { - auto &struct_child = val.struct_value[i]; - D_ASSERT(GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR); - auto &vec_child = children[i]; - D_ASSERT(vec_child.first == struct_child.first); - vec_child.second->SetValue(index, struct_child.second); - } - } break; - - case LogicalTypeId::LIST: { - if (!auxiliary) { - auto vec_list = make_unique(GetType().child_types()[0].second); - ListVector::SetEntry(*this, move(vec_list)); - } - auto offset = ListVector::GetListSize(*this); - if (!val.list_value.empty()) { - for (idx_t i = 0; i < val.list_value.size(); i++) { - Value v(val.list_value[i]); - ListVector::PushBack(*this, v); - } + throw InternalException("Unknown decimal type"); } - //! now set the pointer - auto &entry = ((list_entry_t *)data)[index]; - entry.length = val.list_value.size(); - entry.offset = offset; - } break; - default: - throw NotImplementedException("Unimplemented type for Vector::SetValue"); - } -} - -Value Vector::GetValue(idx_t index) const { - switch (GetVectorType()) { - case VectorType::CONSTANT_VECTOR: - index = 0; - break; - case VectorType::FLAT_VECTOR: - break; - // dictionary: apply dictionary and forward to child - case VectorType::DICTIONARY_VECTOR: { - auto &sel_vector = DictionaryVector::SelVector(*this); - auto &child = DictionaryVector::Child(*this); - return child.GetValue(sel_vector.get_index(index)); - } - case VectorType::SEQUENCE_VECTOR: { - int64_t start, increment; - SequenceVector::GetSequence(*this, start, increment); - return Value::Numeric(GetType(), start + increment * index); + result.type_ = type; + return result; } default: - throw NotImplementedException("Unimplemented vector type for Vector::GetValue"); + throw InvalidTypeException(type, "MinimumValue requires numeric type"); } +} - if (!validity.RowIsValid(index)) { - return Value(GetType()); - } - switch (GetType().id()) { +Value Value::MaximumValue(const LogicalType &type) { + switch (type.id()) { case LogicalTypeId::BOOLEAN: - return Value::BOOLEAN(((bool *)data)[index]); + return Value::BOOLEAN(false); case LogicalTypeId::TINYINT: - return Value::TINYINT(((int8_t *)data)[index]); + return Value::TINYINT(NumericLimits::Maximum()); case LogicalTypeId::SMALLINT: - return Value::SMALLINT(((int16_t *)data)[index]); + return Value::SMALLINT(NumericLimits::Maximum()); case LogicalTypeId::INTEGER: - return Value::INTEGER(((int32_t *)data)[index]); - case LogicalTypeId::DATE: - return Value::DATE(((date_t *)data)[index]); - case LogicalTypeId::TIME: - return Value::TIME(((dtime_t *)data)[index]); + case LogicalTypeId::SQLNULL: + return Value::INTEGER(NumericLimits::Maximum()); case LogicalTypeId::BIGINT: - return Value::BIGINT(((int64_t *)data)[index]); + return Value::BIGINT(NumericLimits::Maximum()); + case LogicalTypeId::HUGEINT: + return Value::HUGEINT(NumericLimits::Maximum()); case LogicalTypeId::UTINYINT: - return Value::UTINYINT(((uint8_t *)data)[index]); + return Value::UTINYINT(NumericLimits::Maximum()); case LogicalTypeId::USMALLINT: - return Value::USMALLINT(((uint16_t *)data)[index]); + return Value::USMALLINT(NumericLimits::Maximum()); case LogicalTypeId::UINTEGER: - return Value::UINTEGER(((uint32_t *)data)[index]); + return Value::UINTEGER(NumericLimits::Maximum()); case LogicalTypeId::UBIGINT: - return Value::UBIGINT(((uint64_t *)data)[index]); + return Value::UBIGINT(NumericLimits::Maximum()); + case LogicalTypeId::DATE: + return Value::DATE(date_t(NumericLimits::Maximum())); + case LogicalTypeId::TIME: + return Value::TIME(dtime_t(Interval::SECS_PER_DAY * Interval::MICROS_PER_SEC)); case LogicalTypeId::TIMESTAMP: - return Value::TIMESTAMP(((timestamp_t *)data)[index]); - case LogicalTypeId::HUGEINT: - return Value::HUGEINT(((hugeint_t *)data)[index]); - case LogicalTypeId::DECIMAL: { - switch (GetType().InternalType()) { - case PhysicalType::INT16: - return Value::DECIMAL(((int16_t *)data)[index], GetType().width(), GetType().scale()); - case PhysicalType::INT32: - return Value::DECIMAL(((int32_t *)data)[index], GetType().width(), GetType().scale()); - case PhysicalType::INT64: - return Value::DECIMAL(((int64_t *)data)[index], GetType().width(), GetType().scale()); - case PhysicalType::INT128: - return Value::DECIMAL(((hugeint_t *)data)[index], GetType().width(), GetType().scale()); - default: - throw NotImplementedException("Widths bigger than 38 are not supported"); - } - } - case LogicalTypeId::HASH: - return Value::HASH(((hash_t *)data)[index]); - case LogicalTypeId::POINTER: - return Value::POINTER(((uintptr_t *)data)[index]); + return Value::TIMESTAMP(timestamp_t(NumericLimits::Maximum())); + case LogicalTypeId::TIMESTAMP_MS: + return Value::TimestampMs(timestamp_t(NumericLimits::Maximum())); + case LogicalTypeId::TIMESTAMP_NS: + return Value::TimestampNs(timestamp_t(NumericLimits::Maximum())); + case LogicalTypeId::TIMESTAMP_SEC: + return Value::TimestampSec(timestamp_t(NumericLimits::Maximum())); case LogicalTypeId::FLOAT: - return Value::FLOAT(((float *)data)[index]); + return Value::FLOAT(NumericLimits::Maximum()); case LogicalTypeId::DOUBLE: - return Value::DOUBLE(((double *)data)[index]); - case LogicalTypeId::INTERVAL: - return Value::INTERVAL(((interval_t *)data)[index]); - case LogicalTypeId::VARCHAR: { - auto str = ((string_t *)data)[index]; - return Value(str.GetString()); - } - case LogicalTypeId::BLOB: { - auto str = ((string_t *)data)[index]; - return Value::BLOB((const_data_ptr_t)str.GetDataUnsafe(), str.GetSize()); - } - case LogicalTypeId::STRUCT: { - Value ret(GetType()); - ret.is_null = false; - // we can derive the value schema from the vector schema - for (auto &struct_child : StructVector::GetEntries(*this)) { - ret.struct_value.push_back(pair(struct_child.first, struct_child.second->GetValue(index))); - } - return ret; - } - case LogicalTypeId::LIST: { - Value ret(GetType()); - ret.is_null = false; - auto offlen = ((list_entry_t *)data)[index]; - auto &child_vec = ListVector::GetEntry(*this); - for (idx_t i = offlen.offset; i < offlen.offset + offlen.length; i++) { - ret.list_value.push_back(child_vec.GetValue(i)); - } - return ret; - } - default: - throw NotImplementedException("Unimplemented type for value access"); - } -} - -string VectorTypeToString(VectorType type) { - switch (type) { - case VectorType::FLAT_VECTOR: - return "FLAT"; - case VectorType::SEQUENCE_VECTOR: - return "SEQUENCE"; - case VectorType::DICTIONARY_VECTOR: - return "DICTIONARY"; - case VectorType::CONSTANT_VECTOR: - return "CONSTANT"; - default: - return "UNKNOWN"; - } -} - -string Vector::ToString(idx_t count) const { - string retval = - VectorTypeToString(GetVectorType()) + " " + GetType().ToString() + ": " + to_string(count) + " = [ "; - switch (GetVectorType()) { - case VectorType::FLAT_VECTOR: - case VectorType::DICTIONARY_VECTOR: - for (idx_t i = 0; i < count; i++) { - retval += GetValue(i).ToString() + (i == count - 1 ? "" : ", "); - } - break; - case VectorType::CONSTANT_VECTOR: - retval += GetValue(0).ToString(); - break; - case VectorType::SEQUENCE_VECTOR: { - int64_t start, increment; - SequenceVector::GetSequence(*this, start, increment); - for (idx_t i = 0; i < count; i++) { - retval += to_string(start + increment * i) + (i == count - 1 ? "" : ", "); - } - break; - } - default: - retval += "UNKNOWN VECTOR TYPE"; - break; - } - retval += "]"; - return retval; -} - -void Vector::Print(idx_t count) { - Printer::Print(ToString(count)); -} - -string Vector::ToString() const { - string retval = VectorTypeToString(GetVectorType()) + " " + GetType().ToString() + ": (UNKNOWN COUNT) [ "; - switch (GetVectorType()) { - case VectorType::FLAT_VECTOR: - case VectorType::DICTIONARY_VECTOR: - break; - case VectorType::CONSTANT_VECTOR: - retval += GetValue(0).ToString(); - break; - case VectorType::SEQUENCE_VECTOR: { - break; - } - default: - retval += "UNKNOWN VECTOR TYPE"; - break; - } - retval += "]"; - return retval; -} - -void Vector::Print() { - Printer::Print(ToString()); -} - -template -static void TemplatedFlattenConstantVector(data_ptr_t data, data_ptr_t old_data, idx_t count) { - auto constant = Load(old_data); - auto output = (T *)data; - for (idx_t i = 0; i < count; i++) { - output[i] = constant; - } -} - -void Vector::Normalify(idx_t count) { - switch (GetVectorType()) { - case VectorType::FLAT_VECTOR: - // already a flat vector - break; - case VectorType::DICTIONARY_VECTOR: { - // create a new flat vector of this type - Vector other(GetType()); - // now copy the data of this vector to the other vector, removing the selection vector in the process - VectorOperations::Copy(*this, other, count, 0, 0); - // create a reference to the data in the other vector - this->Reference(other); - break; - } - case VectorType::CONSTANT_VECTOR: { - bool is_null = ConstantVector::IsNull(*this); - // allocate a new buffer for the vector - auto old_buffer = move(buffer); - auto old_data = data; - buffer = VectorBuffer::CreateStandardVector(VectorType::FLAT_VECTOR, old_buffer->GetType()); - data = buffer->GetData(); - if (is_null) { - // constant NULL, set nullmask - validity.EnsureWritable(); - validity.SetAllInvalid(count); - return; - } - // non-null constant: have to repeat the constant - switch (GetType().InternalType()) { - case PhysicalType::BOOL: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::INT8: - TemplatedFlattenConstantVector(data, old_data, count); - break; + return Value::DOUBLE(NumericLimits::Maximum()); + case LogicalTypeId::DECIMAL: { + Value result; + switch (type.InternalType()) { case PhysicalType::INT16: - TemplatedFlattenConstantVector(data, old_data, count); + result = Value::MaximumValue(LogicalType::SMALLINT); break; case PhysicalType::INT32: - TemplatedFlattenConstantVector(data, old_data, count); + result = Value::MaximumValue(LogicalType::INTEGER); break; case PhysicalType::INT64: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::UINT8: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::UINT16: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::UINT32: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::UINT64: - TemplatedFlattenConstantVector(data, old_data, count); + result = Value::MaximumValue(LogicalType::BIGINT); break; case PhysicalType::INT128: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::FLOAT: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::DOUBLE: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::HASH: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::POINTER: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::INTERVAL: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::VARCHAR: - TemplatedFlattenConstantVector(data, old_data, count); - break; - case PhysicalType::LIST: { - TemplatedFlattenConstantVector(data, old_data, count); + result = Value::MaximumValue(LogicalType::HUGEINT); break; - } - case PhysicalType::STRUCT: { - for (auto &child : StructVector::GetEntries(*this)) { - D_ASSERT(child.second->GetVectorType() == VectorType::CONSTANT_VECTOR); - child.second->Normalify(count); - } - } break; default: - throw NotImplementedException("Unimplemented type for VectorOperations::Normalify"); + throw InternalException("Unknown decimal type"); } - break; - } - case VectorType::SEQUENCE_VECTOR: { - int64_t start, increment; - SequenceVector::GetSequence(*this, start, increment); - - buffer = VectorBuffer::CreateStandardVector(VectorType::FLAT_VECTOR, GetType()); - data = buffer->GetData(); - VectorOperations::GenerateSequence(*this, count, start, increment); - break; + result.type_ = type; + return result; } default: - throw NotImplementedException("FIXME: unimplemented type for normalify"); + throw InvalidTypeException(type, "MaximumValue requires numeric type"); } } -void Vector::Normalify(const SelectionVector &sel, idx_t count) { - switch (GetVectorType()) { - case VectorType::FLAT_VECTOR: - // already a flat vector - break; - case VectorType::SEQUENCE_VECTOR: { - int64_t start, increment; - SequenceVector::GetSequence(*this, start, increment); - - buffer = VectorBuffer::CreateStandardVector(VectorType::FLAT_VECTOR, GetType()); - data = buffer->GetData(); - VectorOperations::GenerateSequence(*this, count, sel, start, increment); - break; - } - default: - throw NotImplementedException("Unimplemented type for normalify with selection vector"); - } +Value Value::BOOLEAN(int8_t value) { + Value result(LogicalType::BOOLEAN); + result.value_.boolean = value ? true : false; + result.is_null = false; + return result; } -void Vector::Orrify(idx_t count, VectorData &data) { - switch (GetVectorType()) { - case VectorType::DICTIONARY_VECTOR: { - auto &sel = DictionaryVector::SelVector(*this); - auto &child = DictionaryVector::Child(*this); - if (child.GetVectorType() == VectorType::FLAT_VECTOR) { - data.sel = &sel; - data.data = FlatVector::GetData(child); - data.validity = FlatVector::Validity(child); - } else { - // dictionary with non-flat child: create a new reference to the child and normalify it - auto new_aux = make_buffer(); - new_aux->data.Reference(child); - new_aux->data.Normalify(sel, count); - - data.sel = &sel; - data.data = FlatVector::GetData(new_aux->data); - data.validity = FlatVector::Validity(new_aux->data); - this->auxiliary = move(new_aux); - } - break; - } - case VectorType::CONSTANT_VECTOR: - data.sel = &ConstantVector::ZERO_SELECTION_VECTOR; - data.data = ConstantVector::GetData(*this); - data.validity = ConstantVector::Validity(*this); - break; - default: - Normalify(count); - data.sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; - data.data = FlatVector::GetData(*this); - data.validity = FlatVector::Validity(*this); - break; - } +Value Value::TINYINT(int8_t value) { + Value result(LogicalType::TINYINT); + result.value_.tinyint = value; + result.is_null = false; + return result; } -void Vector::Sequence(int64_t start, int64_t increment) { - this->buffer = make_buffer(VectorType::SEQUENCE_VECTOR, GetType(), sizeof(int64_t) * 2); - auto data = (int64_t *)buffer->GetData(); - data[0] = start; - data[1] = increment; - validity.Reset(); - auxiliary.reset(); +Value Value::SMALLINT(int16_t value) { + Value result(LogicalType::SMALLINT); + result.value_.smallint = value; + result.is_null = false; + return result; } -void Vector::Serialize(idx_t count, Serializer &serializer) { - auto &type = GetType(); - if (TypeIsConstantSize(type.InternalType())) { - // constant size type: simple copy - idx_t write_size = GetTypeIdSize(type.InternalType()) * count; - auto ptr = unique_ptr(new data_t[write_size]); - VectorOperations::WriteToStorage(*this, count, ptr.get()); - serializer.WriteData(ptr.get(), write_size); - } else { - VectorData vdata; - Orrify(count, vdata); - - switch (type.InternalType()) { - case PhysicalType::VARCHAR: { - auto strings = (string_t *)vdata.data; - for (idx_t i = 0; i < count; i++) { - auto idx = vdata.sel->get_index(i); - auto source = !vdata.validity.RowIsValid(idx) ? NullValue() : strings[idx]; - serializer.WriteStringLen((const_data_ptr_t)source.GetDataUnsafe(), source.GetSize()); - } - break; - } - default: - throw NotImplementedException("Unimplemented type for Vector::Serialize!"); - } - } +Value Value::INTEGER(int32_t value) { + Value result(LogicalType::INTEGER); + result.value_.integer = value; + result.is_null = false; + return result; } -void Vector::Deserialize(idx_t count, Deserializer &source) { - auto &type = GetType(); - if (TypeIsConstantSize(type.InternalType())) { - // constant size type: read fixed amount of data from - auto column_size = GetTypeIdSize(type.InternalType()) * count; - auto ptr = unique_ptr(new data_t[column_size]); - source.ReadData(ptr.get(), column_size); - - VectorOperations::ReadFromStorage(ptr.get(), count, *this); - } else { - auto strings = FlatVector::GetData(*this); - auto &validity = FlatVector::Validity(*this); - for (idx_t i = 0; i < count; i++) { - // read the strings - auto str = source.Read(); - // now add the string to the StringHeap of the vector - // and write the pointer into the vector - if (IsNullValue((const char *)str.c_str())) { - validity.SetInvalid(i); - } else { - strings[i] = StringVector::AddStringOrBlob(*this, str); - } - } - } +Value Value::BIGINT(int64_t value) { + Value result(LogicalType::BIGINT); + result.value_.bigint = value; + result.is_null = false; + return result; } -void Vector::UTFVerify(const SelectionVector &sel, idx_t count) { -#ifdef DEBUG - if (count == 0) { - return; - } - if (GetType().InternalType() == PhysicalType::VARCHAR) { - // we just touch all the strings and let the sanitizer figure out if any - // of them are deallocated/corrupt - switch (GetVectorType()) { - case VectorType::CONSTANT_VECTOR: { - auto string = ConstantVector::GetData(*this); - if (!ConstantVector::IsNull(*this)) { - string->Verify(); - } - break; - } - case VectorType::FLAT_VECTOR: { - auto strings = FlatVector::GetData(*this); - for (idx_t i = 0; i < count; i++) { - auto oidx = sel.get_index(i); - if (validity.RowIsValid(oidx)) { - strings[oidx].Verify(); - } - } - break; - } - default: - break; - } - } -#endif +Value Value::HUGEINT(hugeint_t value) { + Value result(LogicalType::HUGEINT); + result.value_.hugeint = value; + result.is_null = false; + return result; } -void Vector::UTFVerify(idx_t count) { - UTFVerify(FlatVector::INCREMENTAL_SELECTION_VECTOR, count); +Value Value::UTINYINT(uint8_t value) { + Value result(LogicalType::UTINYINT); + result.value_.utinyint = value; + result.is_null = false; + return result; } -void Vector::Verify(const SelectionVector &sel, idx_t count) { -#ifdef DEBUG - if (count == 0) { - return; - } - if (GetVectorType() == VectorType::DICTIONARY_VECTOR) { - auto &child = DictionaryVector::Child(*this); - D_ASSERT(child.GetVectorType() != VectorType::DICTIONARY_VECTOR); - auto &dict_sel = DictionaryVector::SelVector(*this); - for (idx_t i = 0; i < count; i++) { - auto oidx = sel.get_index(i); - auto idx = dict_sel.get_index(oidx); - D_ASSERT(idx < STANDARD_VECTOR_SIZE); - } - // merge the selection vectors and verify the child - auto new_buffer = dict_sel.Slice(sel, count); - SelectionVector new_sel(new_buffer); - child.Verify(new_sel, count); - return; - } - if (TypeIsConstantSize(GetType().InternalType()) && - (GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR)) { - D_ASSERT(!auxiliary); - } - if (GetType().InternalType() == PhysicalType::DOUBLE) { - // verify that there are no INF or NAN values - switch (GetVectorType()) { - case VectorType::CONSTANT_VECTOR: { - auto dbl = ConstantVector::GetData(*this); - if (!ConstantVector::IsNull(*this)) { - D_ASSERT(Value::DoubleIsValid(*dbl)); - } - break; - } - case VectorType::FLAT_VECTOR: { - auto doubles = FlatVector::GetData(*this); - for (idx_t i = 0; i < count; i++) { - auto oidx = sel.get_index(i); - if (validity.RowIsValid(oidx)) { - D_ASSERT(Value::DoubleIsValid(doubles[oidx])); - } - } - break; - } - default: - break; - } - } - if (GetType().id() == LogicalTypeId::VARCHAR) { - // verify that there are no '\0' bytes in string values - switch (GetVectorType()) { - case VectorType::FLAT_VECTOR: { - auto strings = FlatVector::GetData(*this); - for (idx_t i = 0; i < count; i++) { - auto oidx = sel.get_index(i); - if (validity.RowIsValid(oidx)) { - strings[oidx].VerifyNull(); - } - } - break; - } - default: - break; - } - } - - if (GetType().InternalType() == PhysicalType::STRUCT) { - D_ASSERT(GetType().child_types().size() > 0); - if (GetVectorType() == VectorType::FLAT_VECTOR || GetVectorType() == VectorType::CONSTANT_VECTOR) { - auto &children = StructVector::GetEntries(*this); - D_ASSERT(children.size() > 0); - for (auto &child : children) { - child.second->Verify(sel, count); - } - } - } +Value Value::USMALLINT(uint16_t value) { + Value result(LogicalType::USMALLINT); + result.value_.usmallint = value; + result.is_null = false; + return result; +} - if (GetType().InternalType() == PhysicalType::LIST) { - D_ASSERT(GetType().child_types().size() == 1); - if (GetVectorType() == VectorType::CONSTANT_VECTOR) { - if (!ConstantVector::IsNull(*this)) { - ListVector::GetEntry(*this).Verify(ListVector::GetListSize(*this)); - auto le = ConstantVector::GetData(*this); - D_ASSERT(le->offset + le->length <= ListVector::GetListSize(*this)); - } - } else if (GetVectorType() == VectorType::FLAT_VECTOR) { - if (ListVector::HasEntry(*this)) { - ListVector::GetEntry(*this).Verify(ListVector::GetListSize(*this)); - } - auto list_data = FlatVector::GetData(*this); - for (idx_t i = 0; i < count; i++) { - auto idx = sel.get_index(i); - auto &le = list_data[idx]; - if (validity.RowIsValid(idx)) { - D_ASSERT(le.offset + le.length <= ListVector::GetListSize(*this)); - } - } - } - } -#endif +Value Value::UINTEGER(uint32_t value) { + Value result(LogicalType::UINTEGER); + result.value_.uinteger = value; + result.is_null = false; + return result; } -void Vector::Verify(idx_t count) { - if (count > STANDARD_VECTOR_SIZE) { - SelectionVector selection_vector(count); - for (size_t i = 0; i < count; i++) { - selection_vector.set_index(i, i); - } - Verify(selection_vector, count); - } else { - Verify(FlatVector::INCREMENTAL_SELECTION_VECTOR, count); - } +Value Value::UBIGINT(uint64_t value) { + Value result(LogicalType::UBIGINT); + result.value_.ubigint = value; + result.is_null = false; + return result; } -string_t StringVector::AddString(Vector &vector, const char *data, idx_t len) { - return StringVector::AddString(vector, string_t(data, len)); +bool Value::FloatIsValid(float value) { + return !(std::isnan(value) || std::isinf(value)); } -string_t StringVector::AddString(Vector &vector, const char *data) { - return StringVector::AddString(vector, string_t(data, strlen(data))); +bool Value::DoubleIsValid(double value) { + return !(std::isnan(value) || std::isinf(value)); } -string_t StringVector::AddString(Vector &vector, const string &data) { - return StringVector::AddString(vector, string_t(data.c_str(), data.size())); +bool Value::StringIsValid(const char *str, idx_t length) { + auto utf_type = Utf8Proc::Analyze(str, length); + return utf_type != UnicodeType::INVALID; } -string_t StringVector::AddString(Vector &vector, string_t data) { - D_ASSERT(vector.GetType().id() == LogicalTypeId::VARCHAR); - if (data.IsInlined()) { - // string will be inlined: no need to store in string heap - return data; - } - if (!vector.auxiliary) { - vector.auxiliary = make_buffer(); - } - D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); - auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; - return string_buffer.AddString(data); +Value Value::DECIMAL(int16_t value, uint8_t width, uint8_t scale) { + D_ASSERT(width <= Decimal::MAX_WIDTH_INT16); + Value result(LogicalType::DECIMAL(width, scale)); + result.value_.smallint = value; + result.is_null = false; + return result; } -string_t StringVector::AddStringOrBlob(Vector &vector, string_t data) { - D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); - if (data.IsInlined()) { - // string will be inlined: no need to store in string heap - return data; - } - if (!vector.auxiliary) { - vector.auxiliary = make_buffer(); - } - D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); - auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; - return string_buffer.AddBlob(data); +Value Value::DECIMAL(int32_t value, uint8_t width, uint8_t scale) { + D_ASSERT(width >= Decimal::MAX_WIDTH_INT16 && width <= Decimal::MAX_WIDTH_INT32); + Value result(LogicalType::DECIMAL(width, scale)); + result.value_.integer = value; + result.is_null = false; + return result; } -string_t StringVector::EmptyString(Vector &vector, idx_t len) { - D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); - if (len < string_t::INLINE_LENGTH) { - return string_t(len); - } - if (!vector.auxiliary) { - vector.auxiliary = make_buffer(); +Value Value::DECIMAL(int64_t value, uint8_t width, uint8_t scale) { + auto decimal_type = LogicalType::DECIMAL(width, scale); + Value result(decimal_type); + switch (decimal_type.InternalType()) { + case PhysicalType::INT16: + result.value_.smallint = value; + break; + case PhysicalType::INT32: + result.value_.integer = value; + break; + case PhysicalType::INT64: + result.value_.bigint = value; + break; + default: + result.value_.hugeint = value; + break; } - D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); - auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; - return string_buffer.EmptyString(len); + result.type_.Verify(); + result.is_null = false; + return result; } -void StringVector::AddHandle(Vector &vector, unique_ptr handle) { - D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); - if (!vector.auxiliary) { - vector.auxiliary = make_buffer(); - } - auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; - string_buffer.AddHeapReference(make_buffer(move(handle))); +Value Value::DECIMAL(hugeint_t value, uint8_t width, uint8_t scale) { + D_ASSERT(width >= Decimal::MAX_WIDTH_INT64 && width <= Decimal::MAX_WIDTH_INT128); + Value result(LogicalType::DECIMAL(width, scale)); + result.value_.hugeint = value; + result.is_null = false; + return result; } -void StringVector::AddBuffer(Vector &vector, buffer_ptr buffer) { - D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); - if (!vector.auxiliary) { - vector.auxiliary = make_buffer(); +Value Value::FLOAT(float value) { + if (!Value::FloatIsValid(value)) { + throw OutOfRangeException("Invalid float value %f", value); } - auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; - string_buffer.AddHeapReference(move(buffer)); + Value result(LogicalType::FLOAT); + result.value_.float_ = value; + result.is_null = false; + return result; } -void StringVector::AddHeapReference(Vector &vector, Vector &other) { - D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); - D_ASSERT(other.GetType().InternalType() == PhysicalType::VARCHAR); - - if (other.GetVectorType() == VectorType::DICTIONARY_VECTOR) { - StringVector::AddHeapReference(vector, DictionaryVector::Child(other)); - return; - } - if (!other.auxiliary) { - return; - } - if (!vector.auxiliary) { - vector.auxiliary = make_buffer(); +Value Value::DOUBLE(double value) { + if (!Value::DoubleIsValid(value)) { + throw OutOfRangeException("Invalid double value %f", value); } - D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); - D_ASSERT(other.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); - auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; - string_buffer.AddHeapReference(other.auxiliary); + Value result(LogicalType::DOUBLE); + result.value_.double_ = value; + result.is_null = false; + return result; } -bool StructVector::HasEntries(const Vector &vector) { - D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT); - D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || - vector.GetVectorType() == VectorType::CONSTANT_VECTOR); - D_ASSERT(vector.auxiliary == nullptr || vector.auxiliary->GetBufferType() == VectorBufferType::STRUCT_BUFFER); - return vector.auxiliary != nullptr; +Value Value::HASH(hash_t value) { + Value result(LogicalType::HASH); + result.value_.hash = value; + result.is_null = false; + return result; } -const child_list_t> &StructVector::GetEntries(const Vector &vector) { - D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT); - D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || - vector.GetVectorType() == VectorType::CONSTANT_VECTOR); - D_ASSERT(vector.auxiliary); - D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRUCT_BUFFER); - return ((const VectorStructBuffer *)vector.auxiliary.get())->GetChildren(); +Value Value::POINTER(uintptr_t value) { + Value result(LogicalType::POINTER); + result.value_.pointer = value; + result.is_null = false; + return result; } -void StructVector::AddEntry(Vector &vector, const string &name, unique_ptr entry) { - // TODO asser that an entry with this name does not already exist - D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT); - D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || - vector.GetVectorType() == VectorType::CONSTANT_VECTOR); - if (!vector.auxiliary) { - vector.auxiliary = make_buffer(); - } - D_ASSERT(vector.auxiliary); - D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRUCT_BUFFER); - ((VectorStructBuffer *)vector.auxiliary.get())->AddChild(name, move(entry)); +Value Value::DATE(date_t value) { + Value result(LogicalType::DATE); + result.value_.date = value; + result.is_null = false; + return result; } -bool ListVector::HasEntry(const Vector &vector) { - D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST); - if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) { - auto &child = DictionaryVector::Child(vector); - return ListVector::HasEntry(child); - } - D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || - vector.GetVectorType() == VectorType::CONSTANT_VECTOR); - return vector.auxiliary != nullptr; +Value Value::DATE(int32_t year, int32_t month, int32_t day) { + return Value::DATE(Date::FromDate(year, month, day)); } -const Vector &ListVector::GetEntry(const Vector &vector) { - D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST); - if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) { - auto &child = DictionaryVector::Child(vector); - return ListVector::GetEntry(child); - } - D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || - vector.GetVectorType() == VectorType::CONSTANT_VECTOR); - D_ASSERT(vector.auxiliary); - D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::LIST_BUFFER); - return ((VectorListBuffer *)vector.auxiliary.get())->GetChild(); +Value Value::TIME(dtime_t value) { + Value result(LogicalType::TIME); + result.value_.time = value; + result.is_null = false; + return result; } -Vector &ListVector::GetEntry(Vector &vector) { - const Vector &cvector = vector; - return const_cast(ListVector::GetEntry(cvector)); +Value Value::TIME(int32_t hour, int32_t min, int32_t sec, int32_t micros) { + return Value::TIME(Time::FromTime(hour, min, sec, micros)); } -void ListVector::Initialize(Vector &vec) { - if (!ListVector::HasEntry(vec)) { - auto vec_child = make_unique(vec.GetType().child_types()[0].second); - ListVector::SetEntry(vec, move(vec_child)); - } +Value Value::TIMESTAMP(timestamp_t value) { + Value result(LogicalType::TIMESTAMP); + result.value_.timestamp = value; + result.is_null = false; + return result; } -idx_t ListVector::GetListSize(const Vector &vec) { - if (vec.GetVectorType() == VectorType::DICTIONARY_VECTOR) { - auto &child = DictionaryVector::Child(vec); - return ListVector::GetListSize(child); - } - return ((VectorListBuffer &)*vec.auxiliary).size; + +Value Value::TimestampNs(timestamp_t timestamp) { + Value result(LogicalType::TIMESTAMP_NS); + result.value_.timestamp = timestamp; + result.is_null = false; + return result; } -void ListVector::ReferenceEntry(Vector &vector, Vector &other) { - D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST); - D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || - vector.GetVectorType() == VectorType::CONSTANT_VECTOR); - D_ASSERT(other.GetType().id() == LogicalTypeId::LIST); - D_ASSERT(other.GetVectorType() == VectorType::FLAT_VECTOR || other.GetVectorType() == VectorType::CONSTANT_VECTOR); - vector.auxiliary = other.auxiliary; +Value Value::TimestampMs(timestamp_t timestamp) { + Value result(LogicalType::TIMESTAMP_MS); + result.value_.timestamp = timestamp; + result.is_null = false; + return result; } -void ListVector::SetListSize(Vector &vec, idx_t size) { - ListVector::Initialize(vec); - if (vec.GetVectorType() == VectorType::DICTIONARY_VECTOR) { - auto &child = DictionaryVector::Child(vec); - ListVector::SetListSize(child, size); - } - ((VectorListBuffer &)*vec.auxiliary).size = size; +Value Value::TimestampSec(timestamp_t timestamp) { + Value result(LogicalType::TIMESTAMP_S); + result.value_.timestamp = timestamp; + result.is_null = false; + return result; } -void ListVector::SetEntry(Vector &vector, unique_ptr cc) { - D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST); - D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || - vector.GetVectorType() == VectorType::CONSTANT_VECTOR); - vector.auxiliary = make_buffer(); - D_ASSERT(vector.auxiliary); - D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::LIST_BUFFER); - ((VectorListBuffer *)vector.auxiliary.get())->SetChild(move(cc)); +Value Value::TIMESTAMP(date_t date, dtime_t time) { + return Value::TIMESTAMP(Timestamp::FromDatetime(date, time)); } -void ListVector::Append(Vector &target, const Vector &source, idx_t source_size, idx_t source_offset) { - ListVector::Initialize(target); - if (source_size - source_offset == 0) { - //! Nothing to add - return; - } - auto &target_buffer = (VectorListBuffer &)*target.auxiliary; - target_buffer.Append(source, source_size, source_offset); +Value Value::TIMESTAMP(int32_t year, int32_t month, int32_t day, int32_t hour, int32_t min, int32_t sec, + int32_t micros) { + auto val = Value::TIMESTAMP(Date::FromDate(year, month, day), Time::FromTime(hour, min, sec, micros)); + val.type_ = LogicalType::TIMESTAMP; + return val; } -void ListVector::Append(Vector &target, const Vector &source, const SelectionVector &sel, idx_t source_size, - idx_t source_offset) { - ListVector::Initialize(target); - if (source_size - source_offset == 0) { - //! Nothing to add - return; +Value Value::STRUCT(child_list_t values) { + Value result; + child_list_t child_types; + for (auto &child : values) { + child_types.push_back(make_pair(move(child.first), child.second.type())); + result.struct_value.push_back(move(child.second)); } - auto &target_buffer = (VectorListBuffer &)*target.auxiliary; - target_buffer.Append(source, sel, source_size, source_offset); -} + result.type_ = LogicalType::STRUCT(move(child_types)); -void ListVector::PushBack(Vector &target, Value &insert) { - ListVector::Initialize(target); - auto &target_buffer = (VectorListBuffer &)*target.auxiliary; - target_buffer.PushBack(insert); + result.is_null = false; + return result; } -} // namespace duckdb +Value Value::MAP(Value key, Value value) { + Value result; + child_list_t child_types; + child_types.push_back({"key", key.type()}); + child_types.push_back({"value", value.type()}); + result.type_ = LogicalType::MAP(move(child_types)); + result.struct_value.push_back(move(key)); + result.struct_value.push_back(move(value)); + result.is_null = false; + return result; +} +Value Value::LIST(vector values) { + D_ASSERT(!values.empty()); +#ifdef DEBUG + for (idx_t i = 1; i < values.size(); i++) { + D_ASSERT(values[i].type() == values[0].type()); + } +#endif + Value result; + result.type_ = LogicalType::LIST(values[0].type()); + result.list_value = move(values); + result.is_null = false; + return result; +} +Value Value::BLOB(const_data_ptr_t data, idx_t len) { + Value result(LogicalType::BLOB); + result.is_null = false; + result.str_value = string((const char *)data, len); + return result; +} +Value Value::BLOB(const string &data) { + Value result(LogicalType::BLOB); + result.is_null = false; + result.str_value = Blob::ToBlob(string_t(data)); + return result; +} +Value Value::INTERVAL(int32_t months, int32_t days, int64_t micros) { + Value result(LogicalType::INTERVAL); + result.is_null = false; + result.value_.interval.months = months; + result.value_.interval.days = days; + result.value_.interval.micros = micros; + return result; +} +Value Value::INTERVAL(interval_t interval) { + return Value::INTERVAL(interval.months, interval.days, interval.micros); +} -namespace duckdb { +//===--------------------------------------------------------------------===// +// CreateValue +//===--------------------------------------------------------------------===// +template <> +Value Value::CreateValue(bool value) { + return Value::BOOLEAN(value); +} -buffer_ptr VectorBuffer::CreateStandardVector(PhysicalType type) { - return make_buffer(STANDARD_VECTOR_SIZE * GetTypeIdSize(type)); +template <> +Value Value::CreateValue(int8_t value) { + return Value::TINYINT(value); } -buffer_ptr VectorBuffer::CreateConstantVector(PhysicalType type) { - return make_buffer(GetTypeIdSize(type)); +template <> +Value Value::CreateValue(int16_t value) { + return Value::SMALLINT(value); } -buffer_ptr VectorBuffer::CreateConstantVector(VectorType vector_type, const LogicalType &type) { - return make_buffer(vector_type, type, GetTypeIdSize(type.InternalType())); +template <> +Value Value::CreateValue(int32_t value) { + return Value::INTEGER(value); } -buffer_ptr VectorBuffer::CreateStandardVector(VectorType vector_type, const LogicalType &type) { - return make_buffer(vector_type, type, STANDARD_VECTOR_SIZE * GetTypeIdSize(type.InternalType())); +template <> +Value Value::CreateValue(int64_t value) { + return Value::BIGINT(value); } -buffer_ptr VectorBuffer::CreateStandardVector(VectorType vector_type, PhysicalType type) { - return make_buffer(vector_type, STANDARD_VECTOR_SIZE * GetTypeIdSize(type)); +template <> +Value Value::CreateValue(uint8_t value) { + return Value::UTINYINT(value); } -VectorStringBuffer::VectorStringBuffer() : VectorBuffer(VectorBufferType::STRING_BUFFER) { +template <> +Value Value::CreateValue(uint16_t value) { + return Value::USMALLINT(value); } -VectorStructBuffer::VectorStructBuffer() : VectorBuffer(VectorBufferType::STRUCT_BUFFER) { +template <> +Value Value::CreateValue(uint32_t value) { + return Value::UINTEGER(value); } -VectorStructBuffer::~VectorStructBuffer() { +template <> +Value Value::CreateValue(uint64_t value) { + return Value::UBIGINT(value); } -VectorListBuffer::VectorListBuffer() : VectorBuffer(VectorBufferType::LIST_BUFFER) { +template <> +Value Value::CreateValue(hugeint_t value) { + return Value::HUGEINT(value); } -void VectorListBuffer::SetChild(unique_ptr new_child) { - child = move(new_child); - capacity = STANDARD_VECTOR_SIZE; +template <> +Value Value::CreateValue(date_t value) { + return Value::DATE(value); } -void VectorListBuffer::Reserve(const Vector &to_append, idx_t to_reserve) { - if (to_reserve > capacity) { - idx_t new_capacity = (to_reserve) / STANDARD_VECTOR_SIZE + ((to_reserve) % STANDARD_VECTOR_SIZE != 0); - new_capacity *= STANDARD_VECTOR_SIZE; - if (child->GetType().id() == LogicalTypeId::STRUCT && size == 0) { - // Empty struct, gotta initialize it first - auto &source_children = StructVector::GetEntries(to_append); - for (auto &src_child : source_children) { - auto child_copy = make_unique(src_child.second->GetType()); - StructVector::AddEntry(*child, src_child.first, move(child_copy)); - } - } - child->Resize(capacity, new_capacity); - capacity = new_capacity; - } +template <> +Value Value::CreateValue(dtime_t value) { + return Value::TIME(value); } -void VectorListBuffer::Append(const Vector &to_append, idx_t to_append_size, idx_t source_offset) { - Reserve(to_append, size + to_append_size - source_offset); - VectorOperations::Copy(to_append, *child, to_append_size, source_offset, size); - size += to_append_size - source_offset; +template <> +Value Value::CreateValue(timestamp_t value) { + return Value::TIMESTAMP(value); } -void VectorListBuffer::Append(const Vector &to_append, const SelectionVector &sel, idx_t to_append_size, - idx_t source_offset) { - Reserve(to_append, size + to_append_size - source_offset); - VectorOperations::Copy(to_append, *child, sel, to_append_size, source_offset, size); - size += to_append_size - source_offset; +template <> +Value Value::CreateValue(const char *value) { + return Value(string(value)); } -void VectorListBuffer::PushBack(Value &insert) { - if (size + 1 > capacity) { - child->Resize(capacity, capacity * 2); - capacity *= 2; - } - child->SetValue(size++, insert); +template <> +Value Value::CreateValue(string value) { // NOLINT: required for templating + return Value::BLOB(value); } -VectorListBuffer::~VectorListBuffer() { +template <> +Value Value::CreateValue(string_t value) { + return Value(value); } -ManagedVectorBuffer::ManagedVectorBuffer(unique_ptr handle) - : VectorBuffer(VectorBufferType::MANAGED_BUFFER), handle(move(handle)) { +template <> +Value Value::CreateValue(float value) { + return Value::FLOAT(value); } -ManagedVectorBuffer::~ManagedVectorBuffer() { +template <> +Value Value::CreateValue(double value) { + return Value::DOUBLE(value); } -} // namespace duckdb +template <> +Value Value::CreateValue(interval_t value) { + return Value::INTERVAL(value); +} +template <> +Value Value::CreateValue(Value value) { + return value; +} -namespace duckdb { +//===--------------------------------------------------------------------===// +// GetValue +//===--------------------------------------------------------------------===// +template +T Value::GetValueInternal() const { + if (is_null) { + return NullValue(); + } + switch (type_.id()) { + case LogicalTypeId::BOOLEAN: + return Cast::Operation(value_.boolean); + case LogicalTypeId::TINYINT: + return Cast::Operation(value_.tinyint); + case LogicalTypeId::SMALLINT: + return Cast::Operation(value_.smallint); + case LogicalTypeId::INTEGER: + return Cast::Operation(value_.integer); + case LogicalTypeId::BIGINT: + return Cast::Operation(value_.bigint); + case LogicalTypeId::HUGEINT: + return Cast::Operation(value_.hugeint); + case LogicalTypeId::DATE: + return Cast::Operation(value_.date); + case LogicalTypeId::TIME: + return Cast::Operation(value_.time); + case LogicalTypeId::TIMESTAMP: + return Cast::Operation(value_.timestamp); + case LogicalTypeId::UTINYINT: + return Cast::Operation(value_.utinyint); + case LogicalTypeId::USMALLINT: + return Cast::Operation(value_.usmallint); + case LogicalTypeId::UINTEGER: + return Cast::Operation(value_.uinteger); + case LogicalTypeId::UBIGINT: + return Cast::Operation(value_.ubigint); + case LogicalTypeId::FLOAT: + return Cast::Operation(value_.float_); + case LogicalTypeId::DOUBLE: + return Cast::Operation(value_.double_); + case LogicalTypeId::VARCHAR: + return Cast::Operation(str_value.c_str()); + case LogicalTypeId::INTERVAL: + return Cast::Operation(value_.interval); + case LogicalTypeId::DECIMAL: + return CastAs(LogicalType::DOUBLE).GetValueInternal(); + default: + throw NotImplementedException("Unimplemented type \"%s\" for GetValue()", type_.ToString()); + } +} -const SelectionVector ConstantVector::ZERO_SELECTION_VECTOR = SelectionVector((sel_t *)ConstantVector::ZERO_VECTOR); -const SelectionVector FlatVector::INCREMENTAL_SELECTION_VECTOR = - SelectionVector((sel_t *)FlatVector::INCREMENTAL_VECTOR); -const sel_t ConstantVector::ZERO_VECTOR[STANDARD_VECTOR_SIZE] = {0}; +template <> +bool Value::GetValue() const { + return GetValueInternal(); +} +template <> +int8_t Value::GetValue() const { + return GetValueInternal(); +} +template <> +int16_t Value::GetValue() const { + return GetValueInternal(); +} +template <> +int32_t Value::GetValue() const { + if (type_.id() == LogicalTypeId::DATE) { + return value_.integer; + } + return GetValueInternal(); +} +template <> +int64_t Value::GetValue() const { + if (type_.id() == LogicalTypeId::TIMESTAMP || type_.id() == LogicalTypeId::TIME || + type_.id() == LogicalTypeId::TIMESTAMP_SEC || type_.id() == LogicalTypeId::TIMESTAMP_NS || + type_.id() == LogicalTypeId::TIMESTAMP_MS) { + return value_.bigint; + } + return GetValueInternal(); +} +template <> +hugeint_t Value::GetValue() const { + return GetValueInternal(); +} +template <> +uint8_t Value::GetValue() const { + return GetValueInternal(); +} +template <> +uint16_t Value::GetValue() const { + return GetValueInternal(); +} +template <> +uint32_t Value::GetValue() const { + return GetValueInternal(); +} +template <> +uint64_t Value::GetValue() const { + return GetValueInternal(); +} +template <> +string Value::GetValue() const { + return ToString(); +} +template <> +float Value::GetValue() const { + return GetValueInternal(); +} +template <> +double Value::GetValue() const { + return GetValueInternal(); +} +template <> +date_t Value::GetValue() const { + return GetValueInternal(); +} +template <> +dtime_t Value::GetValue() const { + return GetValueInternal(); +} +template <> +timestamp_t Value::GetValue() const { + return GetValueInternal(); +} -#if STANDARD_VECTOR_SIZE == 2 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1}; -#elif STANDARD_VECTOR_SIZE == 4 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1, 2, 3}; -#elif STANDARD_VECTOR_SIZE == 8 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1, 2, 3, 4, 5, 6, 7}; -#elif STANDARD_VECTOR_SIZE == 16 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -#elif STANDARD_VECTOR_SIZE == 32 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; -#elif STANDARD_VECTOR_SIZE == 64 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; -#elif STANDARD_VECTOR_SIZE == 128 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127}; -#elif STANDARD_VECTOR_SIZE == 256 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, - 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, - 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, - 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, - 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}; -#elif STANDARD_VECTOR_SIZE == 512 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, - 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, - 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, - 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, - 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, - 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, - 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, - 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, - 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, - 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, - 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, - 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, - 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, - 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, - 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, - 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, - 506, 507, 508, 509, 510, 511}; -#elif STANDARD_VECTOR_SIZE == 1024 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, - 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, - 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, - 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, - 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, - 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, - 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, - 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, - 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, - 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, - 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, - 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, - 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, - 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, - 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, - 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, - 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, - 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, - 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, - 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, - 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, - 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, - 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, - 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, - 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, - 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, - 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, - 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, - 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, - 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, - 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, - 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, - 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, - 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, - 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, - 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, - 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, - 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, - 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, - 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, - 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, - 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, - 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, - 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, - 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, - 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, - 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, - 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023}; -#elif STANDARD_VECTOR_SIZE == 2048 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, - 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, - 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, - 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, - 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, - 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, - 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, - 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, - 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, - 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, - 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, - 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, - 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, - 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, - 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, - 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, - 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, - 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, - 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, - 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, - 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, - 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, - 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, - 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, - 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, - 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, - 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, - 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, - 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, - 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, - 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, - 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, - 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, - 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, - 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, - 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, - 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, - 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, - 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, - 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, - 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, - 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, - 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, - 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, - 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, - 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, - 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, - 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, - 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, - 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, - 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, - 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, - 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, - 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, - 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, - 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, - 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, - 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, - 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, - 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, - 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, - 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, - 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, - 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, - 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, - 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, - 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, - 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, - 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, - 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, - 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, - 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, - 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, - 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519, - 1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535, 1536, 1537, 1538, - 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, - 1558, 1559, 1560, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 1568, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, - 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, - 1596, 1597, 1598, 1599, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, - 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, - 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, - 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, - 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, - 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, - 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, - 1729, 1730, 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, - 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, - 1767, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, - 1786, 1787, 1788, 1789, 1790, 1791, 1792, 1793, 1794, 1795, 1796, 1797, 1798, 1799, 1800, 1801, 1802, 1803, 1804, - 1805, 1806, 1807, 1808, 1809, 1810, 1811, 1812, 1813, 1814, 1815, 1816, 1817, 1818, 1819, 1820, 1821, 1822, 1823, - 1824, 1825, 1826, 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1834, 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, - 1843, 1844, 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, 1859, 1860, 1861, - 1862, 1863, 1864, 1865, 1866, 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1880, - 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, - 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, - 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, - 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, - 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, - 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, - 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, - 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, - 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047}; -#elif STANDARD_VECTOR_SIZE == 4096 -const sel_t FlatVector::INCREMENTAL_VECTOR[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, - 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, - 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, - 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, - 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, - 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, - 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, - 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, - 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, - 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, - 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, - 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, - 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, - 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, - 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, - 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, - 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, - 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, - 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, - 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, - 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, - 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, - 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, - 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, - 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, - 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, - 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, - 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, - 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, - 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, - 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, - 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, - 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, - 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, - 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, - 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, - 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, - 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, - 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, - 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, - 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, - 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, - 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, - 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, - 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, - 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, - 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, - 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, - 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, - 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, - 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, - 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, - 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, - 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, - 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, - 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, - 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, - 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, - 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, - 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, - 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, - 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, - 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, - 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, - 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, - 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, - 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, - 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, - 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, - 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, - 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, - 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, - 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, - 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519, - 1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535, 1536, 1537, 1538, - 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, - 1558, 1559, 1560, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 1568, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, - 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, - 1596, 1597, 1598, 1599, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, - 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, - 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, - 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, - 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, - 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, - 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, - 1729, 1730, 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, - 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, - 1767, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, - 1786, 1787, 1788, 1789, 1790, 1791, 1792, 1793, 1794, 1795, 1796, 1797, 1798, 1799, 1800, 1801, 1802, 1803, 1804, - 1805, 1806, 1807, 1808, 1809, 1810, 1811, 1812, 1813, 1814, 1815, 1816, 1817, 1818, 1819, 1820, 1821, 1822, 1823, - 1824, 1825, 1826, 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1834, 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, - 1843, 1844, 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, 1859, 1860, 1861, - 1862, 1863, 1864, 1865, 1866, 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1880, - 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, - 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, - 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, - 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, - 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, - 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, - 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, - 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, - 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047, 2048, 2049, 2050, 2051, - 2052, 2053, 2054, 2055, 2056, 2057, 2058, 2059, 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, 2068, 2069, 2070, - 2071, 2072, 2073, 2074, 2075, 2076, 2077, 2078, 2079, 2080, 2081, 2082, 2083, 2084, 2085, 2086, 2087, 2088, 2089, - 2090, 2091, 2092, 2093, 2094, 2095, 2096, 2097, 2098, 2099, 2100, 2101, 2102, 2103, 2104, 2105, 2106, 2107, 2108, - 2109, 2110, 2111, 2112, 2113, 2114, 2115, 2116, 2117, 2118, 2119, 2120, 2121, 2122, 2123, 2124, 2125, 2126, 2127, - 2128, 2129, 2130, 2131, 2132, 2133, 2134, 2135, 2136, 2137, 2138, 2139, 2140, 2141, 2142, 2143, 2144, 2145, 2146, - 2147, 2148, 2149, 2150, 2151, 2152, 2153, 2154, 2155, 2156, 2157, 2158, 2159, 2160, 2161, 2162, 2163, 2164, 2165, - 2166, 2167, 2168, 2169, 2170, 2171, 2172, 2173, 2174, 2175, 2176, 2177, 2178, 2179, 2180, 2181, 2182, 2183, 2184, - 2185, 2186, 2187, 2188, 2189, 2190, 2191, 2192, 2193, 2194, 2195, 2196, 2197, 2198, 2199, 2200, 2201, 2202, 2203, - 2204, 2205, 2206, 2207, 2208, 2209, 2210, 2211, 2212, 2213, 2214, 2215, 2216, 2217, 2218, 2219, 2220, 2221, 2222, - 2223, 2224, 2225, 2226, 2227, 2228, 2229, 2230, 2231, 2232, 2233, 2234, 2235, 2236, 2237, 2238, 2239, 2240, 2241, - 2242, 2243, 2244, 2245, 2246, 2247, 2248, 2249, 2250, 2251, 2252, 2253, 2254, 2255, 2256, 2257, 2258, 2259, 2260, - 2261, 2262, 2263, 2264, 2265, 2266, 2267, 2268, 2269, 2270, 2271, 2272, 2273, 2274, 2275, 2276, 2277, 2278, 2279, - 2280, 2281, 2282, 2283, 2284, 2285, 2286, 2287, 2288, 2289, 2290, 2291, 2292, 2293, 2294, 2295, 2296, 2297, 2298, - 2299, 2300, 2301, 2302, 2303, 2304, 2305, 2306, 2307, 2308, 2309, 2310, 2311, 2312, 2313, 2314, 2315, 2316, 2317, - 2318, 2319, 2320, 2321, 2322, 2323, 2324, 2325, 2326, 2327, 2328, 2329, 2330, 2331, 2332, 2333, 2334, 2335, 2336, - 2337, 2338, 2339, 2340, 2341, 2342, 2343, 2344, 2345, 2346, 2347, 2348, 2349, 2350, 2351, 2352, 2353, 2354, 2355, - 2356, 2357, 2358, 2359, 2360, 2361, 2362, 2363, 2364, 2365, 2366, 2367, 2368, 2369, 2370, 2371, 2372, 2373, 2374, - 2375, 2376, 2377, 2378, 2379, 2380, 2381, 2382, 2383, 2384, 2385, 2386, 2387, 2388, 2389, 2390, 2391, 2392, 2393, - 2394, 2395, 2396, 2397, 2398, 2399, 2400, 2401, 2402, 2403, 2404, 2405, 2406, 2407, 2408, 2409, 2410, 2411, 2412, - 2413, 2414, 2415, 2416, 2417, 2418, 2419, 2420, 2421, 2422, 2423, 2424, 2425, 2426, 2427, 2428, 2429, 2430, 2431, - 2432, 2433, 2434, 2435, 2436, 2437, 2438, 2439, 2440, 2441, 2442, 2443, 2444, 2445, 2446, 2447, 2448, 2449, 2450, - 2451, 2452, 2453, 2454, 2455, 2456, 2457, 2458, 2459, 2460, 2461, 2462, 2463, 2464, 2465, 2466, 2467, 2468, 2469, - 2470, 2471, 2472, 2473, 2474, 2475, 2476, 2477, 2478, 2479, 2480, 2481, 2482, 2483, 2484, 2485, 2486, 2487, 2488, - 2489, 2490, 2491, 2492, 2493, 2494, 2495, 2496, 2497, 2498, 2499, 2500, 2501, 2502, 2503, 2504, 2505, 2506, 2507, - 2508, 2509, 2510, 2511, 2512, 2513, 2514, 2515, 2516, 2517, 2518, 2519, 2520, 2521, 2522, 2523, 2524, 2525, 2526, - 2527, 2528, 2529, 2530, 2531, 2532, 2533, 2534, 2535, 2536, 2537, 2538, 2539, 2540, 2541, 2542, 2543, 2544, 2545, - 2546, 2547, 2548, 2549, 2550, 2551, 2552, 2553, 2554, 2555, 2556, 2557, 2558, 2559, 2560, 2561, 2562, 2563, 2564, - 2565, 2566, 2567, 2568, 2569, 2570, 2571, 2572, 2573, 2574, 2575, 2576, 2577, 2578, 2579, 2580, 2581, 2582, 2583, - 2584, 2585, 2586, 2587, 2588, 2589, 2590, 2591, 2592, 2593, 2594, 2595, 2596, 2597, 2598, 2599, 2600, 2601, 2602, - 2603, 2604, 2605, 2606, 2607, 2608, 2609, 2610, 2611, 2612, 2613, 2614, 2615, 2616, 2617, 2618, 2619, 2620, 2621, - 2622, 2623, 2624, 2625, 2626, 2627, 2628, 2629, 2630, 2631, 2632, 2633, 2634, 2635, 2636, 2637, 2638, 2639, 2640, - 2641, 2642, 2643, 2644, 2645, 2646, 2647, 2648, 2649, 2650, 2651, 2652, 2653, 2654, 2655, 2656, 2657, 2658, 2659, - 2660, 2661, 2662, 2663, 2664, 2665, 2666, 2667, 2668, 2669, 2670, 2671, 2672, 2673, 2674, 2675, 2676, 2677, 2678, - 2679, 2680, 2681, 2682, 2683, 2684, 2685, 2686, 2687, 2688, 2689, 2690, 2691, 2692, 2693, 2694, 2695, 2696, 2697, - 2698, 2699, 2700, 2701, 2702, 2703, 2704, 2705, 2706, 2707, 2708, 2709, 2710, 2711, 2712, 2713, 2714, 2715, 2716, - 2717, 2718, 2719, 2720, 2721, 2722, 2723, 2724, 2725, 2726, 2727, 2728, 2729, 2730, 2731, 2732, 2733, 2734, 2735, - 2736, 2737, 2738, 2739, 2740, 2741, 2742, 2743, 2744, 2745, 2746, 2747, 2748, 2749, 2750, 2751, 2752, 2753, 2754, - 2755, 2756, 2757, 2758, 2759, 2760, 2761, 2762, 2763, 2764, 2765, 2766, 2767, 2768, 2769, 2770, 2771, 2772, 2773, - 2774, 2775, 2776, 2777, 2778, 2779, 2780, 2781, 2782, 2783, 2784, 2785, 2786, 2787, 2788, 2789, 2790, 2791, 2792, - 2793, 2794, 2795, 2796, 2797, 2798, 2799, 2800, 2801, 2802, 2803, 2804, 2805, 2806, 2807, 2808, 2809, 2810, 2811, - 2812, 2813, 2814, 2815, 2816, 2817, 2818, 2819, 2820, 2821, 2822, 2823, 2824, 2825, 2826, 2827, 2828, 2829, 2830, - 2831, 2832, 2833, 2834, 2835, 2836, 2837, 2838, 2839, 2840, 2841, 2842, 2843, 2844, 2845, 2846, 2847, 2848, 2849, - 2850, 2851, 2852, 2853, 2854, 2855, 2856, 2857, 2858, 2859, 2860, 2861, 2862, 2863, 2864, 2865, 2866, 2867, 2868, - 2869, 2870, 2871, 2872, 2873, 2874, 2875, 2876, 2877, 2878, 2879, 2880, 2881, 2882, 2883, 2884, 2885, 2886, 2887, - 2888, 2889, 2890, 2891, 2892, 2893, 2894, 2895, 2896, 2897, 2898, 2899, 2900, 2901, 2902, 2903, 2904, 2905, 2906, - 2907, 2908, 2909, 2910, 2911, 2912, 2913, 2914, 2915, 2916, 2917, 2918, 2919, 2920, 2921, 2922, 2923, 2924, 2925, - 2926, 2927, 2928, 2929, 2930, 2931, 2932, 2933, 2934, 2935, 2936, 2937, 2938, 2939, 2940, 2941, 2942, 2943, 2944, - 2945, 2946, 2947, 2948, 2949, 2950, 2951, 2952, 2953, 2954, 2955, 2956, 2957, 2958, 2959, 2960, 2961, 2962, 2963, - 2964, 2965, 2966, 2967, 2968, 2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2978, 2979, 2980, 2981, 2982, - 2983, 2984, 2985, 2986, 2987, 2988, 2989, 2990, 2991, 2992, 2993, 2994, 2995, 2996, 2997, 2998, 2999, 3000, 3001, - 3002, 3003, 3004, 3005, 3006, 3007, 3008, 3009, 3010, 3011, 3012, 3013, 3014, 3015, 3016, 3017, 3018, 3019, 3020, - 3021, 3022, 3023, 3024, 3025, 3026, 3027, 3028, 3029, 3030, 3031, 3032, 3033, 3034, 3035, 3036, 3037, 3038, 3039, - 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 3052, 3053, 3054, 3055, 3056, 3057, 3058, - 3059, 3060, 3061, 3062, 3063, 3064, 3065, 3066, 3067, 3068, 3069, 3070, 3071, 3072, 3073, 3074, 3075, 3076, 3077, - 3078, 3079, 3080, 3081, 3082, 3083, 3084, 3085, 3086, 3087, 3088, 3089, 3090, 3091, 3092, 3093, 3094, 3095, 3096, - 3097, 3098, 3099, 3100, 3101, 3102, 3103, 3104, 3105, 3106, 3107, 3108, 3109, 3110, 3111, 3112, 3113, 3114, 3115, - 3116, 3117, 3118, 3119, 3120, 3121, 3122, 3123, 3124, 3125, 3126, 3127, 3128, 3129, 3130, 3131, 3132, 3133, 3134, - 3135, 3136, 3137, 3138, 3139, 3140, 3141, 3142, 3143, 3144, 3145, 3146, 3147, 3148, 3149, 3150, 3151, 3152, 3153, - 3154, 3155, 3156, 3157, 3158, 3159, 3160, 3161, 3162, 3163, 3164, 3165, 3166, 3167, 3168, 3169, 3170, 3171, 3172, - 3173, 3174, 3175, 3176, 3177, 3178, 3179, 3180, 3181, 3182, 3183, 3184, 3185, 3186, 3187, 3188, 3189, 3190, 3191, - 3192, 3193, 3194, 3195, 3196, 3197, 3198, 3199, 3200, 3201, 3202, 3203, 3204, 3205, 3206, 3207, 3208, 3209, 3210, - 3211, 3212, 3213, 3214, 3215, 3216, 3217, 3218, 3219, 3220, 3221, 3222, 3223, 3224, 3225, 3226, 3227, 3228, 3229, - 3230, 3231, 3232, 3233, 3234, 3235, 3236, 3237, 3238, 3239, 3240, 3241, 3242, 3243, 3244, 3245, 3246, 3247, 3248, - 3249, 3250, 3251, 3252, 3253, 3254, 3255, 3256, 3257, 3258, 3259, 3260, 3261, 3262, 3263, 3264, 3265, 3266, 3267, - 3268, 3269, 3270, 3271, 3272, 3273, 3274, 3275, 3276, 3277, 3278, 3279, 3280, 3281, 3282, 3283, 3284, 3285, 3286, - 3287, 3288, 3289, 3290, 3291, 3292, 3293, 3294, 3295, 3296, 3297, 3298, 3299, 3300, 3301, 3302, 3303, 3304, 3305, - 3306, 3307, 3308, 3309, 3310, 3311, 3312, 3313, 3314, 3315, 3316, 3317, 3318, 3319, 3320, 3321, 3322, 3323, 3324, - 3325, 3326, 3327, 3328, 3329, 3330, 3331, 3332, 3333, 3334, 3335, 3336, 3337, 3338, 3339, 3340, 3341, 3342, 3343, - 3344, 3345, 3346, 3347, 3348, 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356, 3357, 3358, 3359, 3360, 3361, 3362, - 3363, 3364, 3365, 3366, 3367, 3368, 3369, 3370, 3371, 3372, 3373, 3374, 3375, 3376, 3377, 3378, 3379, 3380, 3381, - 3382, 3383, 3384, 3385, 3386, 3387, 3388, 3389, 3390, 3391, 3392, 3393, 3394, 3395, 3396, 3397, 3398, 3399, 3400, - 3401, 3402, 3403, 3404, 3405, 3406, 3407, 3408, 3409, 3410, 3411, 3412, 3413, 3414, 3415, 3416, 3417, 3418, 3419, - 3420, 3421, 3422, 3423, 3424, 3425, 3426, 3427, 3428, 3429, 3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438, - 3439, 3440, 3441, 3442, 3443, 3444, 3445, 3446, 3447, 3448, 3449, 3450, 3451, 3452, 3453, 3454, 3455, 3456, 3457, - 3458, 3459, 3460, 3461, 3462, 3463, 3464, 3465, 3466, 3467, 3468, 3469, 3470, 3471, 3472, 3473, 3474, 3475, 3476, - 3477, 3478, 3479, 3480, 3481, 3482, 3483, 3484, 3485, 3486, 3487, 3488, 3489, 3490, 3491, 3492, 3493, 3494, 3495, - 3496, 3497, 3498, 3499, 3500, 3501, 3502, 3503, 3504, 3505, 3506, 3507, 3508, 3509, 3510, 3511, 3512, 3513, 3514, - 3515, 3516, 3517, 3518, 3519, 3520, 3521, 3522, 3523, 3524, 3525, 3526, 3527, 3528, 3529, 3530, 3531, 3532, 3533, - 3534, 3535, 3536, 3537, 3538, 3539, 3540, 3541, 3542, 3543, 3544, 3545, 3546, 3547, 3548, 3549, 3550, 3551, 3552, - 3553, 3554, 3555, 3556, 3557, 3558, 3559, 3560, 3561, 3562, 3563, 3564, 3565, 3566, 3567, 3568, 3569, 3570, 3571, - 3572, 3573, 3574, 3575, 3576, 3577, 3578, 3579, 3580, 3581, 3582, 3583, 3584, 3585, 3586, 3587, 3588, 3589, 3590, - 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605, 3606, 3607, 3608, 3609, - 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 3624, 3625, 3626, 3627, 3628, - 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, 3641, 3642, 3643, 3644, 3645, 3646, 3647, - 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, 3660, 3661, 3662, 3663, 3664, 3665, 3666, - 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, 3676, 3677, 3678, 3679, 3680, 3681, 3682, 3683, 3684, 3685, - 3686, 3687, 3688, 3689, 3690, 3691, 3692, 3693, 3694, 3695, 3696, 3697, 3698, 3699, 3700, 3701, 3702, 3703, 3704, - 3705, 3706, 3707, 3708, 3709, 3710, 3711, 3712, 3713, 3714, 3715, 3716, 3717, 3718, 3719, 3720, 3721, 3722, 3723, - 3724, 3725, 3726, 3727, 3728, 3729, 3730, 3731, 3732, 3733, 3734, 3735, 3736, 3737, 3738, 3739, 3740, 3741, 3742, - 3743, 3744, 3745, 3746, 3747, 3748, 3749, 3750, 3751, 3752, 3753, 3754, 3755, 3756, 3757, 3758, 3759, 3760, 3761, - 3762, 3763, 3764, 3765, 3766, 3767, 3768, 3769, 3770, 3771, 3772, 3773, 3774, 3775, 3776, 3777, 3778, 3779, 3780, - 3781, 3782, 3783, 3784, 3785, 3786, 3787, 3788, 3789, 3790, 3791, 3792, 3793, 3794, 3795, 3796, 3797, 3798, 3799, - 3800, 3801, 3802, 3803, 3804, 3805, 3806, 3807, 3808, 3809, 3810, 3811, 3812, 3813, 3814, 3815, 3816, 3817, 3818, - 3819, 3820, 3821, 3822, 3823, 3824, 3825, 3826, 3827, 3828, 3829, 3830, 3831, 3832, 3833, 3834, 3835, 3836, 3837, - 3838, 3839, 3840, 3841, 3842, 3843, 3844, 3845, 3846, 3847, 3848, 3849, 3850, 3851, 3852, 3853, 3854, 3855, 3856, - 3857, 3858, 3859, 3860, 3861, 3862, 3863, 3864, 3865, 3866, 3867, 3868, 3869, 3870, 3871, 3872, 3873, 3874, 3875, - 3876, 3877, 3878, 3879, 3880, 3881, 3882, 3883, 3884, 3885, 3886, 3887, 3888, 3889, 3890, 3891, 3892, 3893, 3894, - 3895, 3896, 3897, 3898, 3899, 3900, 3901, 3902, 3903, 3904, 3905, 3906, 3907, 3908, 3909, 3910, 3911, 3912, 3913, - 3914, 3915, 3916, 3917, 3918, 3919, 3920, 3921, 3922, 3923, 3924, 3925, 3926, 3927, 3928, 3929, 3930, 3931, 3932, - 3933, 3934, 3935, 3936, 3937, 3938, 3939, 3940, 3941, 3942, 3943, 3944, 3945, 3946, 3947, 3948, 3949, 3950, 3951, - 3952, 3953, 3954, 3955, 3956, 3957, 3958, 3959, 3960, 3961, 3962, 3963, 3964, 3965, 3966, 3967, 3968, 3969, 3970, - 3971, 3972, 3973, 3974, 3975, 3976, 3977, 3978, 3979, 3980, 3981, 3982, 3983, 3984, 3985, 3986, 3987, 3988, 3989, - 3990, 3991, 3992, 3993, 3994, 3995, 3996, 3997, 3998, 3999, 4000, 4001, 4002, 4003, 4004, 4005, 4006, 4007, 4008, - 4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, 4026, 4027, - 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036, 4037, 4038, 4039, 4040, 4041, 4042, 4043, 4044, 4045, 4046, - 4047, 4048, 4049, 4050, 4051, 4052, 4053, 4054, 4055, 4056, 4057, 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, - 4066, 4067, 4068, 4069, 4070, 4071, 4072, 4073, 4074, 4075, 4076, 4077, 4078, 4079, 4080, 4081, 4082, 4083, 4084, - 4085, 4086, 4087, 4088, 4089, 4090, 4091, 4092, 4093, 4094, 4095}; -#else -#error Unsupported VECTOR_SIZE! -#endif +template <> +DUCKDB_API interval_t Value::GetValue() const { + return GetValueInternal(); +} -} // namespace duckdb +uintptr_t Value::GetPointer() const { + D_ASSERT(type() == LogicalType::POINTER); + return value_.pointer; +} +Value Value::Numeric(const LogicalType &type, int64_t value) { + switch (type.id()) { + case LogicalTypeId::TINYINT: + D_ASSERT(value <= NumericLimits::Maximum()); + return Value::TINYINT((int8_t)value); + case LogicalTypeId::SMALLINT: + D_ASSERT(value <= NumericLimits::Maximum()); + return Value::SMALLINT((int16_t)value); + case LogicalTypeId::INTEGER: + D_ASSERT(value <= NumericLimits::Maximum()); + return Value::INTEGER((int32_t)value); + case LogicalTypeId::BIGINT: + return Value::BIGINT(value); + case LogicalTypeId::HUGEINT: + return Value::HUGEINT(value); + case LogicalTypeId::DECIMAL: + return Value::DECIMAL(value, DecimalType::GetWidth(type), DecimalType::GetScale(type)); + case LogicalTypeId::FLOAT: + return Value((float)value); + case LogicalTypeId::DOUBLE: + return Value((double)value); + case LogicalTypeId::HASH: + return Value::HASH(value); + case LogicalTypeId::POINTER: + return Value::POINTER(value); + case LogicalTypeId::DATE: + D_ASSERT(value <= NumericLimits::Maximum()); + return Value::DATE(date_t(value)); + case LogicalTypeId::TIME: + return Value::TIME(dtime_t(value)); + case LogicalTypeId::TIMESTAMP: + return Value::TIMESTAMP(timestamp_t(value)); + case LogicalTypeId::TIMESTAMP_NS: + return Value::TimestampNs(timestamp_t(value)); + case LogicalTypeId::TIMESTAMP_MS: + return Value::TimestampMs(timestamp_t(value)); + case LogicalTypeId::TIMESTAMP_SEC: + return Value::TimestampSec(timestamp_t(value)); + default: + throw InvalidTypeException(type, "Numeric requires numeric type"); + } +} +//===--------------------------------------------------------------------===// +// GetValueUnsafe +//===--------------------------------------------------------------------===// +template <> +int8_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::INT8 || type_.InternalType() == PhysicalType::BOOL); + return value_.tinyint; +} +template <> +int16_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::INT16); + return value_.smallint; +} +template <> +int32_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::INT32); + return value_.integer; +} +template <> +int64_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::INT64); + return value_.bigint; +} +template <> +hugeint_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::INT128); + return value_.hugeint; +} +template <> +uint8_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::UINT8); + return value_.utinyint; +} +template <> +uint16_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::UINT16); + return value_.usmallint; +} +template <> +uint32_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::UINT32); + return value_.uinteger; +} -#include +template <> +uint64_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::UINT64); + return value_.ubigint; +} -namespace duckdb { +template <> +string &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::VARCHAR); + return str_value; +} -LogicalType::LogicalType() : id_(LogicalTypeId::INVALID), width_(0), scale_(0), collation_(string()) { - physical_type_ = GetInternalType(); +template <> +float &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::FLOAT); + return value_.float_; } -LogicalType::LogicalType(LogicalTypeId id) : id_(id), width_(0), scale_(0), collation_(string()) { - physical_type_ = GetInternalType(); + +template <> +double &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::DOUBLE); + return value_.double_; } -LogicalType::LogicalType(LogicalTypeId id, string collation) - : id_(id), width_(0), scale_(0), collation_(move(collation)) { - physical_type_ = GetInternalType(); + +template <> +date_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::INT32); + return value_.date; } -LogicalType::LogicalType(LogicalTypeId id, uint8_t width, uint8_t scale) - : id_(id), width_(width), scale_(scale), collation_(string()) { - physical_type_ = GetInternalType(); + +template <> +dtime_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::INT64); + return value_.time; } -LogicalType::LogicalType(LogicalTypeId id, child_list_t child_types) - : id_(id), width_(0), scale_(0), collation_(string()), child_types_(move(child_types)) { - physical_type_ = GetInternalType(); + +template <> +timestamp_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::INT64); + return value_.timestamp; } -LogicalType::LogicalType(LogicalTypeId id, uint8_t width, uint8_t scale, string collation, - child_list_t child_types) - : id_(id), width_(width), scale_(scale), collation_(move(collation)), child_types_(move(child_types)) { - physical_type_ = GetInternalType(); + +template <> +interval_t &Value::GetValueUnsafe() { + D_ASSERT(type_.InternalType() == PhysicalType::INTERVAL); + return value_.interval; } -hash_t LogicalType::Hash() const { - return duckdb::Hash((uint8_t)id_); +Value Value::Numeric(const LogicalType &type, hugeint_t value) { + switch (type.id()) { + case LogicalTypeId::HUGEINT: + return Value::HUGEINT(value); + default: + return Value::Numeric(type, Hugeint::Cast(value)); + } } -PhysicalType LogicalType::GetInternalType() { - switch (id_) { +string Value::ToString() const { + if (is_null) { + return "NULL"; + } + switch (type_.id()) { case LogicalTypeId::BOOLEAN: - return PhysicalType::BOOL; + return value_.boolean ? "True" : "False"; case LogicalTypeId::TINYINT: - return PhysicalType::INT8; - case LogicalTypeId::UTINYINT: - return PhysicalType::UINT8; + return to_string(value_.tinyint); case LogicalTypeId::SMALLINT: - return PhysicalType::INT16; - case LogicalTypeId::USMALLINT: - return PhysicalType::UINT16; - case LogicalTypeId::SQLNULL: - case LogicalTypeId::DATE: + return to_string(value_.smallint); case LogicalTypeId::INTEGER: - return PhysicalType::INT32; - case LogicalTypeId::UINTEGER: - return PhysicalType::UINT32; + return to_string(value_.integer); case LogicalTypeId::BIGINT: - case LogicalTypeId::TIME: - case LogicalTypeId::TIMESTAMP: - return PhysicalType::INT64; + return to_string(value_.bigint); + case LogicalTypeId::UTINYINT: + return to_string(value_.utinyint); + case LogicalTypeId::USMALLINT: + return to_string(value_.usmallint); + case LogicalTypeId::UINTEGER: + return to_string(value_.uinteger); case LogicalTypeId::UBIGINT: - return PhysicalType::UINT64; + return to_string(value_.ubigint); case LogicalTypeId::HUGEINT: - return PhysicalType::INT128; + return Hugeint::ToString(value_.hugeint); case LogicalTypeId::FLOAT: - return PhysicalType::FLOAT; + return to_string(value_.float_); case LogicalTypeId::DOUBLE: - return PhysicalType::DOUBLE; - case LogicalTypeId::DECIMAL: - if (width_ <= Decimal::MAX_WIDTH_INT16) { - return PhysicalType::INT16; - } else if (width_ <= Decimal::MAX_WIDTH_INT32) { - return PhysicalType::INT32; - } else if (width_ <= Decimal::MAX_WIDTH_INT64) { - return PhysicalType::INT64; - } else if (width_ <= Decimal::MAX_WIDTH_INT128) { - return PhysicalType::INT128; + return to_string(value_.double_); + case LogicalTypeId::DECIMAL: { + auto internal_type = type_.InternalType(); + auto scale = DecimalType::GetScale(type_); + if (internal_type == PhysicalType::INT16) { + return Decimal::ToString(value_.smallint, scale); + } else if (internal_type == PhysicalType::INT32) { + return Decimal::ToString(value_.integer, scale); + } else if (internal_type == PhysicalType::INT64) { + return Decimal::ToString(value_.bigint, scale); } else { - throw NotImplementedException("Widths bigger than 38 are not supported"); + D_ASSERT(internal_type == PhysicalType::INT128); + return Decimal::ToString(value_.hugeint, scale); } + } + case LogicalTypeId::DATE: + return Date::ToString(value_.date); + case LogicalTypeId::TIME: + return Time::ToString(value_.time); + case LogicalTypeId::TIMESTAMP: + return Timestamp::ToString(value_.timestamp); + case LogicalTypeId::TIMESTAMP_SEC: + return Timestamp::ToString(Timestamp::FromEpochSeconds(value_.timestamp.value)); + case LogicalTypeId::TIMESTAMP_MS: + return Timestamp::ToString(Timestamp::FromEpochMs(value_.timestamp.value)); + case LogicalTypeId::TIMESTAMP_NS: + return Timestamp::ToString(Timestamp::FromEpochNanoSeconds(value_.timestamp.value)); + case LogicalTypeId::INTERVAL: + return Interval::ToString(value_.interval); case LogicalTypeId::VARCHAR: - case LogicalTypeId::CHAR: + return str_value; case LogicalTypeId::BLOB: - return PhysicalType::VARCHAR; - case LogicalTypeId::INTERVAL: - return PhysicalType::INTERVAL; - case LogicalTypeId::STRUCT: - return PhysicalType::STRUCT; - case LogicalTypeId::LIST: - return PhysicalType::LIST; - case LogicalTypeId::MAP: - return PhysicalType::MAP; - case LogicalTypeId::HASH: - return PhysicalType::HASH; + return Blob::ToString(string_t(str_value)); case LogicalTypeId::POINTER: - return PhysicalType::POINTER; - case LogicalTypeId::VALIDITY: - return PhysicalType::BIT; - case LogicalTypeId::TABLE: - case LogicalTypeId::ANY: - case LogicalTypeId::INVALID: - case LogicalTypeId::UNKNOWN: - return PhysicalType::INVALID; + return to_string(value_.pointer); + case LogicalTypeId::HASH: + return to_string(value_.hash); + case LogicalTypeId::STRUCT: { + string ret = "{"; + auto &child_types = StructType::GetChildTypes(type_); + for (size_t i = 0; i < struct_value.size(); i++) { + auto &name = child_types[i].first; + auto &child = struct_value[i]; + ret += "'" + name + "': " + child.ToString(); + if (i < struct_value.size() - 1) { + ret += ", "; + } + } + ret += "}"; + return ret; + } + case LogicalTypeId::LIST: { + string ret = "["; + for (size_t i = 0; i < list_value.size(); i++) { + auto &child = list_value[i]; + ret += child.ToString(); + if (i < list_value.size() - 1) { + ret += ", "; + } + } + ret += "]"; + return ret; + } + case LogicalTypeId::MAP: { + string ret = "{"; + auto &key_list = struct_value[0].list_value; + auto &value_list = struct_value[1].list_value; + for (size_t i = 0; i < key_list.size(); i++) { + ret += key_list[i].ToString() + "=" + value_list[i].ToString(); + if (i < key_list.size() - 1) { + ret += ", "; + } + } + ret += "}"; + return ret; + } default: - throw ConversionException("Invalid LogicalType %s", ToString()); + throw NotImplementedException("Unimplemented type for printing: %s", type_.ToString()); } } -const LogicalType LogicalType::INVALID = LogicalType(LogicalTypeId::INVALID); -const LogicalType LogicalType::SQLNULL = LogicalType(LogicalTypeId::SQLNULL); -const LogicalType LogicalType::BOOLEAN = LogicalType(LogicalTypeId::BOOLEAN); -const LogicalType LogicalType::TINYINT = LogicalType(LogicalTypeId::TINYINT); -const LogicalType LogicalType::UTINYINT = LogicalType(LogicalTypeId::UTINYINT); -const LogicalType LogicalType::SMALLINT = LogicalType(LogicalTypeId::SMALLINT); -const LogicalType LogicalType::USMALLINT = LogicalType(LogicalTypeId::USMALLINT); -const LogicalType LogicalType::INTEGER = LogicalType(LogicalTypeId::INTEGER); -const LogicalType LogicalType::UINTEGER = LogicalType(LogicalTypeId::UINTEGER); -const LogicalType LogicalType::BIGINT = LogicalType(LogicalTypeId::BIGINT); -const LogicalType LogicalType::UBIGINT = LogicalType(LogicalTypeId::UBIGINT); -const LogicalType LogicalType::HUGEINT = LogicalType(LogicalTypeId::HUGEINT); -const LogicalType LogicalType::FLOAT = LogicalType(LogicalTypeId::FLOAT); -const LogicalType LogicalType::DECIMAL = LogicalType(LogicalTypeId::DECIMAL); -const LogicalType LogicalType::DOUBLE = LogicalType(LogicalTypeId::DOUBLE); -const LogicalType LogicalType::DATE = LogicalType(LogicalTypeId::DATE); -const LogicalType LogicalType::TIMESTAMP = LogicalType(LogicalTypeId::TIMESTAMP); -const LogicalType LogicalType::TIME = LogicalType(LogicalTypeId::TIME); -const LogicalType LogicalType::HASH = LogicalType(LogicalTypeId::HASH); -const LogicalType LogicalType::POINTER = LogicalType(LogicalTypeId::POINTER); +//===--------------------------------------------------------------------===// +// Numeric Operators +//===--------------------------------------------------------------------===// +Value Value::operator+(const Value &rhs) const { + return ValueOperations::Add(*this, rhs); +} -const LogicalType LogicalType::VARCHAR = LogicalType(LogicalTypeId::VARCHAR); +Value Value::operator-(const Value &rhs) const { + return ValueOperations::Subtract(*this, rhs); +} -const LogicalType LogicalType::BLOB = LogicalType(LogicalTypeId::BLOB); -const LogicalType LogicalType::INTERVAL = LogicalType(LogicalTypeId::INTERVAL); +Value Value::operator*(const Value &rhs) const { + return ValueOperations::Multiply(*this, rhs); +} -// TODO these are incomplete and should maybe not exist as such -const LogicalType LogicalType::STRUCT = LogicalType(LogicalTypeId::STRUCT); -const LogicalType LogicalType::LIST = LogicalType(LogicalTypeId::LIST); -const LogicalType LogicalType::MAP = LogicalType(LogicalTypeId::MAP); -const LogicalType LogicalType::TABLE = LogicalType(LogicalTypeId::TABLE); +Value Value::operator/(const Value &rhs) const { + return ValueOperations::Divide(*this, rhs); +} -const LogicalType LogicalType::ANY = LogicalType(LogicalTypeId::ANY); +Value Value::operator%(const Value &rhs) const { + throw NotImplementedException("value modulo"); + // return ValueOperations::Modulo(*this, rhs); +} -const vector LogicalType::NUMERIC = {LogicalType::TINYINT, LogicalType::SMALLINT, LogicalType::INTEGER, - LogicalType::BIGINT, LogicalType::HUGEINT, LogicalType::FLOAT, - LogicalType::DOUBLE, LogicalType::DECIMAL, LogicalType::UTINYINT, - LogicalType::USMALLINT, LogicalType::UINTEGER, LogicalType::UBIGINT}; +//===--------------------------------------------------------------------===// +// Comparison Operators +//===--------------------------------------------------------------------===// +bool Value::operator==(const Value &rhs) const { + return ValueOperations::Equals(*this, rhs); +} -const vector LogicalType::INTEGRAL = {LogicalType::TINYINT, LogicalType::SMALLINT, LogicalType::INTEGER, - LogicalType::BIGINT, LogicalType::HUGEINT, LogicalType::UTINYINT, - LogicalType::USMALLINT, LogicalType::UINTEGER, LogicalType::UBIGINT}; +bool Value::operator!=(const Value &rhs) const { + return ValueOperations::NotEquals(*this, rhs); +} -const vector LogicalType::ALL_TYPES = { - LogicalType::BOOLEAN, LogicalType::TINYINT, LogicalType::SMALLINT, LogicalType::INTEGER, LogicalType::BIGINT, - LogicalType::DATE, LogicalType::TIMESTAMP, LogicalType::DOUBLE, LogicalType::FLOAT, LogicalType::VARCHAR, - LogicalType::BLOB, LogicalType::INTERVAL, LogicalType::HUGEINT, LogicalType::DECIMAL, LogicalType::UTINYINT, - LogicalType::USMALLINT, LogicalType::UINTEGER, LogicalType::UBIGINT}; -// TODO add LIST/STRUCT here +bool Value::operator<(const Value &rhs) const { + return ValueOperations::LessThan(*this, rhs); +} -const LogicalType LOGICAL_ROW_TYPE = LogicalType::BIGINT; -const PhysicalType ROW_TYPE = PhysicalType::INT64; +bool Value::operator>(const Value &rhs) const { + return ValueOperations::GreaterThan(*this, rhs); +} -string TypeIdToString(PhysicalType type) { - switch (type) { - case PhysicalType::BOOL: - return "BOOL"; - case PhysicalType::INT8: - return "INT8"; - case PhysicalType::INT16: - return "INT16"; - case PhysicalType::INT32: - return "INT32"; - case PhysicalType::INT64: - return "INT64"; - case PhysicalType::UINT8: - return "UINT8"; - case PhysicalType::UINT16: - return "UINT16"; - case PhysicalType::UINT32: - return "UINT32"; - case PhysicalType::UINT64: - return "UINT64"; - case PhysicalType::INT128: - return "INT128"; - case PhysicalType::HASH: - return "HASH"; - case PhysicalType::POINTER: - return "POINTER"; - case PhysicalType::FLOAT: - return "FLOAT"; - case PhysicalType::DOUBLE: - return "DOUBLE"; - case PhysicalType::VARCHAR: - return "VARCHAR"; - case PhysicalType::INTERVAL: - return "INTERVAL"; - case PhysicalType::STRUCT: - return "STRUCT"; - case PhysicalType::LIST: - return "LIST"; - case PhysicalType::MAP: - return "MAP"; - case PhysicalType::INVALID: - return "INVALID"; - case PhysicalType::BIT: - return "BIT"; - default: - throw ConversionException("Invalid PhysicalType %s", type); +bool Value::operator<=(const Value &rhs) const { + return ValueOperations::LessThanEquals(*this, rhs); +} + +bool Value::operator>=(const Value &rhs) const { + return ValueOperations::GreaterThanEquals(*this, rhs); +} + +bool Value::operator==(const int64_t &rhs) const { + return *this == Value::Numeric(type_, rhs); +} + +bool Value::operator!=(const int64_t &rhs) const { + return *this != Value::Numeric(type_, rhs); +} + +bool Value::operator<(const int64_t &rhs) const { + return *this < Value::Numeric(type_, rhs); +} + +bool Value::operator>(const int64_t &rhs) const { + return *this > Value::Numeric(type_, rhs); +} + +bool Value::operator<=(const int64_t &rhs) const { + return *this <= Value::Numeric(type_, rhs); +} + +bool Value::operator>=(const int64_t &rhs) const { + return *this >= Value::Numeric(type_, rhs); +} + +bool Value::TryCastAs(const LogicalType &target_type, Value &new_value, string *error_message, bool strict) const { + if (type_ == target_type) { + new_value = Copy(); + return true; + } + Vector input(*this); + Vector result(target_type); + if (!VectorOperations::TryCast(input, result, 1, error_message, strict)) { + return false; } + new_value = result.GetValue(0); + return true; } -idx_t GetTypeIdSize(PhysicalType type) { - switch (type) { - case PhysicalType::BIT: +Value Value::CastAs(const LogicalType &target_type, bool strict) const { + Value new_value; + string error_message; + if (!TryCastAs(target_type, new_value, &error_message, strict)) { + throw InvalidInputException("Failed to cast value: %s", error_message); + } + return new_value; +} + +bool Value::TryCastAs(const LogicalType &target_type, bool strict) { + Value new_value; + string error_message; + if (!TryCastAs(target_type, new_value, &error_message, strict)) { + return false; + } + type_ = target_type; + is_null = new_value.is_null; + value_ = new_value.value_; + str_value = new_value.str_value; + struct_value = new_value.struct_value; + list_value = new_value.list_value; + return true; +} + +void Value::Serialize(Serializer &serializer) { + type_.Serialize(serializer); + serializer.Write(is_null); + if (!is_null) { + switch (type_.InternalType()) { + case PhysicalType::BOOL: + serializer.Write(value_.boolean); + break; + case PhysicalType::INT8: + serializer.Write(value_.tinyint); + break; + case PhysicalType::INT16: + serializer.Write(value_.smallint); + break; + case PhysicalType::INT32: + serializer.Write(value_.integer); + break; + case PhysicalType::INT64: + serializer.Write(value_.bigint); + break; + case PhysicalType::UINT8: + serializer.Write(value_.utinyint); + break; + case PhysicalType::UINT16: + serializer.Write(value_.usmallint); + break; + case PhysicalType::UINT32: + serializer.Write(value_.uinteger); + break; + case PhysicalType::UINT64: + serializer.Write(value_.ubigint); + break; + case PhysicalType::INT128: + serializer.Write(value_.hugeint); + break; + case PhysicalType::FLOAT: + serializer.Write(value_.float_); + break; + case PhysicalType::DOUBLE: + serializer.Write(value_.double_); + break; + case PhysicalType::INTERVAL: + serializer.Write(value_.interval); + break; + case PhysicalType::VARCHAR: + serializer.WriteString(str_value); + break; + default: { + Vector v(*this); + v.Serialize(1, serializer); + break; + } + } + } +} + +Value Value::Deserialize(Deserializer &source) { + auto type = LogicalType::Deserialize(source); + auto is_null = source.Read(); + Value new_value = Value(type); + if (is_null) { + return new_value; + } + new_value.is_null = false; + switch (type.InternalType()) { case PhysicalType::BOOL: - return sizeof(bool); + new_value.value_.boolean = source.Read(); + break; case PhysicalType::INT8: - return sizeof(int8_t); + new_value.value_.tinyint = source.Read(); + break; case PhysicalType::INT16: - return sizeof(int16_t); + new_value.value_.smallint = source.Read(); + break; case PhysicalType::INT32: - return sizeof(int32_t); + new_value.value_.integer = source.Read(); + break; case PhysicalType::INT64: - return sizeof(int64_t); + new_value.value_.bigint = source.Read(); + break; case PhysicalType::UINT8: - return sizeof(uint8_t); + new_value.value_.utinyint = source.Read(); + break; case PhysicalType::UINT16: - return sizeof(uint16_t); + new_value.value_.usmallint = source.Read(); + break; case PhysicalType::UINT32: - return sizeof(uint32_t); + new_value.value_.uinteger = source.Read(); + break; case PhysicalType::UINT64: - return sizeof(uint64_t); + new_value.value_.ubigint = source.Read(); + break; case PhysicalType::INT128: - return sizeof(hugeint_t); + new_value.value_.hugeint = source.Read(); + break; case PhysicalType::FLOAT: - return sizeof(float); + new_value.value_.float_ = source.Read(); + break; case PhysicalType::DOUBLE: - return sizeof(double); - case PhysicalType::HASH: - return sizeof(hash_t); - case PhysicalType::POINTER: - return sizeof(uintptr_t); - case PhysicalType::VARCHAR: - return sizeof(string_t); + new_value.value_.double_ = source.Read(); + break; case PhysicalType::INTERVAL: - return sizeof(interval_t); - case PhysicalType::STRUCT: - return 0; // no own payload - case PhysicalType::MAP: - return 42; // FIXME there is no way to create this type yet - case PhysicalType::LIST: - return 16; // offset + len + new_value.value_.interval = source.Read(); + break; + case PhysicalType::VARCHAR: + new_value.str_value = source.Read(); + break; + default: { + Vector v(type); + v.Deserialize(1, source); + return v.GetValue(0); + } + } + return new_value; +} + +void Value::Print() const { + Printer::Print(ToString()); +} +bool Value::ValuesAreEqual(const Value &result_value, const Value &value) { + if (result_value.is_null != value.is_null) { + return false; + } + if (result_value.is_null && value.is_null) { + // NULL = NULL in checking code + return true; + } + switch (value.type_.id()) { + case LogicalTypeId::FLOAT: { + auto other = result_value.CastAs(LogicalType::FLOAT); + float ldecimal = value.value_.float_; + float rdecimal = other.value_.float_; + return ApproxEqual(ldecimal, rdecimal); + } + case LogicalTypeId::DOUBLE: { + auto other = result_value.CastAs(LogicalType::DOUBLE); + double ldecimal = value.value_.double_; + double rdecimal = other.value_.double_; + return ApproxEqual(ldecimal, rdecimal); + } + case LogicalTypeId::VARCHAR: { + auto other = result_value.CastAs(LogicalType::VARCHAR); + // some results might contain padding spaces, e.g. when rendering + // VARCHAR(10) and the string only has 6 characters, they will be padded + // with spaces to 10 in the rendering. We don't do that here yet as we + // are looking at internal structures. So just ignore any extra spaces + // on the right + string left = other.str_value; + string right = value.str_value; + StringUtil::RTrim(left); + StringUtil::RTrim(right); + return left == right; + } default: - throw ConversionException("Invalid PhysicalType %s", type); + return value == result_value; } } -bool TypeIsConstantSize(PhysicalType type) { - return (type >= PhysicalType::BOOL && type <= PhysicalType::DOUBLE) || - (type >= PhysicalType::FIXED_SIZE_BINARY && type <= PhysicalType::INTERVAL) || type == PhysicalType::HASH || - type == PhysicalType::POINTER || type == PhysicalType::INTERVAL || type == PhysicalType::INT128; +template <> +bool Value::IsValid(float value) { + return Value::FloatIsValid(value); } -bool TypeIsIntegral(PhysicalType type) { - return (type >= PhysicalType::UINT8 && type <= PhysicalType::INT64) || type == PhysicalType::HASH || - type == PhysicalType::POINTER || type == PhysicalType::INT128; + +template <> +bool Value::IsValid(double value) { + return Value::DoubleIsValid(value); } -bool TypeIsNumeric(PhysicalType type) { - return (type >= PhysicalType::UINT8 && type <= PhysicalType::DOUBLE) || type == PhysicalType::INT128; + +} // namespace duckdb + + + + + + + + + + + + + + + + + +#include // strlen() on Solaris + +namespace duckdb { + +Vector::Vector(LogicalType type_p, bool create_data, bool zero_data, idx_t capacity) + : vector_type(VectorType::FLAT_VECTOR), type(move(type_p)), data(nullptr) { + if (create_data) { + Initialize(zero_data, capacity); + } } -bool TypeIsInteger(PhysicalType type) { - return (type >= PhysicalType::UINT8 && type <= PhysicalType::INT64) || type == PhysicalType::INT128; + +Vector::Vector(LogicalType type_p, idx_t capacity) : Vector(move(type_p), true, false, capacity) { } -void LogicalType::Serialize(Serializer &serializer) const { - serializer.Write(id_); - serializer.Write(width_); - serializer.Write(scale_); - serializer.WriteString(collation_); - serializer.Write(child_types_.size()); - for (auto &entry : child_types_) { - serializer.WriteString(entry.first); - entry.second.Serialize(serializer); +Vector::Vector(LogicalType type_p, data_ptr_t dataptr) + : vector_type(VectorType::FLAT_VECTOR), type(move(type_p)), data(dataptr) { + if (dataptr && type.id() == LogicalTypeId::INVALID) { + throw InvalidTypeException(type, "Cannot create a vector of type INVALID!"); } } -LogicalType LogicalType::Deserialize(Deserializer &source) { - auto id = source.Read(); - auto width = source.Read(); - auto scale = source.Read(); - auto collation = source.Read(); - child_list_t children; - auto child_count = source.Read(); - for (uint16_t i = 0; i < child_count; i++) { - string name = source.Read(); - LogicalType child_type = LogicalType::Deserialize(source); - children.push_back(make_pair(move(name), move(child_type))); - } - return LogicalType(id, width, scale, collation, move(children)); +Vector::Vector(const VectorCache &cache) : type(cache.GetType()) { + ResetFromCache(cache); } -string LogicalTypeIdToString(LogicalTypeId id) { - switch (id) { - case LogicalTypeId::BOOLEAN: - return "BOOLEAN"; - case LogicalTypeId::TINYINT: - return "TINYINT"; - case LogicalTypeId::SMALLINT: - return "SMALLINT"; - case LogicalTypeId::INTEGER: - return "INTEGER"; - case LogicalTypeId::BIGINT: - return "BIGINT"; - case LogicalTypeId::HUGEINT: - return "HUGEINT"; - case LogicalTypeId::UTINYINT: - return "UTINYINT"; - case LogicalTypeId::USMALLINT: - return "USMALLINT"; - case LogicalTypeId::UINTEGER: - return "UINTEGER"; - case LogicalTypeId::UBIGINT: - return "UBIGINT"; - case LogicalTypeId::DATE: - return "DATE"; - case LogicalTypeId::TIME: - return "TIME"; - case LogicalTypeId::TIMESTAMP: - return "TIMESTAMP"; - case LogicalTypeId::FLOAT: - return "FLOAT"; - case LogicalTypeId::DOUBLE: - return "DOUBLE"; - case LogicalTypeId::DECIMAL: - return "DECIMAL"; - case LogicalTypeId::VARCHAR: - return "VARCHAR"; - case LogicalTypeId::BLOB: - return "BLOB"; - case LogicalTypeId::CHAR: - return "CHAR"; - case LogicalTypeId::INTERVAL: - return "INTERVAL"; - case LogicalTypeId::SQLNULL: - return "NULL"; - case LogicalTypeId::ANY: - return "ANY"; - case LogicalTypeId::VALIDITY: - return "VALIDITY"; - case LogicalTypeId::STRUCT: - return "STRUCT"; - case LogicalTypeId::LIST: - return "LIST"; - case LogicalTypeId::MAP: - return "MAP"; - case LogicalTypeId::HASH: - return "HASH"; - case LogicalTypeId::POINTER: - return "POINTER"; - case LogicalTypeId::TABLE: - return "TABLE"; - case LogicalTypeId::INVALID: - return "INVALID"; - case LogicalTypeId::UNKNOWN: - return "UNKNOWN"; +Vector::Vector(Vector &other) : type(other.type) { + Reference(other); +} + +Vector::Vector(Vector &other, const SelectionVector &sel, idx_t count) : type(other.type) { + Slice(other, sel, count); +} + +Vector::Vector(Vector &other, idx_t offset) : type(other.type) { + Slice(other, offset); +} + +Vector::Vector(const Value &value) : type(value.type()) { + Reference(value); +} + +Vector::Vector(Vector &&other) noexcept + : vector_type(other.vector_type), type(move(other.type)), data(other.data), validity(move(other.validity)), + buffer(move(other.buffer)), auxiliary(move(other.auxiliary)) { +} + +void Vector::Reference(const Value &value) { + D_ASSERT(GetType() == value.type()); + this->vector_type = VectorType::CONSTANT_VECTOR; + buffer = VectorBuffer::CreateConstantVector(value.type()); + auto internal_type = value.type().InternalType(); + if (internal_type == PhysicalType::STRUCT) { + auto struct_buffer = make_unique(); + auto &child_types = StructType::GetChildTypes(value.type()); + auto &child_vectors = struct_buffer->GetChildren(); + for (idx_t i = 0; i < child_types.size(); i++) { + auto vector = make_unique(value.is_null ? Value(child_types[i].second) : value.struct_value[i]); + child_vectors.push_back(move(vector)); + } + auxiliary = move(struct_buffer); + if (value.is_null) { + SetValue(0, value); + } + } else if (internal_type == PhysicalType::LIST) { + auto list_buffer = make_unique(value.type()); + auxiliary = move(list_buffer); + data = buffer->GetData(); + SetValue(0, value); + } else { + auxiliary.reset(); + data = buffer->GetData(); + SetValue(0, value); } - return "UNDEFINED"; } -string LogicalType::ToString() const { - switch (id_) { - case LogicalTypeId::STRUCT: { - string ret = "STRUCT<"; - for (size_t i = 0; i < child_types_.size(); i++) { - ret += child_types_[i].first + ": " + child_types_[i].second.ToString(); - if (i < child_types_.size() - 1) { - ret += ", "; - } - } - ret += ">"; - return ret; +void Vector::Reference(Vector &other) { + D_ASSERT(other.GetType() == GetType()); + Reinterpret(other); +} + +void Vector::Reinterpret(Vector &other) { + vector_type = other.vector_type; + AssignSharedPointer(buffer, other.buffer); + AssignSharedPointer(auxiliary, other.auxiliary); + data = other.data; + validity = other.validity; +} + +void Vector::ResetFromCache(const VectorCache &cache) { + cache.ResetFromCache(*this); +} + +void Vector::Slice(Vector &other, idx_t offset) { + if (other.GetVectorType() == VectorType::CONSTANT_VECTOR) { + Reference(other); + return; } - case LogicalTypeId::LIST: { - if (child_types_.empty()) { - return "LIST"; - } - if (child_types_.size() != 1) { - throw Exception("List needs a single child element"); + D_ASSERT(other.GetVectorType() == VectorType::FLAT_VECTOR); + + auto internal_type = GetType().InternalType(); + if (internal_type == PhysicalType::STRUCT) { + Vector new_vector(GetType()); + auto &entries = StructVector::GetEntries(new_vector); + auto &other_entries = StructVector::GetEntries(other); + D_ASSERT(entries.size() == other_entries.size()); + for (idx_t i = 0; i < entries.size(); i++) { + entries[i]->Slice(*other_entries[i], offset); } - return "LIST<" + child_types_[0].second.ToString() + ">"; - } - case LogicalTypeId::MAP: { - if (child_types_.empty()) { - return "MAP"; + if (offset > 0) { + new_vector.validity.Slice(other.validity, offset); + } else { + new_vector.validity = other.validity; } - if (child_types_.size() != 2) { - throw Exception("Map needs exactly two child elements"); + Reference(new_vector); + } else { + Reference(other); + if (offset > 0) { + data = data + GetTypeIdSize(internal_type) * offset; + validity.Slice(other.validity, offset); } - return "MAP<" + child_types_[0].second.ToString() + ", " + child_types_[1].second.ToString() + ">"; } - case LogicalTypeId::DECIMAL: { - if (width_ == 0) { - return "DECIMAL"; - } - return StringUtil::Format("DECIMAL(%d,%d)", width_, scale_); +} + +void Vector::Slice(Vector &other, const SelectionVector &sel, idx_t count) { + Reference(other); + Slice(sel, count); +} + +void Vector::Slice(const SelectionVector &sel, idx_t count) { + if (GetVectorType() == VectorType::CONSTANT_VECTOR) { + // dictionary on a constant is just a constant + return; } - default: - return LogicalTypeIdToString(id_); + if (GetVectorType() == VectorType::DICTIONARY_VECTOR) { + // already a dictionary, slice the current dictionary + auto ¤t_sel = DictionaryVector::SelVector(*this); + auto sliced_dictionary = current_sel.Slice(sel, count); + buffer = make_buffer(move(sliced_dictionary)); + return; } + Vector child_vector(*this); + auto child_ref = make_buffer(move(child_vector)); + + auto dict_buffer = make_buffer(sel); + vector_type = VectorType::DICTIONARY_VECTOR; + buffer = move(dict_buffer); + auxiliary = move(child_ref); } -LogicalType TransformStringToLogicalType(const string &str) { - auto lower_str = StringUtil::Lower(str); - // Transform column type - if (lower_str == "int" || lower_str == "int4" || lower_str == "signed" || lower_str == "integer" || - lower_str == "integral" || lower_str == "int32") { - return LogicalType::INTEGER; - } else if (lower_str == "varchar" || lower_str == "bpchar" || lower_str == "text" || lower_str == "string" || - lower_str == "char") { - return LogicalType::VARCHAR; - } else if (lower_str == "bytea" || lower_str == "blob" || lower_str == "varbinary" || lower_str == "binary") { - return LogicalType::BLOB; - } else if (lower_str == "int8" || lower_str == "bigint" || lower_str == "int64" || lower_str == "long") { - return LogicalType::BIGINT; - } else if (lower_str == "int2" || lower_str == "smallint" || lower_str == "short" || lower_str == "int16") { - return LogicalType::SMALLINT; - } else if (lower_str == "timestamp" || lower_str == "datetime") { - return LogicalType::TIMESTAMP; - } else if (lower_str == "bool" || lower_str == "boolean" || lower_str == "logical") { - return LogicalType(LogicalTypeId::BOOLEAN); - } else if (lower_str == "real" || lower_str == "float4" || lower_str == "float") { - return LogicalType::FLOAT; - } else if (lower_str == "decimal" || lower_str == "dec" || lower_str == "numeric") { - return LogicalType(LogicalTypeId::DECIMAL, 18, 3); - } else if (lower_str == "double" || lower_str == "float8" || lower_str == "decimal") { - return LogicalType::DOUBLE; - } else if (lower_str == "tinyint" || lower_str == "int1") { - return LogicalType::TINYINT; - } else if (lower_str == "date") { - return LogicalType::DATE; - } else if (lower_str == "time") { - return LogicalType::TIME; - } else if (lower_str == "interval") { - return LogicalType::INTERVAL; - } else if (lower_str == "hugeint" || lower_str == "int128") { - return LogicalType::HUGEINT; - } else if (lower_str == "struct" || lower_str == "row") { - return LogicalType::STRUCT; - } else if (lower_str == "map") { - return LogicalType::MAP; - } else if (lower_str == "utinyint") { - return LogicalType::UTINYINT; - } else if (lower_str == "usmallint") { - return LogicalType::USMALLINT; - } else if (lower_str == "uinteger") { - return LogicalType::UINTEGER; - } else if (lower_str == "ubigint") { - return LogicalType::UBIGINT; +void Vector::Slice(const SelectionVector &sel, idx_t count, SelCache &cache) { + if (GetVectorType() == VectorType::DICTIONARY_VECTOR) { + // dictionary vector: need to merge dictionaries + // check if we have a cached entry + auto ¤t_sel = DictionaryVector::SelVector(*this); + auto target_data = current_sel.data(); + auto entry = cache.cache.find(target_data); + if (entry != cache.cache.end()) { + // cached entry exists: use that + this->buffer = make_buffer(((DictionaryBuffer &)*entry->second).GetSelVector()); + vector_type = VectorType::DICTIONARY_VECTOR; + } else { + Slice(sel, count); + cache.cache[target_data] = this->buffer; + } } else { - throw NotImplementedException("DataType %s not supported yet...\n", str); + Slice(sel, count); } } -bool LogicalType::IsIntegral() const { - switch (id_) { - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - case LogicalTypeId::UBIGINT: - case LogicalTypeId::HUGEINT: - return true; - default: - return false; +void Vector::Initialize(bool zero_data, idx_t capacity) { + auxiliary.reset(); + validity.Reset(); + auto &type = GetType(); + auto internal_type = type.InternalType(); + if (internal_type == PhysicalType::STRUCT) { + auto struct_buffer = make_unique(type, capacity); + auxiliary = move(struct_buffer); + } else if (internal_type == PhysicalType::LIST) { + auto list_buffer = make_unique(type); + auxiliary = move(list_buffer); + } + auto type_size = GetTypeIdSize(internal_type); + if (type_size > 0) { + buffer = VectorBuffer::CreateStandardVector(type, capacity); + data = buffer->GetData(); + if (zero_data) { + memset(data, 0, capacity * type_size); + } } } -bool LogicalType::IsNumeric() const { - switch (id_) { - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - case LogicalTypeId::HUGEINT: - case LogicalTypeId::FLOAT: - case LogicalTypeId::DOUBLE: - case LogicalTypeId::DECIMAL: - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - case LogicalTypeId::UBIGINT: - return true; - default: - return false; +struct DataArrays { + Vector &vec; + data_ptr_t data; + VectorBuffer *buffer; + idx_t type_size; + bool is_nested; + DataArrays(Vector &vec, data_ptr_t data, VectorBuffer *buffer, idx_t type_size, bool is_nested) + : vec(vec), data(data), buffer(buffer), type_size(type_size), is_nested(is_nested) {}; +}; + +void FindChildren(std::vector &to_resize, VectorBuffer &auxiliary) { + if (auxiliary.GetBufferType() == VectorBufferType::LIST_BUFFER) { + auto &buffer = (VectorListBuffer &)auxiliary; + auto &child = buffer.GetChild(); + auto data = child.GetData(); + if (!data) { + //! Nested type + DataArrays arrays(child, data, child.GetBuffer().get(), GetTypeIdSize(child.GetType().InternalType()), + true); + to_resize.emplace_back(arrays); + FindChildren(to_resize, *child.GetAuxiliary()); + } else { + DataArrays arrays(child, data, child.GetBuffer().get(), GetTypeIdSize(child.GetType().InternalType()), + false); + to_resize.emplace_back(arrays); + } + } else if (auxiliary.GetBufferType() == VectorBufferType::STRUCT_BUFFER) { + auto &buffer = (VectorStructBuffer &)auxiliary; + auto &children = buffer.GetChildren(); + for (auto &child : children) { + auto data = child->GetData(); + if (!data) { + //! Nested type + DataArrays arrays(*child, data, child->GetBuffer().get(), + GetTypeIdSize(child->GetType().InternalType()), true); + to_resize.emplace_back(arrays); + FindChildren(to_resize, *child->GetAuxiliary()); + } else { + DataArrays arrays(*child, data, child->GetBuffer().get(), + GetTypeIdSize(child->GetType().InternalType()), false); + to_resize.emplace_back(arrays); + } + } + } +} +void Vector::Resize(idx_t cur_size, idx_t new_size) { + std::vector to_resize; + if (!buffer) { + buffer = make_unique(0); + } + if (!data) { + //! this is a nested structure + DataArrays arrays(*this, data, buffer.get(), GetTypeIdSize(GetType().InternalType()), true); + to_resize.emplace_back(arrays); + FindChildren(to_resize, *auxiliary); + } else { + DataArrays arrays(*this, data, buffer.get(), GetTypeIdSize(GetType().InternalType()), false); + to_resize.emplace_back(arrays); + } + for (auto &data_to_resize : to_resize) { + if (!data_to_resize.is_nested) { + auto new_data = unique_ptr(new data_t[new_size * data_to_resize.type_size]); + memcpy(new_data.get(), data_to_resize.data, cur_size * data_to_resize.type_size * sizeof(data_t)); + data_to_resize.buffer->SetData(move(new_data)); + data_to_resize.vec.data = data_to_resize.buffer->GetData(); + } + data_to_resize.vec.validity.Resize(cur_size, new_size); } } -bool LogicalType::GetDecimalProperties(uint8_t &width, uint8_t &scale) const { - switch (id_) { - case LogicalTypeId::SQLNULL: - width = 0; - scale = 0; - break; +void Vector::SetValue(idx_t index, const Value &val) { + if (GetVectorType() == VectorType::DICTIONARY_VECTOR) { + // dictionary: apply dictionary and forward to child + auto &sel_vector = DictionaryVector::SelVector(*this); + auto &child = DictionaryVector::Child(*this); + return child.SetValue(sel_vector.get_index(index), val); + } + if (val.type() != GetType()) { + SetValue(index, val.CastAs(GetType())); + return; + } + + validity.EnsureWritable(); + validity.Set(index, !val.is_null); + if (val.is_null && GetType().InternalType() != PhysicalType::STRUCT) { + // for structs we still need to set the child-entries to NULL + // so we do not bail out yet + return; + } + + switch (GetType().id()) { case LogicalTypeId::BOOLEAN: - width = 1; - scale = 0; + ((bool *)data)[index] = val.value_.boolean; break; case LogicalTypeId::TINYINT: - // tinyint: [-127, 127] = DECIMAL(3,0) - width = 3; - scale = 0; + ((int8_t *)data)[index] = val.value_.tinyint; break; case LogicalTypeId::SMALLINT: - // smallint: [-32767, 32767] = DECIMAL(5,0) - width = 5; - scale = 0; + ((int16_t *)data)[index] = val.value_.smallint; break; + case LogicalTypeId::DATE: case LogicalTypeId::INTEGER: - // integer: [-2147483647, 2147483647] = DECIMAL(10,0) - width = 10; - scale = 0; + ((int32_t *)data)[index] = val.value_.integer; break; + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::HASH: + case LogicalTypeId::TIME: case LogicalTypeId::BIGINT: - // bigint: [-9223372036854775807, 9223372036854775807] = DECIMAL(19,0) - width = 19; - scale = 0; + ((int64_t *)data)[index] = val.value_.bigint; break; case LogicalTypeId::UTINYINT: - // UInt8 — [0 : 255] - width = 3; - scale = 0; + ((uint8_t *)data)[index] = val.value_.utinyint; break; case LogicalTypeId::USMALLINT: - // UInt16 — [0 : 65535] - width = 5; - scale = 0; + ((uint16_t *)data)[index] = val.value_.usmallint; break; case LogicalTypeId::UINTEGER: - // UInt32 — [0 : 4294967295] - width = 10; - scale = 0; + ((uint32_t *)data)[index] = val.value_.uinteger; break; case LogicalTypeId::UBIGINT: - // UInt64 — [0 : 18446744073709551615] - width = 20; - scale = 0; + ((uint64_t *)data)[index] = val.value_.ubigint; break; case LogicalTypeId::HUGEINT: - // hugeint: max size decimal (38, 0) - // note that a hugeint is not guaranteed to fit in this - width = 38; - scale = 0; + ((hugeint_t *)data)[index] = val.value_.hugeint; break; case LogicalTypeId::DECIMAL: - width = width_; - scale = scale_; + D_ASSERT(DecimalType::GetWidth(GetType()) == DecimalType::GetWidth(val.type())); + D_ASSERT(DecimalType::GetScale(GetType()) == DecimalType::GetScale(val.type())); + switch (GetType().InternalType()) { + case PhysicalType::INT16: + ((int16_t *)data)[index] = val.value_.smallint; + break; + case PhysicalType::INT32: + ((int32_t *)data)[index] = val.value_.integer; + break; + case PhysicalType::INT64: + ((int64_t *)data)[index] = val.value_.bigint; + break; + case PhysicalType::INT128: + ((hugeint_t *)data)[index] = val.value_.hugeint; + break; + default: + throw InternalException("Widths bigger than 38 are not supported"); + } + break; + case LogicalTypeId::FLOAT: + ((float *)data)[index] = val.value_.float_; + break; + case LogicalTypeId::DOUBLE: + ((double *)data)[index] = val.value_.double_; + break; + case LogicalTypeId::POINTER: + ((uintptr_t *)data)[index] = val.value_.pointer; + break; + case LogicalTypeId::INTERVAL: + ((interval_t *)data)[index] = val.value_.interval; + break; + case LogicalTypeId::VARCHAR: + case LogicalTypeId::BLOB: + ((string_t *)data)[index] = StringVector::AddStringOrBlob(*this, val.str_value); + break; + case LogicalTypeId::MAP: + case LogicalTypeId::STRUCT: { + D_ASSERT(GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR); + + auto &children = StructVector::GetEntries(*this); + D_ASSERT(val.is_null || children.size() == val.struct_value.size()); + for (size_t i = 0; i < children.size(); i++) { + auto &vec_child = children[i]; + if (!val.is_null) { + auto &struct_child = val.struct_value[i]; + vec_child->SetValue(index, struct_child); + } else { + vec_child->SetValue(index, Value()); + } + } + break; + } + case LogicalTypeId::LIST: { + auto offset = ListVector::GetListSize(*this); + if (!val.list_value.empty()) { + for (idx_t i = 0; i < val.list_value.size(); i++) { + Value v(val.list_value[i]); + ListVector::PushBack(*this, v); + } + } + //! now set the pointer + auto &entry = ((list_entry_t *)data)[index]; + entry.length = val.list_value.size(); + entry.offset = offset; break; + } default: - return false; + throw InternalException("Unimplemented type for Vector::SetValue"); } - return true; } -bool LogicalType::IsMoreGenericThan(LogicalType &other) const { - if (other.id() == id_) { - return false; +Value Vector::GetValue(idx_t index) const { + switch (GetVectorType()) { + case VectorType::CONSTANT_VECTOR: + index = 0; + break; + case VectorType::FLAT_VECTOR: + break; + // dictionary: apply dictionary and forward to child + case VectorType::DICTIONARY_VECTOR: { + auto &sel_vector = DictionaryVector::SelVector(*this); + auto &child = DictionaryVector::Child(*this); + return child.GetValue(sel_vector.get_index(index)); } - - if (other.id() == LogicalTypeId::SQLNULL) { - return true; + case VectorType::SEQUENCE_VECTOR: { + int64_t start, increment; + SequenceVector::GetSequence(*this, start, increment); + return Value::Numeric(GetType(), start + increment * index); + } + default: + throw InternalException("Unimplemented vector type for Vector::GetValue"); } - // all integer types can cast from INTEGER - // this is because INTEGER is the smallest type considered by the automatic csv sniffer - switch (id_) { + if (!validity.RowIsValid(index)) { + return Value(GetType()); + } + switch (GetType().id()) { + case LogicalTypeId::BOOLEAN: + return Value::BOOLEAN(((bool *)data)[index]); + case LogicalTypeId::TINYINT: + return Value::TINYINT(((int8_t *)data)[index]); case LogicalTypeId::SMALLINT: - switch (other.id()) { - case LogicalTypeId::BOOLEAN: - case LogicalTypeId::TINYINT: - return true; - default: - return false; - } + return Value::SMALLINT(((int16_t *)data)[index]); case LogicalTypeId::INTEGER: - switch (other.id()) { - case LogicalTypeId::BOOLEAN: - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - return true; - default: - return false; - } + return Value::INTEGER(((int32_t *)data)[index]); + case LogicalTypeId::DATE: + return Value::DATE(((date_t *)data)[index]); + case LogicalTypeId::TIME: + return Value::TIME(((dtime_t *)data)[index]); case LogicalTypeId::BIGINT: - switch (other.id()) { - case LogicalTypeId::BOOLEAN: - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - return true; - default: - return false; - } + return Value::BIGINT(((int64_t *)data)[index]); + case LogicalTypeId::UTINYINT: + return Value::UTINYINT(((uint8_t *)data)[index]); + case LogicalTypeId::USMALLINT: + return Value::USMALLINT(((uint16_t *)data)[index]); + case LogicalTypeId::UINTEGER: + return Value::UINTEGER(((uint32_t *)data)[index]); + case LogicalTypeId::UBIGINT: + return Value::UBIGINT(((uint64_t *)data)[index]); + case LogicalTypeId::TIMESTAMP: + return Value::TIMESTAMP(((timestamp_t *)data)[index]); + case LogicalTypeId::TIMESTAMP_NS: + return Value::TimestampNs(((timestamp_t *)data)[index]); + case LogicalTypeId::TIMESTAMP_MS: + return Value::TimestampMs(((timestamp_t *)data)[index]); + case LogicalTypeId::TIMESTAMP_SEC: + return Value::TimestampSec(((timestamp_t *)data)[index]); case LogicalTypeId::HUGEINT: - switch (other.id()) { - case LogicalTypeId::BOOLEAN: - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - return true; + return Value::HUGEINT(((hugeint_t *)data)[index]); + case LogicalTypeId::DECIMAL: { + auto width = DecimalType::GetWidth(GetType()); + auto scale = DecimalType::GetScale(GetType()); + switch (GetType().InternalType()) { + case PhysicalType::INT16: + return Value::DECIMAL(((int16_t *)data)[index], width, scale); + case PhysicalType::INT32: + return Value::DECIMAL(((int32_t *)data)[index], width, scale); + case PhysicalType::INT64: + return Value::DECIMAL(((int64_t *)data)[index], width, scale); + case PhysicalType::INT128: + return Value::DECIMAL(((hugeint_t *)data)[index], width, scale); default: - return false; + throw InternalException("Widths bigger than 38 are not supported"); } + } + case LogicalTypeId::HASH: + return Value::HASH(((hash_t *)data)[index]); + case LogicalTypeId::POINTER: + return Value::POINTER(((uintptr_t *)data)[index]); case LogicalTypeId::FLOAT: - switch (other.id()) { - case LogicalTypeId::BOOLEAN: - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - return true; - default: - return false; - } - return false; + return Value::FLOAT(((float *)data)[index]); case LogicalTypeId::DOUBLE: - switch (other.id()) { - case LogicalTypeId::BOOLEAN: - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - case LogicalTypeId::FLOAT: - return true; - default: - return false; + return Value::DOUBLE(((double *)data)[index]); + case LogicalTypeId::INTERVAL: + return Value::INTERVAL(((interval_t *)data)[index]); + case LogicalTypeId::VARCHAR: { + auto str = ((string_t *)data)[index]; + return Value(str.GetString()); + } + case LogicalTypeId::BLOB: { + auto str = ((string_t *)data)[index]; + return Value::BLOB((const_data_ptr_t)str.GetDataUnsafe(), str.GetSize()); + } + case LogicalTypeId::MAP: + case LogicalTypeId::STRUCT: { + Value ret(GetType()); + ret.is_null = false; + // we can derive the value schema from the vector schema + auto &child_entries = StructVector::GetEntries(*this); + for (auto &struct_child : child_entries) { + ret.struct_value.push_back(struct_child->GetValue(index)); } - return false; - case LogicalTypeId::DATE: - return false; - case LogicalTypeId::TIMESTAMP: - switch (other.id()) { - case LogicalTypeId::TIME: - case LogicalTypeId::DATE: - return true; - default: - return false; + return ret; + } + case LogicalTypeId::LIST: { + Value ret(GetType()); + ret.is_null = false; + auto offlen = ((list_entry_t *)data)[index]; + auto &child_vec = ListVector::GetEntry(*this); + for (idx_t i = offlen.offset; i < offlen.offset + offlen.length; i++) { + ret.list_value.push_back(child_vec.GetValue(i)); } - case LogicalTypeId::VARCHAR: - return true; + return ret; + } default: - return false; + throw InternalException("Unimplemented type for value access"); } - - return true; } -LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalType &right) { - if (left.id() < right.id()) { - return right; - } else if (right.id() < left.id()) { - return left; - } else { - if (left.id() == LogicalTypeId::VARCHAR) { - // varchar: use type that has collation (if any) - if (right.collation().empty()) { - return left; - } else { - return right; - } - } else if (left.id() == LogicalTypeId::DECIMAL) { - // use max width/scale of the two types - return LogicalType(LogicalTypeId::DECIMAL, MaxValue(left.width(), right.width()), - MaxValue(left.scale(), right.scale())); - } else if (left.id() == LogicalTypeId::LIST) { - // list: perform max recursively on child type - child_list_t child_types; - child_types.push_back( - make_pair(left.child_types()[0].first, - MaxLogicalType(left.child_types()[0].second, right.child_types()[0].second))); - return LogicalType(LogicalTypeId::LIST, move(child_types)); - } else { - // types are equal but no extra specifier: just return the type - // FIXME: LIST and STRUCT? - return left; - } +// LCOV_EXCL_START +string VectorTypeToString(VectorType type) { + switch (type) { + case VectorType::FLAT_VECTOR: + return "FLAT"; + case VectorType::SEQUENCE_VECTOR: + return "SEQUENCE"; + case VectorType::DICTIONARY_VECTOR: + return "DICTIONARY"; + case VectorType::CONSTANT_VECTOR: + return "CONSTANT"; + default: + return "UNKNOWN"; } } -void LogicalType::Verify() const { -#ifdef DEBUG - if (id_ == LogicalTypeId::DECIMAL) { - D_ASSERT(width_ >= 1 && width_ <= Decimal::MAX_WIDTH_DECIMAL); - D_ASSERT(scale_ >= 0 && scale_ <= width_); +string Vector::ToString(idx_t count) const { + string retval = + VectorTypeToString(GetVectorType()) + " " + GetType().ToString() + ": " + to_string(count) + " = [ "; + switch (GetVectorType()) { + case VectorType::FLAT_VECTOR: + case VectorType::DICTIONARY_VECTOR: + for (idx_t i = 0; i < count; i++) { + retval += GetValue(i).ToString() + (i == count - 1 ? "" : ", "); + } + break; + case VectorType::CONSTANT_VECTOR: + retval += GetValue(0).ToString(); + break; + case VectorType::SEQUENCE_VECTOR: { + int64_t start, increment; + SequenceVector::GetSequence(*this, start, increment); + for (idx_t i = 0; i < count; i++) { + retval += to_string(start + increment * i) + (i == count - 1 ? "" : ", "); + } + break; } -#endif + default: + retval += "UNKNOWN VECTOR TYPE"; + break; + } + retval += "]"; + return retval; } -bool ApproxEqual(float ldecimal, float rdecimal) { - float epsilon = std::fabs(rdecimal) * 0.01; - return std::fabs(ldecimal - rdecimal) <= epsilon; +void Vector::Print(idx_t count) { + Printer::Print(ToString(count)); } -bool ApproxEqual(double ldecimal, double rdecimal) { - double epsilon = std::fabs(rdecimal) * 0.01; - return std::fabs(ldecimal - rdecimal) <= epsilon; +string Vector::ToString() const { + string retval = VectorTypeToString(GetVectorType()) + " " + GetType().ToString() + ": (UNKNOWN COUNT) [ "; + switch (GetVectorType()) { + case VectorType::FLAT_VECTOR: + case VectorType::DICTIONARY_VECTOR: + break; + case VectorType::CONSTANT_VECTOR: + retval += GetValue(0).ToString(); + break; + case VectorType::SEQUENCE_VECTOR: { + break; + } + default: + retval += "UNKNOWN VECTOR TYPE"; + break; + } + retval += "]"; + return retval; } -} // namespace duckdb - - +void Vector::Print() { + Printer::Print(ToString()); +} +// LCOV_EXCL_STOP +template +static void TemplatedFlattenConstantVector(data_ptr_t data, data_ptr_t old_data, idx_t count) { + auto constant = Load(old_data); + auto output = (T *)data; + for (idx_t i = 0; i < count; i++) { + output[i] = constant; + } +} +void Vector::Normalify(idx_t count) { + switch (GetVectorType()) { + case VectorType::FLAT_VECTOR: + // already a flat vector + break; + case VectorType::DICTIONARY_VECTOR: { + // create a new flat vector of this type + Vector other(GetType()); + // now copy the data of this vector to the other vector, removing the selection vector in the process + VectorOperations::Copy(*this, other, count, 0, 0); + // create a reference to the data in the other vector + this->Reference(other); + break; + } + case VectorType::CONSTANT_VECTOR: { + bool is_null = ConstantVector::IsNull(*this); + // allocate a new buffer for the vector + auto old_buffer = move(buffer); + auto old_data = data; + buffer = VectorBuffer::CreateStandardVector(type); + data = buffer->GetData(); + vector_type = VectorType::FLAT_VECTOR; + if (is_null) { + // constant NULL, set nullmask + validity.EnsureWritable(); + validity.SetAllInvalid(count); + return; + } + // non-null constant: have to repeat the constant + switch (GetType().InternalType()) { + case PhysicalType::BOOL: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::INT8: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::INT16: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::INT32: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::INT64: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::UINT8: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::UINT16: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::UINT32: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::UINT64: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::INT128: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::FLOAT: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::DOUBLE: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::INTERVAL: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::VARCHAR: + TemplatedFlattenConstantVector(data, old_data, count); + break; + case PhysicalType::LIST: { + TemplatedFlattenConstantVector(data, old_data, count); + break; + } + case PhysicalType::STRUCT: { + auto normalified_buffer = make_unique(); -namespace duckdb { + auto &new_children = normalified_buffer->GetChildren(); -//===--------------------------------------------------------------------===// -// Comparison Operations -//===--------------------------------------------------------------------===// -template -static bool TemplatedBooleanOperation(const Value &left, const Value &right) { - auto left_type = left.type(), right_type = right.type(); - if (left_type != right_type) { - try { - LogicalType comparison_type = BoundComparisonExpression::BindComparison(left_type, right_type); - return TemplatedBooleanOperation(left.CastAs(comparison_type), right.CastAs(comparison_type)); - } catch (...) { - return false; - } - } - switch (left_type.InternalType()) { - case PhysicalType::BOOL: - return OP::Operation(left.value_.boolean, right.value_.boolean); - case PhysicalType::INT8: - return OP::Operation(left.value_.tinyint, right.value_.tinyint); - case PhysicalType::INT16: - return OP::Operation(left.value_.smallint, right.value_.smallint); - case PhysicalType::INT32: - return OP::Operation(left.value_.integer, right.value_.integer); - case PhysicalType::INT64: - return OP::Operation(left.value_.bigint, right.value_.bigint); - case PhysicalType::UINT8: - return OP::Operation(left.value_.utinyint, right.value_.utinyint); - case PhysicalType::UINT16: - return OP::Operation(left.value_.usmallint, right.value_.usmallint); - case PhysicalType::UINT32: - return OP::Operation(left.value_.uinteger, right.value_.uinteger); - case PhysicalType::UINT64: - return OP::Operation(left.value_.ubigint, right.value_.ubigint); - case PhysicalType::INT128: - return OP::Operation(left.value_.hugeint, right.value_.hugeint); - case PhysicalType::POINTER: - return OP::Operation(left.value_.pointer, right.value_.pointer); - case PhysicalType::HASH: - return OP::Operation(left.value_.hash, right.value_.hash); - case PhysicalType::FLOAT: - return OP::Operation(left.value_.float_, right.value_.float_); - case PhysicalType::DOUBLE: - return OP::Operation(left.value_.double_, right.value_.double_); - case PhysicalType::INTERVAL: - return OP::Operation(left.value_.interval, right.value_.interval); - case PhysicalType::VARCHAR: - return OP::Operation(left.str_value, right.str_value); - case PhysicalType::STRUCT: { - for (idx_t i = 0; i < left.struct_value.size(); i++) { - if (i >= right.struct_value.size() || left.struct_value[i].first != right.struct_value[i].first || - left.struct_value[i].second != left.struct_value[i].second) { - return false; + auto &child_entries = StructVector::GetEntries(*this); + for (auto &child : child_entries) { + D_ASSERT(child->GetVectorType() == VectorType::CONSTANT_VECTOR); + auto vector = make_unique(*child); + vector->Normalify(count); + new_children.push_back(move(vector)); } + auxiliary = move(normalified_buffer); + } break; + default: + throw InternalException("Unimplemented type for VectorOperations::Normalify"); } - return true; + break; } - case PhysicalType::LIST: { - return left.list_value == right.list_value; + case VectorType::SEQUENCE_VECTOR: { + int64_t start, increment; + SequenceVector::GetSequence(*this, start, increment); + + buffer = VectorBuffer::CreateStandardVector(GetType()); + data = buffer->GetData(); + VectorOperations::GenerateSequence(*this, count, start, increment); + break; } default: - throw InternalException("Unimplemented type for value comparison"); + throw InternalException("Unimplemented type for normalify"); } } -bool ValueOperations::Equals(const Value &left, const Value &right) { - if (left.is_null && right.is_null) { - return true; +void Vector::Normalify(const SelectionVector &sel, idx_t count) { + switch (GetVectorType()) { + case VectorType::FLAT_VECTOR: + // already a flat vector + break; + case VectorType::SEQUENCE_VECTOR: { + int64_t start, increment; + SequenceVector::GetSequence(*this, start, increment); + + buffer = VectorBuffer::CreateStandardVector(GetType()); + data = buffer->GetData(); + VectorOperations::GenerateSequence(*this, count, sel, start, increment); + break; } - if (left.is_null != right.is_null) { - return false; + default: + throw InternalException("Unimplemented type for normalify with selection vector"); } - return TemplatedBooleanOperation(left, right); } -bool ValueOperations::NotEquals(const Value &left, const Value &right) { - return !ValueOperations::Equals(left, right); -} +void Vector::Orrify(idx_t count, VectorData &data) { + switch (GetVectorType()) { + case VectorType::DICTIONARY_VECTOR: { + auto &sel = DictionaryVector::SelVector(*this); + auto &child = DictionaryVector::Child(*this); + if (child.GetVectorType() == VectorType::FLAT_VECTOR) { + data.sel = &sel; + data.data = FlatVector::GetData(child); + data.validity = FlatVector::Validity(child); + } else { + // dictionary with non-flat child: create a new reference to the child and normalify it + Vector child_vector(child); + child_vector.Normalify(sel, count); + auto new_aux = make_buffer(move(child_vector)); -bool ValueOperations::GreaterThan(const Value &left, const Value &right) { - if (left.is_null && right.is_null) { - return false; - } else if (right.is_null) { - return true; - } else if (left.is_null) { - return false; + data.sel = &sel; + data.data = FlatVector::GetData(new_aux->data); + data.validity = FlatVector::Validity(new_aux->data); + this->auxiliary = move(new_aux); + } + break; } - return TemplatedBooleanOperation(left, right); -} - -bool ValueOperations::GreaterThanEquals(const Value &left, const Value &right) { - if (left.is_null && right.is_null) { - return true; - } else if (right.is_null) { - return true; - } else if (left.is_null) { - return false; + case VectorType::CONSTANT_VECTOR: + data.sel = ConstantVector::ZeroSelectionVector(count, data.owned_sel); + data.data = ConstantVector::GetData(*this); + data.validity = ConstantVector::Validity(*this); + break; + default: + Normalify(count); + data.sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; + data.data = FlatVector::GetData(*this); + data.validity = FlatVector::Validity(*this); + break; } - return TemplatedBooleanOperation(left, right); -} - -bool ValueOperations::LessThan(const Value &left, const Value &right) { - return ValueOperations::GreaterThan(right, left); } -bool ValueOperations::LessThanEquals(const Value &left, const Value &right) { - return ValueOperations::GreaterThanEquals(right, left); +void Vector::Sequence(int64_t start, int64_t increment) { + this->vector_type = VectorType::SEQUENCE_VECTOR; + this->buffer = make_buffer(sizeof(int64_t) * 2); + auto data = (int64_t *)buffer->GetData(); + data[0] = start; + data[1] = increment; + validity.Reset(); + auxiliary.reset(); } -} // namespace duckdb - +void Vector::Serialize(idx_t count, Serializer &serializer) { + auto &type = GetType(); + VectorData vdata; + Orrify(count, vdata); + const auto write_validity = (count > 0) && !vdata.validity.AllValid(); + serializer.Write(write_validity); + if (write_validity) { + ValidityMask flat_mask(count); + for (idx_t i = 0; i < count; ++i) { + auto row_idx = vdata.sel->get_index(i); + flat_mask.Set(i, vdata.validity.RowIsValid(row_idx)); + } + serializer.WriteData((const_data_ptr_t)flat_mask.GetData(), flat_mask.ValidityMaskSize(count)); + } + if (TypeIsConstantSize(type.InternalType())) { + // constant size type: simple copy + idx_t write_size = GetTypeIdSize(type.InternalType()) * count; + auto ptr = unique_ptr(new data_t[write_size]); + VectorOperations::WriteToStorage(*this, count, ptr.get()); + serializer.WriteData(ptr.get(), write_size); + } else { + switch (type.InternalType()) { + case PhysicalType::VARCHAR: { + auto strings = (string_t *)vdata.data; + for (idx_t i = 0; i < count; i++) { + auto idx = vdata.sel->get_index(i); + auto source = !vdata.validity.RowIsValid(idx) ? NullValue() : strings[idx]; + serializer.WriteStringLen((const_data_ptr_t)source.GetDataUnsafe(), source.GetSize()); + } + break; + } + case PhysicalType::STRUCT: { + Normalify(count); + auto &entries = StructVector::GetEntries(*this); + for (auto &entry : entries) { + entry->Serialize(count, serializer); + } + break; + } + case PhysicalType::LIST: { + auto &child = ListVector::GetEntry(*this); + auto list_size = ListVector::GetListSize(*this); + // serialize the list entries in a flat array + auto data = unique_ptr(new list_entry_t[count]); + auto source_array = (list_entry_t *)vdata.data; + for (idx_t i = 0; i < count; i++) { + auto idx = vdata.sel->get_index(i); + auto source = source_array[idx]; + data[i].offset = source.offset; + data[i].length = source.length; + } -namespace duckdb { + // write the list size + serializer.Write(list_size); + serializer.WriteData((data_ptr_t)data.get(), count * sizeof(list_entry_t)); -hash_t ValueOperations::Hash(const Value &op) { - if (op.is_null) { - return 0; - } - switch (op.type().InternalType()) { - case PhysicalType::BOOL: - return duckdb::Hash(op.value_.boolean); - case PhysicalType::INT8: - return duckdb::Hash(op.value_.tinyint); - case PhysicalType::INT16: - return duckdb::Hash(op.value_.smallint); - case PhysicalType::INT32: - return duckdb::Hash(op.value_.integer); - case PhysicalType::INT64: - return duckdb::Hash(op.value_.bigint); - case PhysicalType::UINT8: - return duckdb::Hash(op.value_.utinyint); - case PhysicalType::UINT16: - return duckdb::Hash(op.value_.usmallint); - case PhysicalType::UINT32: - return duckdb::Hash(op.value_.uinteger); - case PhysicalType::UINT64: - return duckdb::Hash(op.value_.ubigint); - case PhysicalType::INT128: - return duckdb::Hash(op.value_.hugeint); - case PhysicalType::FLOAT: - return duckdb::Hash(op.value_.float_); - case PhysicalType::DOUBLE: - return duckdb::Hash(op.value_.double_); - case PhysicalType::POINTER: - return duckdb::Hash(op.value_.pointer); - case PhysicalType::INTERVAL: - return duckdb::Hash(op.value_.interval); - case PhysicalType::VARCHAR: - return duckdb::Hash(op.str_value.c_str()); - case PhysicalType::LIST: { - hash_t hash = 0; - for (auto &entry : op.list_value) { - hash ^= ValueOperations::Hash(entry); + child.Serialize(list_size, serializer); + break; } - return hash; - } - case PhysicalType::STRUCT: { - hash_t hash = 0; - for (auto &entry : op.struct_value) { - hash ^= ValueOperations::Hash(entry.second); + default: + throw InternalException("Unimplemented variable width type for Vector::Serialize!"); } - return hash; - } - default: - throw InternalException("Unimplemented type for value hash"); } } -} // namespace duckdb - - - - - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/operator/subtract.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { +void Vector::Deserialize(idx_t count, Deserializer &source) { + auto &type = GetType(); -struct SubtractOperator { - template - static inline TR Operation(TA left, TB right) { - return left - right; + auto &validity = FlatVector::Validity(*this); + validity.Reset(); + const auto has_validity = source.Read(); + if (has_validity) { + validity.Initialize(count); + source.ReadData((data_ptr_t)validity.GetData(), validity.ValidityMaskSize(count)); } -}; -template <> -float SubtractOperator::Operation(float left, float right); -template <> -double SubtractOperator::Operation(double left, double right); -template <> -interval_t SubtractOperator::Operation(interval_t left, interval_t right); -template <> -date_t SubtractOperator::Operation(date_t left, interval_t right); -template <> -timestamp_t SubtractOperator::Operation(timestamp_t left, interval_t right); -template <> -interval_t SubtractOperator::Operation(timestamp_t left, timestamp_t right); + if (TypeIsConstantSize(type.InternalType())) { + // constant size type: read fixed amount of data from + auto column_size = GetTypeIdSize(type.InternalType()) * count; + auto ptr = unique_ptr(new data_t[column_size]); + source.ReadData(ptr.get(), column_size); -struct TrySubtractOperator { - template - static inline bool Operation(TA left, TB right, TR &result) { - throw InternalException("Unimplemented type for TrySubtractOperator"); - } -}; + VectorOperations::ReadFromStorage(ptr.get(), count, *this); + } else { + switch (type.InternalType()) { + case PhysicalType::VARCHAR: { + auto strings = FlatVector::GetData(*this); + for (idx_t i = 0; i < count; i++) { + // read the strings + auto str = source.Read(); + // now add the string to the StringHeap of the vector + // and write the pointer into the vector + if (validity.RowIsValid(i)) { + strings[i] = StringVector::AddStringOrBlob(*this, str); + } + } + break; + } + case PhysicalType::STRUCT: { + auto &entries = StructVector::GetEntries(*this); + for (auto &entry : entries) { + entry->Deserialize(count, source); + } + break; + } + case PhysicalType::LIST: { + // read the list size + auto list_size = source.Read(); + ListVector::Reserve(*this, list_size); + ListVector::SetListSize(*this, list_size); -template <> -bool TrySubtractOperator::Operation(uint8_t left, uint8_t right, uint8_t &result); -template <> -bool TrySubtractOperator::Operation(uint16_t left, uint16_t right, uint16_t &result); -template <> -bool TrySubtractOperator::Operation(uint32_t left, uint32_t right, uint32_t &result); -template <> -bool TrySubtractOperator::Operation(uint64_t left, uint64_t right, uint64_t &result); + // read the list entry + auto list_entries = FlatVector::GetData(*this); + source.ReadData(list_entries, count * sizeof(list_entry_t)); -template <> -bool TrySubtractOperator::Operation(int8_t left, int8_t right, int8_t &result); -template <> -bool TrySubtractOperator::Operation(int16_t left, int16_t right, int16_t &result); -template <> -bool TrySubtractOperator::Operation(int32_t left, int32_t right, int32_t &result); -template <> -bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result); + // deserialize the child vector + auto &child = ListVector::GetEntry(*this); + child.Deserialize(list_size, source); -struct SubtractOperatorOverflowCheck { - template - static inline TR Operation(TA left, TB right) { - TR result; - if (!TrySubtractOperator::Operation(left, right, result)) { - throw OutOfRangeException("Overflow in subtraction of %s (%d - %d)!", TypeIdToString(GetTypeId()), left, - right); + break; + } + default: + throw InternalException("Unimplemented variable width type for Vector::Deserialize!"); } - return result; } -}; +} -struct TryDecimalSubtract { - template - static inline bool Operation(TA left, TB right, TR &result) { - throw InternalException("Unimplemented type for TryDecimalSubtract"); +void Vector::SetVectorType(VectorType vector_type_p) { + this->vector_type = vector_type_p; + if (vector_type == VectorType::CONSTANT_VECTOR && GetType().InternalType() == PhysicalType::STRUCT) { + auto &entries = StructVector::GetEntries(*this); + for (auto &entry : entries) { + entry->SetVectorType(vector_type); + } } -}; - -template <> -bool TryDecimalSubtract::Operation(int16_t left, int16_t right, int16_t &result); -template <> -bool TryDecimalSubtract::Operation(int32_t left, int32_t right, int32_t &result); -template <> -bool TryDecimalSubtract::Operation(int64_t left, int64_t right, int64_t &result); -template <> -bool TryDecimalSubtract::Operation(hugeint_t left, hugeint_t right, hugeint_t &result); +} -struct DecimalSubtractOverflowCheck { - template - static inline TR Operation(TA left, TB right) { - TR result; - if (!TryDecimalSubtract::Operation(left, right, result)) { - throw OutOfRangeException("Overflow in subtract of DECIMAL(18) (%d - %d). You might want to add an " - "explicit cast to a bigger decimal.", - left, right); +void Vector::UTFVerify(const SelectionVector &sel, idx_t count) { +#ifdef DEBUG + if (count == 0) { + return; + } + if (GetType().InternalType() == PhysicalType::VARCHAR) { + // we just touch all the strings and let the sanitizer figure out if any + // of them are deallocated/corrupt + switch (GetVectorType()) { + case VectorType::CONSTANT_VECTOR: { + auto string = ConstantVector::GetData(*this); + if (!ConstantVector::IsNull(*this)) { + string->Verify(); + } + break; + } + case VectorType::FLAT_VECTOR: { + auto strings = FlatVector::GetData(*this); + for (idx_t i = 0; i < count; i++) { + auto oidx = sel.get_index(i); + if (validity.RowIsValid(oidx)) { + strings[oidx].Verify(); + } + } + break; + } + default: + break; } - return result; } -}; - -template <> -hugeint_t DecimalSubtractOverflowCheck::Operation(hugeint_t left, hugeint_t right); - -struct SubtractTimeOperator { - template - static TR Operation(TA left, TB right); -}; - -template <> -dtime_t SubtractTimeOperator::Operation(dtime_t left, interval_t right); - -} // namespace duckdb - +#endif +} -namespace duckdb { +void Vector::UTFVerify(idx_t count) { + UTFVerify(FlatVector::INCREMENTAL_SELECTION_VECTOR, count); +} -template -static Value BinaryValueOperation(const Value &left, const Value &right) { - auto left_type = left.type(); - auto right_type = right.type(); - LogicalType result_type = left_type; - if (left_type != right_type) { - result_type = LogicalType::MaxLogicalType(left.type(), right.type()); - Value left_cast = left.CastAs(result_type); - Value right_cast = right.CastAs(result_type); - return BinaryValueOperation(left_cast, right_cast); +void Vector::Verify(const SelectionVector &sel, idx_t count) { +#ifdef DEBUG + if (count == 0) { + return; } - if (left.is_null || right.is_null) { - return Value().CastAs(result_type); + if (GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &child = DictionaryVector::Child(*this); + D_ASSERT(child.GetVectorType() != VectorType::DICTIONARY_VECTOR); + auto &dict_sel = DictionaryVector::SelVector(*this); + // merge the selection vectors and verify the child + auto new_buffer = dict_sel.Slice(sel, count); + SelectionVector new_sel(new_buffer); + child.Verify(new_sel, count); + return; } - if (TypeIsIntegral(result_type.InternalType())) { - hugeint_t left_hugeint; - hugeint_t right_hugeint; - switch (result_type.InternalType()) { - case PhysicalType::INT8: - left_hugeint = Hugeint::Convert(left.value_.tinyint); - right_hugeint = Hugeint::Convert(right.value_.tinyint); - break; - case PhysicalType::INT16: - left_hugeint = Hugeint::Convert(left.value_.smallint); - right_hugeint = Hugeint::Convert(right.value_.smallint); + if (TypeIsConstantSize(GetType().InternalType()) && + (GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR)) { + D_ASSERT(!auxiliary); + } + if (GetType().InternalType() == PhysicalType::DOUBLE) { + // verify that there are no INF or NAN values + switch (GetVectorType()) { + case VectorType::CONSTANT_VECTOR: { + auto dbl = ConstantVector::GetData(*this); + if (!ConstantVector::IsNull(*this)) { + D_ASSERT(Value::DoubleIsValid(*dbl)); + } break; - case PhysicalType::INT32: - left_hugeint = Hugeint::Convert(left.value_.integer); - right_hugeint = Hugeint::Convert(right.value_.integer); + } + case VectorType::FLAT_VECTOR: { + auto doubles = FlatVector::GetData(*this); + for (idx_t i = 0; i < count; i++) { + auto oidx = sel.get_index(i); + if (validity.RowIsValid(oidx)) { + D_ASSERT(Value::DoubleIsValid(doubles[oidx])); + } + } break; - case PhysicalType::INT64: - left_hugeint = Hugeint::Convert(left.value_.bigint); - right_hugeint = Hugeint::Convert(right.value_.bigint); + } + default: break; - case PhysicalType::INT128: - left_hugeint = left.value_.hugeint; - right_hugeint = right.value_.hugeint; + } + } + if (GetType().id() == LogicalTypeId::VARCHAR) { + // verify that there are no '\0' bytes in string values + switch (GetVectorType()) { + case VectorType::FLAT_VECTOR: { + auto strings = FlatVector::GetData(*this); + for (idx_t i = 0; i < count; i++) { + auto oidx = sel.get_index(i); + if (validity.RowIsValid(oidx)) { + strings[oidx].VerifyNull(); + } + } break; + } default: - throw NotImplementedException("Unimplemented type for value binary op"); + break; } - // integer addition - return Value::Numeric(result_type, - OP::template Operation(left_hugeint, right_hugeint)); - } else if (result_type.InternalType() == PhysicalType::FLOAT) { - return Value::FLOAT( - OP::template Operation(left.GetValue(), right.GetValue())); - } else if (result_type.InternalType() == PhysicalType::DOUBLE) { - return Value::DOUBLE( - OP::template Operation(left.GetValue(), right.GetValue())); - } else { - throw NotImplementedException("Unimplemented type for value binary op"); - } -} - -//===--------------------------------------------------------------------===// -// Numeric Operations -//===--------------------------------------------------------------------===// -Value ValueOperations::Add(const Value &left, const Value &right) { - return BinaryValueOperation(left, right); -} - -Value ValueOperations::Subtract(const Value &left, const Value &right) { - return BinaryValueOperation(left, right); -} - -Value ValueOperations::Multiply(const Value &left, const Value &right) { - return BinaryValueOperation(left, right); -} - -Value ValueOperations::Modulo(const Value &left, const Value &right) { - if (right == 0) { - return Value(right.type()); - } else { - return BinaryValueOperation(left, right); } -} -Value ValueOperations::Divide(const Value &left, const Value &right) { - if (right == 0) { - return Value(right.type()); - } else { - return BinaryValueOperation(left, right); + if (GetType().InternalType() == PhysicalType::STRUCT) { + auto &child_types = StructType::GetChildTypes(GetType()); + D_ASSERT(child_types.size() > 0); + if (GetVectorType() == VectorType::FLAT_VECTOR || GetVectorType() == VectorType::CONSTANT_VECTOR) { + // create a selection vector of the non-null entries of the struct vector + auto &children = StructVector::GetEntries(*this); + D_ASSERT(child_types.size() == children.size()); + for (idx_t child_idx = 0; child_idx < children.size(); child_idx++) { + if (GetVectorType() == VectorType::CONSTANT_VECTOR) { + D_ASSERT(children[child_idx]->GetVectorType() == VectorType::CONSTANT_VECTOR); + if (ConstantVector::IsNull(*this)) { + D_ASSERT(ConstantVector::IsNull(*children[child_idx])); + } + } else if (GetVectorType() == VectorType::FLAT_VECTOR && + children[child_idx]->GetVectorType() == VectorType::FLAT_VECTOR) { + // for any NULL entry in the struct, the child should be NULL as well + auto &validity = FlatVector::Validity(*this); + auto &child_validity = FlatVector::Validity(*children[child_idx]); + for (idx_t i = 0; i < count; i++) { + auto index = sel.get_index(i); + if (!validity.RowIsValid(index)) { + D_ASSERT(!child_validity.RowIsValid(index)); + } + } + } + D_ASSERT(children[child_idx]->GetType() == child_types[child_idx].second); + children[child_idx]->Verify(sel, count); + } + } } -} - -} // namespace duckdb -//===--------------------------------------------------------------------===// -// boolean_operators.cpp -// Description: This file contains the implementation of the boolean -// operations AND OR ! -//===--------------------------------------------------------------------===// - - - - - -namespace duckdb { - -//===--------------------------------------------------------------------===// -// AND/OR -//===--------------------------------------------------------------------===// -template -static void TemplatedBooleanNullmask(Vector &left, Vector &right, Vector &result, idx_t count) { - D_ASSERT(left.GetType().id() == LogicalTypeId::BOOLEAN && right.GetType().id() == LogicalTypeId::BOOLEAN && - result.GetType().id() == LogicalTypeId::BOOLEAN); - - if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() == VectorType::CONSTANT_VECTOR) { - // operation on two constants, result is constant vector - result.SetVectorType(VectorType::CONSTANT_VECTOR); - auto ldata = ConstantVector::GetData(left); - auto rdata = ConstantVector::GetData(right); - auto result_data = ConstantVector::GetData(result); - - bool is_null = OP::Operation(*ldata > 0, *rdata > 0, ConstantVector::IsNull(left), - ConstantVector::IsNull(right), *result_data); - ConstantVector::SetNull(result, is_null); - } else { - // perform generic loop - VectorData ldata, rdata; - left.Orrify(count, ldata); - right.Orrify(count, rdata); - result.SetVectorType(VectorType::FLAT_VECTOR); - auto left_data = (uint8_t *)ldata.data; // we use uint8 to avoid load of gunk bools - auto right_data = (uint8_t *)rdata.data; - auto result_data = FlatVector::GetData(result); - auto &result_mask = FlatVector::Validity(result); - if (!ldata.validity.AllValid() || !rdata.validity.AllValid()) { + if (GetType().InternalType() == PhysicalType::LIST) { + if (GetVectorType() == VectorType::CONSTANT_VECTOR) { + if (!ConstantVector::IsNull(*this)) { + auto &child = ListVector::GetEntry(*this); + SelectionVector child_sel(ListVector::GetListSize(*this)); + idx_t child_count = 0; + auto le = ConstantVector::GetData(*this); + D_ASSERT(le->offset + le->length <= ListVector::GetListSize(*this)); + for (idx_t k = 0; k < le->length; k++) { + child_sel.set_index(child_count++, le->offset + k); + } + child.Verify(child_sel, child_count); + } + } else if (GetVectorType() == VectorType::FLAT_VECTOR) { + auto &child = ListVector::GetEntry(*this); + auto child_size = ListVector::GetListSize(*this); + auto list_data = FlatVector::GetData(*this); + idx_t total_size = 0; for (idx_t i = 0; i < count; i++) { - auto lidx = ldata.sel->get_index(i); - auto ridx = rdata.sel->get_index(i); - bool is_null = - OP::Operation(left_data[lidx] > 0, right_data[ridx] > 0, !ldata.validity.RowIsValid(lidx), - !rdata.validity.RowIsValid(ridx), result_data[i]); - result_mask.Set(i, !is_null); + auto idx = sel.get_index(i); + auto &le = list_data[idx]; + if (validity.RowIsValid(idx)) { + D_ASSERT(le.offset + le.length <= child_size); + total_size += le.length; + } } - } else { + SelectionVector child_sel(total_size); + idx_t child_count = 0; for (idx_t i = 0; i < count; i++) { - auto lidx = ldata.sel->get_index(i); - auto ridx = rdata.sel->get_index(i); - result_data[i] = OP::SimpleOperation(left_data[lidx], right_data[ridx]); + auto idx = sel.get_index(i); + auto &le = list_data[idx]; + if (validity.RowIsValid(idx)) { + D_ASSERT(le.offset + le.length <= child_size); + for (idx_t k = 0; k < le.length; k++) { + child_sel.set_index(child_count++, le.offset + k); + } + } } + child.Verify(child_sel, child_count); } } +#endif } -/* -SQL AND Rules: - -TRUE AND TRUE = TRUE -TRUE AND FALSE = FALSE -TRUE AND NULL = NULL -FALSE AND TRUE = FALSE -FALSE AND FALSE = FALSE -FALSE AND NULL = FALSE -NULL AND TRUE = NULL -NULL AND FALSE = FALSE -NULL AND NULL = NULL - -Basically: -- Only true if both are true -- False if either is false (regardless of NULLs) -- NULL otherwise -*/ -struct TernaryAnd { - static bool SimpleOperation(bool left, bool right) { - return left && right; - } - static bool Operation(bool left, bool right, bool left_null, bool right_null, bool &result) { - if (left_null && right_null) { - // both NULL: - // result is NULL - return true; - } else if (left_null) { - // left is NULL: - // result is FALSE if right is false - // result is NULL if right is true - result = right; - return right; - } else if (right_null) { - // right is NULL: - // result is FALSE if left is false - // result is NULL if left is true - result = left; - return left; - } else { - // no NULL: perform the AND - result = left && right; - return false; +void Vector::Verify(idx_t count) { + if (count > STANDARD_VECTOR_SIZE) { + SelectionVector selection_vector(count); + for (size_t i = 0; i < count; i++) { + selection_vector.set_index(i, i); } + Verify(selection_vector, count); + } else { + Verify(FlatVector::INCREMENTAL_SELECTION_VECTOR, count); } -}; - -void VectorOperations::And(Vector &left, Vector &right, Vector &result, idx_t count) { - TemplatedBooleanNullmask(left, right, result, count); } -/* -SQL OR Rules: - -OR -TRUE OR TRUE = TRUE -TRUE OR FALSE = TRUE -TRUE OR NULL = TRUE -FALSE OR TRUE = TRUE -FALSE OR FALSE = FALSE -FALSE OR NULL = NULL -NULL OR TRUE = TRUE -NULL OR FALSE = NULL -NULL OR NULL = NULL - -Basically: -- Only false if both are false -- True if either is true (regardless of NULLs) -- NULL otherwise -*/ - -struct TernaryOr { - static bool SimpleOperation(bool left, bool right) { - return left || right; - } - static bool Operation(bool left, bool right, bool left_null, bool right_null, bool &result) { - if (left_null && right_null) { - // both NULL: - // result is NULL - return true; - } else if (left_null) { - // left is NULL: - // result is TRUE if right is true - // result is NULL if right is false - result = right; - return !right; - } else if (right_null) { - // right is NULL: - // result is TRUE if left is true - // result is NULL if left is false - result = left; - return !left; - } else { - // no NULL: perform the OR - result = left || right; - return false; +void FlatVector::SetNull(Vector &vector, idx_t idx, bool is_null) { + D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); + vector.validity.Set(idx, !is_null); + if (is_null && vector.GetType().InternalType() == PhysicalType::STRUCT) { + // set all child entries to null as well + auto &entries = StructVector::GetEntries(vector); + for (auto &entry : entries) { + FlatVector::SetNull(*entry, idx, is_null); } } -}; - -void VectorOperations::Or(Vector &left, Vector &right, Vector &result, idx_t count) { - TemplatedBooleanNullmask(left, right, result, count); } -struct NotOperator { - template - static inline TR Operation(TA left) { - return !left; +void ConstantVector::SetNull(Vector &vector, bool is_null) { + D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR); + vector.validity.Set(0, !is_null); + if (is_null && vector.GetType().InternalType() == PhysicalType::STRUCT) { + // set all child entries to null as well + auto &entries = StructVector::GetEntries(vector); + for (auto &entry : entries) { + entry->SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(*entry, is_null); + } } -}; - -void VectorOperations::Not(Vector &input, Vector &result, idx_t count) { - D_ASSERT(input.GetType() == LogicalType::BOOLEAN && result.GetType() == LogicalType::BOOLEAN); - UnaryExecutor::Execute(input, result, count); } -} // namespace duckdb -//===--------------------------------------------------------------------===// -// comparison_operators.cpp -// Description: This file contains the implementation of the comparison -// operations == != >= <= > < -//===--------------------------------------------------------------------===// - +const SelectionVector *ConstantVector::ZeroSelectionVector(idx_t count, SelectionVector &owned_sel) { + if (count <= STANDARD_VECTOR_SIZE) { + return &ConstantVector::ZERO_SELECTION_VECTOR; + } + owned_sel.Initialize(count); + for (idx_t i = 0; i < count; i++) { + owned_sel.set_index(i, 0); + } + return &owned_sel; +} +void ConstantVector::Reference(Vector &vector, Vector &source, idx_t position, idx_t count) { + D_ASSERT(position < count); + auto &source_type = source.GetType(); + switch (source_type.InternalType()) { + case PhysicalType::LIST: { + // retrieve the list entry from the source vector + VectorData vdata; + source.Orrify(count, vdata); + auto list_index = vdata.sel->get_index(position); + if (!vdata.validity.RowIsValid(list_index)) { + // list is null: create null value + Value null_value(source_type); + vector.Reference(null_value); + break; + } + auto list_data = (list_entry_t *)vdata.data; + auto list_entry = list_data[list_index]; + // add the list entry as the first element of "vector" + // FIXME: we only need to allocate space for 1 tuple here + auto target_data = FlatVector::GetData(vector); + target_data[0] = list_entry; -namespace duckdb { + // create a reference to the child list of the source vector + auto &child = ListVector::GetEntry(vector); + child.Reference(ListVector::GetEntry(source)); -struct ComparisonExecutor { -private: - template - static inline void TemplatedExecute(Vector &left, Vector &right, Vector &result, idx_t count) { - BinaryExecutor::Execute(left, right, result, count); + ListVector::SetListSize(vector, ListVector::GetListSize(source)); + vector.SetVectorType(VectorType::CONSTANT_VECTOR); + break; } + case PhysicalType::STRUCT: { + VectorData vdata; + source.Orrify(count, vdata); -public: - template - static inline void Execute(Vector &left, Vector &right, Vector &result, idx_t count) { - D_ASSERT(left.GetType() == right.GetType() && result.GetType() == LogicalType::BOOLEAN); - // the inplace loops take the result as the last parameter - switch (left.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::INT16: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::INT32: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::INT64: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::UINT8: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::UINT16: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::UINT32: - TemplatedExecute(left, right, result, count); + auto struct_index = vdata.sel->get_index(position); + if (!vdata.validity.RowIsValid(struct_index)) { + // null struct: create null value + Value null_value(source_type); + vector.Reference(null_value); break; - case PhysicalType::UINT64: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::INT128: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::POINTER: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::FLOAT: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::DOUBLE: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::INTERVAL: - TemplatedExecute(left, right, result, count); - break; - case PhysicalType::VARCHAR: - TemplatedExecute(left, right, result, count); - break; - default: - throw InvalidTypeException(left.GetType(), "Invalid type for comparison"); } - } -}; - -void VectorOperations::Equals(Vector &left, Vector &right, Vector &result, idx_t count) { - ComparisonExecutor::Execute(left, right, result, count); -} - -void VectorOperations::NotEquals(Vector &left, Vector &right, Vector &result, idx_t count) { - ComparisonExecutor::Execute(left, right, result, count); -} - -void VectorOperations::GreaterThanEquals(Vector &left, Vector &right, Vector &result, idx_t count) { - ComparisonExecutor::Execute(left, right, result, count); -} - -void VectorOperations::LessThanEquals(Vector &left, Vector &right, Vector &result, idx_t count) { - ComparisonExecutor::Execute(left, right, result, count); -} -void VectorOperations::GreaterThan(Vector &left, Vector &right, Vector &result, idx_t count) { - ComparisonExecutor::Execute(left, right, result, count); + // struct: pass constant reference into child entries + auto &source_entries = StructVector::GetEntries(source); + auto &target_entries = StructVector::GetEntries(vector); + for (idx_t i = 0; i < source_entries.size(); i++) { + ConstantVector::Reference(*target_entries[i], *source_entries[i], position, count); + } + vector.SetVectorType(VectorType::CONSTANT_VECTOR); + break; + } + default: + // default behavior: get a value from the vector and reference it + // this is not that expensive for scalar types + auto value = source.GetValue(position); + vector.Reference(value); + D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR); + break; + } } -void VectorOperations::LessThan(Vector &left, Vector &right, Vector &result, idx_t count) { - ComparisonExecutor::Execute(left, right, result, count); +string_t StringVector::AddString(Vector &vector, const char *data, idx_t len) { + return StringVector::AddString(vector, string_t(data, len)); } -} // namespace duckdb -//===--------------------------------------------------------------------===// -// gather.cpp -// Description: This file contains the implementation of the gather operators -//===--------------------------------------------------------------------===// - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/operator/constant_operators.hpp -// -// -//===----------------------------------------------------------------------===// - +string_t StringVector::AddStringOrBlob(Vector &vector, const char *data, idx_t len) { + return StringVector::AddStringOrBlob(vector, string_t(data, len)); +} +string_t StringVector::AddString(Vector &vector, const char *data) { + return StringVector::AddString(vector, string_t(data, strlen(data))); +} -namespace duckdb { +string_t StringVector::AddString(Vector &vector, const string &data) { + return StringVector::AddString(vector, string_t(data.c_str(), data.size())); +} -struct PickLeft { - template - static inline T Operation(T left, T right) { - return left; +string_t StringVector::AddString(Vector &vector, string_t data) { + D_ASSERT(vector.GetType().id() == LogicalTypeId::VARCHAR); + if (data.IsInlined()) { + // string will be inlined: no need to store in string heap + return data; } -}; + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); + } + D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); + auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; + return string_buffer.AddString(data); +} -struct PickRight { - template - static inline T Operation(T left, T right) { - return right; +string_t StringVector::AddStringOrBlob(Vector &vector, string_t data) { + D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); + if (data.IsInlined()) { + // string will be inlined: no need to store in string heap + return data; } -}; + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); + } + D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); + auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; + return string_buffer.AddBlob(data); +} -struct NOP { - template - static inline T Operation(T left) { - return left; +string_t StringVector::EmptyString(Vector &vector, idx_t len) { + D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); + if (len < string_t::INLINE_LENGTH) { + return string_t(len); } -}; + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); + } + D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); + auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; + return string_buffer.EmptyString(len); +} -struct ConstantZero { - template - static inline T Operation(T left, T right) { - return 0; +void StringVector::AddHandle(Vector &vector, unique_ptr handle) { + D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); } -}; + auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; + string_buffer.AddHeapReference(make_buffer(move(handle))); +} -struct ConstantOne { - template - static inline T Operation(T left, T right) { - return 1; +void StringVector::AddBuffer(Vector &vector, buffer_ptr buffer) { + D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); } -}; + auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; + string_buffer.AddHeapReference(move(buffer)); +} -struct AddOne { - template - static inline T Operation(T left, T right) { - return right + 1; +void StringVector::AddHeapReference(Vector &vector, Vector &other) { + D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); + D_ASSERT(other.GetType().InternalType() == PhysicalType::VARCHAR); + + if (other.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + StringVector::AddHeapReference(vector, DictionaryVector::Child(other)); + return; } -}; + if (!other.auxiliary) { + return; + } + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); + } + D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); + D_ASSERT(other.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); + auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; + string_buffer.AddHeapReference(other.auxiliary); +} -} // namespace duckdb +vector> &StructVector::GetEntries(Vector &vector) { + D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT || vector.GetType().id() == LogicalTypeId::MAP); + if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &child = DictionaryVector::Child(vector); + return StructVector::GetEntries(child); + } + D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || + vector.GetVectorType() == VectorType::CONSTANT_VECTOR); + D_ASSERT(vector.auxiliary); + D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRUCT_BUFFER); + return ((VectorStructBuffer *)vector.auxiliary.get())->GetChildren(); +} +const vector> &StructVector::GetEntries(const Vector &vector) { + return GetEntries((Vector &)vector); +} +const Vector &ListVector::GetEntry(const Vector &vector) { + D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST); + if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &child = DictionaryVector::Child(vector); + return ListVector::GetEntry(child); + } + D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || + vector.GetVectorType() == VectorType::CONSTANT_VECTOR); + D_ASSERT(vector.auxiliary); + D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::LIST_BUFFER); + return ((VectorListBuffer *)vector.auxiliary.get())->GetChild(); +} +Vector &ListVector::GetEntry(Vector &vector) { + const Vector &cvector = vector; + return const_cast(ListVector::GetEntry(cvector)); +} -namespace duckdb { +void ListVector::Reserve(Vector &vector, idx_t required_capacity) { + D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST); + D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || + vector.GetVectorType() == VectorType::CONSTANT_VECTOR); + D_ASSERT(vector.auxiliary); + D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::LIST_BUFFER); + auto &child_buffer = *((VectorListBuffer *)vector.auxiliary.get()); + child_buffer.Reserve(required_capacity); +} template -static void TemplatedGatherLoop(Vector &source, Vector &dest, idx_t count) { - auto addresses = FlatVector::GetData(source); - auto data = FlatVector::GetData(dest); - auto &mask = FlatVector::Validity(dest); +void TemplatedSearchInMap(Vector &list, T key, vector &offsets, bool is_key_null, idx_t offset, idx_t length) { + auto &list_vector = ListVector::GetEntry(list); + VectorData vector_data; + list_vector.Orrify(ListVector::GetListSize(list), vector_data); + auto data = (T *)vector_data.data; + auto validity_mask = vector_data.validity; - for (idx_t i = 0; i < count; i++) { - auto val = Load((const_data_ptr_t)addresses[i]); - if (IsNullValue(val)) { - mask.SetInvalid(i); - } else { - data[i] = val; + if (is_key_null) { + for (idx_t i = offset; i < offset + length; i++) { + if (!validity_mask.RowIsValid(i)) { + offsets.push_back(i); + } + } + } else { + for (idx_t i = offset; i < offset + length; i++) { + if (!validity_mask.RowIsValid(i)) { + continue; + } + if (key == data[i]) { + offsets.push_back(i); + } + } + } +} + +void SearchString(Vector &list, string &key, vector &offsets, bool is_key_null, idx_t offset, idx_t length) { + auto &list_vector = ListVector::GetEntry(list); + VectorData vector_data; + list_vector.Orrify(ListVector::GetListSize(list), vector_data); + auto data = (string_t *)vector_data.data; + auto validity_mask = vector_data.validity; + if (is_key_null) { + for (idx_t i = offset; i < offset + length; i++) { + if (!validity_mask.RowIsValid(i)) { + offsets.push_back(i); + } + } + } else { + string_t key_str_t(key); + for (idx_t i = offset; i < offset + length; i++) { + if (!validity_mask.RowIsValid(i)) { + continue; + } + if (Equals::Operation(data[i], key_str_t)) { + offsets.push_back(i); + } } - addresses[i] += sizeof(T); } } -void VectorOperations::Gather::Set(Vector &source, Vector &dest, idx_t count) { - D_ASSERT(source.GetVectorType() == VectorType::FLAT_VECTOR); - D_ASSERT(source.GetType().id() == LogicalTypeId::POINTER); // "Cannot gather from non-pointer type!" +vector ListVector::Search(Vector &list, Value &key, idx_t row) { + vector offsets; - dest.SetVectorType(VectorType::FLAT_VECTOR); - switch (dest.GetType().InternalType()) { - case PhysicalType::UINT8: - TemplatedGatherLoop(source, dest, count); + auto &list_vector = ListVector::GetEntry(list); + auto &entry = ((list_entry_t *)list.GetData())[row]; + switch (list_vector.GetType().id()) { + + case LogicalTypeId::SQLNULL: + if (key.is_null) { + for (idx_t i = entry.offset; i < entry.offset + entry.length; i++) { + offsets.push_back(i); + } + } break; - case PhysicalType::UINT16: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::UTINYINT: + ::duckdb::TemplatedSearchInMap(list, key.value_.utinyint, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::UINT32: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::TINYINT: + ::duckdb::TemplatedSearchInMap(list, key.value_.tinyint, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::UINT64: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::USMALLINT: + ::duckdb::TemplatedSearchInMap(list, key.value_.usmallint, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::BOOL: - case PhysicalType::INT8: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::SMALLINT: + ::duckdb::TemplatedSearchInMap(list, key.value_.smallint, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::INT16: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::UINTEGER: + ::duckdb::TemplatedSearchInMap(list, key.value_.uinteger, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::INT32: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::INTEGER: + ::duckdb::TemplatedSearchInMap(list, key.value_.integer, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::INT64: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::UBIGINT: + ::duckdb::TemplatedSearchInMap(list, key.value_.ubigint, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::INT128: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::BIGINT: + ::duckdb::TemplatedSearchInMap(list, key.value_.bigint, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::FLOAT: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::HUGEINT: + ::duckdb::TemplatedSearchInMap(list, key.value_.hugeint, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::DOUBLE: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::FLOAT: + ::duckdb::TemplatedSearchInMap(list, key.value_.float_, offsets, key.is_null, entry.offset, + entry.length); + break; + case LogicalTypeId::DOUBLE: + ::duckdb::TemplatedSearchInMap(list, key.value_.double_, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::POINTER: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::DATE: + ::duckdb::TemplatedSearchInMap(list, key.value_.date, offsets, key.is_null, entry.offset, entry.length); break; - case PhysicalType::INTERVAL: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::TIME: + ::duckdb::TemplatedSearchInMap(list, key.value_.time, offsets, key.is_null, entry.offset, + entry.length); break; - case PhysicalType::VARCHAR: - TemplatedGatherLoop(source, dest, count); + case LogicalTypeId::TIMESTAMP: + ::duckdb::TemplatedSearchInMap(list, key.value_.timestamp, offsets, key.is_null, entry.offset, + entry.length); + break; + case LogicalTypeId::BLOB: + case LogicalTypeId::VARCHAR: + ::duckdb::SearchString(list, key.str_value, offsets, key.is_null, entry.offset, entry.length); break; default: - throw NotImplementedException("Unimplemented type for gather"); + throw InvalidTypeException(list.GetType().id(), "Invalid type for List Vector Search"); } + return offsets; } -} // namespace duckdb -//===--------------------------------------------------------------------===// -// generators.cpp -// Description: This file contains the implementation of different generators -//===--------------------------------------------------------------------===// - - - +Value ListVector::GetValuesFromOffsets(Vector &list, vector &offsets) { + Value ret(ListType::GetChildType(list.GetType())); + ret.is_null = false; + auto &child_vec = ListVector::GetEntry(list); + for (auto &offset : offsets) { + ret.list_value.push_back(child_vec.GetValue(offset)); + } + return ret; +} +idx_t ListVector::GetListSize(const Vector &vec) { + if (vec.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &child = DictionaryVector::Child(vec); + return ListVector::GetListSize(child); + } + D_ASSERT(vec.auxiliary); + return ((VectorListBuffer &)*vec.auxiliary).size; +} -namespace duckdb { +void ListVector::ReferenceEntry(Vector &vector, Vector &other) { + D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST); + D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR || + vector.GetVectorType() == VectorType::CONSTANT_VECTOR); + D_ASSERT(other.GetType().id() == LogicalTypeId::LIST); + D_ASSERT(other.GetVectorType() == VectorType::FLAT_VECTOR || other.GetVectorType() == VectorType::CONSTANT_VECTOR); + vector.auxiliary = other.auxiliary; +} -template -void TemplatedGenerateSequence(Vector &result, idx_t count, int64_t start, int64_t increment) { - D_ASSERT(result.GetType().IsNumeric()); - if (start > NumericLimits::Maximum() || increment > NumericLimits::Maximum()) { - throw Exception("Sequence start or increment out of type range"); - } - result.SetVectorType(VectorType::FLAT_VECTOR); - auto result_data = FlatVector::GetData(result); - auto value = (T)start; - for (idx_t i = 0; i < count; i++) { - result_data[i] = value; - value += increment; +void ListVector::SetListSize(Vector &vec, idx_t size) { + if (vec.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &child = DictionaryVector::Child(vec); + ListVector::SetListSize(child, size); } + ((VectorListBuffer &)*vec.auxiliary).size = size; } -void VectorOperations::GenerateSequence(Vector &result, idx_t count, int64_t start, int64_t increment) { - if (!result.GetType().IsNumeric()) { - throw InvalidTypeException(result.GetType(), "Can only generate sequences for numeric values!"); - } - switch (result.GetType().InternalType()) { - case PhysicalType::INT8: - TemplatedGenerateSequence(result, count, start, increment); - break; - case PhysicalType::INT16: - TemplatedGenerateSequence(result, count, start, increment); - break; - case PhysicalType::INT32: - TemplatedGenerateSequence(result, count, start, increment); - break; - case PhysicalType::INT64: - TemplatedGenerateSequence(result, count, start, increment); - break; - case PhysicalType::FLOAT: - TemplatedGenerateSequence(result, count, start, increment); - break; - case PhysicalType::DOUBLE: - TemplatedGenerateSequence(result, count, start, increment); - break; - default: - throw NotImplementedException("Unimplemented type for generate sequence"); +void ListVector::Append(Vector &target, const Vector &source, idx_t source_size, idx_t source_offset) { + if (source_size - source_offset == 0) { + //! Nothing to add + return; } + auto &target_buffer = (VectorListBuffer &)*target.auxiliary; + target_buffer.Append(source, source_size, source_offset); } -template -void TemplatedGenerateSequence(Vector &result, idx_t count, const SelectionVector &sel, int64_t start, - int64_t increment) { - D_ASSERT(result.GetType().IsNumeric()); - if (start > NumericLimits::Maximum() || increment > NumericLimits::Maximum()) { - throw Exception("Sequence start or increment out of type range"); - } - result.SetVectorType(VectorType::FLAT_VECTOR); - auto result_data = FlatVector::GetData(result); - auto value = (T)start; - for (idx_t i = 0; i < count; i++) { - auto idx = sel.get_index(i); - result_data[idx] = value + increment * idx; +void ListVector::Append(Vector &target, const Vector &source, const SelectionVector &sel, idx_t source_size, + idx_t source_offset) { + if (source_size - source_offset == 0) { + //! Nothing to add + return; } + auto &target_buffer = (VectorListBuffer &)*target.auxiliary; + target_buffer.Append(source, sel, source_size, source_offset); } -void VectorOperations::GenerateSequence(Vector &result, idx_t count, const SelectionVector &sel, int64_t start, - int64_t increment) { - if (!result.GetType().IsNumeric()) { - throw InvalidTypeException(result.GetType(), "Can only generate sequences for numeric values!"); - } - switch (result.GetType().InternalType()) { - case PhysicalType::INT8: - TemplatedGenerateSequence(result, count, sel, start, increment); - break; - case PhysicalType::INT16: - TemplatedGenerateSequence(result, count, sel, start, increment); - break; - case PhysicalType::INT32: - TemplatedGenerateSequence(result, count, sel, start, increment); - break; - case PhysicalType::INT64: - TemplatedGenerateSequence(result, count, sel, start, increment); - break; - case PhysicalType::FLOAT: - TemplatedGenerateSequence(result, count, sel, start, increment); - break; - case PhysicalType::DOUBLE: - TemplatedGenerateSequence(result, count, sel, start, increment); - break; - default: - throw NotImplementedException("Unimplemented type for generate sequence"); - } +void ListVector::PushBack(Vector &target, Value &insert) { + auto &target_buffer = (VectorListBuffer &)*target.auxiliary; + target_buffer.PushBack(insert); } } // namespace duckdb + + + + + namespace duckdb { -struct DistinctBinaryLambdaWrapper { - template - static inline RESULT_TYPE Operation(LEFT_TYPE left, RIGHT_TYPE right, bool is_left_null, bool is_right_null) { - return OP::template Operation(left, right, is_left_null, is_right_null); - } -}; +buffer_ptr VectorBuffer::CreateStandardVector(PhysicalType type, idx_t capacity) { + return make_buffer(capacity * GetTypeIdSize(type)); +} -template -static void DistinctExecuteGenericLoop(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, - RESULT_TYPE *__restrict result_data, const SelectionVector *__restrict lsel, - const SelectionVector *__restrict rsel, idx_t count, ValidityMask &lmask, - ValidityMask &rmask, ValidityMask &result_mask) { - for (idx_t i = 0; i < count; i++) { - auto lindex = lsel->get_index(i); - auto rindex = rsel->get_index(i); - auto lentry = ldata[lindex]; - auto rentry = rdata[rindex]; - result_data[i] = - OP::template Operation(lentry, rentry, !lmask.RowIsValid(lindex), !rmask.RowIsValid(rindex)); - } +buffer_ptr VectorBuffer::CreateConstantVector(PhysicalType type) { + return make_buffer(GetTypeIdSize(type)); } -template -static void DistinctExecuteConstant(Vector &left, Vector &right, Vector &result) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); +buffer_ptr VectorBuffer::CreateConstantVector(const LogicalType &type) { + return VectorBuffer::CreateConstantVector(type.InternalType()); +} - auto ldata = ConstantVector::GetData(left); - auto rdata = ConstantVector::GetData(right); - auto result_data = ConstantVector::GetData(result); - *result_data = - OP::template Operation(*ldata, *rdata, ConstantVector::IsNull(left), ConstantVector::IsNull(right)); +buffer_ptr VectorBuffer::CreateStandardVector(const LogicalType &type, idx_t capacity) { + return VectorBuffer::CreateStandardVector(type.InternalType(), capacity); } -template -static void DistinctExecuteGeneric(Vector &left, Vector &right, Vector &result, idx_t count) { - if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() == VectorType::CONSTANT_VECTOR) { - DistinctExecuteConstant(left, right, result); - } else { - VectorData ldata, rdata; +VectorStringBuffer::VectorStringBuffer() : VectorBuffer(VectorBufferType::STRING_BUFFER) { +} - left.Orrify(count, ldata); - right.Orrify(count, rdata); +VectorStructBuffer::VectorStructBuffer() : VectorBuffer(VectorBufferType::STRUCT_BUFFER) { +} - result.SetVectorType(VectorType::FLAT_VECTOR); - auto result_data = FlatVector::GetData(result); - DistinctExecuteGenericLoop( - (LEFT_TYPE *)ldata.data, (RIGHT_TYPE *)rdata.data, result_data, ldata.sel, rdata.sel, count, ldata.validity, - rdata.validity, FlatVector::Validity(result)); +VectorStructBuffer::VectorStructBuffer(const LogicalType &type, idx_t capacity) + : VectorBuffer(VectorBufferType::STRUCT_BUFFER) { + auto &child_types = StructType::GetChildTypes(type); + for (auto &child_type : child_types) { + auto vector = make_unique(child_type.second, capacity); + children.push_back(move(vector)); } } -template -static void DistinctExecuteSwitch(Vector &left, Vector &right, Vector &result, idx_t count) { - DistinctExecuteGeneric(left, right, result, count); +VectorStructBuffer::~VectorStructBuffer() { } -template -static void DistinctExecute(Vector &left, Vector &right, Vector &result, idx_t count) { - DistinctExecuteSwitch(left, right, result, count); +VectorListBuffer::VectorListBuffer(unique_ptr vector, idx_t initial_capacity) + : VectorBuffer(VectorBufferType::LIST_BUFFER), capacity(initial_capacity), child(move(vector)) { } -template -static inline idx_t -DistinctSelectGenericLoop(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, - const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, - const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lmask, - ValidityMask &rmask, SelectionVector *true_sel, SelectionVector *false_sel) { - idx_t true_count = 0, false_count = 0; - for (idx_t i = 0; i < count; i++) { - auto result_idx = result_sel->get_index(i); - auto lindex = lsel->get_index(i); - auto rindex = rsel->get_index(i); - if (NO_NULL) { - if (OP::Operation(ldata[lindex], rdata[rindex], true, true) && HAS_TRUE_SEL) { - true_sel->set_index(true_count++, result_idx); - } - } else { - if (OP::Operation(ldata[lindex], rdata[rindex], !lmask.RowIsValid(i), !rmask.RowIsValid(i)) && - HAS_FALSE_SEL) { - false_sel->set_index(false_count++, result_idx); - } - } - } - if (HAS_TRUE_SEL) { - return true_count; - } else { - return count - false_count; - } +VectorListBuffer::VectorListBuffer(const LogicalType &list_type, idx_t initial_capacity) + : VectorBuffer(VectorBufferType::LIST_BUFFER) { + // FIXME: directly construct vector of correct size + child = make_unique(ListType::GetChildType(list_type)); + capacity = STANDARD_VECTOR_SIZE; + Reserve(initial_capacity); } -template -static inline idx_t -DistinctSelectGenericLoopSelSwitch(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, - const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, - const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lmask, - ValidityMask &rmask, SelectionVector *true_sel, SelectionVector *false_sel) { - if (true_sel && false_sel) { - return DistinctSelectGenericLoop( - ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); - } else if (true_sel) { - return DistinctSelectGenericLoop( - ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); - } else { - D_ASSERT(false_sel); - return DistinctSelectGenericLoop( - ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); + +void VectorListBuffer::Reserve(idx_t to_reserve) { + if (to_reserve > capacity) { + idx_t new_capacity = (to_reserve + STANDARD_VECTOR_SIZE - 1) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE; + D_ASSERT(new_capacity >= to_reserve); + D_ASSERT(new_capacity % STANDARD_VECTOR_SIZE == 0); + child->Resize(capacity, new_capacity); + capacity = new_capacity; } } -template -static inline idx_t -DistinctSelectGenericLoopSwitch(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, - const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, - const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lmask, - ValidityMask &rmask, SelectionVector *true_sel, SelectionVector *false_sel) { - if (!lmask.AllValid() || rmask.AllValid()) { - return DistinctSelectGenericLoopSelSwitch( - ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); - } else { - return DistinctSelectGenericLoopSelSwitch( - ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); +void VectorListBuffer::Append(const Vector &to_append, idx_t to_append_size, idx_t source_offset) { + Reserve(size + to_append_size - source_offset); + VectorOperations::Copy(to_append, *child, to_append_size, source_offset, size); + size += to_append_size - source_offset; +} + +void VectorListBuffer::Append(const Vector &to_append, const SelectionVector &sel, idx_t to_append_size, + idx_t source_offset) { + Reserve(size + to_append_size - source_offset); + VectorOperations::Copy(to_append, *child, sel, to_append_size, source_offset, size); + size += to_append_size - source_offset; +} + +void VectorListBuffer::PushBack(Value &insert) { + if (size + 1 > capacity) { + child->Resize(capacity, capacity * 2); + capacity *= 2; } + child->SetValue(size++, insert); } -template -static idx_t DistinctSelectGeneric(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - VectorData ldata, rdata; +VectorListBuffer::~VectorListBuffer() { +} - left.Orrify(count, ldata); - right.Orrify(count, rdata); +ManagedVectorBuffer::ManagedVectorBuffer(unique_ptr handle) + : VectorBuffer(VectorBufferType::MANAGED_BUFFER), handle(move(handle)) { +} - return DistinctSelectGenericLoopSwitch((LEFT_TYPE *)ldata.data, (RIGHT_TYPE *)rdata.data, - ldata.sel, rdata.sel, sel, count, ldata.validity, - rdata.validity, true_sel, false_sel); +ManagedVectorBuffer::~ManagedVectorBuffer() { } -template -static inline idx_t DistinctSelectFlatLoop(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, - const SelectionVector *sel, idx_t count, ValidityMask &lmask, - ValidityMask &rmask, SelectionVector *true_sel, SelectionVector *false_sel) { - idx_t true_count = 0, false_count = 0; - for (idx_t i = 0; i < count; i++) { - idx_t result_idx = sel->get_index(i); - idx_t lidx = LEFT_CONSTANT ? 0 : i; - idx_t ridx = RIGHT_CONSTANT ? 0 : i; - bool comparison_result = OP::Operation(ldata[lidx], rdata[ridx], !lmask.RowIsValid(i), !rmask.RowIsValid(i)); - if (HAS_TRUE_SEL) { - true_sel->set_index(true_count, result_idx); - true_count += comparison_result; + +} // namespace duckdb + + + +namespace duckdb { + +class VectorCacheBuffer : public VectorBuffer { +public: + explicit VectorCacheBuffer(const LogicalType &type_p) + : VectorBuffer(VectorBufferType::OPAQUE_BUFFER), type(type_p) { + auto internal_type = type.InternalType(); + switch (internal_type) { + case PhysicalType::LIST: { + // memory for the list offsets + owned_data = unique_ptr(new data_t[STANDARD_VECTOR_SIZE * GetTypeIdSize(internal_type)]); + // child data of the list + auto &child_type = ListType::GetChildType(type); + child_caches.push_back(make_buffer(child_type)); + auto child_vector = make_unique(child_type, false, false); + auxiliary = make_unique(move(child_vector)); + break; } - if (HAS_FALSE_SEL) { - false_sel->set_index(false_count, result_idx); - false_count += !comparison_result; + case PhysicalType::STRUCT: { + auto &child_types = StructType::GetChildTypes(type); + for (auto &child_type : child_types) { + child_caches.push_back(make_buffer(child_type.second)); + } + auto struct_buffer = make_unique(type); + auxiliary = move(struct_buffer); + break; + } + default: + owned_data = unique_ptr(new data_t[STANDARD_VECTOR_SIZE * GetTypeIdSize(internal_type)]); + break; } } - if (HAS_TRUE_SEL) { - return true_count; - } else { - return count - false_count; + + void ResetFromCache(Vector &result, const buffer_ptr &buffer) { + D_ASSERT(type == result.GetType()); + auto internal_type = type.InternalType(); + result.vector_type = VectorType::FLAT_VECTOR; + AssignSharedPointer(result.buffer, buffer); + result.validity.Reset(); + switch (internal_type) { + case PhysicalType::LIST: { + result.data = owned_data.get(); + // reinitialize the VectorListBuffer + AssignSharedPointer(result.auxiliary, auxiliary); + // propagate through child + auto &list_buffer = (VectorListBuffer &)*result.auxiliary; + list_buffer.capacity = STANDARD_VECTOR_SIZE; + list_buffer.size = 0; + + auto &list_child = list_buffer.GetChild(); + auto &child_cache = (VectorCacheBuffer &)*child_caches[0]; + child_cache.ResetFromCache(list_child, child_caches[0]); + break; + } + case PhysicalType::STRUCT: { + // struct does not have data + result.data = nullptr; + // reinitialize the VectorStructBuffer + AssignSharedPointer(result.auxiliary, auxiliary); + // propagate through children + auto &children = ((VectorStructBuffer &)*result.auxiliary).GetChildren(); + for (idx_t i = 0; i < children.size(); i++) { + auto &child_cache = (VectorCacheBuffer &)*child_caches[i]; + child_cache.ResetFromCache(*children[i], child_caches[i]); + } + break; + } + default: + // regular type: no aux data and reset data to cached data + result.data = owned_data.get(); + result.auxiliary.reset(); + break; + } } -} -template -static inline idx_t DistinctSelectFlatLoopSelSwitch(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, - const SelectionVector *sel, idx_t count, ValidityMask &lmask, - ValidityMask &rmask, SelectionVector *true_sel, - SelectionVector *false_sel) { - if (true_sel && false_sel) { - return DistinctSelectFlatLoop( - ldata, rdata, sel, count, lmask, rmask, true_sel, false_sel); - } else if (true_sel) { - return DistinctSelectFlatLoop( - ldata, rdata, sel, count, lmask, rmask, true_sel, false_sel); - } else { - D_ASSERT(false_sel); - return DistinctSelectFlatLoop( - ldata, rdata, sel, count, lmask, rmask, true_sel, false_sel); + const LogicalType &GetType() { + return type; } + +private: + //! The type of the vector cache + LogicalType type; + //! Owned data + unique_ptr owned_data; + //! Child caches (if any). Used for nested types. + vector> child_caches; + //! Aux data for the vector (if any) + buffer_ptr auxiliary; +}; + +VectorCache::VectorCache(const LogicalType &type_p) { + buffer = make_unique(type_p); } -template -static inline idx_t DistinctSelectFlatLoopSwitch(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, - const SelectionVector *sel, idx_t count, ValidityMask &lmask, - ValidityMask &rmask, SelectionVector *true_sel, - SelectionVector *false_sel) { - return DistinctSelectFlatLoopSelSwitch( - ldata, rdata, sel, count, lmask, rmask, true_sel, false_sel); +void VectorCache::ResetFromCache(Vector &result) const { + D_ASSERT(buffer); + auto &vcache = (VectorCacheBuffer &)*buffer; + vcache.ResetFromCache(result, buffer); } -template -static idx_t DistinctSelectFlat(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - auto ldata = FlatVector::GetData(left); - auto rdata = FlatVector::GetData(right); - if (LEFT_CONSTANT) { - ValidityMask validity; - if (ConstantVector::IsNull(left)) { - validity.SetAllInvalid(count); - } else { - validity.SetAllValid(count); + +const LogicalType &VectorCache::GetType() const { + auto &vcache = (VectorCacheBuffer &)*buffer; + return vcache.GetType(); +} + +} // namespace duckdb + + +namespace duckdb { + +const SelectionVector ConstantVector::ZERO_SELECTION_VECTOR = SelectionVector((sel_t *)ConstantVector::ZERO_VECTOR); +const SelectionVector FlatVector::INCREMENTAL_SELECTION_VECTOR; +const sel_t ConstantVector::ZERO_VECTOR[STANDARD_VECTOR_SIZE] = {0}; + +#if STANDARD_VECTOR_SIZE == 2 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1}; +#elif STANDARD_VECTOR_SIZE == 4 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1, 2, 3}; +#elif STANDARD_VECTOR_SIZE == 8 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1, 2, 3, 4, 5, 6, 7}; +#elif STANDARD_VECTOR_SIZE == 16 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; +#elif STANDARD_VECTOR_SIZE == 32 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; +#elif STANDARD_VECTOR_SIZE == 64 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; +#elif STANDARD_VECTOR_SIZE == 128 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127}; +#elif STANDARD_VECTOR_SIZE == 256 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, + 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}; +#elif STANDARD_VECTOR_SIZE == 512 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, + 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, + 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, + 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, + 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, + 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, + 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, + 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, + 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, + 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, + 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, + 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, + 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, + 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, + 506, 507, 508, 509, 510, 511}; +#elif STANDARD_VECTOR_SIZE == 1024 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, + 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, + 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, + 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, + 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, + 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, + 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, + 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, + 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, + 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, + 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, + 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, + 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, + 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, + 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, + 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, + 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, + 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, + 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, + 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, + 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, + 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, + 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, + 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, + 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, + 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, + 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, + 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, + 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, + 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, + 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, + 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, + 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, + 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, + 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, + 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, + 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, + 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, + 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, + 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, + 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, + 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, + 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023}; +#elif STANDARD_VECTOR_SIZE == 2048 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, + 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, + 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, + 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, + 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, + 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, + 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, + 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, + 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, + 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, + 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, + 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, + 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, + 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, + 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, + 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, + 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, + 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, + 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, + 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, + 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, + 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, + 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, + 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, + 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, + 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, + 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, + 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, + 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, + 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, + 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, + 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, + 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, + 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, + 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, + 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, + 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, + 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, + 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, + 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, + 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, + 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, + 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, + 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, + 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, + 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, + 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, + 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, + 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, + 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, + 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, + 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, + 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, + 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, + 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, + 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, + 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, + 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, + 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, + 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, + 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, + 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, + 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, + 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, + 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, + 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, + 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, + 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, + 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519, + 1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535, 1536, 1537, 1538, + 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, + 1558, 1559, 1560, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 1568, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, + 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, + 1596, 1597, 1598, 1599, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, + 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, + 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, + 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, + 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, + 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, + 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, + 1729, 1730, 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, + 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, + 1767, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, + 1786, 1787, 1788, 1789, 1790, 1791, 1792, 1793, 1794, 1795, 1796, 1797, 1798, 1799, 1800, 1801, 1802, 1803, 1804, + 1805, 1806, 1807, 1808, 1809, 1810, 1811, 1812, 1813, 1814, 1815, 1816, 1817, 1818, 1819, 1820, 1821, 1822, 1823, + 1824, 1825, 1826, 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1834, 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, + 1843, 1844, 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, 1859, 1860, 1861, + 1862, 1863, 1864, 1865, 1866, 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1880, + 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, + 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, + 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, + 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, + 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, + 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, + 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, + 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047}; +#elif STANDARD_VECTOR_SIZE == 4096 +const sel_t FlatVector::INCREMENTAL_VECTOR[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, + 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, + 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, + 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, + 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, + 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, + 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, + 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, + 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, + 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, + 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, + 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, + 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, + 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, + 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, + 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, + 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, + 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, + 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, + 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, + 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, + 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, + 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, + 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, + 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, + 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, + 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, + 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, + 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, + 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, + 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, + 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, + 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, + 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, + 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, + 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, + 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, + 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, + 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, + 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, + 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, + 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, + 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, + 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, + 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, + 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, + 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, + 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, + 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, + 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, + 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, + 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, + 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, + 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, + 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, + 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, + 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, + 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, + 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, + 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, + 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, + 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, + 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, + 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, + 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, + 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, + 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, + 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, + 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519, + 1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535, 1536, 1537, 1538, + 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, + 1558, 1559, 1560, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 1568, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, + 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, + 1596, 1597, 1598, 1599, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, + 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, + 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, + 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, + 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, + 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, + 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, + 1729, 1730, 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, + 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, + 1767, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, + 1786, 1787, 1788, 1789, 1790, 1791, 1792, 1793, 1794, 1795, 1796, 1797, 1798, 1799, 1800, 1801, 1802, 1803, 1804, + 1805, 1806, 1807, 1808, 1809, 1810, 1811, 1812, 1813, 1814, 1815, 1816, 1817, 1818, 1819, 1820, 1821, 1822, 1823, + 1824, 1825, 1826, 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1834, 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, + 1843, 1844, 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, 1859, 1860, 1861, + 1862, 1863, 1864, 1865, 1866, 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1880, + 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, + 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, + 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, + 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, + 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, + 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, + 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, + 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047, 2048, 2049, 2050, 2051, + 2052, 2053, 2054, 2055, 2056, 2057, 2058, 2059, 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, 2068, 2069, 2070, + 2071, 2072, 2073, 2074, 2075, 2076, 2077, 2078, 2079, 2080, 2081, 2082, 2083, 2084, 2085, 2086, 2087, 2088, 2089, + 2090, 2091, 2092, 2093, 2094, 2095, 2096, 2097, 2098, 2099, 2100, 2101, 2102, 2103, 2104, 2105, 2106, 2107, 2108, + 2109, 2110, 2111, 2112, 2113, 2114, 2115, 2116, 2117, 2118, 2119, 2120, 2121, 2122, 2123, 2124, 2125, 2126, 2127, + 2128, 2129, 2130, 2131, 2132, 2133, 2134, 2135, 2136, 2137, 2138, 2139, 2140, 2141, 2142, 2143, 2144, 2145, 2146, + 2147, 2148, 2149, 2150, 2151, 2152, 2153, 2154, 2155, 2156, 2157, 2158, 2159, 2160, 2161, 2162, 2163, 2164, 2165, + 2166, 2167, 2168, 2169, 2170, 2171, 2172, 2173, 2174, 2175, 2176, 2177, 2178, 2179, 2180, 2181, 2182, 2183, 2184, + 2185, 2186, 2187, 2188, 2189, 2190, 2191, 2192, 2193, 2194, 2195, 2196, 2197, 2198, 2199, 2200, 2201, 2202, 2203, + 2204, 2205, 2206, 2207, 2208, 2209, 2210, 2211, 2212, 2213, 2214, 2215, 2216, 2217, 2218, 2219, 2220, 2221, 2222, + 2223, 2224, 2225, 2226, 2227, 2228, 2229, 2230, 2231, 2232, 2233, 2234, 2235, 2236, 2237, 2238, 2239, 2240, 2241, + 2242, 2243, 2244, 2245, 2246, 2247, 2248, 2249, 2250, 2251, 2252, 2253, 2254, 2255, 2256, 2257, 2258, 2259, 2260, + 2261, 2262, 2263, 2264, 2265, 2266, 2267, 2268, 2269, 2270, 2271, 2272, 2273, 2274, 2275, 2276, 2277, 2278, 2279, + 2280, 2281, 2282, 2283, 2284, 2285, 2286, 2287, 2288, 2289, 2290, 2291, 2292, 2293, 2294, 2295, 2296, 2297, 2298, + 2299, 2300, 2301, 2302, 2303, 2304, 2305, 2306, 2307, 2308, 2309, 2310, 2311, 2312, 2313, 2314, 2315, 2316, 2317, + 2318, 2319, 2320, 2321, 2322, 2323, 2324, 2325, 2326, 2327, 2328, 2329, 2330, 2331, 2332, 2333, 2334, 2335, 2336, + 2337, 2338, 2339, 2340, 2341, 2342, 2343, 2344, 2345, 2346, 2347, 2348, 2349, 2350, 2351, 2352, 2353, 2354, 2355, + 2356, 2357, 2358, 2359, 2360, 2361, 2362, 2363, 2364, 2365, 2366, 2367, 2368, 2369, 2370, 2371, 2372, 2373, 2374, + 2375, 2376, 2377, 2378, 2379, 2380, 2381, 2382, 2383, 2384, 2385, 2386, 2387, 2388, 2389, 2390, 2391, 2392, 2393, + 2394, 2395, 2396, 2397, 2398, 2399, 2400, 2401, 2402, 2403, 2404, 2405, 2406, 2407, 2408, 2409, 2410, 2411, 2412, + 2413, 2414, 2415, 2416, 2417, 2418, 2419, 2420, 2421, 2422, 2423, 2424, 2425, 2426, 2427, 2428, 2429, 2430, 2431, + 2432, 2433, 2434, 2435, 2436, 2437, 2438, 2439, 2440, 2441, 2442, 2443, 2444, 2445, 2446, 2447, 2448, 2449, 2450, + 2451, 2452, 2453, 2454, 2455, 2456, 2457, 2458, 2459, 2460, 2461, 2462, 2463, 2464, 2465, 2466, 2467, 2468, 2469, + 2470, 2471, 2472, 2473, 2474, 2475, 2476, 2477, 2478, 2479, 2480, 2481, 2482, 2483, 2484, 2485, 2486, 2487, 2488, + 2489, 2490, 2491, 2492, 2493, 2494, 2495, 2496, 2497, 2498, 2499, 2500, 2501, 2502, 2503, 2504, 2505, 2506, 2507, + 2508, 2509, 2510, 2511, 2512, 2513, 2514, 2515, 2516, 2517, 2518, 2519, 2520, 2521, 2522, 2523, 2524, 2525, 2526, + 2527, 2528, 2529, 2530, 2531, 2532, 2533, 2534, 2535, 2536, 2537, 2538, 2539, 2540, 2541, 2542, 2543, 2544, 2545, + 2546, 2547, 2548, 2549, 2550, 2551, 2552, 2553, 2554, 2555, 2556, 2557, 2558, 2559, 2560, 2561, 2562, 2563, 2564, + 2565, 2566, 2567, 2568, 2569, 2570, 2571, 2572, 2573, 2574, 2575, 2576, 2577, 2578, 2579, 2580, 2581, 2582, 2583, + 2584, 2585, 2586, 2587, 2588, 2589, 2590, 2591, 2592, 2593, 2594, 2595, 2596, 2597, 2598, 2599, 2600, 2601, 2602, + 2603, 2604, 2605, 2606, 2607, 2608, 2609, 2610, 2611, 2612, 2613, 2614, 2615, 2616, 2617, 2618, 2619, 2620, 2621, + 2622, 2623, 2624, 2625, 2626, 2627, 2628, 2629, 2630, 2631, 2632, 2633, 2634, 2635, 2636, 2637, 2638, 2639, 2640, + 2641, 2642, 2643, 2644, 2645, 2646, 2647, 2648, 2649, 2650, 2651, 2652, 2653, 2654, 2655, 2656, 2657, 2658, 2659, + 2660, 2661, 2662, 2663, 2664, 2665, 2666, 2667, 2668, 2669, 2670, 2671, 2672, 2673, 2674, 2675, 2676, 2677, 2678, + 2679, 2680, 2681, 2682, 2683, 2684, 2685, 2686, 2687, 2688, 2689, 2690, 2691, 2692, 2693, 2694, 2695, 2696, 2697, + 2698, 2699, 2700, 2701, 2702, 2703, 2704, 2705, 2706, 2707, 2708, 2709, 2710, 2711, 2712, 2713, 2714, 2715, 2716, + 2717, 2718, 2719, 2720, 2721, 2722, 2723, 2724, 2725, 2726, 2727, 2728, 2729, 2730, 2731, 2732, 2733, 2734, 2735, + 2736, 2737, 2738, 2739, 2740, 2741, 2742, 2743, 2744, 2745, 2746, 2747, 2748, 2749, 2750, 2751, 2752, 2753, 2754, + 2755, 2756, 2757, 2758, 2759, 2760, 2761, 2762, 2763, 2764, 2765, 2766, 2767, 2768, 2769, 2770, 2771, 2772, 2773, + 2774, 2775, 2776, 2777, 2778, 2779, 2780, 2781, 2782, 2783, 2784, 2785, 2786, 2787, 2788, 2789, 2790, 2791, 2792, + 2793, 2794, 2795, 2796, 2797, 2798, 2799, 2800, 2801, 2802, 2803, 2804, 2805, 2806, 2807, 2808, 2809, 2810, 2811, + 2812, 2813, 2814, 2815, 2816, 2817, 2818, 2819, 2820, 2821, 2822, 2823, 2824, 2825, 2826, 2827, 2828, 2829, 2830, + 2831, 2832, 2833, 2834, 2835, 2836, 2837, 2838, 2839, 2840, 2841, 2842, 2843, 2844, 2845, 2846, 2847, 2848, 2849, + 2850, 2851, 2852, 2853, 2854, 2855, 2856, 2857, 2858, 2859, 2860, 2861, 2862, 2863, 2864, 2865, 2866, 2867, 2868, + 2869, 2870, 2871, 2872, 2873, 2874, 2875, 2876, 2877, 2878, 2879, 2880, 2881, 2882, 2883, 2884, 2885, 2886, 2887, + 2888, 2889, 2890, 2891, 2892, 2893, 2894, 2895, 2896, 2897, 2898, 2899, 2900, 2901, 2902, 2903, 2904, 2905, 2906, + 2907, 2908, 2909, 2910, 2911, 2912, 2913, 2914, 2915, 2916, 2917, 2918, 2919, 2920, 2921, 2922, 2923, 2924, 2925, + 2926, 2927, 2928, 2929, 2930, 2931, 2932, 2933, 2934, 2935, 2936, 2937, 2938, 2939, 2940, 2941, 2942, 2943, 2944, + 2945, 2946, 2947, 2948, 2949, 2950, 2951, 2952, 2953, 2954, 2955, 2956, 2957, 2958, 2959, 2960, 2961, 2962, 2963, + 2964, 2965, 2966, 2967, 2968, 2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2978, 2979, 2980, 2981, 2982, + 2983, 2984, 2985, 2986, 2987, 2988, 2989, 2990, 2991, 2992, 2993, 2994, 2995, 2996, 2997, 2998, 2999, 3000, 3001, + 3002, 3003, 3004, 3005, 3006, 3007, 3008, 3009, 3010, 3011, 3012, 3013, 3014, 3015, 3016, 3017, 3018, 3019, 3020, + 3021, 3022, 3023, 3024, 3025, 3026, 3027, 3028, 3029, 3030, 3031, 3032, 3033, 3034, 3035, 3036, 3037, 3038, 3039, + 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 3052, 3053, 3054, 3055, 3056, 3057, 3058, + 3059, 3060, 3061, 3062, 3063, 3064, 3065, 3066, 3067, 3068, 3069, 3070, 3071, 3072, 3073, 3074, 3075, 3076, 3077, + 3078, 3079, 3080, 3081, 3082, 3083, 3084, 3085, 3086, 3087, 3088, 3089, 3090, 3091, 3092, 3093, 3094, 3095, 3096, + 3097, 3098, 3099, 3100, 3101, 3102, 3103, 3104, 3105, 3106, 3107, 3108, 3109, 3110, 3111, 3112, 3113, 3114, 3115, + 3116, 3117, 3118, 3119, 3120, 3121, 3122, 3123, 3124, 3125, 3126, 3127, 3128, 3129, 3130, 3131, 3132, 3133, 3134, + 3135, 3136, 3137, 3138, 3139, 3140, 3141, 3142, 3143, 3144, 3145, 3146, 3147, 3148, 3149, 3150, 3151, 3152, 3153, + 3154, 3155, 3156, 3157, 3158, 3159, 3160, 3161, 3162, 3163, 3164, 3165, 3166, 3167, 3168, 3169, 3170, 3171, 3172, + 3173, 3174, 3175, 3176, 3177, 3178, 3179, 3180, 3181, 3182, 3183, 3184, 3185, 3186, 3187, 3188, 3189, 3190, 3191, + 3192, 3193, 3194, 3195, 3196, 3197, 3198, 3199, 3200, 3201, 3202, 3203, 3204, 3205, 3206, 3207, 3208, 3209, 3210, + 3211, 3212, 3213, 3214, 3215, 3216, 3217, 3218, 3219, 3220, 3221, 3222, 3223, 3224, 3225, 3226, 3227, 3228, 3229, + 3230, 3231, 3232, 3233, 3234, 3235, 3236, 3237, 3238, 3239, 3240, 3241, 3242, 3243, 3244, 3245, 3246, 3247, 3248, + 3249, 3250, 3251, 3252, 3253, 3254, 3255, 3256, 3257, 3258, 3259, 3260, 3261, 3262, 3263, 3264, 3265, 3266, 3267, + 3268, 3269, 3270, 3271, 3272, 3273, 3274, 3275, 3276, 3277, 3278, 3279, 3280, 3281, 3282, 3283, 3284, 3285, 3286, + 3287, 3288, 3289, 3290, 3291, 3292, 3293, 3294, 3295, 3296, 3297, 3298, 3299, 3300, 3301, 3302, 3303, 3304, 3305, + 3306, 3307, 3308, 3309, 3310, 3311, 3312, 3313, 3314, 3315, 3316, 3317, 3318, 3319, 3320, 3321, 3322, 3323, 3324, + 3325, 3326, 3327, 3328, 3329, 3330, 3331, 3332, 3333, 3334, 3335, 3336, 3337, 3338, 3339, 3340, 3341, 3342, 3343, + 3344, 3345, 3346, 3347, 3348, 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356, 3357, 3358, 3359, 3360, 3361, 3362, + 3363, 3364, 3365, 3366, 3367, 3368, 3369, 3370, 3371, 3372, 3373, 3374, 3375, 3376, 3377, 3378, 3379, 3380, 3381, + 3382, 3383, 3384, 3385, 3386, 3387, 3388, 3389, 3390, 3391, 3392, 3393, 3394, 3395, 3396, 3397, 3398, 3399, 3400, + 3401, 3402, 3403, 3404, 3405, 3406, 3407, 3408, 3409, 3410, 3411, 3412, 3413, 3414, 3415, 3416, 3417, 3418, 3419, + 3420, 3421, 3422, 3423, 3424, 3425, 3426, 3427, 3428, 3429, 3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438, + 3439, 3440, 3441, 3442, 3443, 3444, 3445, 3446, 3447, 3448, 3449, 3450, 3451, 3452, 3453, 3454, 3455, 3456, 3457, + 3458, 3459, 3460, 3461, 3462, 3463, 3464, 3465, 3466, 3467, 3468, 3469, 3470, 3471, 3472, 3473, 3474, 3475, 3476, + 3477, 3478, 3479, 3480, 3481, 3482, 3483, 3484, 3485, 3486, 3487, 3488, 3489, 3490, 3491, 3492, 3493, 3494, 3495, + 3496, 3497, 3498, 3499, 3500, 3501, 3502, 3503, 3504, 3505, 3506, 3507, 3508, 3509, 3510, 3511, 3512, 3513, 3514, + 3515, 3516, 3517, 3518, 3519, 3520, 3521, 3522, 3523, 3524, 3525, 3526, 3527, 3528, 3529, 3530, 3531, 3532, 3533, + 3534, 3535, 3536, 3537, 3538, 3539, 3540, 3541, 3542, 3543, 3544, 3545, 3546, 3547, 3548, 3549, 3550, 3551, 3552, + 3553, 3554, 3555, 3556, 3557, 3558, 3559, 3560, 3561, 3562, 3563, 3564, 3565, 3566, 3567, 3568, 3569, 3570, 3571, + 3572, 3573, 3574, 3575, 3576, 3577, 3578, 3579, 3580, 3581, 3582, 3583, 3584, 3585, 3586, 3587, 3588, 3589, 3590, + 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605, 3606, 3607, 3608, 3609, + 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 3624, 3625, 3626, 3627, 3628, + 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, 3641, 3642, 3643, 3644, 3645, 3646, 3647, + 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, 3660, 3661, 3662, 3663, 3664, 3665, 3666, + 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, 3676, 3677, 3678, 3679, 3680, 3681, 3682, 3683, 3684, 3685, + 3686, 3687, 3688, 3689, 3690, 3691, 3692, 3693, 3694, 3695, 3696, 3697, 3698, 3699, 3700, 3701, 3702, 3703, 3704, + 3705, 3706, 3707, 3708, 3709, 3710, 3711, 3712, 3713, 3714, 3715, 3716, 3717, 3718, 3719, 3720, 3721, 3722, 3723, + 3724, 3725, 3726, 3727, 3728, 3729, 3730, 3731, 3732, 3733, 3734, 3735, 3736, 3737, 3738, 3739, 3740, 3741, 3742, + 3743, 3744, 3745, 3746, 3747, 3748, 3749, 3750, 3751, 3752, 3753, 3754, 3755, 3756, 3757, 3758, 3759, 3760, 3761, + 3762, 3763, 3764, 3765, 3766, 3767, 3768, 3769, 3770, 3771, 3772, 3773, 3774, 3775, 3776, 3777, 3778, 3779, 3780, + 3781, 3782, 3783, 3784, 3785, 3786, 3787, 3788, 3789, 3790, 3791, 3792, 3793, 3794, 3795, 3796, 3797, 3798, 3799, + 3800, 3801, 3802, 3803, 3804, 3805, 3806, 3807, 3808, 3809, 3810, 3811, 3812, 3813, 3814, 3815, 3816, 3817, 3818, + 3819, 3820, 3821, 3822, 3823, 3824, 3825, 3826, 3827, 3828, 3829, 3830, 3831, 3832, 3833, 3834, 3835, 3836, 3837, + 3838, 3839, 3840, 3841, 3842, 3843, 3844, 3845, 3846, 3847, 3848, 3849, 3850, 3851, 3852, 3853, 3854, 3855, 3856, + 3857, 3858, 3859, 3860, 3861, 3862, 3863, 3864, 3865, 3866, 3867, 3868, 3869, 3870, 3871, 3872, 3873, 3874, 3875, + 3876, 3877, 3878, 3879, 3880, 3881, 3882, 3883, 3884, 3885, 3886, 3887, 3888, 3889, 3890, 3891, 3892, 3893, 3894, + 3895, 3896, 3897, 3898, 3899, 3900, 3901, 3902, 3903, 3904, 3905, 3906, 3907, 3908, 3909, 3910, 3911, 3912, 3913, + 3914, 3915, 3916, 3917, 3918, 3919, 3920, 3921, 3922, 3923, 3924, 3925, 3926, 3927, 3928, 3929, 3930, 3931, 3932, + 3933, 3934, 3935, 3936, 3937, 3938, 3939, 3940, 3941, 3942, 3943, 3944, 3945, 3946, 3947, 3948, 3949, 3950, 3951, + 3952, 3953, 3954, 3955, 3956, 3957, 3958, 3959, 3960, 3961, 3962, 3963, 3964, 3965, 3966, 3967, 3968, 3969, 3970, + 3971, 3972, 3973, 3974, 3975, 3976, 3977, 3978, 3979, 3980, 3981, 3982, 3983, 3984, 3985, 3986, 3987, 3988, 3989, + 3990, 3991, 3992, 3993, 3994, 3995, 3996, 3997, 3998, 3999, 4000, 4001, 4002, 4003, 4004, 4005, 4006, 4007, 4008, + 4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, 4026, 4027, + 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036, 4037, 4038, 4039, 4040, 4041, 4042, 4043, 4044, 4045, 4046, + 4047, 4048, 4049, 4050, 4051, 4052, 4053, 4054, 4055, 4056, 4057, 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, + 4066, 4067, 4068, 4069, 4070, 4071, 4072, 4073, 4074, 4075, 4076, 4077, 4078, 4079, 4080, 4081, 4082, 4083, 4084, + 4085, 4086, 4087, 4088, 4089, 4090, 4091, 4092, 4093, 4094, 4095}; +#else +#error Unsupported VECTOR_SIZE! +#endif + +} // namespace duckdb + + + + + + + + + +#include + +namespace duckdb { + +LogicalType::LogicalType() : LogicalType(LogicalTypeId::INVALID) { +} + +LogicalType::LogicalType(LogicalTypeId id) : id_(id) { + physical_type_ = GetInternalType(); +} +LogicalType::LogicalType(LogicalTypeId id, shared_ptr type_info_p) + : id_(id), type_info_(move(type_info_p)) { + physical_type_ = GetInternalType(); +} + +hash_t LogicalType::Hash() const { + return duckdb::Hash((uint8_t)id_); +} + +PhysicalType LogicalType::GetInternalType() { + switch (id_) { + case LogicalTypeId::BOOLEAN: + return PhysicalType::BOOL; + case LogicalTypeId::TINYINT: + return PhysicalType::INT8; + case LogicalTypeId::UTINYINT: + return PhysicalType::UINT8; + case LogicalTypeId::SMALLINT: + return PhysicalType::INT16; + case LogicalTypeId::USMALLINT: + return PhysicalType::UINT16; + case LogicalTypeId::SQLNULL: + case LogicalTypeId::DATE: + case LogicalTypeId::INTEGER: + return PhysicalType::INT32; + case LogicalTypeId::UINTEGER: + return PhysicalType::UINT32; + case LogicalTypeId::BIGINT: + case LogicalTypeId::TIME: + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::TIMESTAMP_MS: + return PhysicalType::INT64; + case LogicalTypeId::UBIGINT: + return PhysicalType::UINT64; + case LogicalTypeId::HUGEINT: + return PhysicalType::INT128; + case LogicalTypeId::FLOAT: + return PhysicalType::FLOAT; + case LogicalTypeId::DOUBLE: + return PhysicalType::DOUBLE; + case LogicalTypeId::DECIMAL: { + if (!type_info_) { + return PhysicalType::INVALID; } - return DistinctSelectFlatLoopSwitch( - ldata, rdata, sel, count, validity, FlatVector::Validity(right), true_sel, false_sel); - } else if (RIGHT_CONSTANT) { - ValidityMask validity; - if (ConstantVector::IsNull(right)) { - validity.SetAllInvalid(count); + auto width = DecimalType::GetWidth(*this); + if (width <= Decimal::MAX_WIDTH_INT16) { + return PhysicalType::INT16; + } else if (width <= Decimal::MAX_WIDTH_INT32) { + return PhysicalType::INT32; + } else if (width <= Decimal::MAX_WIDTH_INT64) { + return PhysicalType::INT64; + } else if (width <= Decimal::MAX_WIDTH_INT128) { + return PhysicalType::INT128; } else { - validity.SetAllValid(count); + throw InternalException("Widths bigger than 38 are not supported"); } - return DistinctSelectFlatLoopSwitch( - ldata, rdata, sel, count, FlatVector::Validity(left), validity, true_sel, false_sel); - } else { - return DistinctSelectFlatLoopSwitch( - ldata, rdata, sel, count, FlatVector::Validity(left), FlatVector::Validity(right), true_sel, false_sel); } -} -template -static idx_t DistinctSelectConstant(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - auto ldata = ConstantVector::GetData(left); - auto rdata = ConstantVector::GetData(right); - - // both sides are constant, return either 0 or the count - // in this case we do not fill in the result selection vector at all - if (!OP::Operation(*ldata, *rdata, ConstantVector::IsNull(left), ConstantVector::IsNull(right))) { - if (false_sel) { - for (idx_t i = 0; i < count; i++) { - false_sel->set_index(i, sel->get_index(i)); - } - } - return 0; - } else { - if (true_sel) { - for (idx_t i = 0; i < count; i++) { - true_sel->set_index(i, sel->get_index(i)); - } + case LogicalTypeId::VARCHAR: + case LogicalTypeId::CHAR: + case LogicalTypeId::BLOB: + return PhysicalType::VARCHAR; + case LogicalTypeId::INTERVAL: + return PhysicalType::INTERVAL; + case LogicalTypeId::MAP: + case LogicalTypeId::STRUCT: + return PhysicalType::STRUCT; + case LogicalTypeId::LIST: + return PhysicalType::LIST; + case LogicalTypeId::HASH: + static_assert(sizeof(hash_t) == sizeof(uint64_t), "Hash must be uint64_t"); + return PhysicalType::UINT64; + case LogicalTypeId::POINTER: + // LCOV_EXCL_START + if (sizeof(uintptr_t) == sizeof(uint32_t)) { + return PhysicalType::UINT32; + } else if (sizeof(uintptr_t) == sizeof(uint64_t)) { + return PhysicalType::UINT64; + } else { + throw InternalException("Unsupported pointer size"); } - return count; + // LCOV_EXCL_STOP + case LogicalTypeId::VALIDITY: + return PhysicalType::BIT; + case LogicalTypeId::TABLE: + case LogicalTypeId::ANY: + case LogicalTypeId::INVALID: + case LogicalTypeId::UNKNOWN: + return PhysicalType::INVALID; + default: + throw InternalException("Invalid LogicalType %s", ToString()); } } -template -static idx_t DistinctSelect(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - if (!sel) { - sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; - } - if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() == VectorType::CONSTANT_VECTOR) { - return DistinctSelectConstant(left, right, sel, count, true_sel, false_sel); - } else if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && - right.GetVectorType() == VectorType::FLAT_VECTOR) { - return DistinctSelectFlat(left, right, sel, count, true_sel, false_sel); - } else if (left.GetVectorType() == VectorType::FLAT_VECTOR && - right.GetVectorType() == VectorType::CONSTANT_VECTOR) { - return DistinctSelectFlat(left, right, sel, count, true_sel, false_sel); - } else if (left.GetVectorType() == VectorType::FLAT_VECTOR && right.GetVectorType() == VectorType::FLAT_VECTOR) { - return DistinctSelectFlat(left, right, sel, count, true_sel, - false_sel); - } else { - return DistinctSelectGeneric(left, right, sel, count, true_sel, false_sel); - } -} -template -static inline void TemplatedDistinctExecute(Vector &left, Vector &right, Vector &result, idx_t count) { - DistinctExecute(left, right, result, count); -} -template -static void ExecuteDistinct(Vector &left, Vector &right, Vector &result, idx_t count) { - D_ASSERT(left.GetType() == right.GetType() && result.GetType() == LogicalType::BOOLEAN); - // the inplace loops take the result as the last parameter - switch (left.GetType().InternalType()) { +const LogicalType LogicalType::INVALID = LogicalType(LogicalTypeId::INVALID); +const LogicalType LogicalType::SQLNULL = LogicalType(LogicalTypeId::SQLNULL); +const LogicalType LogicalType::BOOLEAN = LogicalType(LogicalTypeId::BOOLEAN); +const LogicalType LogicalType::TINYINT = LogicalType(LogicalTypeId::TINYINT); +const LogicalType LogicalType::UTINYINT = LogicalType(LogicalTypeId::UTINYINT); +const LogicalType LogicalType::SMALLINT = LogicalType(LogicalTypeId::SMALLINT); +const LogicalType LogicalType::USMALLINT = LogicalType(LogicalTypeId::USMALLINT); +const LogicalType LogicalType::INTEGER = LogicalType(LogicalTypeId::INTEGER); +const LogicalType LogicalType::UINTEGER = LogicalType(LogicalTypeId::UINTEGER); +const LogicalType LogicalType::BIGINT = LogicalType(LogicalTypeId::BIGINT); +const LogicalType LogicalType::UBIGINT = LogicalType(LogicalTypeId::UBIGINT); +const LogicalType LogicalType::HUGEINT = LogicalType(LogicalTypeId::HUGEINT); +const LogicalType LogicalType::FLOAT = LogicalType(LogicalTypeId::FLOAT); +const LogicalType LogicalType::DOUBLE = LogicalType(LogicalTypeId::DOUBLE); +const LogicalType LogicalType::DATE = LogicalType(LogicalTypeId::DATE); + +const LogicalType LogicalType::TIMESTAMP = LogicalType(LogicalTypeId::TIMESTAMP); +const LogicalType LogicalType::TIMESTAMP_MS = LogicalType(LogicalTypeId::TIMESTAMP_MS); +const LogicalType LogicalType::TIMESTAMP_NS = LogicalType(LogicalTypeId::TIMESTAMP_NS); +const LogicalType LogicalType::TIMESTAMP_S = LogicalType(LogicalTypeId::TIMESTAMP_SEC); + +const LogicalType LogicalType::TIME = LogicalType(LogicalTypeId::TIME); +const LogicalType LogicalType::HASH = LogicalType(LogicalTypeId::HASH); +const LogicalType LogicalType::POINTER = LogicalType(LogicalTypeId::POINTER); + +const LogicalType LogicalType::VARCHAR = LogicalType(LogicalTypeId::VARCHAR); + +const LogicalType LogicalType::BLOB = LogicalType(LogicalTypeId::BLOB); +const LogicalType LogicalType::INTERVAL = LogicalType(LogicalTypeId::INTERVAL); + +// TODO these are incomplete and should maybe not exist as such +const LogicalType LogicalType::TABLE = LogicalType(LogicalTypeId::TABLE); + +const LogicalType LogicalType::ANY = LogicalType(LogicalTypeId::ANY); + +const vector LogicalType::NUMERIC = {LogicalType::TINYINT, LogicalType::SMALLINT, LogicalType::INTEGER, + LogicalType::BIGINT, LogicalType::HUGEINT, LogicalType::FLOAT, + LogicalType::DOUBLE, LogicalTypeId::DECIMAL, LogicalType::UTINYINT, + LogicalType::USMALLINT, LogicalType::UINTEGER, LogicalType::UBIGINT}; + +const vector LogicalType::INTEGRAL = {LogicalType::TINYINT, LogicalType::SMALLINT, LogicalType::INTEGER, + LogicalType::BIGINT, LogicalType::HUGEINT, LogicalType::UTINYINT, + LogicalType::USMALLINT, LogicalType::UINTEGER, LogicalType::UBIGINT}; + +const vector LogicalType::ALL_TYPES = { + LogicalType::BOOLEAN, LogicalType::TINYINT, LogicalType::SMALLINT, LogicalType::INTEGER, + LogicalType::BIGINT, LogicalType::DATE, LogicalType::TIMESTAMP, LogicalType::DOUBLE, + LogicalType::FLOAT, LogicalType::VARCHAR, LogicalType::BLOB, LogicalType::INTERVAL, + LogicalType::HUGEINT, LogicalTypeId::DECIMAL, LogicalType::UTINYINT, LogicalType::USMALLINT, + LogicalType::UINTEGER, LogicalType::UBIGINT, LogicalType::TIME, LogicalTypeId::LIST, + LogicalTypeId::STRUCT, LogicalTypeId::MAP}; + +const LogicalType LOGICAL_ROW_TYPE = LogicalType::BIGINT; +const PhysicalType ROW_TYPE = PhysicalType::INT64; + +// LCOV_EXCL_START +string TypeIdToString(PhysicalType type) { + switch (type) { case PhysicalType::BOOL: + return "BOOL"; case PhysicalType::INT8: - TemplatedDistinctExecute(left, right, result, count); - break; + return "INT8"; case PhysicalType::INT16: - TemplatedDistinctExecute(left, right, result, count); - break; + return "INT16"; case PhysicalType::INT32: - TemplatedDistinctExecute(left, right, result, count); - break; + return "INT32"; case PhysicalType::INT64: - TemplatedDistinctExecute(left, right, result, count); - break; + return "INT64"; case PhysicalType::UINT8: - TemplatedDistinctExecute(left, right, result, count); - break; + return "UINT8"; case PhysicalType::UINT16: - TemplatedDistinctExecute(left, right, result, count); - break; + return "UINT16"; case PhysicalType::UINT32: - TemplatedDistinctExecute(left, right, result, count); - break; + return "UINT32"; case PhysicalType::UINT64: - TemplatedDistinctExecute(left, right, result, count); - break; + return "UINT64"; case PhysicalType::INT128: - TemplatedDistinctExecute(left, right, result, count); - break; - case PhysicalType::POINTER: - TemplatedDistinctExecute(left, right, result, count); - break; + return "INT128"; case PhysicalType::FLOAT: - TemplatedDistinctExecute(left, right, result, count); - break; + return "FLOAT"; case PhysicalType::DOUBLE: - TemplatedDistinctExecute(left, right, result, count); - break; - case PhysicalType::INTERVAL: - TemplatedDistinctExecute(left, right, result, count); - break; + return "DOUBLE"; case PhysicalType::VARCHAR: - TemplatedDistinctExecute(left, right, result, count); - break; - default: - throw InvalidTypeException(left.GetType(), "Invalid type for distinct comparison"); + return "VARCHAR"; + case PhysicalType::INTERVAL: + return "INTERVAL"; + case PhysicalType::STRUCT: + return "STRUCT"; + case PhysicalType::LIST: + return "LIST"; + case PhysicalType::INVALID: + return "INVALID"; + case PhysicalType::BIT: + return "BIT"; + case PhysicalType::NA: + return "NA"; + case PhysicalType::HALF_FLOAT: + return "HALF_FLOAT"; + case PhysicalType::STRING: + return "ARROW_STRING"; + case PhysicalType::BINARY: + return "BINARY"; + case PhysicalType::FIXED_SIZE_BINARY: + return "FIXED_SIZE_BINARY"; + case PhysicalType::DATE32: + return "DATE32"; + case PhysicalType::DATE64: + return "DATE64"; + case PhysicalType::TIMESTAMP: + return "TIMESTAMP"; + case PhysicalType::TIME32: + return "TIME32"; + case PhysicalType::TIME64: + return "TIME64"; + case PhysicalType::UNION: + return "UNION"; + case PhysicalType::DICTIONARY: + return "DICTIONARY"; + case PhysicalType::MAP: + return "MAP"; + case PhysicalType::EXTENSION: + return "EXTENSION"; + case PhysicalType::FIXED_SIZE_LIST: + return "FIXED_SIZE_LIST"; + case PhysicalType::DURATION: + return "DURATION"; + case PhysicalType::LARGE_STRING: + return "LARGE_STRING"; + case PhysicalType::LARGE_BINARY: + return "LARGE_BINARY"; + case PhysicalType::LARGE_LIST: + return "LARGE_LIST"; } + return "INVALID"; } +// LCOV_EXCL_STOP -template -static idx_t TemplatedDistinctSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - // the inplace loops take the result as the last parameter - switch (left.GetType().InternalType()) { +idx_t GetTypeIdSize(PhysicalType type) { + switch (type) { + case PhysicalType::BIT: case PhysicalType::BOOL: + return sizeof(bool); case PhysicalType::INT8: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(int8_t); case PhysicalType::INT16: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(int16_t); case PhysicalType::INT32: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(int32_t); case PhysicalType::INT64: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(int64_t); case PhysicalType::UINT8: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(uint8_t); case PhysicalType::UINT16: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(uint16_t); case PhysicalType::UINT32: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(uint32_t); case PhysicalType::UINT64: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(uint64_t); case PhysicalType::INT128: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); - case PhysicalType::POINTER: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(hugeint_t); case PhysicalType::FLOAT: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(float); case PhysicalType::DOUBLE: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); - case PhysicalType::INTERVAL: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(double); case PhysicalType::VARCHAR: - return DistinctSelect(left, right, sel, count, true_sel, false_sel); + return sizeof(string_t); + case PhysicalType::INTERVAL: + return sizeof(interval_t); + case PhysicalType::STRUCT: + return 0; // no own payload + case PhysicalType::LIST: + return sizeof(list_entry_t); // offset + len default: - throw InvalidTypeException(left.GetType(), "Invalid type for comparison"); + throw InternalException("Invalid PhysicalType for GetTypeIdSize"); } } -void VectorOperations::DistinctFrom(Vector &left, Vector &right, Vector &result, idx_t count) { - ExecuteDistinct(left, right, result, count); +bool TypeIsConstantSize(PhysicalType type) { + return (type >= PhysicalType::BOOL && type <= PhysicalType::DOUBLE) || + (type >= PhysicalType::FIXED_SIZE_BINARY && type <= PhysicalType::INTERVAL) || + type == PhysicalType::INTERVAL || type == PhysicalType::INT128; } - -void VectorOperations::NotDistinctFrom(Vector &left, Vector &right, Vector &result, idx_t count) { - ExecuteDistinct(left, right, result, count); +bool TypeIsIntegral(PhysicalType type) { + return (type >= PhysicalType::UINT8 && type <= PhysicalType::INT64) || type == PhysicalType::INT128; } - -// result = A != B with nulls being equal -idx_t VectorOperations::SelectDistinctFrom(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - return TemplatedDistinctSelectOperation(left, right, sel, count, true_sel, false_sel); +bool TypeIsNumeric(PhysicalType type) { + return (type >= PhysicalType::UINT8 && type <= PhysicalType::DOUBLE) || type == PhysicalType::INT128; } -// result = A == B with nulls being equal -idx_t VectorOperations::SelectNotDistinctFrom(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - return TemplatedDistinctSelectOperation(left, right, sel, count, true_sel, false_sel); +bool TypeIsInteger(PhysicalType type) { + return (type >= PhysicalType::UINT8 && type <= PhysicalType::INT64) || type == PhysicalType::INT128; } -} // namespace duckdb -//===--------------------------------------------------------------------===// -// null_operators.cpp -// Description: This file contains the implementation of the -// IS NULL/NOT IS NULL operators -//===--------------------------------------------------------------------===// - - - - -namespace duckdb { - -template -void IsNullLoop(Vector &input, Vector &result, idx_t count) { - D_ASSERT(result.GetType() == LogicalType::BOOLEAN); - - if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - auto result_data = ConstantVector::GetData(result); - *result_data = INVERSE ? !ConstantVector::IsNull(input) : ConstantVector::IsNull(input); - } else { - VectorData data; - input.Orrify(count, data); - - result.SetVectorType(VectorType::FLAT_VECTOR); - auto result_data = FlatVector::GetData(result); - for (idx_t i = 0; i < count; i++) { - auto idx = data.sel->get_index(i); - result_data[i] = INVERSE ? data.validity.RowIsValid(idx) : !data.validity.RowIsValid(idx); - } +// LCOV_EXCL_START +string LogicalTypeIdToString(LogicalTypeId id) { + switch (id) { + case LogicalTypeId::BOOLEAN: + return "BOOLEAN"; + case LogicalTypeId::TINYINT: + return "TINYINT"; + case LogicalTypeId::SMALLINT: + return "SMALLINT"; + case LogicalTypeId::INTEGER: + return "INTEGER"; + case LogicalTypeId::BIGINT: + return "BIGINT"; + case LogicalTypeId::HUGEINT: + return "HUGEINT"; + case LogicalTypeId::UTINYINT: + return "UTINYINT"; + case LogicalTypeId::USMALLINT: + return "USMALLINT"; + case LogicalTypeId::UINTEGER: + return "UINTEGER"; + case LogicalTypeId::UBIGINT: + return "UBIGINT"; + case LogicalTypeId::DATE: + return "DATE"; + case LogicalTypeId::TIME: + return "TIME"; + case LogicalTypeId::TIMESTAMP: + return "TIMESTAMP"; + case LogicalTypeId::TIMESTAMP_MS: + return "TIMESTAMP (MS)"; + case LogicalTypeId::TIMESTAMP_NS: + return "TIMESTAMP (NS)"; + case LogicalTypeId::TIMESTAMP_SEC: + return "TIMESTAMP (SEC)"; + case LogicalTypeId::FLOAT: + return "FLOAT"; + case LogicalTypeId::DOUBLE: + return "DOUBLE"; + case LogicalTypeId::DECIMAL: + return "DECIMAL"; + case LogicalTypeId::VARCHAR: + return "VARCHAR"; + case LogicalTypeId::BLOB: + return "BLOB"; + case LogicalTypeId::CHAR: + return "CHAR"; + case LogicalTypeId::INTERVAL: + return "INTERVAL"; + case LogicalTypeId::SQLNULL: + return "NULL"; + case LogicalTypeId::ANY: + return "ANY"; + case LogicalTypeId::VALIDITY: + return "VALIDITY"; + case LogicalTypeId::STRUCT: + return "STRUCT"; + case LogicalTypeId::LIST: + return "LIST"; + case LogicalTypeId::MAP: + return "MAP"; + case LogicalTypeId::HASH: + return "HASH"; + case LogicalTypeId::POINTER: + return "POINTER"; + case LogicalTypeId::TABLE: + return "TABLE"; + case LogicalTypeId::INVALID: + return "INVALID"; + case LogicalTypeId::UNKNOWN: + return "UNKNOWN"; } + return "UNDEFINED"; } -void VectorOperations::IsNotNull(Vector &input, Vector &result, idx_t count) { - IsNullLoop(input, result, count); -} - -void VectorOperations::IsNull(Vector &input, Vector &result, idx_t count) { - IsNullLoop(input, result, count); -} - -bool VectorOperations::HasNotNull(Vector &input, idx_t count) { - if (count == 0) { - return false; - } - if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { - return !ConstantVector::IsNull(input); - } else { - VectorData data; - input.Orrify(count, data); - - if (data.validity.AllValid()) { - return true; +string LogicalType::ToString() const { + switch (id_) { + case LogicalTypeId::STRUCT: { + if (!type_info_) { + return "STRUCT"; } - for (idx_t i = 0; i < count; i++) { - auto idx = data.sel->get_index(i); - if (data.validity.RowIsValid(idx)) { - return true; + auto &child_types = StructType::GetChildTypes(*this); + string ret = "STRUCT<"; + for (size_t i = 0; i < child_types.size(); i++) { + ret += child_types[i].first + ": " + child_types[i].second.ToString(); + if (i < child_types.size() - 1) { + ret += ", "; } } - return false; + ret += ">"; + return ret; } -} - -bool VectorOperations::HasNull(Vector &input, idx_t count) { - if (count == 0) { - return false; + case LogicalTypeId::LIST: { + if (!type_info_) { + return "LIST"; + } + return "LIST<" + ListType::GetChildType(*this).ToString() + ">"; } - if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { - return ConstantVector::IsNull(input); - } else { - VectorData data; - input.Orrify(count, data); - - if (data.validity.AllValid()) { - return false; + case LogicalTypeId::MAP: { + if (!type_info_) { + return "MAP"; } - for (idx_t i = 0; i < count; i++) { - auto idx = data.sel->get_index(i); - if (!data.validity.RowIsValid(idx)) { - return true; - } + auto &child_types = StructType::GetChildTypes(*this); + if (child_types.empty()) { + return "MAP"; } - return false; - } -} - -} // namespace duckdb -//===--------------------------------------------------------------------===// -// numeric_inplace_operators.cpp -// Description: This file contains the implementation of numeric inplace ops -// += *= /= -= %= -//===--------------------------------------------------------------------===// - - - -#include - -namespace duckdb { - -//===--------------------------------------------------------------------===// -// In-Place Addition -//===--------------------------------------------------------------------===// - -void VectorOperations::AddInPlace(Vector &input, int64_t right, idx_t count) { - D_ASSERT(input.GetType().InternalType() == PhysicalType::POINTER); - if (right == 0) { - return; - } - switch (input.GetVectorType()) { - case VectorType::CONSTANT_VECTOR: { - D_ASSERT(!ConstantVector::IsNull(input)); - auto data = ConstantVector::GetData(input); - *data += right; - break; - } - default: { - D_ASSERT(input.GetVectorType() == VectorType::FLAT_VECTOR); - auto data = FlatVector::GetData(input); - for (idx_t i = 0; i < count; i++) { - data[i] += right; + if (child_types.size() != 2) { + throw InternalException("Map needs exactly two child elements"); } - break; - } - } -} - -} // namespace duckdb - - - - - - - -namespace duckdb { - -template -static void VectorStringCast(Vector &source, Vector &result, idx_t count) { - D_ASSERT(result.GetType().InternalType() == PhysicalType::VARCHAR); - UnaryExecutor::Execute(source, result, count, - [&](SRC input) { return OP::template Operation(input, result); }); -} - -static NotImplementedException UnimplementedCast(const LogicalType &source_type, const LogicalType &target_type) { - return NotImplementedException("Unimplemented type for cast (%s -> %s)", source_type.ToString(), - target_type.ToString()); -} - -// NULL cast only works if all values in source are NULL, otherwise an unimplemented cast exception is thrown -static void VectorNullCast(Vector &source, Vector &result, idx_t count) { - if (VectorOperations::HasNotNull(source, count)) { - throw UnimplementedCast(source.GetType(), result.GetType()); - } - if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - ConstantVector::SetNull(result, true); - } else { - result.SetVectorType(VectorType::FLAT_VECTOR); - FlatVector::Validity(result).SetAllInvalid(count); + return "MAP<" + ListType::GetChildType(child_types[0].second).ToString() + ", " + + ListType::GetChildType(child_types[1].second).ToString() + ">"; } -} - -template -static void ToDecimalCast(Vector &source, Vector &result, idx_t count) { - auto &result_type = result.GetType(); - switch (result_type.InternalType()) { - case PhysicalType::INT16: - UnaryExecutor::Execute(source, result, count, [&](T input) { - return CastToDecimal::Operation(input, result_type.width(), result_type.scale()); - }); - break; - case PhysicalType::INT32: - UnaryExecutor::Execute(source, result, count, [&](T input) { - return CastToDecimal::Operation(input, result_type.width(), result_type.scale()); - }); - break; - case PhysicalType::INT64: - UnaryExecutor::Execute(source, result, count, [&](T input) { - return CastToDecimal::Operation(input, result_type.width(), result_type.scale()); - }); - break; - case PhysicalType::INT128: - UnaryExecutor::Execute(source, result, count, [&](T input) { - return CastToDecimal::Operation(input, result_type.width(), result_type.scale()); - }); - break; - default: - throw NotImplementedException("Unimplemented internal type for decimal"); + case LogicalTypeId::DECIMAL: { + if (!type_info_) { + return "DECIMAL"; + } + auto width = DecimalType::GetWidth(*this); + auto scale = DecimalType::GetScale(*this); + if (width == 0) { + return "DECIMAL"; + } + return StringUtil::Format("DECIMAL(%d,%d)", width, scale); } -} - -template -static void FromDecimalCast(Vector &source, Vector &result, idx_t count) { - auto &source_type = source.GetType(); - switch (source_type.InternalType()) { - case PhysicalType::INT16: - UnaryExecutor::Execute(source, result, count, [&](int16_t input) { - return CastFromDecimal::Operation(input, source_type.width(), source_type.scale()); - }); - break; - case PhysicalType::INT32: - UnaryExecutor::Execute(source, result, count, [&](int32_t input) { - return CastFromDecimal::Operation(input, source_type.width(), source_type.scale()); - }); - break; - case PhysicalType::INT64: - UnaryExecutor::Execute(source, result, count, [&](int64_t input) { - return CastFromDecimal::Operation(input, source_type.width(), source_type.scale()); - }); - break; - case PhysicalType::INT128: - UnaryExecutor::Execute(source, result, count, [&](hugeint_t input) { - return CastFromDecimal::Operation(input, source_type.width(), source_type.scale()); - }); - break; default: - throw NotImplementedException("Unimplemented internal type for decimal"); - } -} - -template -void TemplatedDecimalScaleUp(Vector &source, Vector &result, idx_t count) { - D_ASSERT(result.GetType().scale() >= source.GetType().scale()); - idx_t scale_difference = result.GetType().scale() - source.GetType().scale(); - auto multiply_factor = POWERS_DEST::POWERS_OF_TEN[scale_difference]; - idx_t target_width = result.GetType().width() - scale_difference; - if (source.GetType().width() < target_width) { - // type will always fit: no need to check limit - UnaryExecutor::Execute(source, result, count, [&](SOURCE input) { - return Cast::Operation(input) * multiply_factor; - }); - } else { - // type might not fit: check limit - auto limit = POWERS_SOURCE::POWERS_OF_TEN[target_width]; - UnaryExecutor::Execute(source, result, count, [&](SOURCE input) { - if (input >= limit || input <= -limit) { - throw OutOfRangeException("Casting value \"%s\" to type %s failed: value is out of range!", - Decimal::ToString(input, source.GetType().scale()), - result.GetType().ToString()); - } - return Cast::Operation(input) * multiply_factor; - }); - } -} - -template -void TemplatedDecimalScaleDown(Vector &source, Vector &result, idx_t count) { - D_ASSERT(result.GetType().scale() < source.GetType().scale()); - idx_t scale_difference = source.GetType().scale() - result.GetType().scale(); - idx_t target_width = result.GetType().width() + scale_difference; - auto divide_factor = POWERS_SOURCE::POWERS_OF_TEN[scale_difference]; - if (source.GetType().width() < target_width) { - // type will always fit: no need to check limit - UnaryExecutor::Execute( - source, result, count, [&](SOURCE input) { return Cast::Operation(input / divide_factor); }); - } else { - // type might not fit: check limit - auto limit = POWERS_SOURCE::POWERS_OF_TEN[target_width]; - UnaryExecutor::Execute(source, result, count, [&](SOURCE input) { - if (input >= limit || input <= -limit) { - throw OutOfRangeException("Casting value \"%s\" to type %s failed: value is out of range!", - Decimal::ToString(input, source.GetType().scale()), - result.GetType().ToString()); - } - return Cast::Operation(input / divide_factor); - }); + return LogicalTypeIdToString(id_); } } +// LCOV_EXCL_STOP -template -static void DecimalDecimalCastSwitch(Vector &source, Vector &result, idx_t count) { - source.GetType().Verify(); - result.GetType().Verify(); - - // we need to either multiply or divide by the difference in scales - if (result.GetType().scale() >= source.GetType().scale()) { - // multiply - switch (result.GetType().InternalType()) { - case PhysicalType::INT16: - TemplatedDecimalScaleUp(source, result, count); - break; - case PhysicalType::INT32: - TemplatedDecimalScaleUp(source, result, count); - break; - case PhysicalType::INT64: - TemplatedDecimalScaleUp(source, result, count); - break; - case PhysicalType::INT128: - TemplatedDecimalScaleUp(source, result, count); - break; - default: - throw NotImplementedException("Unimplemented internal type for decimal"); - } +LogicalTypeId TransformStringToLogicalType(const string &str) { + auto lower_str = StringUtil::Lower(str); + // Transform column type + if (lower_str == "int" || lower_str == "int4" || lower_str == "signed" || lower_str == "integer" || + lower_str == "integral" || lower_str == "int32") { + return LogicalTypeId::INTEGER; + } else if (lower_str == "varchar" || lower_str == "bpchar" || lower_str == "text" || lower_str == "string" || + lower_str == "char") { + return LogicalTypeId::VARCHAR; + } else if (lower_str == "bytea" || lower_str == "blob" || lower_str == "varbinary" || lower_str == "binary") { + return LogicalTypeId::BLOB; + } else if (lower_str == "int8" || lower_str == "bigint" || lower_str == "int64" || lower_str == "long" || + lower_str == "oid") { + return LogicalTypeId::BIGINT; + } else if (lower_str == "int2" || lower_str == "smallint" || lower_str == "short" || lower_str == "int16") { + return LogicalTypeId::SMALLINT; + } else if (lower_str == "timestamp" || lower_str == "datetime" || lower_str == "timestamp_us") { + return LogicalTypeId::TIMESTAMP; + } else if (lower_str == "timestamp_ms") { + return LogicalTypeId::TIMESTAMP_MS; + } else if (lower_str == "timestamp_ns") { + return LogicalTypeId::TIMESTAMP_NS; + } else if (lower_str == "timestamp_s") { + return LogicalTypeId::TIMESTAMP_SEC; + } else if (lower_str == "bool" || lower_str == "boolean" || lower_str == "logical") { + return LogicalTypeId::BOOLEAN; + } else if (lower_str == "real" || lower_str == "float4" || lower_str == "float") { + return LogicalTypeId::FLOAT; + } else if (lower_str == "decimal" || lower_str == "dec" || lower_str == "numeric") { + return LogicalTypeId::DECIMAL; + } else if (lower_str == "double" || lower_str == "float8" || lower_str == "decimal") { + return LogicalTypeId::DOUBLE; + } else if (lower_str == "tinyint" || lower_str == "int1") { + return LogicalTypeId::TINYINT; + } else if (lower_str == "date") { + return LogicalTypeId::DATE; + } else if (lower_str == "time") { + return LogicalTypeId::TIME; + } else if (lower_str == "interval") { + return LogicalTypeId::INTERVAL; + } else if (lower_str == "hugeint" || lower_str == "int128") { + return LogicalTypeId::HUGEINT; + } else if (lower_str == "struct" || lower_str == "row") { + return LogicalTypeId::STRUCT; + } else if (lower_str == "map") { + return LogicalTypeId::MAP; + } else if (lower_str == "utinyint" || lower_str == "uint8") { + return LogicalTypeId::UTINYINT; + } else if (lower_str == "usmallint" || lower_str == "uint16") { + return LogicalTypeId::USMALLINT; + } else if (lower_str == "uinteger" || lower_str == "uint32") { + return LogicalTypeId::UINTEGER; + } else if (lower_str == "ubigint" || lower_str == "uint64") { + return LogicalTypeId::UBIGINT; } else { - // divide - switch (result.GetType().InternalType()) { - case PhysicalType::INT16: - TemplatedDecimalScaleDown(source, result, count); - break; - case PhysicalType::INT32: - TemplatedDecimalScaleDown(source, result, count); - break; - case PhysicalType::INT64: - TemplatedDecimalScaleDown(source, result, count); - break; - case PhysicalType::INT128: - TemplatedDecimalScaleDown(source, result, count); - break; - default: - throw NotImplementedException("Unimplemented internal type for decimal"); - } + throw NotImplementedException("DataType %s not supported yet...\n", str); } } -static void DecimalCastSwitch(Vector &source, Vector &result, idx_t count) { - // now switch on the result type - switch (result.GetType().id()) { - case LogicalTypeId::BOOLEAN: - FromDecimalCast(source, result, count); - break; +bool LogicalType::IsIntegral() const { + switch (id_) { case LogicalTypeId::TINYINT: - FromDecimalCast(source, result, count); - break; case LogicalTypeId::SMALLINT: - FromDecimalCast(source, result, count); - break; case LogicalTypeId::INTEGER: - FromDecimalCast(source, result, count); - break; case LogicalTypeId::BIGINT: - FromDecimalCast(source, result, count); - break; case LogicalTypeId::UTINYINT: - FromDecimalCast(source, result, count); - break; case LogicalTypeId::USMALLINT: - FromDecimalCast(source, result, count); - break; case LogicalTypeId::UINTEGER: - FromDecimalCast(source, result, count); - break; case LogicalTypeId::UBIGINT: - FromDecimalCast(source, result, count); - break; case LogicalTypeId::HUGEINT: - FromDecimalCast(source, result, count); - break; - case LogicalTypeId::DECIMAL: { - // decimal to decimal cast - // first we need to figure out the source and target internal types - switch (source.GetType().InternalType()) { - case PhysicalType::INT16: - DecimalDecimalCastSwitch(source, result, count); - break; - case PhysicalType::INT32: - DecimalDecimalCastSwitch(source, result, count); - break; - case PhysicalType::INT64: - DecimalDecimalCastSwitch(source, result, count); - break; - case PhysicalType::INT128: - DecimalDecimalCastSwitch(source, result, count); - break; - default: - throw NotImplementedException("Unimplemented internal type for decimal in decimal_decimal cast"); - } - break; - } - case LogicalTypeId::FLOAT: - FromDecimalCast(source, result, count); - break; - case LogicalTypeId::DOUBLE: - FromDecimalCast(source, result, count); - break; - case LogicalTypeId::VARCHAR: { - auto &source_type = source.GetType(); - switch (source_type.InternalType()) { - case PhysicalType::INT16: - UnaryExecutor::Execute(source, result, count, [&](int16_t input) { - return StringCastFromDecimal::Operation(input, source_type.width(), source_type.scale(), - result); - }); - break; - case PhysicalType::INT32: - UnaryExecutor::Execute(source, result, count, [&](int32_t input) { - return StringCastFromDecimal::Operation(input, source_type.width(), source_type.scale(), - result); - }); - break; - case PhysicalType::INT64: - UnaryExecutor::Execute(source, result, count, [&](int64_t input) { - return StringCastFromDecimal::Operation(input, source_type.width(), source_type.scale(), - result); - }); - break; - case PhysicalType::INT128: - UnaryExecutor::Execute(source, result, count, [&](hugeint_t input) { - return StringCastFromDecimal::Operation(input, source_type.width(), source_type.scale(), - result); - }); - break; - default: - throw NotImplementedException("Unimplemented internal decimal type"); - } - break; - } + return true; default: - VectorNullCast(source, result, count); - break; + return false; } } -template -static void NumericCastSwitch(Vector &source, Vector &result, idx_t count) { - // now switch on the result type - switch (result.GetType().id()) { - case LogicalTypeId::BOOLEAN: - UnaryExecutor::Execute(source, result, count); - break; +bool LogicalType::IsNumeric() const { + switch (id_) { case LogicalTypeId::TINYINT: - UnaryExecutor::Execute(source, result, count); - break; case LogicalTypeId::SMALLINT: - UnaryExecutor::Execute(source, result, count); - break; case LogicalTypeId::INTEGER: - UnaryExecutor::Execute(source, result, count); - break; case LogicalTypeId::BIGINT: - UnaryExecutor::Execute(source, result, count); - break; - case LogicalTypeId::UTINYINT: - UnaryExecutor::Execute(source, result, count); - break; - case LogicalTypeId::USMALLINT: - UnaryExecutor::Execute(source, result, count); - break; - case LogicalTypeId::UINTEGER: - UnaryExecutor::Execute(source, result, count); - break; - case LogicalTypeId::UBIGINT: - UnaryExecutor::Execute(source, result, count); - break; case LogicalTypeId::HUGEINT: - UnaryExecutor::Execute(source, result, count); - break; case LogicalTypeId::FLOAT: - UnaryExecutor::Execute(source, result, count); - break; case LogicalTypeId::DOUBLE: - UnaryExecutor::Execute(source, result, count); - break; case LogicalTypeId::DECIMAL: - ToDecimalCast(source, result, count); - break; - case LogicalTypeId::VARCHAR: { - VectorStringCast(source, result, count); - break; - } - case LogicalTypeId::LIST: { - auto list_child = make_unique(); - ListVector::SetEntry(result, move(list_child)); - VectorNullCast(source, result, count); - break; - } + case LogicalTypeId::UTINYINT: + case LogicalTypeId::USMALLINT: + case LogicalTypeId::UINTEGER: + case LogicalTypeId::UBIGINT: + return true; default: - VectorNullCast(source, result, count); - break; + return false; } } -template -static void VectorStringCastNumericSwitch(Vector &source, Vector &result, idx_t count) { - // now switch on the result type - switch (result.GetType().id()) { +bool LogicalType::GetDecimalProperties(uint8_t &width, uint8_t &scale) const { + switch (id_) { + case LogicalTypeId::SQLNULL: + width = 0; + scale = 0; + break; case LogicalTypeId::BOOLEAN: - UnaryExecutor::Execute(source, result, count); + width = 1; + scale = 0; break; case LogicalTypeId::TINYINT: - UnaryExecutor::Execute(source, result, count); + // tinyint: [-127, 127] = DECIMAL(3,0) + width = 3; + scale = 0; break; case LogicalTypeId::SMALLINT: - UnaryExecutor::Execute(source, result, count); + // smallint: [-32767, 32767] = DECIMAL(5,0) + width = 5; + scale = 0; break; case LogicalTypeId::INTEGER: - UnaryExecutor::Execute(source, result, count); + // integer: [-2147483647, 2147483647] = DECIMAL(10,0) + width = 10; + scale = 0; break; case LogicalTypeId::BIGINT: - UnaryExecutor::Execute(source, result, count); + // bigint: [-9223372036854775807, 9223372036854775807] = DECIMAL(19,0) + width = 19; + scale = 0; break; case LogicalTypeId::UTINYINT: - UnaryExecutor::Execute(source, result, count); + // UInt8 — [0 : 255] + width = 3; + scale = 0; break; case LogicalTypeId::USMALLINT: - UnaryExecutor::Execute(source, result, count); + // UInt16 — [0 : 65535] + width = 5; + scale = 0; break; case LogicalTypeId::UINTEGER: - UnaryExecutor::Execute(source, result, count); + // UInt32 — [0 : 4294967295] + width = 10; + scale = 0; break; case LogicalTypeId::UBIGINT: - UnaryExecutor::Execute(source, result, count); + // UInt64 — [0 : 18446744073709551615] + width = 20; + scale = 0; break; case LogicalTypeId::HUGEINT: - UnaryExecutor::Execute(source, result, count); - break; - case LogicalTypeId::FLOAT: - UnaryExecutor::Execute(source, result, count); - break; - case LogicalTypeId::DOUBLE: - UnaryExecutor::Execute(source, result, count); - break; - case LogicalTypeId::INTERVAL: - UnaryExecutor::Execute(source, result, count); + // hugeint: max size decimal (38, 0) + // note that a hugeint is not guaranteed to fit in this + width = 38; + scale = 0; break; case LogicalTypeId::DECIMAL: - ToDecimalCast(source, result, count); + width = DecimalType::GetWidth(*this); + scale = DecimalType::GetScale(*this); break; default: - VectorNullCast(source, result, count); - break; + return false; } + return true; } -static void StringCastSwitch(Vector &source, Vector &result, idx_t count, bool strict = false) { - // now switch on the result type - switch (result.GetType().id()) { - case LogicalTypeId::DATE: - if (strict) { - UnaryExecutor::Execute(source, result, count); - } else { - UnaryExecutor::Execute(source, result, count); - } - break; - case LogicalTypeId::TIME: - if (strict) { - UnaryExecutor::Execute(source, result, count); - } else { - UnaryExecutor::Execute(source, result, count); - } - break; - case LogicalTypeId::TIMESTAMP: - UnaryExecutor::Execute(source, result, count); - break; - case LogicalTypeId::BLOB: - VectorStringCast(source, result, count); - break; - default: - if (strict) { - VectorStringCastNumericSwitch(source, result, count); +LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalType &right) { + if (left.id() < right.id()) { + return right; + } else if (right.id() < left.id()) { + return left; + } else { + if (left.id() == LogicalTypeId::VARCHAR) { + // varchar: use type that has collation (if any) + if (StringType::GetCollation(right).empty()) { + return left; + } else { + return right; + } + } else if (left.id() == LogicalTypeId::DECIMAL) { + // use max width/scale of the two types + auto width = MaxValue(DecimalType::GetWidth(left), DecimalType::GetWidth(right)); + auto scale = MaxValue(DecimalType::GetScale(left), DecimalType::GetScale(right)); + return LogicalType::DECIMAL(width, scale); + } else if (left.id() == LogicalTypeId::LIST) { + // list: perform max recursively on child type + auto new_child = MaxLogicalType(ListType::GetChildType(left), ListType::GetChildType(right)); + return LogicalType::LIST(move(new_child)); + } else if (left.id() == LogicalTypeId::STRUCT) { + // struct: perform recursively + auto &left_child_types = StructType::GetChildTypes(left); + auto &right_child_types = StructType::GetChildTypes(right); + if (left_child_types.size() != right_child_types.size()) { + // child types are not of equal size, we can't cast anyway + // just return the left child + return left; + } + child_list_t child_types; + for (idx_t i = 0; i < left_child_types.size(); i++) { + auto child_type = MaxLogicalType(left_child_types[i].second, right_child_types[i].second); + child_types.push_back(make_pair(left_child_types[i].first, move(child_type))); + } + return LogicalType::STRUCT(move(child_types)); } else { - VectorStringCastNumericSwitch(source, result, count); + // types are equal but no extra specifier: just return the type + return left; } - break; } } -static void DateCastSwitch(Vector &source, Vector &result, idx_t count) { - // now switch on the result type - switch (result.GetType().id()) { - case LogicalTypeId::VARCHAR: - // date to varchar - VectorStringCast(source, result, count); - break; - case LogicalTypeId::TIMESTAMP: - // date to timestamp - UnaryExecutor::Execute(source, result, count); - break; - default: - VectorNullCast(source, result, count); - break; +void LogicalType::Verify() const { +#ifdef DEBUG + if (id_ == LogicalTypeId::DECIMAL) { + D_ASSERT(DecimalType::GetWidth(*this) >= 1 && DecimalType::GetWidth(*this) <= Decimal::MAX_WIDTH_DECIMAL); + D_ASSERT(DecimalType::GetScale(*this) >= 0 && DecimalType::GetScale(*this) <= DecimalType::GetWidth(*this)); } +#endif } -static void TimeCastSwitch(Vector &source, Vector &result, idx_t count) { - // now switch on the result type - switch (result.GetType().id()) { - case LogicalTypeId::VARCHAR: - // time to varchar - VectorStringCast(source, result, count); - break; - default: - VectorNullCast(source, result, count); - break; - } +bool ApproxEqual(float ldecimal, float rdecimal) { + float epsilon = std::fabs(rdecimal) * 0.01; + return std::fabs(ldecimal - rdecimal) <= epsilon; } -static void TimestampCastSwitch(Vector &source, Vector &result, idx_t count) { - // now switch on the result type - switch (result.GetType().id()) { - case LogicalTypeId::VARCHAR: - // timestamp to varchar - VectorStringCast(source, result, count); - break; - case LogicalTypeId::DATE: - // timestamp to date - UnaryExecutor::Execute(source, result, count); - break; - case LogicalTypeId::TIME: - // timestamp to time - UnaryExecutor::Execute(source, result, count); - break; - default: - VectorNullCast(source, result, count); - break; - } +bool ApproxEqual(double ldecimal, double rdecimal) { + double epsilon = std::fabs(rdecimal) * 0.01; + return std::fabs(ldecimal - rdecimal) <= epsilon; } -static void IntervalCastSwitch(Vector &source, Vector &result, idx_t count) { - // now switch on the result type - switch (result.GetType().id()) { - case LogicalTypeId::VARCHAR: - // time to varchar - VectorStringCast(source, result, count); - break; - default: - VectorNullCast(source, result, count); - break; - } -} +//===--------------------------------------------------------------------===// +// Extra Type Info +//===--------------------------------------------------------------------===// +enum class ExtraTypeInfoType : uint8_t { + INVALID_TYPE_INFO = 0, + DECIMAL_TYPE_INFO = 1, + STRING_TYPE_INFO = 2, + LIST_TYPE_INFO = 3, + STRUCT_TYPE_INFO = 4 +}; -static void BlobCastSwitch(Vector &source, Vector &result, idx_t count) { - // now switch on the result type - switch (result.GetType().id()) { - case LogicalTypeId::VARCHAR: - // blob to varchar - VectorStringCast(source, result, count); - break; - default: - VectorNullCast(source, result, count); - break; +struct ExtraTypeInfo { + explicit ExtraTypeInfo(ExtraTypeInfoType type) : type(type) { } -} - -static void ValueStringCastSwitch(Vector &source, Vector &result, idx_t count) { - switch (result.GetType().id()) { - case LogicalTypeId::VARCHAR: - if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { - result.SetVectorType(source.GetVectorType()); - } else { - result.SetVectorType(VectorType::FLAT_VECTOR); - } - for (idx_t i = 0; i < count; i++) { - auto src_val = source.GetValue(i); - auto str_val = src_val.ToString(); - result.SetValue(i, Value(str_val)); - } - break; - default: - VectorNullCast(source, result, count); - break; + virtual ~ExtraTypeInfo() { } -} -static void ListCastSwitch(Vector &source, Vector &result, idx_t count) { - switch (result.GetType().id()) { - case LogicalTypeId::LIST: { - // only handle constant and flat vectors here for now - if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { - result.SetVectorType(source.GetVectorType()); - ConstantVector::SetNull(result, ConstantVector::IsNull(source)); - } else { - source.Normalify(count); - result.SetVectorType(VectorType::FLAT_VECTOR); - FlatVector::SetValidity(result, FlatVector::Validity(source)); - } - auto list_child = make_unique(result.GetType().child_types()[0].second); - ListVector::SetEntry(result, move(list_child)); - if (ListVector::HasEntry(source)) { - auto &source_cc = ListVector::GetEntry(source); - auto source_size = ListVector::GetListSize(source); - Vector append_vector(result.GetType().child_types()[0].second); - if (source_size > STANDARD_VECTOR_SIZE) { - append_vector.Resize(STANDARD_VECTOR_SIZE, source_size); - } - if (source_cc.GetData()) { - VectorOperations::Cast(source_cc, append_vector, source_size); - ListVector::Append(result, append_vector, source_size); - } - } + ExtraTypeInfoType type; - auto ldata = FlatVector::GetData(source); - auto tdata = FlatVector::GetData(result); - for (idx_t i = 0; i < count; i++) { - tdata[i] = ldata[i]; - } - break; - } - default: - ValueStringCastSwitch(source, result, count); - break; +public: + virtual bool Equals(ExtraTypeInfo *other) = 0; + //! Serializes a ExtraTypeInfo to a stand-alone binary blob + virtual void Serialize(Serializer &serializer) const = 0; + //! Serializes a ExtraTypeInfo to a stand-alone binary blob + static void Serialize(ExtraTypeInfo *info, Serializer &serializer); + //! Deserializes a blob back into an ExtraTypeInfo + static shared_ptr Deserialize(Deserializer &source); +}; + +//===--------------------------------------------------------------------===// +// Decimal Type +//===--------------------------------------------------------------------===// +struct DecimalTypeInfo : public ExtraTypeInfo { + DecimalTypeInfo(uint8_t width_p, uint8_t scale_p) + : ExtraTypeInfo(ExtraTypeInfoType::DECIMAL_TYPE_INFO), width(width_p), scale(scale_p) { } -} -static void StructCastSwitch(Vector &source, Vector &result, idx_t count) { - switch (result.GetType().id()) { - case LogicalTypeId::STRUCT: { - if (source.GetType().child_types().size() != result.GetType().child_types().size()) { - throw TypeMismatchException(source.GetType(), result.GetType(), "Cannot cast STRUCTs of different size"); - } - auto &source_children = StructVector::GetEntries(source); - D_ASSERT(source_children.size() == source.GetType().child_types().size()); + uint8_t width; + uint8_t scale; - bool is_constant = true; - for (idx_t c_idx = 0; c_idx < result.GetType().child_types().size(); c_idx++) { - auto &child_type = result.GetType().child_types()[c_idx]; - auto result_child_vector = make_unique(child_type.second); - auto &source_child_vector = *source_children[c_idx].second; - if (source_child_vector.GetVectorType() != VectorType::CONSTANT_VECTOR) { - is_constant = false; - } - if (child_type.second != source_child_vector.GetType()) { - VectorOperations::Cast(source_child_vector, *result_child_vector, count, false); - } else { - result_child_vector->Reference(source_child_vector); - } - StructVector::AddEntry(result, child_type.first, move(result_child_vector)); +public: + bool Equals(ExtraTypeInfo *other_p) override { + if (!other_p) { + return false; } - if (is_constant) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); + if (type != other_p->type) { + return false; } + auto &other = (DecimalTypeInfo &)*other_p; + return width == other.width && scale == other.scale; + } - break; + void Serialize(Serializer &serializer) const override { + serializer.Write(width); + serializer.Write(scale); } - case LogicalTypeId::VARCHAR: - if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { - result.SetVectorType(source.GetVectorType()); - } else { - result.SetVectorType(VectorType::FLAT_VECTOR); - } - for (idx_t i = 0; i < count; i++) { - auto src_val = source.GetValue(i); - auto str_val = src_val.ToString(); - result.SetValue(i, Value(str_val)); - } - break; - default: - VectorNullCast(source, result, count); - break; + static shared_ptr Deserialize(Deserializer &source) { + auto width = source.Read(); + auto scale = source.Read(); + return make_shared(width, scale); } +}; + +uint8_t DecimalType::GetWidth(const LogicalType &type) { + D_ASSERT(type.id() == LogicalTypeId::DECIMAL); + auto info = type.AuxInfo(); + D_ASSERT(info); + return ((DecimalTypeInfo &)*info).width; } -void VectorOperations::Cast(Vector &source, Vector &result, idx_t count, bool strict) { - D_ASSERT(source.GetType() != result.GetType()); - // first switch on source type - switch (source.GetType().id()) { - case LogicalTypeId::BOOLEAN: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::TINYINT: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::SMALLINT: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::INTEGER: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::BIGINT: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::UTINYINT: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::USMALLINT: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::UINTEGER: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::UBIGINT: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::HUGEINT: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::DECIMAL: - DecimalCastSwitch(source, result, count); - break; - case LogicalTypeId::FLOAT: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::DOUBLE: - NumericCastSwitch(source, result, count); - break; - case LogicalTypeId::DATE: - DateCastSwitch(source, result, count); - break; - case LogicalTypeId::TIME: - TimeCastSwitch(source, result, count); - break; - case LogicalTypeId::TIMESTAMP: - TimestampCastSwitch(source, result, count); - break; - case LogicalTypeId::INTERVAL: - IntervalCastSwitch(source, result, count); - break; - case LogicalTypeId::VARCHAR: - StringCastSwitch(source, result, count, strict); - break; - case LogicalTypeId::BLOB: - BlobCastSwitch(source, result, count); - break; - case LogicalTypeId::SQLNULL: { - // cast a NULL to another type, just copy the properties and change the type - result.SetVectorType(VectorType::CONSTANT_VECTOR); - ConstantVector::SetNull(result, true); - break; - } - case LogicalTypeId::STRUCT: - StructCastSwitch(source, result, count); - break; - case LogicalTypeId::LIST: - ListCastSwitch(source, result, count); - break; - default: - throw UnimplementedCast(source.GetType(), result.GetType()); - } +uint8_t DecimalType::GetScale(const LogicalType &type) { + D_ASSERT(type.id() == LogicalTypeId::DECIMAL); + auto info = type.AuxInfo(); + D_ASSERT(info); + return ((DecimalTypeInfo &)*info).scale; +} + +LogicalType LogicalType::DECIMAL(int width, int scale) { + auto type_info = make_shared(width, scale); + return LogicalType(LogicalTypeId::DECIMAL, move(type_info)); } -} // namespace duckdb //===--------------------------------------------------------------------===// -// copy.cpp -// Description: This file contains the implementation of the different copy -// functions +// String Type //===--------------------------------------------------------------------===// +struct StringTypeInfo : public ExtraTypeInfo { + explicit StringTypeInfo(string collation_p) + : ExtraTypeInfo(ExtraTypeInfoType::STRING_TYPE_INFO), collation(move(collation_p)) { + } + string collation; +public: + bool Equals(ExtraTypeInfo *other_p) override { + // collation info has no impact on equality + return true; + } + void Serialize(Serializer &serializer) const override { + serializer.WriteString(collation); + } + static shared_ptr Deserialize(Deserializer &source) { + auto collation = source.Read(); + return make_shared(move(collation)); + } +}; +string StringType::GetCollation(const LogicalType &type) { + if (type.id() != LogicalTypeId::VARCHAR) { + return string(); + } + auto info = type.AuxInfo(); + if (!info) { + return string(); + } + return ((StringTypeInfo &)*info).collation; +} +LogicalType LogicalType::VARCHAR_COLLATION(string collation) { // NOLINT + auto string_info = make_shared(move(collation)); + return LogicalType(LogicalTypeId::VARCHAR, move(string_info)); +} -namespace duckdb { - -template -static void TemplatedCopy(const Vector &source, const SelectionVector &sel, Vector &target, idx_t source_offset, - idx_t target_offset, idx_t copy_count) { - auto ldata = FlatVector::GetData(source); - auto tdata = FlatVector::GetData(target); - for (idx_t i = 0; i < copy_count; i++) { - auto source_idx = sel.get_index(source_offset + i); - tdata[target_offset + i] = ldata[source_idx]; +//===--------------------------------------------------------------------===// +// List Type +//===--------------------------------------------------------------------===// +struct ListTypeInfo : public ExtraTypeInfo { + explicit ListTypeInfo(LogicalType child_type_p) + : ExtraTypeInfo(ExtraTypeInfoType::LIST_TYPE_INFO), child_type(move(child_type_p)) { } -} -void VectorOperations::Copy(const Vector &source, Vector &target, const SelectionVector &sel_p, idx_t source_count, - idx_t source_offset, idx_t target_offset) { - D_ASSERT(source_offset <= source_count); - D_ASSERT(target.GetVectorType() == VectorType::FLAT_VECTOR); - D_ASSERT(source.GetType() == target.GetType()); - const SelectionVector *sel = &sel_p; - switch (source.GetVectorType()) { - case VectorType::DICTIONARY_VECTOR: { - // dictionary vector: merge selection vectors - auto &child = DictionaryVector::Child(source); - auto &dict_sel = DictionaryVector::SelVector(source); - // merge the selection vectors and verify the child - auto new_buffer = dict_sel.Slice(*sel, source_count); - SelectionVector merged_sel(new_buffer); - VectorOperations::Copy(child, target, merged_sel, source_count, source_offset, target_offset); - return; + LogicalType child_type; + +public: + bool Equals(ExtraTypeInfo *other_p) override { + if (!other_p) { + return false; + } + if (type != other_p->type) { + return false; + } + auto &other = (ListTypeInfo &)*other_p; + return child_type == other.child_type; } - case VectorType::SEQUENCE_VECTOR: { - int64_t start, increment; - Vector seq(source.GetType()); - SequenceVector::GetSequence(source, start, increment); - VectorOperations::GenerateSequence(seq, source_count, *sel, start, increment); - VectorOperations::Copy(seq, target, *sel, source_count, source_offset, target_offset); - return; + + void Serialize(Serializer &serializer) const override { + child_type.Serialize(serializer); } - case VectorType::CONSTANT_VECTOR: - sel = &ConstantVector::ZERO_SELECTION_VECTOR; - break; // carry on with below code - case VectorType::FLAT_VECTOR: - break; - default: - throw NotImplementedException("FIXME unimplemented vector type for VectorOperations::Copy"); + + static shared_ptr Deserialize(Deserializer &source) { + auto child_type = LogicalType::Deserialize(source); + return make_shared(move(child_type)); } +}; + +const LogicalType &ListType::GetChildType(const LogicalType &type) { + D_ASSERT(type.id() == LogicalTypeId::LIST); + auto info = type.AuxInfo(); + D_ASSERT(info); + return ((ListTypeInfo &)*info).child_type; +} - idx_t copy_count = source_count - source_offset; - if (copy_count == 0) { - return; +LogicalType LogicalType::LIST(LogicalType child) { + auto info = make_shared(move(child)); + return LogicalType(LogicalTypeId::LIST, move(info)); +} + +//===--------------------------------------------------------------------===// +// Struct Type +//===--------------------------------------------------------------------===// +struct StructTypeInfo : public ExtraTypeInfo { + explicit StructTypeInfo(child_list_t child_types_p) + : ExtraTypeInfo(ExtraTypeInfoType::STRUCT_TYPE_INFO), child_types(move(child_types_p)) { } - // first copy the nullmask - auto &tmask = FlatVector::Validity(target); - if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { - if (ConstantVector::IsNull(source)) { - for (idx_t i = 0; i < copy_count; i++) { - tmask.SetInvalid(target_offset + i); - } + child_list_t child_types; + +public: + bool Equals(ExtraTypeInfo *other_p) override { + if (!other_p) { + return false; } - } else { - auto &smask = FlatVector::Validity(source); - if (smask.IsMaskSet()) { - for (idx_t i = 0; i < copy_count; i++) { - auto idx = sel->get_index(source_offset + i); - tmask.Set(target_offset + i, smask.RowIsValid(idx)); - } + if (type != other_p->type) { + return false; } + auto &other = (StructTypeInfo &)*other_p; + return child_types == other.child_types; } - D_ASSERT(sel); - - // now copy over the data - switch (source.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::INT16: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::INT32: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::HASH: - case PhysicalType::INT64: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::UINT8: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::UINT16: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::UINT32: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::UINT64: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::INT128: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::POINTER: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::FLOAT: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::DOUBLE: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::INTERVAL: - TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); - break; - case PhysicalType::VARCHAR: { - auto ldata = FlatVector::GetData(source); - auto tdata = FlatVector::GetData(target); - for (idx_t i = 0; i < copy_count; i++) { - auto source_idx = sel->get_index(source_offset + i); - auto target_idx = target_offset + i; - if (tmask.RowIsValid(target_idx)) { - tdata[target_idx] = StringVector::AddStringOrBlob(target, ldata[source_idx]); - } + void Serialize(Serializer &serializer) const override { + serializer.Write(child_types.size()); + for (idx_t i = 0; i < child_types.size(); i++) { + serializer.WriteString(child_types[i].first); + child_types[i].second.Serialize(serializer); } - break; } - case PhysicalType::STRUCT: { - if (StructVector::HasEntries(target)) { - // target already has entries: append to them - auto &source_children = StructVector::GetEntries(source); - auto &target_children = StructVector::GetEntries(target); - D_ASSERT(source_children.size() == target_children.size()); - for (idx_t i = 0; i < source_children.size(); i++) { - D_ASSERT(target_children[i].first == target_children[i].first); - VectorOperations::Copy(*source_children[i].second, *target_children[i].second, *sel, source_count, - source_offset, target_offset); - } - } else { - D_ASSERT(target_offset == 0); - // target has no entries: create new entries for the target - auto &source_children = StructVector::GetEntries(source); - for (auto &child : source_children) { - auto child_copy = make_unique(child.second->GetType()); - VectorOperations::Copy(*child.second, *child_copy, *sel, source_count, source_offset, target_offset); - StructVector::AddEntry(target, child.first, move(child_copy)); - } + + static shared_ptr Deserialize(Deserializer &source) { + child_list_t child_list; + auto child_types_size = source.Read(); + for (uint32_t i = 0; i < child_types_size; i++) { + auto name = source.Read(); + auto type = LogicalType::Deserialize(source); + child_list.push_back(make_pair(move(name), move(type))); } - break; + return make_shared(move(child_list)); } - case PhysicalType::LIST: { - D_ASSERT(target.GetType().InternalType() == PhysicalType::LIST); - if (ListVector::HasEntry(source)) { - //! if the source has list offsets, we need to append them to the target - if (!ListVector::HasEntry(target)) { - auto target_child = make_unique(target.GetType().child_types()[0].second); - ListVector::SetEntry(target, move(target_child)); - } +}; - //! build a selection vector for the copied child elements - auto sdata = FlatVector::GetData(source); - vector child_rows; - for (idx_t i = 0; i < copy_count; ++i) { - if (tmask.RowIsValid(target_offset + i)) { - auto source_idx = sel->get_index(source_offset + i); - auto &source_entry = sdata[source_idx]; - for (idx_t j = 0; j < source_entry.length; ++j) { - child_rows.emplace_back(source_entry.offset + j); - } - } - } - idx_t source_child_size = child_rows.size(); - SelectionVector child_sel(child_rows.data()); +const child_list_t &StructType::GetChildTypes(const LogicalType &type) { + D_ASSERT(type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::MAP); + auto info = type.AuxInfo(); + D_ASSERT(info); + return ((StructTypeInfo &)*info).child_types; +} - auto &source_child = ListVector::GetEntry(source); +const LogicalType &StructType::GetChildType(const LogicalType &type, idx_t index) { + auto &child_types = StructType::GetChildTypes(type); + D_ASSERT(index < child_types.size()); + return child_types[index].second; +} - idx_t old_target_child_len = ListVector::GetListSize(target); +const string &StructType::GetChildName(const LogicalType &type, idx_t index) { + auto &child_types = StructType::GetChildTypes(type); + D_ASSERT(index < child_types.size()); + return child_types[index].first; +} - //! append to list itself - ListVector::Append(target, source_child, child_sel, source_child_size); +idx_t StructType::GetChildCount(const LogicalType &type) { + return StructType::GetChildTypes(type).size(); +} - //! now write the list offsets - auto tdata = FlatVector::GetData(target); - for (idx_t i = 0; i < copy_count; i++) { - auto source_idx = sel->get_index(source_offset + i); - auto &source_entry = sdata[source_idx]; - auto &target_entry = tdata[target_offset + i]; +LogicalType LogicalType::STRUCT(child_list_t children) { + auto info = make_shared(move(children)); + return LogicalType(LogicalTypeId::STRUCT, move(info)); +} - target_entry.length = source_entry.length; - target_entry.offset = old_target_child_len; - if (tmask.RowIsValid(target_offset + i)) { - old_target_child_len += target_entry.length; - } - } - } - break; +LogicalType LogicalType::MAP(child_list_t children) { + auto info = make_shared(move(children)); + return LogicalType(LogicalTypeId::MAP, move(info)); +} + +//===--------------------------------------------------------------------===// +// Extra Type Info +//===--------------------------------------------------------------------===// +void ExtraTypeInfo::Serialize(ExtraTypeInfo *info, Serializer &serializer) { + if (!info) { + serializer.Write(ExtraTypeInfoType::INVALID_TYPE_INFO); + } else { + serializer.Write(info->type); + info->Serialize(serializer); } +} + +shared_ptr ExtraTypeInfo::Deserialize(Deserializer &source) { + auto type = source.Read(); + switch (type) { + case ExtraTypeInfoType::INVALID_TYPE_INFO: + return nullptr; + case ExtraTypeInfoType::DECIMAL_TYPE_INFO: + return DecimalTypeInfo::Deserialize(source); + case ExtraTypeInfoType::STRING_TYPE_INFO: + return StringTypeInfo::Deserialize(source); + case ExtraTypeInfoType::LIST_TYPE_INFO: + return ListTypeInfo::Deserialize(source); + case ExtraTypeInfoType::STRUCT_TYPE_INFO: + return StructTypeInfo::Deserialize(source); default: - throw NotImplementedException("Unimplemented type '%s' for copy!", - TypeIdToString(source.GetType().InternalType())); + throw InternalException("Unimplemented type info in ExtraTypeInfo::Deserialize"); } } -void VectorOperations::Copy(const Vector &source, Vector &target, idx_t source_count, idx_t source_offset, - idx_t target_offset) { - switch (source.GetVectorType()) { - case VectorType::DICTIONARY_VECTOR: { - // dictionary: continue into child with selection vector - auto &child = DictionaryVector::Child(source); - auto &dict_sel = DictionaryVector::SelVector(source); - VectorOperations::Copy(child, target, dict_sel, source_count, source_offset, target_offset); - break; - } - case VectorType::CONSTANT_VECTOR: - VectorOperations::Copy(source, target, ConstantVector::ZERO_SELECTION_VECTOR, source_count, source_offset, - target_offset); - break; - case VectorType::FLAT_VECTOR: - if (target_offset + source_count - source_offset > STANDARD_VECTOR_SIZE) { - idx_t sel_vec_size = target_offset + source_count - source_offset; - SelectionVector selection_vector(sel_vec_size); - for (size_t i = 0; i < sel_vec_size; i++) { - selection_vector.set_index(i, i); - } - VectorOperations::Copy(source, target, selection_vector, source_count, source_offset, target_offset); - } else { - VectorOperations::Copy(source, target, FlatVector::INCREMENTAL_SELECTION_VECTOR, source_count, - source_offset, target_offset); - } - break; - case VectorType::SEQUENCE_VECTOR: { - int64_t start, increment; - SequenceVector::GetSequence(source, start, increment); - Vector flattened(source.GetType()); - VectorOperations::GenerateSequence(flattened, source_count, start, increment); +//===--------------------------------------------------------------------===// +// Logical Type +//===--------------------------------------------------------------------===// - VectorOperations::Copy(flattened, target, FlatVector::INCREMENTAL_SELECTION_VECTOR, source_count, source_offset, - target_offset); - break; +// the destructor needs to know about the extra type info +LogicalType::~LogicalType() { +} + +void LogicalType::Serialize(Serializer &serializer) const { + serializer.Write(id_); + ExtraTypeInfo::Serialize(type_info_.get(), serializer); +} + +LogicalType LogicalType::Deserialize(Deserializer &source) { + auto id = source.Read(); + auto info = ExtraTypeInfo::Deserialize(source); + + return LogicalType(id, move(info)); +} + +bool LogicalType::operator==(const LogicalType &rhs) const { + if (id_ != rhs.id_) { + return false; } - default: - throw NotImplementedException("FIXME: unimplemented vector type for VectorOperations::Copy"); + if (type_info_.get() == rhs.type_info_.get()) { + return true; + } + if (type_info_) { + return type_info_->Equals(rhs.type_info_.get()); + } else { + D_ASSERT(rhs.type_info_); + return rhs.type_info_->Equals(type_info_.get()); } } } // namespace duckdb -//===--------------------------------------------------------------------===// -// hash.cpp -// Description: This file contains the vectorized hash implementations -//===--------------------------------------------------------------------===// @@ -29515,234 +35733,242 @@ void VectorOperations::Copy(const Vector &source, Vector &target, idx_t source_c namespace duckdb { -struct HashOp { - template - static inline hash_t Operation(T input, bool is_null) { - return duckdb::Hash(is_null ? duckdb::NullValue() : input); +//===--------------------------------------------------------------------===// +// Comparison Operations +//===--------------------------------------------------------------------===// + +struct ValuePositionComparator { + // Return true if the positional Values definitely match. + // Default to the same as the final value + template + static inline bool Definite(const Value &lhs, const Value &rhs) { + return Final(lhs, rhs); } -}; -template -static inline void TightLoopHash(T *__restrict ldata, hash_t *__restrict result_data, const SelectionVector *rsel, - idx_t count, const SelectionVector *__restrict sel_vector, ValidityMask &mask) { - if (!mask.AllValid()) { - for (idx_t i = 0; i < count; i++) { - auto ridx = HAS_RSEL ? rsel->get_index(i) : i; - auto idx = sel_vector->get_index(ridx); - result_data[ridx] = HashOp::Operation(ldata[idx], !mask.RowIsValid(idx)); - } - } else { - for (idx_t i = 0; i < count; i++) { - auto ridx = HAS_RSEL ? rsel->get_index(i) : i; - auto idx = sel_vector->get_index(ridx); - result_data[ridx] = duckdb::Hash(ldata[idx]); - } + // Select the positional Values that need further testing. + // Usually this means Is Not Distinct, as those are the semantics used by Postges + template + static inline bool Possible(const Value &lhs, const Value &rhs) { + return ValueOperations::NotDistinctFrom(lhs, rhs); + } + + // Return true if the positional Values definitely match in the final position + // This needs to be specialised. + template + static inline bool Final(const Value &lhs, const Value &rhs) { + return false; } + + // Tie-break based on length when one of the sides has been exhausted, returning true if the LHS matches. + // This essentially means that the existing positions compare equal. + // Default to the same semantics as the OP for idx_t. This works in most cases. + template + static inline bool TieBreak(const idx_t lpos, const idx_t rpos) { + return OP::Operation(lpos, rpos); + } +}; + +// Equals must always check every column +template <> +inline bool ValuePositionComparator::Definite(const Value &lhs, const Value &rhs) { + return false; } -template -static inline void TemplatedLoopHash(Vector &input, Vector &result, const SelectionVector *rsel, idx_t count) { - if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); +template <> +inline bool ValuePositionComparator::Final(const Value &lhs, const Value &rhs) { + return ValueOperations::NotDistinctFrom(lhs, rhs); +} - auto ldata = ConstantVector::GetData(input); - auto result_data = ConstantVector::GetData(result); - *result_data = HashOp::Operation(*ldata, ConstantVector::IsNull(input)); - } else { - result.SetVectorType(VectorType::FLAT_VECTOR); +// NotEquals must check everything that matched +template <> +inline bool ValuePositionComparator::Possible(const Value &lhs, const Value &rhs) { + return true; +} - VectorData idata; - input.Orrify(count, idata); +template <> +inline bool ValuePositionComparator::Final(const Value &lhs, const Value &rhs) { + return ValueOperations::NotDistinctFrom(lhs, rhs); +} - TightLoopHash((T *)idata.data, FlatVector::GetData(result), rsel, count, idata.sel, - idata.validity); - } +// Non-strict inequalities must use strict comparisons for Definite +template <> +bool ValuePositionComparator::Definite(const Value &lhs, const Value &rhs) { + return ValueOperations::DistinctLessThan(lhs, rhs); } -template -static inline void HashTypeSwitch(Vector &input, Vector &result, const SelectionVector *rsel, idx_t count) { - D_ASSERT(result.GetType().id() == LogicalTypeId::HASH); - switch (input.GetType().InternalType()) { +template <> +bool ValuePositionComparator::Final(const Value &lhs, const Value &rhs) { + return ValueOperations::DistinctLessThanEquals(lhs, rhs); +} + +template <> +bool ValuePositionComparator::Definite(const Value &lhs, const Value &rhs) { + return ValueOperations::DistinctGreaterThan(lhs, rhs); +} + +template <> +bool ValuePositionComparator::Final(const Value &lhs, const Value &rhs) { + return ValueOperations::DistinctGreaterThanEquals(lhs, rhs); +} + +// Strict inequalities just use strict for both Definite and Final +template <> +bool ValuePositionComparator::Final(const Value &lhs, const Value &rhs) { + return ValueOperations::DistinctLessThan(lhs, rhs); +} + +template <> +bool ValuePositionComparator::Final(const Value &lhs, const Value &rhs) { + return ValueOperations::DistinctGreaterThan(lhs, rhs); +} + +template +static bool TemplatedBooleanOperation(const Value &left, const Value &right) { + const auto &left_type = left.type(); + const auto &right_type = right.type(); + if (left_type != right_type) { + try { + LogicalType comparison_type = BoundComparisonExpression::BindComparison(left_type, right_type); + return TemplatedBooleanOperation(left.CastAs(comparison_type), right.CastAs(comparison_type)); + } catch (...) { + return false; + } + } + switch (left_type.InternalType()) { case PhysicalType::BOOL: + return OP::Operation(left.value_.boolean, right.value_.boolean); case PhysicalType::INT8: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.tinyint, right.value_.tinyint); case PhysicalType::INT16: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.smallint, right.value_.smallint); case PhysicalType::INT32: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.integer, right.value_.integer); case PhysicalType::INT64: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.bigint, right.value_.bigint); case PhysicalType::UINT8: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.utinyint, right.value_.utinyint); case PhysicalType::UINT16: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.usmallint, right.value_.usmallint); case PhysicalType::UINT32: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.uinteger, right.value_.uinteger); case PhysicalType::UINT64: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.ubigint, right.value_.ubigint); case PhysicalType::INT128: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.hugeint, right.value_.hugeint); case PhysicalType::FLOAT: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.float_, right.value_.float_); case PhysicalType::DOUBLE: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.double_, right.value_.double_); case PhysicalType::INTERVAL: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.value_.interval, right.value_.interval); case PhysicalType::VARCHAR: - TemplatedLoopHash(input, result, rsel, count); - break; + return OP::Operation(left.str_value, right.str_value); + case PhysicalType::STRUCT: { + // this should be enforced by the type + D_ASSERT(left.struct_value.size() == right.struct_value.size()); + idx_t i = 0; + for (; i < left.struct_value.size() - 1; ++i) { + if (ValuePositionComparator::Definite(left.struct_value[i], right.struct_value[i])) { + return true; + } + if (!ValuePositionComparator::Possible(left.struct_value[i], right.struct_value[i])) { + return false; + } + } + return ValuePositionComparator::Final(left.struct_value[i], right.struct_value[i]); + } + case PhysicalType::LIST: { + for (idx_t pos = 0;; ++pos) { + if (pos == left.list_value.size() || pos == right.list_value.size()) { + return ValuePositionComparator::TieBreak(left.list_value.size(), right.list_value.size()); + } + if (ValuePositionComparator::Definite(left.list_value[pos], right.list_value[pos])) { + return true; + } + if (!ValuePositionComparator::Possible(left.list_value[pos], right.list_value[pos])) { + return false; + } + } + return false; + } default: - throw InvalidTypeException(input.GetType(), "Invalid type for hash"); + throw InternalException("Unimplemented type for value comparison"); } } -void VectorOperations::Hash(Vector &input, Vector &result, idx_t count) { - HashTypeSwitch(input, result, nullptr, count); +bool ValueOperations::Equals(const Value &left, const Value &right) { + if (left.is_null || right.is_null) { + throw InternalException("Comparison on NULL values"); + } + return TemplatedBooleanOperation(left, right); } -void VectorOperations::Hash(Vector &input, Vector &result, const SelectionVector &sel, idx_t count) { - HashTypeSwitch(input, result, &sel, count); +bool ValueOperations::NotEquals(const Value &left, const Value &right) { + return !ValueOperations::Equals(left, right); } -static inline hash_t CombineHashScalar(hash_t a, hash_t b) { - return (a * UINT64_C(0xbf58476d1ce4e5b9)) ^ b; +bool ValueOperations::GreaterThan(const Value &left, const Value &right) { + if (left.is_null || right.is_null) { + throw InternalException("Comparison on NULL values"); + } + return TemplatedBooleanOperation(left, right); } -template -static inline void TightLoopCombineHashConstant(T *__restrict ldata, hash_t constant_hash, hash_t *__restrict hash_data, - const SelectionVector *rsel, idx_t count, - const SelectionVector *__restrict sel_vector, ValidityMask &mask) { - if (!mask.AllValid()) { - for (idx_t i = 0; i < count; i++) { - auto ridx = HAS_RSEL ? rsel->get_index(i) : i; - auto idx = sel_vector->get_index(ridx); - auto other_hash = HashOp::Operation(ldata[idx], !mask.RowIsValid(idx)); - hash_data[ridx] = CombineHashScalar(constant_hash, other_hash); - } - } else { - for (idx_t i = 0; i < count; i++) { - auto ridx = HAS_RSEL ? rsel->get_index(i) : i; - auto idx = sel_vector->get_index(ridx); - auto other_hash = duckdb::Hash(ldata[idx]); - hash_data[ridx] = CombineHashScalar(constant_hash, other_hash); - } +bool ValueOperations::GreaterThanEquals(const Value &left, const Value &right) { + if (left.is_null || right.is_null) { + throw InternalException("Comparison on NULL values"); } + return TemplatedBooleanOperation(left, right); } -template -static inline void TightLoopCombineHash(T *__restrict ldata, hash_t *__restrict hash_data, const SelectionVector *rsel, - idx_t count, const SelectionVector *__restrict sel_vector, ValidityMask &mask) { - if (!mask.AllValid()) { - for (idx_t i = 0; i < count; i++) { - auto ridx = HAS_RSEL ? rsel->get_index(i) : i; - auto idx = sel_vector->get_index(ridx); - auto other_hash = HashOp::Operation(ldata[idx], !mask.RowIsValid(idx)); - hash_data[ridx] = CombineHashScalar(hash_data[ridx], other_hash); - } - } else { - for (idx_t i = 0; i < count; i++) { - auto ridx = HAS_RSEL ? rsel->get_index(i) : i; - auto idx = sel_vector->get_index(ridx); - auto other_hash = duckdb::Hash(ldata[idx]); - hash_data[ridx] = CombineHashScalar(hash_data[ridx], other_hash); - } +bool ValueOperations::LessThan(const Value &left, const Value &right) { + return ValueOperations::GreaterThan(right, left); +} + +bool ValueOperations::LessThanEquals(const Value &left, const Value &right) { + return ValueOperations::GreaterThanEquals(right, left); +} + +bool ValueOperations::NotDistinctFrom(const Value &left, const Value &right) { + if (left.is_null && right.is_null) { + return true; } + if (left.is_null != right.is_null) { + return false; + } + return TemplatedBooleanOperation(left, right); } -template -void TemplatedLoopCombineHash(Vector &input, Vector &hashes, const SelectionVector *rsel, idx_t count) { - if (input.GetVectorType() == VectorType::CONSTANT_VECTOR && hashes.GetVectorType() == VectorType::CONSTANT_VECTOR) { - auto ldata = ConstantVector::GetData(input); - auto hash_data = ConstantVector::GetData(hashes); +bool ValueOperations::DistinctFrom(const Value &left, const Value &right) { + return !ValueOperations::NotDistinctFrom(left, right); +} - auto other_hash = HashOp::Operation(*ldata, ConstantVector::IsNull(input)); - *hash_data = CombineHashScalar(*hash_data, other_hash); - } else { - VectorData idata; - input.Orrify(count, idata); - if (hashes.GetVectorType() == VectorType::CONSTANT_VECTOR) { - // mix constant with non-constant, first get the constant value - auto constant_hash = *ConstantVector::GetData(hashes); - // now re-initialize the hashes vector to an empty flat vector - hashes.Initialize(hashes.GetType()); - TightLoopCombineHashConstant((T *)idata.data, constant_hash, - FlatVector::GetData(hashes), rsel, count, idata.sel, - idata.validity); - } else { - D_ASSERT(hashes.GetVectorType() == VectorType::FLAT_VECTOR); - TightLoopCombineHash((T *)idata.data, FlatVector::GetData(hashes), rsel, count, - idata.sel, idata.validity); - } +bool ValueOperations::DistinctGreaterThan(const Value &left, const Value &right) { + if (left.is_null && right.is_null) { + return false; + } else if (right.is_null) { + return false; + } else if (left.is_null) { + return true; } + return TemplatedBooleanOperation(left, right); } -template -static inline void CombineHashTypeSwitch(Vector &hashes, Vector &input, const SelectionVector *rsel, idx_t count) { - D_ASSERT(hashes.GetType().id() == LogicalTypeId::HASH); - switch (input.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::INT16: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::INT32: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::INT64: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::UINT8: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::UINT16: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::UINT32: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::UINT64: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::INT128: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::FLOAT: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::DOUBLE: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::INTERVAL: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - case PhysicalType::VARCHAR: - TemplatedLoopCombineHash(input, hashes, rsel, count); - break; - default: - throw InvalidTypeException(input.GetType(), "Invalid type for hash"); +bool ValueOperations::DistinctGreaterThanEquals(const Value &left, const Value &right) { + if (left.is_null) { + return true; + } else if (right.is_null) { + return false; } + return TemplatedBooleanOperation(left, right); } -void VectorOperations::CombineHash(Vector &hashes, Vector &input, idx_t count) { - CombineHashTypeSwitch(hashes, input, nullptr, count); +bool ValueOperations::DistinctLessThan(const Value &left, const Value &right) { + return ValueOperations::DistinctGreaterThan(right, left); } -void VectorOperations::CombineHash(Vector &hashes, Vector &input, const SelectionVector &rsel, idx_t count) { - CombineHashTypeSwitch(hashes, input, &rsel, count); +bool ValueOperations::DistinctLessThanEquals(const Value &left, const Value &right) { + return ValueOperations::DistinctGreaterThanEquals(right, left); } } // namespace duckdb @@ -29750,140 +35976,58 @@ void VectorOperations::CombineHash(Vector &hashes, Vector &input, const Selectio -namespace duckdb { -template -static void CopyToStorageLoop(VectorData &vdata, idx_t count, data_ptr_t target) { - auto ldata = (T *)vdata.data; - auto result_data = (T *)target; - for (idx_t i = 0; i < count; i++) { - auto idx = vdata.sel->get_index(i); - if (!vdata.validity.RowIsValid(idx)) { - result_data[i] = NullValue(); - } else { - result_data[i] = ldata[idx]; - } - } -} +namespace duckdb { -void VectorOperations::WriteToStorage(Vector &source, idx_t count, data_ptr_t target) { - if (count == 0) { - return; +hash_t ValueOperations::Hash(const Value &op) { + if (op.is_null) { + return 0; } - VectorData vdata; - source.Orrify(count, vdata); - - switch (source.GetType().InternalType()) { + switch (op.type().InternalType()) { case PhysicalType::BOOL: + return duckdb::Hash(op.value_.boolean); case PhysicalType::INT8: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.tinyint); case PhysicalType::INT16: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.smallint); case PhysicalType::INT32: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.integer); case PhysicalType::INT64: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.bigint); case PhysicalType::UINT8: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.utinyint); case PhysicalType::UINT16: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.usmallint); case PhysicalType::UINT32: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.uinteger); case PhysicalType::UINT64: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.ubigint); case PhysicalType::INT128: - CopyToStorageLoop(vdata, count, target); - break; - case PhysicalType::HASH: - CopyToStorageLoop(vdata, count, target); - break; - case PhysicalType::POINTER: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.hugeint); case PhysicalType::FLOAT: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.float_); case PhysicalType::DOUBLE: - CopyToStorageLoop(vdata, count, target); - break; + return duckdb::Hash(op.value_.double_); case PhysicalType::INTERVAL: - CopyToStorageLoop(vdata, count, target); - break; - default: - throw NotImplementedException("Unimplemented type for CopyToStorage"); + return duckdb::Hash(op.value_.interval); + case PhysicalType::VARCHAR: + return duckdb::Hash(op.str_value.c_str()); + case PhysicalType::LIST: { + hash_t hash = 0; + for (auto &entry : op.list_value) { + hash ^= ValueOperations::Hash(entry); + } + return hash; } -} - -template -static void ReadFromStorageLoop(data_ptr_t source, idx_t count, Vector &result) { - auto ldata = (T *)source; - auto result_data = FlatVector::GetData(result); - auto &mask = FlatVector::Validity(result); - for (idx_t i = 0; i < count; i++) { - if (IsNullValue(ldata[i])) { - mask.SetInvalid(i); - } else { - result_data[i] = ldata[i]; + case PhysicalType::STRUCT: { + hash_t hash = 0; + for (auto &entry : op.struct_value) { + hash ^= ValueOperations::Hash(entry); } + return hash; } -} - -void VectorOperations::ReadFromStorage(data_ptr_t source, idx_t count, Vector &result) { - result.SetVectorType(VectorType::FLAT_VECTOR); - switch (result.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::INT16: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::INT32: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::INT64: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::UINT8: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::UINT16: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::UINT32: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::UINT64: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::INT128: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::HASH: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::POINTER: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::FLOAT: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::DOUBLE: - ReadFromStorageLoop(source, count, result); - break; - case PhysicalType::INTERVAL: - ReadFromStorageLoop(source, count, result); - break; default: - throw NotImplementedException("Unimplemented type for CopyToStorage"); + throw InternalException("Unimplemented type for value hash"); } } @@ -29891,1485 +36035,2685 @@ void VectorOperations::ReadFromStorage(data_ptr_t source, idx_t count, Vector &r -#include + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/subtract.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + namespace duckdb { -AdaptiveFilter::AdaptiveFilter(Expression &expr) - : iteration_count(0), observe_interval(10), execute_interval(20), warmup(true) { - auto &conj_expr = (BoundConjunctionExpression &)expr; - D_ASSERT(conj_expr.children.size() > 1); - for (idx_t idx = 0; idx < conj_expr.children.size(); idx++) { - permutation.push_back(idx); - if (idx != conj_expr.children.size() - 1) { - swap_likeliness.push_back(100); +struct SubtractOperator { + template + static inline TR Operation(TA left, TB right) { + return left - right; + } +}; + +template <> +float SubtractOperator::Operation(float left, float right); +template <> +double SubtractOperator::Operation(double left, double right); +template <> +interval_t SubtractOperator::Operation(interval_t left, interval_t right); +template <> +int64_t SubtractOperator::Operation(date_t left, date_t right); +template <> +date_t SubtractOperator::Operation(date_t left, int32_t right); +template <> +date_t SubtractOperator::Operation(date_t left, interval_t right); +template <> +timestamp_t SubtractOperator::Operation(timestamp_t left, interval_t right); +template <> +interval_t SubtractOperator::Operation(timestamp_t left, timestamp_t right); + +struct TrySubtractOperator { + template + static inline bool Operation(TA left, TB right, TR &result) { + throw InternalException("Unimplemented type for TrySubtractOperator"); + } +}; + +template <> +bool TrySubtractOperator::Operation(uint8_t left, uint8_t right, uint8_t &result); +template <> +bool TrySubtractOperator::Operation(uint16_t left, uint16_t right, uint16_t &result); +template <> +bool TrySubtractOperator::Operation(uint32_t left, uint32_t right, uint32_t &result); +template <> +bool TrySubtractOperator::Operation(uint64_t left, uint64_t right, uint64_t &result); + +template <> +bool TrySubtractOperator::Operation(int8_t left, int8_t right, int8_t &result); +template <> +bool TrySubtractOperator::Operation(int16_t left, int16_t right, int16_t &result); +template <> +bool TrySubtractOperator::Operation(int32_t left, int32_t right, int32_t &result); +template <> +bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result); + +struct SubtractOperatorOverflowCheck { + template + static inline TR Operation(TA left, TB right) { + TR result; + if (!TrySubtractOperator::Operation(left, right, result)) { + throw OutOfRangeException("Overflow in subtraction of %s (%d - %d)!", TypeIdToString(GetTypeId()), left, + right); } + return result; } - right_random_border = 100 * (conj_expr.children.size() - 1); -} +}; -AdaptiveFilter::AdaptiveFilter(TableFilterSet *table_filters) - : iteration_count(0), observe_interval(10), execute_interval(20), warmup(true) { - for (auto &table_filter : table_filters->filters) { - permutation.push_back(table_filter.first); - swap_likeliness.push_back(100); +struct TryDecimalSubtract { + template + static inline bool Operation(TA left, TB right, TR &result) { + throw InternalException("Unimplemented type for TryDecimalSubtract"); } - swap_likeliness.pop_back(); - right_random_border = 100 * (table_filters->filters.size() - 1); -} -void AdaptiveFilter::AdaptRuntimeStatistics(double duration) { - iteration_count++; - runtime_sum += duration; +}; - if (!warmup) { - // the last swap was observed - if (observe && iteration_count == observe_interval) { - // keep swap if runtime decreased, else reverse swap - if (prev_mean - (runtime_sum / iteration_count) <= 0) { - // reverse swap because runtime didn't decrease - std::swap(permutation[swap_idx], permutation[swap_idx + 1]); +template <> +bool TryDecimalSubtract::Operation(int16_t left, int16_t right, int16_t &result); +template <> +bool TryDecimalSubtract::Operation(int32_t left, int32_t right, int32_t &result); +template <> +bool TryDecimalSubtract::Operation(int64_t left, int64_t right, int64_t &result); +template <> +bool TryDecimalSubtract::Operation(hugeint_t left, hugeint_t right, hugeint_t &result); - // decrease swap likeliness, but make sure there is always a small likeliness left - if (swap_likeliness[swap_idx] > 1) { - swap_likeliness[swap_idx] /= 2; - } - } else { - // keep swap because runtime decreased, reset likeliness - swap_likeliness[swap_idx] = 100; - } - observe = false; +struct DecimalSubtractOverflowCheck { + template + static inline TR Operation(TA left, TB right) { + TR result; + if (!TryDecimalSubtract::Operation(left, right, result)) { + throw OutOfRangeException("Overflow in subtract of DECIMAL(18) (%d - %d). You might want to add an " + "explicit cast to a bigger decimal.", + left, right); + } + return result; + } +}; - // reset values - iteration_count = 0; - runtime_sum = 0.0; - } else if (!observe && iteration_count == execute_interval) { - // save old mean to evaluate swap - prev_mean = runtime_sum / iteration_count; +template <> +hugeint_t DecimalSubtractOverflowCheck::Operation(hugeint_t left, hugeint_t right); - // get swap index and swap likeliness - std::uniform_int_distribution distribution(1, right_random_border); // a <= i <= b - idx_t random_number = distribution(generator) - 1; +struct SubtractTimeOperator { + template + static TR Operation(TA left, TB right); +}; - swap_idx = random_number / 100; // index to be swapped - idx_t likeliness = random_number - 100 * swap_idx; // random number between [0, 100) +template <> +dtime_t SubtractTimeOperator::Operation(dtime_t left, interval_t right); - // check if swap is going to happen - if (swap_likeliness[swap_idx] > likeliness) { // always true for the first swap of an index - // swap - std::swap(permutation[swap_idx], permutation[swap_idx + 1]); +} // namespace duckdb - // observe whether swap will be applied - observe = true; - } - // reset values - iteration_count = 0; - runtime_sum = 0.0; +namespace duckdb { + +template +static Value BinaryValueOperation(const Value &left, const Value &right) { + auto left_type = left.type(); + auto right_type = right.type(); + LogicalType result_type = left_type; + if (left_type != right_type) { + result_type = LogicalType::MaxLogicalType(left.type(), right.type()); + Value left_cast = left.CastAs(result_type); + Value right_cast = right.CastAs(result_type); + return BinaryValueOperation(left_cast, right_cast); + } + if (left.is_null || right.is_null) { + return Value().CastAs(result_type); + } + if (TypeIsIntegral(result_type.InternalType())) { + hugeint_t left_hugeint; + hugeint_t right_hugeint; + switch (result_type.InternalType()) { + case PhysicalType::INT8: + left_hugeint = Hugeint::Convert(left.value_.tinyint); + right_hugeint = Hugeint::Convert(right.value_.tinyint); + break; + case PhysicalType::INT16: + left_hugeint = Hugeint::Convert(left.value_.smallint); + right_hugeint = Hugeint::Convert(right.value_.smallint); + break; + case PhysicalType::INT32: + left_hugeint = Hugeint::Convert(left.value_.integer); + right_hugeint = Hugeint::Convert(right.value_.integer); + break; + case PhysicalType::INT64: + left_hugeint = Hugeint::Convert(left.value_.bigint); + right_hugeint = Hugeint::Convert(right.value_.bigint); + break; + case PhysicalType::INT128: + left_hugeint = left.value_.hugeint; + right_hugeint = right.value_.hugeint; + break; + default: + throw NotImplementedException("Unimplemented type for value binary op"); } + // integer addition + return Value::Numeric(result_type, + OP::template Operation(left_hugeint, right_hugeint)); + } else if (result_type.InternalType() == PhysicalType::FLOAT) { + return Value::FLOAT( + OP::template Operation(left.GetValue(), right.GetValue())); + } else if (result_type.InternalType() == PhysicalType::DOUBLE) { + return Value::DOUBLE( + OP::template Operation(left.GetValue(), right.GetValue())); } else { - if (iteration_count == 5) { - // initially set all values - iteration_count = 0; - runtime_sum = 0.0; - observe = false; - warmup = false; - } + throw NotImplementedException("Unimplemented type for value binary op"); + } +} + +//===--------------------------------------------------------------------===// +// Numeric Operations +//===--------------------------------------------------------------------===// +Value ValueOperations::Add(const Value &left, const Value &right) { + return BinaryValueOperation(left, right); +} + +Value ValueOperations::Subtract(const Value &left, const Value &right) { + return BinaryValueOperation(left, right); +} + +Value ValueOperations::Multiply(const Value &left, const Value &right) { + return BinaryValueOperation(left, right); +} + +Value ValueOperations::Modulo(const Value &left, const Value &right) { + if (right == 0) { + return Value(right.type()); + } else { + return BinaryValueOperation(left, right); + } +} + +Value ValueOperations::Divide(const Value &left, const Value &right) { + if (right == 0) { + return Value(right.type()); + } else { + return BinaryValueOperation(left, right); } } } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/aggregate_hashtable.hpp -// -// -//===----------------------------------------------------------------------===// +//===--------------------------------------------------------------------===// +// boolean_operators.cpp +// Description: This file contains the implementation of the boolean +// operations AND OR ! +//===--------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/base_aggregate_hashtable.hpp -// -// -//===----------------------------------------------------------------------===// +namespace duckdb { + +//===--------------------------------------------------------------------===// +// AND/OR +//===--------------------------------------------------------------------===// +template +static void TemplatedBooleanNullmask(Vector &left, Vector &right, Vector &result, idx_t count) { + D_ASSERT(left.GetType().id() == LogicalTypeId::BOOLEAN && right.GetType().id() == LogicalTypeId::BOOLEAN && + result.GetType().id() == LogicalTypeId::BOOLEAN); + + if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() == VectorType::CONSTANT_VECTOR) { + // operation on two constants, result is constant vector + result.SetVectorType(VectorType::CONSTANT_VECTOR); + auto ldata = ConstantVector::GetData(left); + auto rdata = ConstantVector::GetData(right); + auto result_data = ConstantVector::GetData(result); + + bool is_null = OP::Operation(*ldata > 0, *rdata > 0, ConstantVector::IsNull(left), + ConstantVector::IsNull(right), *result_data); + ConstantVector::SetNull(result, is_null); + } else { + // perform generic loop + VectorData ldata, rdata; + left.Orrify(count, ldata); + right.Orrify(count, rdata); + + result.SetVectorType(VectorType::FLAT_VECTOR); + auto left_data = (uint8_t *)ldata.data; // we use uint8 to avoid load of gunk bools + auto right_data = (uint8_t *)rdata.data; + auto result_data = FlatVector::GetData(result); + auto &result_mask = FlatVector::Validity(result); + if (!ldata.validity.AllValid() || !rdata.validity.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto lidx = ldata.sel->get_index(i); + auto ridx = rdata.sel->get_index(i); + bool is_null = + OP::Operation(left_data[lidx] > 0, right_data[ridx] > 0, !ldata.validity.RowIsValid(lidx), + !rdata.validity.RowIsValid(ridx), result_data[i]); + result_mask.Set(i, !is_null); + } + } else { + for (idx_t i = 0; i < count; i++) { + auto lidx = ldata.sel->get_index(i); + auto ridx = rdata.sel->get_index(i); + result_data[i] = OP::SimpleOperation(left_data[lidx], right_data[ridx]); + } + } + } +} +/* +SQL AND Rules: +TRUE AND TRUE = TRUE +TRUE AND FALSE = FALSE +TRUE AND NULL = NULL +FALSE AND TRUE = FALSE +FALSE AND FALSE = FALSE +FALSE AND NULL = FALSE +NULL AND TRUE = NULL +NULL AND FALSE = FALSE +NULL AND NULL = NULL +Basically: +- Only true if both are true +- False if either is false (regardless of NULLs) +- NULL otherwise +*/ +struct TernaryAnd { + static bool SimpleOperation(bool left, bool right) { + return left && right; + } + static bool Operation(bool left, bool right, bool left_null, bool right_null, bool &result) { + if (left_null && right_null) { + // both NULL: + // result is NULL + return true; + } else if (left_null) { + // left is NULL: + // result is FALSE if right is false + // result is NULL if right is true + result = right; + return right; + } else if (right_null) { + // right is NULL: + // result is FALSE if left is false + // result is NULL if left is true + result = left; + return left; + } else { + // no NULL: perform the AND + result = left && right; + return false; + } + } +}; +void VectorOperations::And(Vector &left, Vector &right, Vector &result, idx_t count) { + TemplatedBooleanNullmask(left, right, result, count); +} +/* +SQL OR Rules: +OR +TRUE OR TRUE = TRUE +TRUE OR FALSE = TRUE +TRUE OR NULL = TRUE +FALSE OR TRUE = TRUE +FALSE OR FALSE = FALSE +FALSE OR NULL = NULL +NULL OR TRUE = TRUE +NULL OR FALSE = NULL +NULL OR NULL = NULL -namespace duckdb { -class BoundAggregateExpression; -class BufferManager; +Basically: +- Only false if both are false +- True if either is true (regardless of NULLs) +- NULL otherwise +*/ -struct AggregateObject { - AggregateObject(AggregateFunction function, FunctionData *bind_data, idx_t child_count, idx_t payload_size, - bool distinct, PhysicalType return_type, Expression *filter = nullptr) - : function(move(function)), bind_data(bind_data), child_count(child_count), payload_size(payload_size), - distinct(distinct), return_type(return_type), filter(filter) { +struct TernaryOr { + static bool SimpleOperation(bool left, bool right) { + return left || right; + } + static bool Operation(bool left, bool right, bool left_null, bool right_null, bool &result) { + if (left_null && right_null) { + // both NULL: + // result is NULL + return true; + } else if (left_null) { + // left is NULL: + // result is TRUE if right is true + // result is NULL if right is false + result = right; + return !right; + } else if (right_null) { + // right is NULL: + // result is TRUE if left is true + // result is NULL if left is false + result = left; + return !left; + } else { + // no NULL: perform the OR + result = left || right; + return false; + } } - - AggregateFunction function; - FunctionData *bind_data; - idx_t child_count; - idx_t payload_size; - bool distinct; - PhysicalType return_type; - Expression *filter = nullptr; - - static vector CreateAggregateObjects(const vector &bindings); }; -class BaseAggregateHashTable { -public: - BaseAggregateHashTable(BufferManager &buffer_manager, vector group_types, - vector payload_types, vector aggregate_objects); - virtual ~BaseAggregateHashTable() { - } +void VectorOperations::Or(Vector &left, Vector &right, Vector &result, idx_t count) { + TemplatedBooleanNullmask(left, right, result, count); +} - static idx_t Align(idx_t n) { - return ((n + 7) / 8) * 8; +struct NotOperator { + template + static inline TR Operation(TA left) { + return !left; } - -protected: - BufferManager &buffer_manager; - //! The aggregates to be computed - vector aggregates; - //! The types of the group columns stored in the hashtable - vector group_types; - //! The types of the payload columns stored in the hashtable - vector payload_types; - //! The size of the groups in bytes - idx_t group_width; - //! some optional padding to align payload - idx_t group_padding; - //! The size of the payload (aggregations) in bytes - idx_t payload_width; - - //! The empty payload data - unique_ptr empty_payload_data; - -protected: - void CallDestructors(Vector &state_vector, idx_t count); }; +void VectorOperations::Not(Vector &input, Vector &result, idx_t count) { + D_ASSERT(input.GetType() == LogicalType::BOOLEAN && result.GetType() == LogicalType::BOOLEAN); + UnaryExecutor::Execute(input, result, count); +} + } // namespace duckdb +//===--------------------------------------------------------------------===// +// comparison_operators.cpp +// Description: This file contains the implementation of the comparison +// operations == != >= <= > < +//===--------------------------------------------------------------------===// -namespace duckdb { -class BlockHandle; -class BufferHandle; -//! GroupedAggregateHashTable is a linear probing HT that is used for computing -//! aggregates -/*! - GroupedAggregateHashTable is a HT that is used for computing aggregates. It takes - as input the set of groups and the types of the aggregates to compute and - stores them in the HT. It uses linear probing for collision resolution. -*/ -// two part hash table -// hashes and payload -// hashes layout: -// [SALT][PAGE_NR][PAGE_OFFSET] -// [SALT] are the high bits of the hash value, e.g. 16 for 64 bit hashes -// [PAGE_NR] is the buffer managed payload page index -// [PAGE_OFFSET] is the logical entry offset into said payload page -// NOTE: PAGE_NR and PAGE_OFFSET are reversed for 64 bit HTs because struct packing -// payload layout -// [HASH][GROUPS][PADDING][PAYLOAD] -// [HASH] is the hash of the groups -// [GROUPS] is the group data, could be multiple values, fixed size, strings are elsewhere -// [PADDING] is gunk data to align payload properly -// [PAYLOAD] is the payload (i.e. the aggregate states) -struct aggr_ht_entry_64 { - uint16_t salt; - uint16_t page_offset; - uint32_t page_nr; // this has to come last because alignment -}; +namespace duckdb { -struct aggr_ht_entry_32 { - uint8_t salt; - uint8_t page_nr; - uint16_t page_offset; +struct ComparisonSelector { + template + static idx_t Select(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, + SelectionVector *false_sel) { + throw NotImplementedException("Unknown comparison operation!"); + } }; -enum HtEntryType { HT_WIDTH_32, HT_WIDTH_64 }; - -class GroupedAggregateHashTable : public BaseAggregateHashTable { -public: - GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types, - vector payload_types, const vector &aggregates, - HtEntryType entry_type = HtEntryType::HT_WIDTH_64); - GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types, - vector payload_types, vector aggregates, - HtEntryType entry_type = HtEntryType::HT_WIDTH_64); - GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types); - ~GroupedAggregateHashTable() override; +template <> +inline idx_t ComparisonSelector::Select(Vector &left, Vector &right, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, + SelectionVector *false_sel) { + return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel); +} - //! Add the given data to the HT, computing the aggregates grouped by the - //! data in the group chunk. When resize = true, aggregates will not be - //! computed but instead just assigned. - idx_t AddChunk(DataChunk &groups, DataChunk &payload); - idx_t AddChunk(DataChunk &groups, Vector &group_hashes, DataChunk &payload); +template <> +inline idx_t ComparisonSelector::Select(Vector &left, Vector &right, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, + SelectionVector *false_sel) { + return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel); +} - //! Scan the HT starting from the scan_position until the result and group - //! chunks are filled. scan_position will be updated by this function. - //! Returns the amount of elements found. - idx_t Scan(idx_t &scan_position, DataChunk &result); +template <> +inline idx_t ComparisonSelector::Select(Vector &left, Vector &right, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, + SelectionVector *false_sel) { + return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel); +} - //! Fetch the aggregates for specific groups from the HT and place them in the result - void FetchAggregates(DataChunk &groups, DataChunk &result); +template <> +inline idx_t ComparisonSelector::Select(Vector &left, Vector &right, + const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, + SelectionVector *false_sel) { + return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel); +} - //! Finds or creates groups in the hashtable using the specified group keys. The addresses vector will be filled - //! with pointers to the groups in the hash table, and the new_groups selection vector will point to the newly - //! created groups. The return value is the amount of newly created groups. - idx_t FindOrCreateGroups(DataChunk &groups, Vector &group_hashes, Vector &addresses_out, - SelectionVector &new_groups_out); - idx_t FindOrCreateGroups(DataChunk &groups, Vector &addresses_out, SelectionVector &new_groups_out); - void FindOrCreateGroups(DataChunk &groups, Vector &addresses_out); +template <> +inline idx_t ComparisonSelector::Select(Vector &left, Vector &right, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, + SelectionVector *false_sel) { + return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel); +} - //! Executes the filter(if any) and update the aggregates - static void UpdateAggregate(AggregateObject &aggr, DataChunk &payload, Vector &distinct_addresses, - idx_t input_count, idx_t payload_idx); - void Combine(GroupedAggregateHashTable &other); +template <> +inline idx_t ComparisonSelector::Select(Vector &left, Vector &right, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, + SelectionVector *false_sel) { + return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel); +} - idx_t Size() { - return entries; +static idx_t ComparesNotNull(ValidityMask &vleft, ValidityMask &vright, ValidityMask &vresult, idx_t count, + SelectionVector ¬_null) { + idx_t valid = 0; + for (idx_t i = 0; i < count; ++i) { + if (vleft.RowIsValid(i) && vright.RowIsValid(i)) { + not_null.set_index(valid++, i); + } else { + vresult.SetInvalid(i); + } } + return valid; +} - idx_t MaxCapacity(); +template +static void NestedComparisonExecutor(Vector &left, Vector &right, Vector &result, idx_t count) { + const auto left_constant = left.GetVectorType() == VectorType::CONSTANT_VECTOR; + const auto right_constant = right.GetVectorType() == VectorType::CONSTANT_VECTOR; - void Partition(vector &partition_hts, hash_t mask, idx_t shift); + if ((left_constant && ConstantVector::IsNull(left)) || (right_constant && ConstantVector::IsNull(right))) { + // either left or right is constant NULL: result is constant NULL + result.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(result, true); + return; + } - void Finalize(); + if (left_constant && right_constant) { + // both sides are constant, and neither is NULL so just compare one element. + result.SetVectorType(VectorType::CONSTANT_VECTOR); + SelectionVector true_sel(1); + auto match_count = ComparisonSelector::Select(left, right, nullptr, 1, &true_sel, nullptr); + auto result_data = ConstantVector::GetData(result); + result_data[0] = match_count > 0; + return; + } - //! The stringheap of the AggregateHashTable - StringHeap string_heap; + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + auto &validity = FlatVector::Validity(result); - //! The hash table load factor, when a resize is triggered - constexpr static float LOAD_FACTOR = 1.5; - constexpr static uint8_t HASH_WIDTH = sizeof(hash_t); + VectorData leftv, rightv; + left.Orrify(count, leftv); + right.Orrify(count, rightv); -private: - HtEntryType entry_type; + SelectionVector true_sel(count); + SelectionVector false_sel(count); - //! The total tuple size - idx_t tuple_size; - //! The amount of tuples that fit in a single block - idx_t tuples_per_block; - //! The capacity of the HT. This can be increased using - //! GroupedAggregateHashTable::Resize - idx_t capacity; - //! The amount of entries stored in the HT currently - idx_t entries; - //! The data of the HT - vector> payload_hds; - vector payload_hds_ptrs; + idx_t match_count = 0; + if (leftv.validity.AllValid() && rightv.validity.AllValid()) { + match_count = ComparisonSelector::Select(left, right, nullptr, count, &true_sel, &false_sel); + } else { + SelectionVector not_null(count); + count = ComparesNotNull(leftv.validity, rightv.validity, validity, count, not_null); + match_count = ComparisonSelector::Select(left, right, ¬_null, count, &true_sel, &false_sel); + } - //! The hashes of the HT - unique_ptr hashes_hdl; - data_ptr_t hashes_hdl_ptr; - data_ptr_t hashes_end_ptr; // of hashes + for (idx_t i = 0; i < match_count; ++i) { + const auto idx = true_sel.get_index(i); + result_data[idx] = true; + } - idx_t hash_prefix_shift; - idx_t payload_page_offset; + const idx_t no_match_count = count - match_count; + for (idx_t i = 0; i < no_match_count; ++i) { + const auto idx = false_sel.get_index(i); + result_data[idx] = false; + } +} - //! Bitmask for getting relevant bits from the hashes to determine the position - hash_t bitmask; +struct ComparisonExecutor { +private: + template + static inline void TemplatedExecute(Vector &left, Vector &right, Vector &result, idx_t count) { + BinaryExecutor::Execute(left, right, result, count); + } - //! Pointer vector for Scan() - Vector addresses; +public: + template + static inline void Execute(Vector &left, Vector &right, Vector &result, idx_t count) { + D_ASSERT(left.GetType() == right.GetType() && result.GetType() == LogicalType::BOOLEAN); + // the inplace loops take the result as the last parameter + switch (left.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::INT16: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::INT32: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::INT64: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::UINT8: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::UINT16: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::UINT32: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::UINT64: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::INT128: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::FLOAT: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::DOUBLE: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::INTERVAL: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::VARCHAR: + TemplatedExecute(left, right, result, count); + break; + case PhysicalType::LIST: + case PhysicalType::MAP: + case PhysicalType::STRUCT: + NestedComparisonExecutor(left, right, result, count); + break; + default: + throw InternalException("Invalid type for comparison"); + } + } +}; - vector> distinct_hashes; +void VectorOperations::Equals(Vector &left, Vector &right, Vector &result, idx_t count) { + ComparisonExecutor::Execute(left, right, result, count); +} - bool is_finalized; +void VectorOperations::NotEquals(Vector &left, Vector &right, Vector &result, idx_t count) { + ComparisonExecutor::Execute(left, right, result, count); +} - // some stuff from FindOrCreateGroupsInternal() to avoid allocation there - Vector ht_offsets; - Vector hash_salts; - SelectionVector group_compare_vector; - SelectionVector no_match_vector; - SelectionVector empty_vector; +void VectorOperations::GreaterThanEquals(Vector &left, Vector &right, Vector &result, idx_t count) { + ComparisonExecutor::Execute(left, right, result, count); +} -private: - GroupedAggregateHashTable(const GroupedAggregateHashTable &) = delete; +void VectorOperations::LessThanEquals(Vector &left, Vector &right, Vector &result, idx_t count) { + ComparisonExecutor::Execute(left, right, result, count); +} - //! Resize the HT to the specified size. Must be larger than the current - //! size. - void Destroy(); - void ScatterGroups(DataChunk &groups, unique_ptr &group_data, Vector &addresses, - const SelectionVector &sel, idx_t count); +void VectorOperations::GreaterThan(Vector &left, Vector &right, Vector &result, idx_t count) { + ComparisonExecutor::Execute(left, right, result, count); +} - void Verify(); +void VectorOperations::LessThan(Vector &left, Vector &right, Vector &result, idx_t count) { + ComparisonExecutor::Execute(left, right, result, count); +} - void FlushMove(Vector &source_addresses, Vector &source_hashes, idx_t count); - void NewBlock(); +} // namespace duckdb +//===--------------------------------------------------------------------===// +// generators.cpp +// Description: This file contains the implementation of different generators +//===--------------------------------------------------------------------===// - template - void VerifyInternal(); - template - void Resize(idx_t size); - template - idx_t FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes, Vector &addresses, - SelectionVector &new_groups); - template > - void PayloadApply(FUNC fun); -}; -} // namespace duckdb +namespace duckdb { + +template +void TemplatedGenerateSequence(Vector &result, idx_t count, int64_t start, int64_t increment) { + D_ASSERT(result.GetType().IsNumeric()); + if (start > NumericLimits::Maximum() || increment > NumericLimits::Maximum()) { + throw Exception("Sequence start or increment out of type range"); + } + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + auto value = (T)start; + for (idx_t i = 0; i < count; i++) { + result_data[i] = value; + value += increment; + } +} +void VectorOperations::GenerateSequence(Vector &result, idx_t count, int64_t start, int64_t increment) { + if (!result.GetType().IsNumeric()) { + throw InvalidTypeException(result.GetType(), "Can only generate sequences for numeric values!"); + } + switch (result.GetType().InternalType()) { + case PhysicalType::INT8: + TemplatedGenerateSequence(result, count, start, increment); + break; + case PhysicalType::INT16: + TemplatedGenerateSequence(result, count, start, increment); + break; + case PhysicalType::INT32: + TemplatedGenerateSequence(result, count, start, increment); + break; + case PhysicalType::INT64: + TemplatedGenerateSequence(result, count, start, increment); + break; + case PhysicalType::FLOAT: + TemplatedGenerateSequence(result, count, start, increment); + break; + case PhysicalType::DOUBLE: + TemplatedGenerateSequence(result, count, start, increment); + break; + default: + throw NotImplementedException("Unimplemented type for generate sequence"); + } +} +template +void TemplatedGenerateSequence(Vector &result, idx_t count, const SelectionVector &sel, int64_t start, + int64_t increment) { + D_ASSERT(result.GetType().IsNumeric()); + if (start > NumericLimits::Maximum() || increment > NumericLimits::Maximum()) { + throw Exception("Sequence start or increment out of type range"); + } + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + auto value = (T)start; + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + result_data[idx] = value + increment * idx; + } +} +void VectorOperations::GenerateSequence(Vector &result, idx_t count, const SelectionVector &sel, int64_t start, + int64_t increment) { + if (!result.GetType().IsNumeric()) { + throw InvalidTypeException(result.GetType(), "Can only generate sequences for numeric values!"); + } + switch (result.GetType().InternalType()) { + case PhysicalType::INT8: + TemplatedGenerateSequence(result, count, sel, start, increment); + break; + case PhysicalType::INT16: + TemplatedGenerateSequence(result, count, sel, start, increment); + break; + case PhysicalType::INT32: + TemplatedGenerateSequence(result, count, sel, start, increment); + break; + case PhysicalType::INT64: + TemplatedGenerateSequence(result, count, sel, start, increment); + break; + case PhysicalType::FLOAT: + TemplatedGenerateSequence(result, count, sel, start, increment); + break; + case PhysicalType::DOUBLE: + TemplatedGenerateSequence(result, count, sel, start, increment); + break; + default: + throw NotImplementedException("Unimplemented type for generate sequence"); + } +} +} // namespace duckdb +namespace duckdb { +struct DistinctBinaryLambdaWrapper { + template + static inline RESULT_TYPE Operation(LEFT_TYPE left, RIGHT_TYPE right, bool is_left_null, bool is_right_null) { + return OP::template Operation(left, right, is_left_null, is_right_null); + } +}; +template +static void DistinctExecuteGenericLoop(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, + RESULT_TYPE *__restrict result_data, const SelectionVector *__restrict lsel, + const SelectionVector *__restrict rsel, idx_t count, ValidityMask &lmask, + ValidityMask &rmask, ValidityMask &result_mask) { + for (idx_t i = 0; i < count; i++) { + auto lindex = lsel->get_index(i); + auto rindex = rsel->get_index(i); + auto lentry = ldata[lindex]; + auto rentry = rdata[rindex]; + result_data[i] = + OP::template Operation(lentry, rentry, !lmask.RowIsValid(lindex), !rmask.RowIsValid(rindex)); + } +} +template +static void DistinctExecuteConstant(Vector &left, Vector &right, Vector &result) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + auto ldata = ConstantVector::GetData(left); + auto rdata = ConstantVector::GetData(right); + auto result_data = ConstantVector::GetData(result); + *result_data = + OP::template Operation(*ldata, *rdata, ConstantVector::IsNull(left), ConstantVector::IsNull(right)); +} -#include -#include +template +static void DistinctExecuteGeneric(Vector &left, Vector &right, Vector &result, idx_t count) { + if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() == VectorType::CONSTANT_VECTOR) { + DistinctExecuteConstant(left, right, result); + } else { + VectorData ldata, rdata; -namespace duckdb { + left.Orrify(count, ldata); + right.Orrify(count, rdata); -GroupedAggregateHashTable::GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types, - vector payload_types, - const vector &bindings, - HtEntryType entry_type) - : GroupedAggregateHashTable(buffer_manager, move(group_types), move(payload_types), - AggregateObject::CreateAggregateObjects(bindings), entry_type) { + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + DistinctExecuteGenericLoop( + (LEFT_TYPE *)ldata.data, (RIGHT_TYPE *)rdata.data, result_data, ldata.sel, rdata.sel, count, ldata.validity, + rdata.validity, FlatVector::Validity(result)); + } } -GroupedAggregateHashTable::GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types) - : GroupedAggregateHashTable(buffer_manager, move(group_types), {}, vector()) { +template +static void DistinctExecuteSwitch(Vector &left, Vector &right, Vector &result, idx_t count) { + DistinctExecuteGeneric(left, right, result, count); } -GroupedAggregateHashTable::GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types_p, - vector payload_types_p, - vector aggregate_objects_p, - HtEntryType entry_type) - : BaseAggregateHashTable(buffer_manager, move(group_types_p), move(payload_types_p), move(aggregate_objects_p)), - entry_type(entry_type), capacity(0), entries(0), payload_page_offset(0), is_finalized(false), - ht_offsets(LogicalTypeId::BIGINT), hash_salts(LogicalTypeId::SMALLINT), - group_compare_vector(STANDARD_VECTOR_SIZE), no_match_vector(STANDARD_VECTOR_SIZE), - empty_vector(STANDARD_VECTOR_SIZE) { - - // HT layout - tuple_size = HASH_WIDTH + group_width + payload_width; -#ifndef DUCKDB_ALLOW_UNDEFINED - tuple_size = BaseAggregateHashTable::Align(tuple_size); -#endif - - D_ASSERT(tuple_size <= Storage::BLOCK_ALLOC_SIZE); - tuples_per_block = Storage::BLOCK_ALLOC_SIZE / tuple_size; - hashes_hdl = buffer_manager.Allocate(Storage::BLOCK_ALLOC_SIZE); - hashes_hdl_ptr = hashes_hdl->Ptr(); +template +static void DistinctExecute(Vector &left, Vector &right, Vector &result, idx_t count) { + DistinctExecuteSwitch(left, right, result, count); +} - switch (entry_type) { - case HtEntryType::HT_WIDTH_64: { - hash_prefix_shift = (HASH_WIDTH - sizeof(aggr_ht_entry_64::salt)) * 8; - Resize(STANDARD_VECTOR_SIZE * 2); - break; +template +static inline idx_t +DistinctSelectGenericLoop(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, + const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, + const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lmask, + ValidityMask &rmask, SelectionVector *true_sel, SelectionVector *false_sel) { + idx_t true_count = 0, false_count = 0; + for (idx_t i = 0; i < count; i++) { + auto result_idx = result_sel->get_index(i); + const auto idx = DENSE ? i : result_idx; + auto lindex = lsel->get_index(idx); + auto rindex = rsel->get_index(idx); + if (NO_NULL) { + if (OP::Operation(ldata[lindex], rdata[rindex], false, false)) { + if (HAS_TRUE_SEL) { + true_sel->set_index(true_count++, result_idx); + } + } else { + if (HAS_FALSE_SEL) { + false_sel->set_index(false_count++, result_idx); + } + } + } else { + if (OP::Operation(ldata[lindex], rdata[rindex], !lmask.RowIsValid(lindex), !rmask.RowIsValid(rindex))) { + if (HAS_TRUE_SEL) { + true_sel->set_index(true_count++, result_idx); + } + } else { + if (HAS_FALSE_SEL) { + false_sel->set_index(false_count++, result_idx); + } + } + } } - case HtEntryType::HT_WIDTH_32: { - hash_prefix_shift = (HASH_WIDTH - sizeof(aggr_ht_entry_32::salt)) * 8; - Resize(STANDARD_VECTOR_SIZE * 2); - break; + if (HAS_TRUE_SEL) { + return true_count; + } else { + return count - false_count; } - default: - throw NotImplementedException("Unknown HT entry width"); +} +template +static inline idx_t +DistinctSelectGenericLoopSelSwitch(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, + const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, + const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lmask, + ValidityMask &rmask, SelectionVector *true_sel, SelectionVector *false_sel) { + if (true_sel && false_sel) { + return DistinctSelectGenericLoop( + ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); + } else if (true_sel) { + return DistinctSelectGenericLoop( + ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); + } else { + D_ASSERT(false_sel); + return DistinctSelectGenericLoop( + ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); } +} - // create additional hash tables for distinct aggrs - distinct_hashes.resize(aggregates.size()); +template +static inline idx_t +DistinctSelectGenericLoopSwitch(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, + const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, + const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lmask, + ValidityMask &rmask, SelectionVector *true_sel, SelectionVector *false_sel) { + if (!lmask.AllValid() || rmask.AllValid()) { + return DistinctSelectGenericLoopSelSwitch( + ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); + } else { + return DistinctSelectGenericLoopSelSwitch( + ldata, rdata, lsel, rsel, result_sel, count, lmask, rmask, true_sel, false_sel); + } +} - idx_t payload_idx = 0; - for (idx_t i = 0; i < aggregates.size(); i++) { - auto &aggr = aggregates[i]; - if (aggr.distinct) { - // group types plus aggr return type - vector distinct_group_types(group_types); - for (idx_t child_idx = 0; child_idx < aggr.child_count; child_idx++) { - distinct_group_types.push_back(payload_types[payload_idx]); - } - distinct_hashes[i] = make_unique(buffer_manager, distinct_group_types); +template +static idx_t DistinctSelectGeneric(Vector &left, Vector &right, idx_t vcount, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + VectorData ldata, rdata; + + left.Orrify(vcount, ldata); + right.Orrify(vcount, rdata); + + return DistinctSelectGenericLoopSwitch( + (LEFT_TYPE *)ldata.data, (RIGHT_TYPE *)rdata.data, ldata.sel, rdata.sel, sel, count, ldata.validity, + rdata.validity, true_sel, false_sel); +} +template +static inline idx_t DistinctSelectFlatLoop(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, + const SelectionVector *sel, idx_t count, ValidityMask &lmask, + ValidityMask &rmask, SelectionVector *true_sel, SelectionVector *false_sel) { + idx_t true_count = 0, false_count = 0; + for (idx_t i = 0; i < count; i++) { + idx_t result_idx = sel->get_index(i); + const auto idx = DENSE ? i : result_idx; + idx_t lidx = LEFT_CONSTANT ? 0 : idx; + idx_t ridx = RIGHT_CONSTANT ? 0 : idx; + const bool lnull = !lmask.RowIsValid(lidx); + const bool rnull = !rmask.RowIsValid(ridx); + bool comparison_result = OP::Operation(ldata[lidx], rdata[ridx], lnull, rnull); + if (HAS_TRUE_SEL) { + true_sel->set_index(true_count, result_idx); + true_count += comparison_result; } - payload_idx += aggr.child_count; + if (HAS_FALSE_SEL) { + false_sel->set_index(false_count, result_idx); + false_count += !comparison_result; + } + } + if (HAS_TRUE_SEL) { + return true_count; + } else { + return count - false_count; } - addresses.Initialize(LogicalType::POINTER); } -GroupedAggregateHashTable::~GroupedAggregateHashTable() { - Destroy(); +template +static inline idx_t DistinctSelectFlatLoopSelSwitch(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, + const SelectionVector *sel, idx_t count, ValidityMask &lmask, + ValidityMask &rmask, SelectionVector *true_sel, + SelectionVector *false_sel) { + if (true_sel && false_sel) { + return DistinctSelectFlatLoop(ldata, rdata, sel, count, lmask, rmask, true_sel, false_sel); + } else if (true_sel) { + return DistinctSelectFlatLoop(ldata, rdata, sel, count, lmask, rmask, true_sel, false_sel); + } else { + D_ASSERT(false_sel); + return DistinctSelectFlatLoop(ldata, rdata, sel, count, lmask, rmask, true_sel, false_sel); + } } -template -void GroupedAggregateHashTable::PayloadApply(FUNC fun) { - if (entries == 0) { - return; +template +static inline idx_t DistinctSelectFlatLoopSwitch(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rdata, + const SelectionVector *sel, idx_t count, ValidityMask &lmask, + ValidityMask &rmask, SelectionVector *true_sel, + SelectionVector *false_sel) { + return DistinctSelectFlatLoopSelSwitch( + ldata, rdata, sel, count, lmask, rmask, true_sel, false_sel); +} +template +static idx_t DistinctSelectFlat(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + auto ldata = FlatVector::GetData(left); + auto rdata = FlatVector::GetData(right); + if (LEFT_CONSTANT) { + ValidityMask validity; + if (ConstantVector::IsNull(left)) { + validity.SetAllInvalid(1); + } + return DistinctSelectFlatLoopSwitch( + ldata, rdata, sel, count, validity, FlatVector::Validity(right), true_sel, false_sel); + } else if (RIGHT_CONSTANT) { + ValidityMask validity; + if (ConstantVector::IsNull(right)) { + validity.SetAllInvalid(1); + } + return DistinctSelectFlatLoopSwitch( + ldata, rdata, sel, count, FlatVector::Validity(left), validity, true_sel, false_sel); + } else { + return DistinctSelectFlatLoopSwitch( + ldata, rdata, sel, count, FlatVector::Validity(left), FlatVector::Validity(right), true_sel, false_sel); } - idx_t apply_entries = entries; - idx_t page_nr = 0; - idx_t page_offset = 0; +} +template +static idx_t DistinctSelectConstant(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + auto ldata = ConstantVector::GetData(left); + auto rdata = ConstantVector::GetData(right); - for (auto &payload_chunk_ptr : payload_hds_ptrs) { - auto this_entries = MinValue(tuples_per_block, apply_entries); - page_offset = 0; - for (data_ptr_t ptr = payload_chunk_ptr, end = payload_chunk_ptr + this_entries * tuple_size; ptr < end; - ptr += tuple_size) { - fun(page_nr, page_offset++, ptr); + // both sides are constant, return either 0 or the count + // in this case we do not fill in the result selection vector at all + if (!OP::Operation(*ldata, *rdata, ConstantVector::IsNull(left), ConstantVector::IsNull(right))) { + if (false_sel) { + for (idx_t i = 0; i < count; i++) { + false_sel->set_index(i, sel->get_index(i)); + } } - apply_entries -= this_entries; - page_nr++; + return 0; + } else { + if (true_sel) { + for (idx_t i = 0; i < count; i++) { + true_sel->set_index(i, sel->get_index(i)); + } + } + return count; } - D_ASSERT(apply_entries == 0); } -void GroupedAggregateHashTable::NewBlock() { - auto pin = buffer_manager.Allocate(Storage::BLOCK_ALLOC_SIZE); - payload_hds.push_back(move(pin)); - payload_hds_ptrs.push_back(payload_hds.back()->Ptr()); - payload_page_offset = 0; +template +static idx_t DistinctSelect(Vector &left, Vector &right, idx_t vcount, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + if (!sel) { + sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; + } + if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() == VectorType::CONSTANT_VECTOR) { + return DistinctSelectConstant(left, right, sel, count, true_sel, false_sel); + } else if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && + right.GetVectorType() == VectorType::FLAT_VECTOR) { + return DistinctSelectFlat(left, right, sel, count, true_sel, + false_sel); + } else if (left.GetVectorType() == VectorType::FLAT_VECTOR && + right.GetVectorType() == VectorType::CONSTANT_VECTOR) { + return DistinctSelectFlat(left, right, sel, count, true_sel, + false_sel); + } else if (left.GetVectorType() == VectorType::FLAT_VECTOR && right.GetVectorType() == VectorType::FLAT_VECTOR) { + return DistinctSelectFlat(left, right, sel, count, true_sel, + false_sel); + } else { + return DistinctSelectGeneric(left, right, vcount, sel, count, true_sel, + false_sel); + } } -void GroupedAggregateHashTable::Destroy() { - // check if there is a destructor - bool has_destructor = false; - for (idx_t i = 0; i < aggregates.size(); i++) { - if (aggregates[i].function.destructor) { - has_destructor = true; +template +static idx_t DistinctSelectNotNull(VectorData &lvdata, VectorData &rvdata, const idx_t count, idx_t &true_count, + const SelectionVector &sel, SelectionVector &maybe_vec, OptionalSelection &true_vec, + OptionalSelection &false_vec) { + auto &lmask = lvdata.validity; + auto &rmask = rvdata.validity; + + idx_t remaining = 0; + if (lmask.AllValid() && rmask.AllValid()) { + // None are NULL, distinguish values. + for (idx_t i = 0; i < count; ++i) { + const auto idx = sel.get_index(i); + maybe_vec.set_index(remaining++, idx); + } + return remaining; + } + + idx_t false_count = 0; + for (idx_t i = 0; i < count; ++i) { + const auto idx = sel.get_index(i); + const auto lidx = lvdata.sel->get_index(idx); + const auto ridx = rvdata.sel->get_index(idx); + const auto lnull = !lmask.RowIsValid(lidx); + const auto rnull = !rmask.RowIsValid(ridx); + if (lnull || rnull) { + // If either is NULL then we can major distinguish them + if (!OP::Operation(lnull, rnull, false, false)) { + false_vec.Append(false_count, idx); + } else { + true_vec.Append(true_count, idx); + } + } else { + // Neither is NULL, distinguish values. + maybe_vec.set_index(remaining++, idx); } } - if (!has_destructor) { - return; - } - // there are aggregates with destructors: loop over the hash table - // and call the destructor method for each of the aggregates - data_ptr_t data_pointers[STANDARD_VECTOR_SIZE]; - Vector state_vector(LogicalType::POINTER, (data_ptr_t)data_pointers); - idx_t count = 0; + true_vec.Advance(true_count); + false_vec.Advance(false_count); - PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) { - data_pointers[count++] = ptr + HASH_WIDTH + group_width; - if (count == STANDARD_VECTOR_SIZE) { - CallDestructors(state_vector, count); - count = 0; - } - }); - CallDestructors(state_vector, count); + return remaining; } -template -void GroupedAggregateHashTable::VerifyInternal() { - auto hashes_ptr = (T *)hashes_hdl_ptr; - D_ASSERT(payload_hds.size() == payload_hds_ptrs.size()); - idx_t count = 0; - for (idx_t i = 0; i < capacity; i++) { - if (hashes_ptr[i].page_nr > 0) { - D_ASSERT(hashes_ptr[i].page_offset < tuples_per_block); - D_ASSERT(hashes_ptr[i].page_nr <= payload_hds.size()); - auto ptr = payload_hds_ptrs[hashes_ptr[i].page_nr - 1] + ((hashes_ptr[i].page_offset) * tuple_size); - auto hash = Load(ptr); - D_ASSERT((hashes_ptr[i].salt) == (hash >> hash_prefix_shift)); - - count++; +template +static inline void ScatterSelection(SelectionVector *target, const idx_t count, const SelectionVector &sel, + const SelectionVector &dense_vec) { + if (DENSE && target) { + for (idx_t i = 0; i < count; ++i) { + target->set_index(i, sel.get_index(dense_vec.get_index(i))); } } - D_ASSERT(count == entries); } -idx_t GroupedAggregateHashTable::MaxCapacity() { - idx_t max_pages = 0; - idx_t max_tuples = 0; +struct PositionComparator { + // Select the rows that definitely match. + // Default to the same as the final row + template + static idx_t Definite(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector &false_sel) { + return Final(left, right, vcount, sel, count, true_sel, &false_sel); + } - switch (entry_type) { - case HtEntryType::HT_WIDTH_32: - max_pages = NumericLimits::Maximum(); - max_tuples = NumericLimits::Maximum(); - break; - default: - D_ASSERT(entry_type == HtEntryType::HT_WIDTH_64); - max_pages = NumericLimits::Maximum(); - max_tuples = NumericLimits::Maximum(); - break; + // Select the possible rows that need further testing. + // Usually this means Is Not Distinct, as those are the semantics used by Postges + template + static idx_t Possible(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector &true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedEquals(left, right, vcount, sel, count, &true_sel, false_sel); } - return max_pages * MinValue(max_tuples, (idx_t)Storage::BLOCK_ALLOC_SIZE / tuple_size); -} + // Select the matching rows for the final position. + // This needs to be specialised. + template + static idx_t Final(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return 0; + } -void GroupedAggregateHashTable::Verify() { -#ifdef DEBUG - switch (entry_type) { - case HtEntryType::HT_WIDTH_32: - VerifyInternal(); - break; - case HtEntryType::HT_WIDTH_64: - VerifyInternal(); - break; + // Tie-break based on length when one of the sides has been exhausted, returning true if the LHS matches. + // This essentially means that the existing positions compare equal. + // Default to the same semantics as the OP for idx_t. This works in most cases. + template + static bool TieBreak(const idx_t lpos, const idx_t rpos) { + return OP::Operation(lpos, rpos, false, false); } -#endif -} +}; -template -void GroupedAggregateHashTable::Resize(idx_t size) { - Verify(); +// NotDistinctFrom must always check every column +template <> +idx_t PositionComparator::Definite(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector &false_sel) { + return 0; +} - D_ASSERT(!is_finalized); +template <> +idx_t PositionComparator::Final(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedEquals(left, right, vcount, sel, count, true_sel, false_sel); +} - if (size <= capacity) { - throw InternalException("Cannot downsize a hash table!"); - } - if (size < STANDARD_VECTOR_SIZE) { - size = STANDARD_VECTOR_SIZE; - } +// DistinctFrom must check everything that matched +template <> +idx_t PositionComparator::Possible(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector &true_sel, SelectionVector *false_sel) { + return count; +} - // size needs to be a power of 2 - D_ASSERT((size & (size - 1)) == 0); - bitmask = size - 1; +template <> +idx_t PositionComparator::Final(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedNotEquals(left, right, vcount, sel, count, true_sel, false_sel); +} - auto byte_size = size * sizeof(T); - if (byte_size > (idx_t)Storage::BLOCK_ALLOC_SIZE) { - hashes_hdl = buffer_manager.Allocate(byte_size); - hashes_hdl_ptr = hashes_hdl->Ptr(); - } - memset(hashes_hdl_ptr, 0, byte_size); - hashes_end_ptr = hashes_hdl_ptr + byte_size; - capacity = size; +// Non-strict inequalities must use strict comparisons for Definite +template <> +idx_t PositionComparator::Definite(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, + SelectionVector &false_sel) { + return VectorOperations::NestedLessThan(left, right, vcount, sel, count, true_sel, &false_sel); +} - auto hashes_arr = (T *)hashes_hdl_ptr; +template <> +idx_t PositionComparator::Final(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedLessThanEquals(left, right, vcount, sel, count, true_sel, false_sel); +} - PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) { - auto hash = Load(ptr); - D_ASSERT((hash & bitmask) == (hash % capacity)); - auto entry_idx = (idx_t)hash & bitmask; - while (hashes_arr[entry_idx].page_nr > 0) { - entry_idx++; - if (entry_idx >= capacity) { - entry_idx = 0; - } - } +template <> +idx_t PositionComparator::Definite(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, + SelectionVector &false_sel) { + return VectorOperations::NestedGreaterThan(left, right, vcount, sel, count, true_sel, &false_sel); +} - D_ASSERT(!hashes_arr[entry_idx].page_nr); - D_ASSERT(hash >> hash_prefix_shift <= NumericLimits::Maximum()); +template <> +idx_t PositionComparator::Final(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, + SelectionVector *false_sel) { + return VectorOperations::NestedGreaterThanEquals(left, right, vcount, sel, count, true_sel, false_sel); +} - hashes_arr[entry_idx].salt = hash >> hash_prefix_shift; - hashes_arr[entry_idx].page_nr = page_nr + 1; - hashes_arr[entry_idx].page_offset = page_offset; - }); +// Strict inequalities just use strict for both Definite and Final +template <> +idx_t PositionComparator::Final(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedLessThan(left, right, vcount, sel, count, true_sel, false_sel); +} - Verify(); +template <> +idx_t PositionComparator::Final(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedGreaterThan(left, right, vcount, sel, count, true_sel, false_sel); } -idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload) { - Vector hashes(LogicalType::HASH); - groups.Hash(hashes); +using StructEntries = vector>; - return AddChunk(groups, hashes, payload); +static StructEntries &StructVectorGetSlicedEntries(Vector &parent, StructEntries &sliced, const idx_t vcount) { + // We have to manually slice STRUCT dictionaries. + auto &children = StructVector::GetEntries(parent); + if (parent.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &dict_sel = DictionaryVector::SelVector(parent); + for (auto &child : children) { + auto v = make_unique(*child, dict_sel, vcount); + sliced.push_back(move(v)); + } + + return sliced; + } + + return children; } -void GroupedAggregateHashTable::UpdateAggregate(AggregateObject &aggr, DataChunk &payload, Vector &distinct_addresses, - idx_t input_count, idx_t payload_idx) { - ExpressionExecutor filter_execution(aggr.filter); - SelectionVector true_sel(STANDARD_VECTOR_SIZE); - auto count = filter_execution.SelectExpression(payload, true_sel); - DataChunk filtered_payload; - auto pay_types = payload.GetTypes(); - filtered_payload.Initialize(pay_types); - filtered_payload.Slice(payload, true_sel, count); - Vector filtered_addresses; - filtered_addresses.Slice(distinct_addresses, true_sel, count); - filtered_addresses.Normalify(count); - aggr.function.update(input_count == 0 ? nullptr : &filtered_payload.data[payload_idx], nullptr, input_count, - filtered_addresses, filtered_payload.size()); +template +static idx_t DistinctSelectStruct(Vector &left, Vector &right, idx_t vcount, VectorData &lvdata, VectorData &rvdata, + idx_t count, SelectionVector &maybe_vec, OptionalSelection &true_opt, + OptionalSelection &false_opt) { + // Avoid allocating in the 99% of the cases where we don't need to. + StructEntries lsliced, rsliced; + auto &lchildren = StructVectorGetSlicedEntries(left, lsliced, vcount); + auto &rchildren = StructVectorGetSlicedEntries(right, rsliced, vcount); + D_ASSERT(lchildren.size() == rchildren.size()); + + idx_t match_count = 0; + for (idx_t col_no = 0; col_no < lchildren.size(); ++col_no) { + auto &lchild = *lchildren[col_no]; + auto &rchild = *rchildren[col_no]; + + // Find everything that definitely matches + auto true_count = + PositionComparator::Definite(lchild, rchild, vcount, maybe_vec, count, true_opt, maybe_vec); + if (true_count > 0) { + true_opt.Advance(true_count); + match_count += true_count; + count -= true_count; + } + + if (col_no != lchildren.size() - 1) { + // Find what might match on the next position + true_count = + PositionComparator::Possible(lchild, rchild, vcount, maybe_vec, count, maybe_vec, false_opt); + auto false_count = count - true_count; + false_opt.Advance(false_count); + count = true_count; + } else { + true_count = PositionComparator::Final(lchild, rchild, vcount, maybe_vec, count, true_opt, false_opt); + match_count += true_count; + } + } + + return match_count; } -idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashes, DataChunk &payload) { - D_ASSERT(!is_finalized); +static void PositionListCursor(SelectionVector &cursor, VectorData &vdata, const idx_t pos, + const SelectionVector &maybe_vec, const idx_t count) { + const auto data = (const list_entry_t *)vdata.data; + for (idx_t i = 0; i < count; ++i) { + const auto idx = maybe_vec.get_index(i); - if (groups.size() == 0) { - return 0; + const auto lidx = vdata.sel->get_index(idx); + const auto &entry = data[lidx]; + cursor.set_index(idx, entry.offset + pos); } - // dummy - SelectionVector new_groups(STANDARD_VECTOR_SIZE); +} - D_ASSERT(groups.ColumnCount() == group_types.size()); -#ifdef DEBUG - for (idx_t i = 0; i < group_types.size(); i++) { - D_ASSERT(groups.GetTypes()[i] == group_types[i]); +template +static idx_t DistinctSelectList(Vector &left, Vector &right, idx_t vcount, VectorData &lvdata, VectorData &rvdata, + idx_t count, SelectionVector &maybe_vec, OptionalSelection &true_opt, + OptionalSelection &false_opt) { + if (count == 0) { + return count; } -#endif - Vector addresses(LogicalType::POINTER); - auto new_group_count = FindOrCreateGroups(groups, group_hashes, addresses, new_groups); + // We use them to create dictionary views of the children so we can vectorise the positional comparisons. + SelectionVector lcursor(vcount); + SelectionVector rcursor(vcount); - // now every cell has an entry - // update the aggregates - idx_t payload_idx = 0; + Vector lchild(ListVector::GetEntry(left), lcursor, count); + Vector rchild(ListVector::GetEntry(right), rcursor, count); - for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) { - // for any entries for which a group was found, update the aggregate - auto &aggr = aggregates[aggr_idx]; - auto input_count = (idx_t)aggr.child_count; - if (aggr.distinct) { - // construct chunk for secondary hash table probing - vector probe_types(group_types); - for (idx_t i = 0; i < aggr.child_count; i++) { - probe_types.push_back(payload_types[payload_idx]); - } - DataChunk probe_chunk; - probe_chunk.Initialize(probe_types); - for (idx_t group_idx = 0; group_idx < group_types.size(); group_idx++) { - probe_chunk.data[group_idx].Reference(groups.data[group_idx]); - } - for (idx_t i = 0; i < aggr.child_count; i++) { - probe_chunk.data[group_types.size() + i].Reference(payload.data[payload_idx + i]); - } - probe_chunk.SetCardinality(groups); - probe_chunk.Verify(); + // To perform the positional comparison, we use a vectorisation of the following algorithm: + // bool CompareLists(T *left, idx_t nleft, T *right, nright) { + // for (idx_t pos = 0; ; ++pos) { + // if (nleft == pos || nright == pos) + // return OP::TieBreak(nleft, nright); + // if (OP::Definite(*left, *right)) + // return true; + // if (!OP::Maybe(*left, *right)) + // return false; + // } + // ++left, ++right; + // } + // } + const auto ldata = (const list_entry_t *)lvdata.data; + const auto rdata = (const list_entry_t *)rvdata.data; - Vector dummy_addresses(LogicalType::POINTER); - // this is the actual meat, find out which groups plus payload - // value have not been seen yet - idx_t new_group_count = - distinct_hashes[aggr_idx]->FindOrCreateGroups(probe_chunk, dummy_addresses, new_groups); + idx_t match_count = 0; + for (idx_t pos = 0; count > 0; ++pos) { + // Set up the cursors for the current position + PositionListCursor(lcursor, lvdata, pos, maybe_vec, count); + PositionListCursor(rcursor, rvdata, pos, maybe_vec, count); - // now fix up the payload and addresses accordingly by creating - // a selection vector - if (new_group_count > 0) { - if (aggr.filter) { - Vector distinct_addresses; - DataChunk distinct_payload; - distinct_addresses.Slice(addresses, new_groups, new_group_count); - auto pay_types = payload.GetTypes(); - distinct_payload.Initialize(pay_types); - distinct_payload.Slice(payload, new_groups, new_group_count); - distinct_addresses.Verify(new_group_count); - distinct_addresses.Normalify(new_group_count); - UpdateAggregate(aggr, distinct_payload, distinct_addresses, input_count, payload_idx); + // Tie-break the pairs where one of the LISTs is exhausted. + idx_t true_count = 0; + idx_t false_count = 0; + idx_t maybe_count = 0; + for (idx_t i = 0; i < count; ++i) { + const auto idx = maybe_vec.get_index(i); + const auto lidx = lvdata.sel->get_index(idx); + const auto &lentry = ldata[lidx]; + const auto ridx = rvdata.sel->get_index(idx); + const auto &rentry = rdata[ridx]; + if (lentry.length == pos || rentry.length == pos) { + if (PositionComparator::TieBreak(lentry.length, rentry.length)) { + true_opt.Append(true_count, idx); } else { - Vector distinct_addresses; - distinct_addresses.Slice(addresses, new_groups, new_group_count); - for (idx_t i = 0; i < aggr.child_count; i++) { - payload.data[payload_idx + i].Slice(new_groups, new_group_count); - payload.data[payload_idx + i].Verify(new_group_count); - } - distinct_addresses.Verify(new_group_count); - - aggr.function.update(input_count == 0 ? nullptr : &payload.data[payload_idx], nullptr, input_count, - distinct_addresses, new_group_count); + false_opt.Append(false_count, idx); } + } else { + maybe_vec.set_index(maybe_count++, idx); } - } else if (aggr.filter) { - UpdateAggregate(aggr, payload, addresses, input_count, payload_idx); - } else { - aggr.function.update(input_count == 0 ? nullptr : &payload.data[payload_idx], aggr.bind_data, input_count, - addresses, payload.size()); } + true_opt.Advance(true_count); + false_opt.Advance(false_count); + count = maybe_count; + match_count += true_count; - // move to the next aggregate - payload_idx += input_count; - VectorOperations::AddInPlace(addresses, aggr.payload_size, payload.size()); + // Find everything that definitely matches + true_count = PositionComparator::Definite(lchild, rchild, vcount, maybe_vec, count, true_opt, maybe_vec); + true_opt.Advance(true_count); + match_count += true_count; + count -= true_count; + + // Find what might match on the next position + maybe_count = PositionComparator::Possible(lchild, rchild, vcount, maybe_vec, count, maybe_vec, false_opt); + false_count = count - maybe_count; + false_opt.Advance(false_count); + count = maybe_count; } - Verify(); - return new_group_count; + return match_count; } -void GroupedAggregateHashTable::FetchAggregates(DataChunk &groups, DataChunk &result) { - groups.Verify(); - D_ASSERT(groups.ColumnCount() == group_types.size()); - for (idx_t i = 0; i < result.ColumnCount(); i++) { - D_ASSERT(result.data[i].GetType() == payload_types[i]); - } - result.SetCardinality(groups); - if (groups.size() == 0) { - return; +template +static idx_t DistinctSelectNested(Vector &left, Vector &right, idx_t vcount, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + // We need multiple, real selections + if (!sel) { + sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; } - // find the groups associated with the addresses - // FIXME: this should not use the FindOrCreateGroups, creating them is unnecessary - Vector addresses(LogicalType::POINTER); - FindOrCreateGroups(groups, addresses); - // now fetch the aggregates - for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) { - D_ASSERT(result.ColumnCount() > aggr_idx); - VectorOperations::Gather::Set(addresses, result.data[aggr_idx], groups.size()); + SelectionVector true_vec; + SelectionVector false_vec; + if (DENSE) { + true_vec.Initialize(vcount); + false_vec.Initialize(vcount); } -} -template -static void TemplatedScatter(VectorData &gdata, Vector &addresses, const SelectionVector &sel, idx_t count, - idx_t type_size) { - auto data = (T *)gdata.data; - auto pointers = FlatVector::GetData(addresses); - if (!gdata.validity.AllValid()) { - for (idx_t i = 0; i < count; i++) { - auto pointer_idx = sel.get_index(i); - auto group_idx = gdata.sel->get_index(pointer_idx); - auto ptr = (T *)pointers[pointer_idx]; + OptionalSelection true_opt(DENSE ? &true_vec : true_sel); + OptionalSelection false_opt(DENSE ? &false_vec : false_sel); - T store_value = !gdata.validity.RowIsValid(group_idx) ? NullValue() : data[group_idx]; - Store(store_value, (data_ptr_t)ptr); - pointers[pointer_idx] += type_size; - } - } else { - for (idx_t i = 0; i < count; i++) { - auto pointer_idx = sel.get_index(i); - auto group_idx = gdata.sel->get_index(pointer_idx); - auto ptr = (T *)pointers[pointer_idx]; + // Handle NULL STRUCTs + VectorData lvdata, rvdata; + left.Orrify(vcount, lvdata); + right.Orrify(vcount, rvdata); - Store(data[group_idx], (data_ptr_t)ptr); - pointers[pointer_idx] += type_size; - } + sel_t maybe_one; + SelectionVector maybe_vec(&maybe_one); + if (count > 1) { + maybe_vec.Initialize(count); } -} + idx_t match_count = 0; + idx_t no_match_count = count; + count = DistinctSelectNotNull(lvdata, rvdata, count, match_count, *sel, maybe_vec, true_opt, false_opt); + no_match_count -= (count + match_count); -void GroupedAggregateHashTable::ScatterGroups(DataChunk &groups, unique_ptr &group_data, - Vector &addresses, const SelectionVector &sel, idx_t count) { - if (count == 0) { - return; + idx_t true_count = 0; + if (PhysicalType::LIST == left.GetType().InternalType()) { + true_count = DistinctSelectList(left, right, vcount, lvdata, rvdata, count, maybe_vec, true_opt, false_opt); + } else { + true_count = + DistinctSelectStruct(left, right, vcount, lvdata, rvdata, count, maybe_vec, true_opt, false_opt); } - for (idx_t grp_idx = 0; grp_idx < groups.ColumnCount(); grp_idx++) { - auto &data = groups.data[grp_idx]; - auto &gdata = group_data[grp_idx]; - - auto type_size = GetTypeIdSize(data.GetType().InternalType()); - switch (data.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::INT16: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::INT32: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::INT64: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::UINT8: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::UINT16: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::UINT32: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::UINT64: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::INT128: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::FLOAT: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::DOUBLE: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::INTERVAL: - TemplatedScatter(gdata, addresses, sel, count, type_size); - break; - case PhysicalType::VARCHAR: { - auto string_data = (string_t *)gdata.data; - auto pointers = FlatVector::GetData(addresses); + auto false_count = count - true_count; + match_count += true_count; + no_match_count += false_count; - for (idx_t i = 0; i < count; i++) { - auto pointer_idx = sel.get_index(i); - auto group_idx = gdata.sel->get_index(pointer_idx); - auto ptr = pointers[pointer_idx]; - if (!gdata.validity.RowIsValid(group_idx)) { - Store(NullValue(), ptr); - } else if (string_data[group_idx].IsInlined()) { - Store(string_data[group_idx], ptr); - } else { - Store( - string_heap.AddBlob(string_data[group_idx].GetDataUnsafe(), string_data[group_idx].GetSize()), - ptr); - } + ScatterSelection(true_sel, match_count, *sel, true_vec); + ScatterSelection(false_sel, no_match_count, *sel, false_vec); - pointers[pointer_idx] += type_size; - } - break; - } - default: - throw Exception("Unsupported type for group vector"); - } - } + return match_count; } -template -static void TemplatedCompareGroups(VectorData &gdata, Vector &addresses, SelectionVector &sel, idx_t &count, - idx_t type_size, SelectionVector &no_match, idx_t &no_match_count) { - auto data = (T *)gdata.data; - auto pointers = FlatVector::GetData(addresses); - idx_t match_count = 0; - if (!gdata.validity.AllValid()) { - for (idx_t i = 0; i < count; i++) { - auto idx = sel.get_index(i); - auto group_idx = gdata.sel->get_index(idx); - auto value = Load((data_ptr_t)pointers[idx]); - - if (!gdata.validity.RowIsValid(group_idx)) { - if (IsNullValue(value)) { - // match: move to next value to compare - sel.set_index(match_count++, idx); - pointers[idx] += type_size; - } else { - no_match.set_index(no_match_count++, idx); - } - } else { - if (Equals::Operation(data[group_idx], value)) { - sel.set_index(match_count++, idx); - pointers[idx] += type_size; - } else { - no_match.set_index(no_match_count++, idx); - } - } - } - } else { - for (idx_t i = 0; i < count; i++) { - auto idx = sel.get_index(i); - auto group_idx = gdata.sel->get_index(idx); - auto value = Load((data_ptr_t)pointers[idx]); +template +static void NestedDistinctExecute(Vector &left, Vector &right, Vector &result, idx_t count); - if (Equals::Operation(data[group_idx], value)) { - sel.set_index(match_count++, idx); - pointers[idx] += type_size; - } else { - no_match.set_index(no_match_count++, idx); - } - } +template +static inline void TemplatedDistinctExecute(Vector &left, Vector &right, Vector &result, idx_t count) { + DistinctExecute(left, right, result, count); +} +template +static void ExecuteDistinct(Vector &left, Vector &right, Vector &result, idx_t count) { + D_ASSERT(left.GetType() == right.GetType() && result.GetType() == LogicalType::BOOLEAN); + // the inplace loops take the result as the last parameter + switch (left.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::INT16: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::INT32: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::INT64: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::UINT8: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::UINT16: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::UINT32: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::UINT64: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::INT128: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::FLOAT: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::DOUBLE: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::INTERVAL: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::VARCHAR: + TemplatedDistinctExecute(left, right, result, count); + break; + case PhysicalType::LIST: + case PhysicalType::MAP: + case PhysicalType::STRUCT: + NestedDistinctExecute(left, right, result, count); + break; + default: + throw InternalException("Invalid type for distinct comparison"); } - count = match_count; } -static void CompareGroups(DataChunk &groups, unique_ptr &group_data, Vector &addresses, - SelectionVector &sel, idx_t count, SelectionVector &no_match, idx_t &no_match_count) { - for (idx_t group_idx = 0; group_idx < groups.ColumnCount(); group_idx++) { - auto &data = groups.data[group_idx]; - auto &gdata = group_data[group_idx]; - auto type_size = GetTypeIdSize(data.GetType().InternalType()); - switch (data.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::INT16: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::INT32: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::INT64: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::UINT8: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::UINT16: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::UINT32: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::UINT64: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::INT128: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::FLOAT: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::DOUBLE: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::INTERVAL: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - case PhysicalType::VARCHAR: - TemplatedCompareGroups(gdata, addresses, sel, count, type_size, no_match, no_match_count); - break; - default: - throw Exception("Unsupported type for group vector"); - } +template +static idx_t TemplatedDistinctSelectOperation(Vector &left, Vector &right, idx_t vcount, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + // the inplace loops take the result as the last parameter + switch (left.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::INT16: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::INT32: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::INT64: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::UINT8: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::UINT16: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::UINT32: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::UINT64: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::INT128: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::FLOAT: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::DOUBLE: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::INTERVAL: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::VARCHAR: + return DistinctSelect(left, right, vcount, sel, count, true_sel, false_sel); + case PhysicalType::MAP: + case PhysicalType::STRUCT: + case PhysicalType::LIST: + return DistinctSelectNested(left, right, vcount, sel, count, true_sel, false_sel); + default: + throw InternalException("Invalid type for distinct selection"); } } -template -idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes, Vector &addresses, - SelectionVector &new_groups_out) { - - D_ASSERT(!is_finalized); - - if (entries + groups.size() > MaxCapacity()) { - throw InternalException("Hash table capacity reached"); - } +template +static void NestedDistinctExecute(Vector &left, Vector &right, Vector &result, idx_t count) { + const auto left_constant = left.GetVectorType() == VectorType::CONSTANT_VECTOR; + const auto right_constant = right.GetVectorType() == VectorType::CONSTANT_VECTOR; - // resize at 50% capacity, also need to fit the entire vector - if (capacity - entries <= groups.size() || entries > capacity / LOAD_FACTOR) { - Resize(capacity * 2); + if (left_constant && right_constant) { + // both sides are constant, so just compare one element. + result.SetVectorType(VectorType::CONSTANT_VECTOR); + auto result_data = ConstantVector::GetData(result); + SelectionVector true_sel(1); + auto match_count = TemplatedDistinctSelectOperation(left, right, 1, nullptr, 1, &true_sel, nullptr); + result_data[0] = match_count > 0; + return; } - D_ASSERT(capacity - entries >= groups.size()); - D_ASSERT(groups.ColumnCount() == group_types.size()); - // we need to be able to fit at least one vector of data - D_ASSERT(capacity - entries >= groups.size()); - D_ASSERT(group_hashes.GetType() == LogicalType::HASH); - - group_hashes.Normalify(groups.size()); - auto group_hashes_ptr = FlatVector::GetData(group_hashes); - - D_ASSERT(ht_offsets.GetVectorType() == VectorType::FLAT_VECTOR); - D_ASSERT(ht_offsets.GetType() == LogicalType::BIGINT); + SelectionVector true_sel(count); + SelectionVector false_sel(count); - D_ASSERT(addresses.GetType() == LogicalType::POINTER); - addresses.Normalify(groups.size()); - auto addresses_ptr = FlatVector::GetData(addresses); + // DISTINCT is either true or false + idx_t match_count = + TemplatedDistinctSelectOperation(left, right, count, nullptr, count, &true_sel, &false_sel); - // now compute the entry in the table based on the hash using a modulo - UnaryExecutor::Execute(group_hashes, ht_offsets, groups.size(), [&](hash_t element) { - D_ASSERT((element & bitmask) == (element % capacity)); - return (element & bitmask); - }); - auto ht_offsets_ptr = FlatVector::GetData(ht_offsets); + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); - // precompute the hash salts for faster comparison below - D_ASSERT(hash_salts.GetType() == LogicalType::SMALLINT); - UnaryExecutor::Execute(group_hashes, hash_salts, groups.size(), - [&](hash_t element) { return (element >> hash_prefix_shift); }); - auto hash_salts_ptr = FlatVector::GetData(hash_salts); + for (idx_t i = 0; i < match_count; ++i) { + const auto idx = true_sel.get_index(i); + result_data[idx] = true; + } - // we start out with all entries [0, 1, 2, ..., groups.size()] - const SelectionVector *sel_vector = &FlatVector::INCREMENTAL_SELECTION_VECTOR; + const idx_t no_match_count = count - match_count; + for (idx_t i = 0; i < no_match_count; ++i) { + const auto idx = false_sel.get_index(i); + result_data[idx] = false; + } +} - idx_t remaining_entries = groups.size(); +void VectorOperations::DistinctFrom(Vector &left, Vector &right, Vector &result, idx_t count) { + ExecuteDistinct(left, right, result, count); +} - // orrify all the groups - auto group_data = unique_ptr(new VectorData[groups.ColumnCount()]); - for (idx_t grp_idx = 0; grp_idx < groups.ColumnCount(); grp_idx++) { - groups.data[grp_idx].Orrify(groups.size(), group_data[grp_idx]); - } +void VectorOperations::NotDistinctFrom(Vector &left, Vector &right, Vector &result, idx_t count) { + ExecuteDistinct(left, right, result, count); +} - idx_t new_group_count = 0; - while (remaining_entries > 0) { - idx_t new_entry_count = 0; - idx_t need_compare_count = 0; - idx_t no_match_count = 0; +// true := A != B with nulls being equal +idx_t VectorOperations::DistinctFrom(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, count, sel, count, true_sel, + false_sel); +} +// true := A == B with nulls being equal +idx_t VectorOperations::NotDistinctFrom(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, count, sel, count, true_sel, + false_sel); +} - // first figure out for each remaining whether or not it belongs to a full or empty group - for (idx_t i = 0; i < remaining_entries; i++) { - const idx_t index = sel_vector->get_index(i); - const auto ht_entry_ptr = ((T *)this->hashes_hdl_ptr) + ht_offsets_ptr[index]; - if (ht_entry_ptr->page_nr == 0) { // we use page number 0 as a "unused marker" - // cell is empty; setup the new entry - if (payload_page_offset == tuples_per_block || payload_hds.empty()) { - NewBlock(); - } +// true := A > B with nulls being maximal +idx_t VectorOperations::DistinctGreaterThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, count, sel, count, true_sel, + false_sel); +} +// true := A >= B with nulls being maximal +idx_t VectorOperations::DistinctGreaterThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, count, sel, count, + true_sel, false_sel); +} +// true := A < B with nulls being maximal +idx_t VectorOperations::DistinctLessThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, count, sel, count, true_sel, + false_sel); +} +// true := A <= B with nulls being maximal +idx_t VectorOperations::DistinctLessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, count, sel, count, + true_sel, false_sel); +} - auto entry_payload_ptr = payload_hds_ptrs.back() + (payload_page_offset * tuple_size); +// true := A != B with nulls being equal, inputs selected +idx_t VectorOperations::NestedNotEquals(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, vcount, &sel, count, true_sel, + false_sel); +} +// true := A == B with nulls being equal, inputs selected +idx_t VectorOperations::NestedEquals(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, vcount, &sel, count, true_sel, + false_sel); +} +// true := A > B with nulls being maximal, inputs selected +idx_t VectorOperations::NestedGreaterThan(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, vcount, &sel, count, + true_sel, false_sel); +} +// true := A >= B with nulls being maximal, inputs selected +idx_t VectorOperations::NestedGreaterThanEquals(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, vcount, &sel, count, + true_sel, false_sel); +} +// true := A < B with nulls being maximal, inputs selected +idx_t VectorOperations::NestedLessThan(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, vcount, &sel, count, true_sel, + false_sel); +} +// true := A <= B with nulls being maximal, inputs selected +idx_t VectorOperations::NestedLessThanEquals(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedDistinctSelectOperation(left, right, vcount, &sel, count, + true_sel, false_sel); +} - // copy the group hash to the payload for use in resize - memcpy(entry_payload_ptr, &group_hashes_ptr[index], HASH_WIDTH); - D_ASSERT((*(hash_t *)entry_payload_ptr) == group_hashes_ptr[index]); +} // namespace duckdb +//===--------------------------------------------------------------------===// +// null_operators.cpp +// Description: This file contains the implementation of the +// IS NULL/NOT IS NULL operators +//===--------------------------------------------------------------------===// - // initialize the payload info for the column - memcpy(entry_payload_ptr + HASH_WIDTH + group_width, empty_payload_data.get(), payload_width); - D_ASSERT(group_hashes_ptr[index] >> hash_prefix_shift <= NumericLimits::Maximum()); - D_ASSERT(payload_page_offset < tuples_per_block); - D_ASSERT(payload_hds.size() < NumericLimits::Maximum()); - D_ASSERT(payload_page_offset + 1 < NumericLimits::Maximum()); - ht_entry_ptr->salt = group_hashes_ptr[index] >> hash_prefix_shift; - D_ASSERT(((*(hash_t *)entry_payload_ptr) >> hash_prefix_shift) == ht_entry_ptr->salt); +namespace duckdb { - // page numbers start at one so we can use 0 as empty flag - // GetPtr undoes this - ht_entry_ptr->page_nr = payload_hds.size(); - ht_entry_ptr->page_offset = payload_page_offset++; +template +void IsNullLoop(Vector &input, Vector &result, idx_t count) { + D_ASSERT(result.GetType() == LogicalType::BOOLEAN); - // update selection lists for outer loops - empty_vector.set_index(new_entry_count++, index); - new_groups_out.set_index(new_group_count++, index); - entries++; + if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + auto result_data = ConstantVector::GetData(result); + *result_data = INVERSE ? !ConstantVector::IsNull(input) : ConstantVector::IsNull(input); + } else { + VectorData data; + input.Orrify(count, data); - addresses_ptr[index] = entry_payload_ptr + HASH_WIDTH; + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + auto idx = data.sel->get_index(i); + result_data[i] = INVERSE ? data.validity.RowIsValid(idx) : !data.validity.RowIsValid(idx); + } + } +} - } else { - // cell is occupied: add to check list - // only need to check if hash salt in ptr == prefix of hash in payload - if (ht_entry_ptr->salt == hash_salts_ptr[index]) { - group_compare_vector.set_index(need_compare_count++, index); +void VectorOperations::IsNotNull(Vector &input, Vector &result, idx_t count) { + IsNullLoop(input, result, count); +} - auto page_ptr = payload_hds_ptrs[ht_entry_ptr->page_nr - 1]; - auto page_offset = ht_entry_ptr->page_offset * tuple_size; - addresses_ptr[index] = page_ptr + page_offset + HASH_WIDTH; +void VectorOperations::IsNull(Vector &input, Vector &result, idx_t count) { + IsNullLoop(input, result, count); +} - } else { - no_match_vector.set_index(no_match_count++, index); - } - } - } +bool VectorOperations::HasNotNull(Vector &input, idx_t count) { + if (count == 0) { + return false; + } + if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { + return !ConstantVector::IsNull(input); + } else { + VectorData data; + input.Orrify(count, data); - if (new_entry_count > 0) { - // for each of the locations that are empty, serialize the group columns to the locations - ScatterGroups(groups, group_data, addresses, empty_vector, new_entry_count); - } - // now we have only the tuples remaining that might match to an existing group - // start performing comparisons with each of the groups - if (need_compare_count > 0) { - CompareGroups(groups, group_data, addresses, group_compare_vector, need_compare_count, no_match_vector, - no_match_count); + if (data.validity.AllValid()) { + return true; } - - // each of the entries that do not match we move them to the next entry in the HT - for (idx_t i = 0; i < no_match_count; i++) { - idx_t index = no_match_vector.get_index(i); - ht_offsets_ptr[index]++; - if (ht_offsets_ptr[index] >= capacity) { - ht_offsets_ptr[index] = 0; + for (idx_t i = 0; i < count; i++) { + auto idx = data.sel->get_index(i); + if (data.validity.RowIsValid(idx)) { + return true; } } - sel_vector = &no_match_vector; - remaining_entries = no_match_count; + return false; } - // pointers in addresses now were moved behind the grousp by CompareGroups/ScatterGroups but we may have to add - // padding still to point at the payload. - VectorOperations::AddInPlace(addresses, group_padding, groups.size()); - return new_group_count; } -// this is to support distinct aggregations where we need to record whether we -// have already seen a value for a group -idx_t GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &group_hashes, Vector &addresses_out, - SelectionVector &new_groups_out) { - switch (entry_type) { - case HtEntryType::HT_WIDTH_64: - return FindOrCreateGroupsInternal(groups, group_hashes, addresses_out, new_groups_out); - case HtEntryType::HT_WIDTH_32: - return FindOrCreateGroupsInternal(groups, group_hashes, addresses_out, new_groups_out); - default: - throw NotImplementedException("Unknown HT entry width"); +bool VectorOperations::HasNull(Vector &input, idx_t count) { + if (count == 0) { + return false; } -} + if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { + return ConstantVector::IsNull(input); + } else { + VectorData data; + input.Orrify(count, data); -void GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &addresses) { - // create a dummy new_groups sel vector - SelectionVector new_groups(STANDARD_VECTOR_SIZE); - FindOrCreateGroups(groups, addresses, new_groups); + if (data.validity.AllValid()) { + return false; + } + for (idx_t i = 0; i < count; i++) { + auto idx = data.sel->get_index(i); + if (!data.validity.RowIsValid(idx)) { + return true; + } + } + return false; + } } -idx_t GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &addresses_out, - SelectionVector &new_groups_out) { - Vector hashes(LogicalType::HASH); - groups.Hash(hashes); - return FindOrCreateGroups(groups, hashes, addresses_out, new_groups_out); -} +} // namespace duckdb +//===--------------------------------------------------------------------===// +// numeric_inplace_operators.cpp +// Description: This file contains the implementation of numeric inplace ops +// += *= /= -= %= +//===--------------------------------------------------------------------===// -void GroupedAggregateHashTable::FlushMove(Vector &source_addresses, Vector &source_hashes, idx_t count) { - D_ASSERT(source_addresses.GetType() == LogicalType::POINTER); - D_ASSERT(source_hashes.GetType() == LogicalType::HASH); - DataChunk groups; - groups.Initialize(group_types); - groups.SetCardinality(count); - for (idx_t i = 0; i < groups.ColumnCount(); i++) { - auto &column = groups.data[i]; - VectorOperations::Gather::Set(source_addresses, column, count); - } - SelectionVector new_groups(STANDARD_VECTOR_SIZE); - Vector group_addresses(LogicalType::POINTER); - SelectionVector new_groups_sel(STANDARD_VECTOR_SIZE); +#include - FindOrCreateGroups(groups, source_hashes, group_addresses, new_groups_sel); +namespace duckdb { - VectorOperations::AddInPlace(source_addresses, group_padding, count); +//===--------------------------------------------------------------------===// +// In-Place Addition +//===--------------------------------------------------------------------===// - for (auto &aggr : aggregates) { - // for any entries for which a group was found, update the aggregate - D_ASSERT(aggr.function.combine); - aggr.function.combine(source_addresses, group_addresses, count); - VectorOperations::AddInPlace(source_addresses, aggr.payload_size, count); - VectorOperations::AddInPlace(group_addresses, aggr.payload_size, count); +void VectorOperations::AddInPlace(Vector &input, int64_t right, idx_t count) { + D_ASSERT(input.GetType().id() == LogicalTypeId::POINTER); + if (right == 0) { + return; + } + switch (input.GetVectorType()) { + case VectorType::CONSTANT_VECTOR: { + D_ASSERT(!ConstantVector::IsNull(input)); + auto data = ConstantVector::GetData(input); + *data += right; + break; + } + default: { + D_ASSERT(input.GetVectorType() == VectorType::FLAT_VECTOR); + auto data = FlatVector::GetData(input); + for (idx_t i = 0; i < count; i++) { + data[i] += right; + } + break; + } } } -void GroupedAggregateHashTable::Combine(GroupedAggregateHashTable &other) { +} // namespace duckdb - D_ASSERT(!is_finalized); - D_ASSERT(other.payload_width == payload_width); - D_ASSERT(other.group_width == group_width); - D_ASSERT(other.tuple_size == tuple_size); - D_ASSERT(other.tuples_per_block == tuples_per_block); - if (other.entries == 0) { - return; - } - Vector addresses(LogicalType::POINTER); - auto addresses_ptr = FlatVector::GetData(addresses); - Vector hashes(LogicalType::HASH); - auto hashes_ptr = FlatVector::GetData(hashes); - idx_t group_idx = 0; - other.PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) { - auto hash = Load(ptr); - hashes_ptr[group_idx] = hash; - addresses_ptr[group_idx] = ptr + HASH_WIDTH; - group_idx++; - if (group_idx == STANDARD_VECTOR_SIZE) { - FlushMove(addresses, hashes, group_idx); - group_idx = 0; - } - }); - FlushMove(addresses, hashes, group_idx); - string_heap.MergeHeap(other.string_heap); - Verify(); -} +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/vector_operations/decimal_cast.hpp +// +// +//===----------------------------------------------------------------------===// -struct PartitionInfo { - PartitionInfo() : addresses(LogicalType::POINTER), hashes(LogicalType::HASH), group_count(0) { - addresses_ptr = FlatVector::GetData(addresses); - hashes_ptr = FlatVector::GetData(hashes); - }; - Vector addresses; - Vector hashes; - idx_t group_count; - data_ptr_t *addresses_ptr; - hash_t *hashes_ptr; -}; -void GroupedAggregateHashTable::Partition(vector &partition_hts, hash_t mask, - idx_t shift) { - D_ASSERT(partition_hts.size() > 1); - vector partition_info(partition_hts.size()); - PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) { - auto hash = Load(ptr); +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/vector_operations/general_cast.hpp +// +// +//===----------------------------------------------------------------------===// - idx_t partition = (hash & mask) >> shift; - D_ASSERT(partition < partition_hts.size()); - auto &info = partition_info[partition]; - info.hashes_ptr[info.group_count] = hash; - info.addresses_ptr[info.group_count] = ptr + HASH_WIDTH; - info.group_count++; - if (info.group_count == STANDARD_VECTOR_SIZE) { - D_ASSERT(partition_hts[partition]); - partition_hts[partition]->FlushMove(info.addresses, info.hashes, info.group_count); - info.group_count = 0; - } - }); - idx_t info_idx = 0; - idx_t total_count = 0; - for (auto &partition_entry : partition_hts) { - auto &info = partition_info[info_idx++]; - partition_entry->FlushMove(info.addresses, info.hashes, info.group_count); - partition_entry->string_heap.MergeHeap(string_heap); - partition_entry->Verify(); - total_count += partition_entry->Size(); - } - D_ASSERT(total_count == entries); - // mark the ht as empty so finalizers are not run - entries = 0; -} -idx_t GroupedAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) { - auto data_pointers = FlatVector::GetData(addresses); +namespace duckdb { - auto remaining = entries - scan_position; - if (remaining == 0) { - return 0; +struct HandleVectorCastError { + template + static RESULT_TYPE Operation(string error_message, ValidityMask &mask, idx_t idx, string *error_message_ptr, + bool &all_converted) { + HandleCastError::AssignError(error_message, error_message_ptr); + all_converted = false; + mask.SetInvalid(idx); + return NullValue(); } - auto this_n = MinValue((idx_t)STANDARD_VECTOR_SIZE, remaining); +}; - auto chunk_idx = scan_position / tuples_per_block; - auto chunk_offset = (scan_position % tuples_per_block) * tuple_size; - D_ASSERT(chunk_offset + tuple_size <= Storage::BLOCK_ALLOC_SIZE); +static string UnimplementedCastMessage(const LogicalType &source_type, const LogicalType &target_type) { + return StringUtil::Format("Unimplemented type for cast (%s -> %s)", source_type.ToString(), target_type.ToString()); +} - auto read_ptr = payload_hds_ptrs[chunk_idx++]; - for (idx_t i = 0; i < this_n; i++) { - data_pointers[i] = read_ptr + chunk_offset + HASH_WIDTH; - chunk_offset += tuple_size; - if (chunk_offset >= tuples_per_block * tuple_size) { - read_ptr = payload_hds_ptrs[chunk_idx++]; - chunk_offset = 0; - } +// NULL cast only works if all values in source are NULL, otherwise an unimplemented cast exception is thrown +static bool TryVectorNullCast(Vector &source, Vector &result, idx_t count, string *error_message) { + bool success = true; + if (VectorOperations::HasNotNull(source, count)) { + HandleCastError::AssignError(UnimplementedCastMessage(source.GetType(), result.GetType()), error_message); + success = false; } + result.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(result, true); + return success; +} - result.SetCardinality(this_n); - // fetch the group columns - for (idx_t i = 0; i < group_types.size(); i++) { - auto &column = result.data[i]; - VectorOperations::Gather::Set(addresses, column, result.size()); +} // namespace duckdb + + + + +namespace duckdb { + +struct VectorDecimalCastData { + VectorDecimalCastData(string *error_message_p, uint8_t width_p, uint8_t scale_p) + : error_message(error_message_p), width(width_p), scale(scale_p) { } - VectorOperations::AddInPlace(addresses, group_padding, result.size()); + string *error_message; + uint8_t width; + uint8_t scale; + bool all_converted = true; +}; - for (idx_t i = 0; i < aggregates.size(); i++) { - auto &target = result.data[group_types.size() + i]; - auto &aggr = aggregates[i]; - aggr.function.finalize(addresses, aggr.bind_data, target, result.size()); - VectorOperations::AddInPlace(addresses, aggr.payload_size, result.size()); +template +struct VectorDecimalCastOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto data = (VectorDecimalCastData *)dataptr; + RESULT_TYPE result_value; + if (!OP::template Operation(input, result_value, data->error_message, data->width, + data->scale)) { + return HandleVectorCastError::Operation("Failed to cast decimal value", mask, idx, + data->error_message, data->all_converted); + } + return result_value; } - scan_position += this_n; - return this_n; +}; + +template +bool TemplatedVectorDecimalCast(Vector &source, Vector &result, idx_t count, string *error_message, uint8_t width, + uint8_t scale) { + VectorDecimalCastData input(error_message, width, scale); + UnaryExecutor::GenericExecute>(source, result, count, (void *)&input, + error_message); + return input.all_converted; } -void GroupedAggregateHashTable::Finalize() { - D_ASSERT(!is_finalized); +template +static bool ToDecimalCast(Vector &source, Vector &result, idx_t count, string *error_message) { + auto &result_type = result.GetType(); + auto width = DecimalType::GetWidth(result_type); + auto scale = DecimalType::GetScale(result_type); + switch (result_type.InternalType()) { + case PhysicalType::INT16: + return TemplatedVectorDecimalCast(source, result, count, error_message, width, + scale); + case PhysicalType::INT32: + return TemplatedVectorDecimalCast(source, result, count, error_message, width, + scale); + case PhysicalType::INT64: + return TemplatedVectorDecimalCast(source, result, count, error_message, width, + scale); + case PhysicalType::INT128: + return TemplatedVectorDecimalCast(source, result, count, error_message, width, + scale); + default: + throw InternalException("Unimplemented internal type for decimal"); + } +} - // early release hashes, not needed for partition/scan - hashes_hdl.reset(); - is_finalized = true; +template +static bool FromDecimalCast(Vector &source, Vector &result, idx_t count, string *error_message) { + auto &source_type = source.GetType(); + auto width = DecimalType::GetWidth(source_type); + auto scale = DecimalType::GetScale(source_type); + switch (source_type.InternalType()) { + case PhysicalType::INT16: + return TemplatedVectorDecimalCast(source, result, count, error_message, width, + scale); + case PhysicalType::INT32: + return TemplatedVectorDecimalCast(source, result, count, error_message, width, + scale); + case PhysicalType::INT64: + return TemplatedVectorDecimalCast(source, result, count, error_message, width, + scale); + case PhysicalType::INT128: + return TemplatedVectorDecimalCast(source, result, count, error_message, width, + scale); + default: + throw InternalException("Unimplemented internal type for decimal"); + } } -} // namespace duckdb +template +struct DecimalScaleInput { + DecimalScaleInput(Vector &result_p, FACTOR_TYPE factor_p) : result(result_p), factor(factor_p) { + } + DecimalScaleInput(Vector &result_p, LIMIT_TYPE limit_p, FACTOR_TYPE factor_p, string *error_message_p, + uint8_t source_scale_p) + : result(result_p), limit(limit_p), factor(factor_p), error_message(error_message_p), + source_scale(source_scale_p) { + } + Vector &result; + LIMIT_TYPE limit; + FACTOR_TYPE factor; + bool all_converted = true; + string *error_message; + uint8_t source_scale; +}; +struct DecimalScaleUpOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto data = (DecimalScaleInput *)dataptr; + return Cast::Operation(input) * data->factor; + } +}; -namespace duckdb { +struct DecimalScaleUpCheckOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto data = (DecimalScaleInput *)dataptr; + if (input >= data->limit || input <= -data->limit) { + auto error = + StringUtil::Format("Casting value \"%s\" to type %s failed: value is out of range!", + Decimal::ToString(input, data->source_scale), data->result.GetType().ToString()); + return HandleVectorCastError::Operation(move(error), mask, idx, data->error_message, + data->all_converted); + } + return Cast::Operation(input) * data->factor; + } +}; -vector AggregateObject::CreateAggregateObjects(const vector &bindings) { - vector aggregates; - for (auto &binding : bindings) { - auto payload_size = binding->function.state_size(); -#ifndef DUCKDB_ALLOW_UNDEFINED - payload_size = BaseAggregateHashTable::Align(payload_size); -#endif - aggregates.emplace_back(binding->function, binding->bind_info.get(), binding->children.size(), payload_size, - binding->distinct, binding->return_type.InternalType(), binding->filter.get()); +template +bool TemplatedDecimalScaleUp(Vector &source, Vector &result, idx_t count, string *error_message) { + auto source_scale = DecimalType::GetScale(source.GetType()); + auto source_width = DecimalType::GetWidth(source.GetType()); + auto result_scale = DecimalType::GetScale(result.GetType()); + auto result_width = DecimalType::GetWidth(result.GetType()); + D_ASSERT(result_scale >= source_scale); + idx_t scale_difference = result_scale - source_scale; + DEST multiply_factor = POWERS_DEST::POWERS_OF_TEN[scale_difference]; + idx_t target_width = result_width - scale_difference; + if (source_width < target_width) { + DecimalScaleInput input(result, multiply_factor); + // type will always fit: no need to check limit + UnaryExecutor::GenericExecute(source, result, count, &input); + return true; + } else { + // type might not fit: check limit + auto limit = POWERS_SOURCE::POWERS_OF_TEN[target_width]; + DecimalScaleInput input(result, limit, multiply_factor, error_message, source_scale); + UnaryExecutor::GenericExecute(source, result, count, &input, + error_message); + return input.all_converted; } - return aggregates; } -BaseAggregateHashTable::BaseAggregateHashTable(BufferManager &buffer_manager, vector group_types_p, - vector payload_types_p, - vector aggregate_objects) - : buffer_manager(buffer_manager), aggregates(move(aggregate_objects)), group_types(move(group_types_p)), - payload_types(move(payload_types_p)), group_width(0), group_padding(0), payload_width(0) { - - for (idx_t i = 0; i < group_types.size(); i++) { - group_width += GetTypeIdSize(group_types[i].InternalType()); +struct DecimalScaleDownOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto data = (DecimalScaleInput *)dataptr; + return Cast::Operation(input / data->factor); } - for (idx_t i = 0; i < aggregates.size(); i++) { - payload_width += aggregates[i].payload_size; -#ifndef DUCKDB_ALLOW_UNDEFINED - D_ASSERT(aggregates[i].payload_size == BaseAggregateHashTable::Align(aggregates[i].payload_size)); -#endif +}; + +struct DecimalScaleDownCheckOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto data = (DecimalScaleInput *)dataptr; + if (input >= data->limit || input <= -data->limit) { + auto error = + StringUtil::Format("Casting value \"%s\" to type %s failed: value is out of range!", + Decimal::ToString(input, data->source_scale), data->result.GetType().ToString()); + return HandleVectorCastError::Operation(move(error), mask, idx, data->error_message, + data->all_converted); + } + return Cast::Operation(input / data->factor); } +}; - empty_payload_data = unique_ptr(new data_t[payload_width]); - // initialize the aggregates to the NULL value - auto pointer = empty_payload_data.get(); - for (idx_t i = 0; i < aggregates.size(); i++) { - auto &aggr = aggregates[i]; - aggr.function.initialize(pointer); - pointer += aggr.payload_size; +template +bool TemplatedDecimalScaleDown(Vector &source, Vector &result, idx_t count, string *error_message) { + auto source_scale = DecimalType::GetScale(source.GetType()); + auto source_width = DecimalType::GetWidth(source.GetType()); + auto result_scale = DecimalType::GetScale(result.GetType()); + auto result_width = DecimalType::GetWidth(result.GetType()); + D_ASSERT(result_scale < source_scale); + idx_t scale_difference = source_scale - result_scale; + idx_t target_width = result_width + scale_difference; + SOURCE divide_factor = POWERS_SOURCE::POWERS_OF_TEN[scale_difference]; + if (source_width < target_width) { + DecimalScaleInput input(result, divide_factor); + // type will always fit: no need to check limit + UnaryExecutor::GenericExecute(source, result, count, &input); + return true; + } else { + // type might not fit: check limit + auto limit = POWERS_SOURCE::POWERS_OF_TEN[target_width]; + DecimalScaleInput input(result, limit, divide_factor, error_message, source_scale); + UnaryExecutor::GenericExecute(source, result, count, &input, + error_message); + return input.all_converted; } +} - D_ASSERT(group_width > 0); +template +static bool DecimalDecimalCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + auto source_scale = DecimalType::GetScale(source.GetType()); + auto result_scale = DecimalType::GetScale(result.GetType()); + source.GetType().Verify(); + result.GetType().Verify(); -#ifndef DUCKDB_ALLOW_UNDEFINED - auto aligned_group_width = BaseAggregateHashTable::Align(group_width); - group_padding = aligned_group_width - group_width; - group_width += group_padding; -#endif + // we need to either multiply or divide by the difference in scales + if (result_scale >= source_scale) { + // multiply + switch (result.GetType().InternalType()) { + case PhysicalType::INT16: + return TemplatedDecimalScaleUp(source, result, count, + error_message); + case PhysicalType::INT32: + return TemplatedDecimalScaleUp(source, result, count, + error_message); + case PhysicalType::INT64: + return TemplatedDecimalScaleUp(source, result, count, + error_message); + case PhysicalType::INT128: + return TemplatedDecimalScaleUp(source, result, count, + error_message); + default: + throw NotImplementedException("Unimplemented internal type for decimal"); + } + } else { + // divide + switch (result.GetType().InternalType()) { + case PhysicalType::INT16: + return TemplatedDecimalScaleDown(source, result, count, error_message); + case PhysicalType::INT32: + return TemplatedDecimalScaleDown(source, result, count, error_message); + case PhysicalType::INT64: + return TemplatedDecimalScaleDown(source, result, count, error_message); + case PhysicalType::INT128: + return TemplatedDecimalScaleDown(source, result, count, error_message); + default: + throw NotImplementedException("Unimplemented internal type for decimal"); + } + } } -void BaseAggregateHashTable::CallDestructors(Vector &state_vector, idx_t count) { - if (count == 0) { - return; +struct DecimalCastInput { + DecimalCastInput(Vector &result_p, uint8_t width_p, uint8_t scale_p) + : result(result_p), width(width_p), scale(scale_p) { } - for (idx_t i = 0; i < aggregates.size(); i++) { - auto &aggr = aggregates[i]; - if (aggr.function.destructor) { - aggr.function.destructor(state_vector, count); + + Vector &result; + uint8_t width; + uint8_t scale; +}; + +struct StringCastFromDecimalOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto data = (DecimalCastInput *)dataptr; + return StringCastFromDecimal::Operation(input, data->width, data->scale, data->result); + } +}; + +static bool DecimalCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::BOOLEAN: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::TINYINT: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::SMALLINT: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::INTEGER: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::BIGINT: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::UTINYINT: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::USMALLINT: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::UINTEGER: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::UBIGINT: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::HUGEINT: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::DECIMAL: { + // decimal to decimal cast + // first we need to figure out the source and target internal types + switch (source.GetType().InternalType()) { + case PhysicalType::INT16: + return DecimalDecimalCastSwitch(source, result, count, error_message); + case PhysicalType::INT32: + return DecimalDecimalCastSwitch(source, result, count, error_message); + case PhysicalType::INT64: + return DecimalDecimalCastSwitch(source, result, count, error_message); + case PhysicalType::INT128: + return DecimalDecimalCastSwitch(source, result, count, error_message); + default: + throw NotImplementedException("Unimplemented internal type for decimal in decimal_decimal cast"); + } + break; + } + case LogicalTypeId::FLOAT: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::DOUBLE: + return FromDecimalCast(source, result, count, error_message); + case LogicalTypeId::VARCHAR: { + auto &source_type = source.GetType(); + auto width = DecimalType::GetWidth(source_type); + auto scale = DecimalType::GetScale(source_type); + DecimalCastInput input(result, width, scale); + switch (source_type.InternalType()) { + case PhysicalType::INT16: + UnaryExecutor::GenericExecute(source, result, count, + (void *)&input); + break; + case PhysicalType::INT32: + UnaryExecutor::GenericExecute(source, result, count, + (void *)&input); + break; + case PhysicalType::INT64: + UnaryExecutor::GenericExecute(source, result, count, + (void *)&input); + break; + case PhysicalType::INT128: + UnaryExecutor::GenericExecute(source, result, count, + (void *)&input); + break; + default: + throw InternalException("Unimplemented internal decimal type"); } - // move to the next aggregate state - VectorOperations::AddInPlace(state_vector, aggr.payload_size, count); + return true; + } + default: + return TryVectorNullCast(source, result, count, error_message); } } } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/column_binding_resolver.hpp -// -// -//===----------------------------------------------------------------------===// - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/column_binding_map.hpp +// duckdb/common/likely.hpp // // //===----------------------------------------------------------------------===// +#if __GNUC__ +#define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (__builtin_expect(cond, expected_value)) +#else +#define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (cond) +#endif - - +#define DUCKDB_LIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 1) +#define DUCKDB_UNLIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 0) namespace duckdb { -struct ColumnBindingHashFunction { - uint64_t operator()(const ColumnBinding &a) const { - return CombineHash(Hash(a.table_index), Hash(a.column_index)); +template +struct VectorStringCastOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto result = (Vector *)dataptr; + return OP::template Operation(input, *result); } }; -struct ColumnBindingEquality { - bool operator()(const ColumnBinding &a, const ColumnBinding &b) const { - return a == b; +struct VectorTryCastData { + VectorTryCastData(Vector &result_p, string *error_message_p, bool strict_p) + : result(result_p), error_message(error_message_p), strict(strict_p) { } -}; - -template -using column_binding_map_t = unordered_map; -using column_binding_set_t = unordered_set; + Vector &result; + string *error_message; + bool strict; + bool all_converted = true; +}; -} // namespace duckdb +template +struct VectorTryCastOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + RESULT_TYPE output; + if (DUCKDB_LIKELY(OP::template Operation(input, output))) { + return output; + } + auto data = (VectorTryCastData *)dataptr; + return HandleVectorCastError::Operation(CastExceptionText(input), mask, + idx, data->error_message, data->all_converted); + } +}; +template +struct VectorTryCastStrictOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto data = (VectorTryCastData *)dataptr; + RESULT_TYPE output; + if (DUCKDB_LIKELY(OP::template Operation(input, output, data->strict))) { + return output; + } + return HandleVectorCastError::Operation(CastExceptionText(input), mask, + idx, data->error_message, data->all_converted); + } +}; +template +struct VectorTryCastErrorOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto data = (VectorTryCastData *)dataptr; + RESULT_TYPE output; + if (DUCKDB_LIKELY( + OP::template Operation(input, output, data->error_message, data->strict))) { + return output; + } + bool has_error = data->error_message && !data->error_message->empty(); + return HandleVectorCastError::Operation( + has_error ? *data->error_message : CastExceptionText(input), mask, idx, + data->error_message, data->all_converted); + } +}; -namespace duckdb { +template +struct VectorTryCastStringOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto data = (VectorTryCastData *)dataptr; + RESULT_TYPE output; + if (DUCKDB_LIKELY(OP::template Operation(input, output, data->result, + data->error_message, data->strict))) { + return output; + } + return HandleVectorCastError::Operation(CastExceptionText(input), mask, + idx, data->error_message, data->all_converted); + } +}; -//! The ColumnBindingResolver resolves ColumnBindings into base tables -//! (table_index, column_index) into physical indices into the DataChunks that -//! are used within the execution engine -class ColumnBindingResolver : public LogicalOperatorVisitor { -public: - ColumnBindingResolver(); +template +static bool TemplatedVectorTryCastLoop(Vector &source, Vector &result, idx_t count, bool strict, + string *error_message) { + VectorTryCastData input(result, error_message, strict); + UnaryExecutor::GenericExecute(source, result, count, &input, error_message); + return input.all_converted; +} - void VisitOperator(LogicalOperator &op) override; +template +static bool VectorTryCastLoop(Vector &source, Vector &result, idx_t count, string *error_message) { + return TemplatedVectorTryCastLoop>(source, result, count, false, error_message); +} -protected: - vector bindings; +template +static bool VectorTryCastStrictLoop(Vector &source, Vector &result, idx_t count, bool strict, string *error_message) { + return TemplatedVectorTryCastLoop>(source, result, count, strict, + error_message); +} - unique_ptr VisitReplace(BoundColumnRefExpression &expr, unique_ptr *expr_ptr) override; -}; -} // namespace duckdb +template +static bool VectorTryCastErrorLoop(Vector &source, Vector &result, idx_t count, bool strict, string *error_message) { + return TemplatedVectorTryCastLoop>(source, result, count, strict, + error_message); +} +template +static bool VectorTryCastStringLoop(Vector &source, Vector &result, idx_t count, bool strict, string *error_message) { + return TemplatedVectorTryCastLoop>(source, result, count, strict, + error_message); +} -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/operator/logical_comparison_join.hpp -// -// -//===----------------------------------------------------------------------===// +template +static void VectorStringCast(Vector &source, Vector &result, idx_t count) { + D_ASSERT(result.GetType().InternalType() == PhysicalType::VARCHAR); + UnaryExecutor::GenericExecute>(source, result, count, (void *)&result); +} +template +static bool NumericCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::BOOLEAN: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::TINYINT: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::SMALLINT: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::INTEGER: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::BIGINT: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::UTINYINT: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::USMALLINT: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::UINTEGER: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::UBIGINT: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::HUGEINT: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::FLOAT: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::DOUBLE: + return VectorTryCastLoop(source, result, count, error_message); + case LogicalTypeId::DECIMAL: + return ToDecimalCast(source, result, count, error_message); + case LogicalTypeId::VARCHAR: { + VectorStringCast(source, result, count); + return true; + } + default: + return TryVectorNullCast(source, result, count, error_message); + } +} +static bool VectorStringCastNumericSwitch(Vector &source, Vector &result, idx_t count, bool strict, + string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::BOOLEAN: + return VectorTryCastStrictLoop(source, result, count, strict, error_message); + case LogicalTypeId::TINYINT: + return VectorTryCastStrictLoop(source, result, count, strict, error_message); + case LogicalTypeId::SMALLINT: + return VectorTryCastStrictLoop(source, result, count, strict, + error_message); + case LogicalTypeId::INTEGER: + return VectorTryCastStrictLoop(source, result, count, strict, + error_message); + case LogicalTypeId::BIGINT: + return VectorTryCastStrictLoop(source, result, count, strict, + error_message); + case LogicalTypeId::UTINYINT: + return VectorTryCastStrictLoop(source, result, count, strict, + error_message); + case LogicalTypeId::USMALLINT: + return VectorTryCastStrictLoop(source, result, count, strict, + error_message); + case LogicalTypeId::UINTEGER: + return VectorTryCastStrictLoop(source, result, count, strict, + error_message); + case LogicalTypeId::UBIGINT: + return VectorTryCastStrictLoop(source, result, count, strict, + error_message); + case LogicalTypeId::HUGEINT: + return VectorTryCastStrictLoop(source, result, count, strict, + error_message); + case LogicalTypeId::FLOAT: + return VectorTryCastStrictLoop(source, result, count, strict, error_message); + case LogicalTypeId::DOUBLE: + return VectorTryCastStrictLoop(source, result, count, strict, error_message); + case LogicalTypeId::INTERVAL: + return VectorTryCastErrorLoop(source, result, count, strict, + error_message); + case LogicalTypeId::DECIMAL: + return ToDecimalCast(source, result, count, error_message); + default: + return TryVectorNullCast(source, result, count, error_message); + } +} +static bool StringCastSwitch(Vector &source, Vector &result, idx_t count, bool strict, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::DATE: + return VectorTryCastErrorLoop(source, result, count, strict, + error_message); + case LogicalTypeId::TIME: + return VectorTryCastErrorLoop(source, result, count, strict, + error_message); + case LogicalTypeId::TIMESTAMP: + return VectorTryCastErrorLoop(source, result, count, strict, + error_message); + case LogicalTypeId::TIMESTAMP_NS: + return VectorTryCastStrictLoop(source, result, count, + strict, error_message); + case LogicalTypeId::TIMESTAMP_SEC: + return VectorTryCastStrictLoop(source, result, count, + strict, error_message); + case LogicalTypeId::TIMESTAMP_MS: + return VectorTryCastStrictLoop(source, result, count, + strict, error_message); + case LogicalTypeId::BLOB: + return VectorTryCastStringLoop(source, result, count, strict, + error_message); + case LogicalTypeId::SQLNULL: + return TryVectorNullCast(source, result, count, error_message); + default: + return VectorStringCastNumericSwitch(source, result, count, strict, error_message); + } +} +static bool DateCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::VARCHAR: + // date to varchar + VectorStringCast(source, result, count); + return true; + case LogicalTypeId::TIMESTAMP: + // date to timestamp + return VectorTryCastLoop(source, result, count, error_message); + default: + return TryVectorNullCast(source, result, count, error_message); + } +} -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/joinside.hpp -// -// -//===----------------------------------------------------------------------===// +static bool TimeCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::VARCHAR: + // time to varchar + VectorStringCast(source, result, count); + return true; + default: + return TryVectorNullCast(source, result, count, error_message); + } +} +static bool TimestampCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::VARCHAR: + // timestamp to varchar + VectorStringCast(source, result, count); + break; + case LogicalTypeId::DATE: + // timestamp to date + UnaryExecutor::Execute(source, result, count); + break; + case LogicalTypeId::TIME: + // timestamp to time + UnaryExecutor::Execute(source, result, count); + break; + case LogicalTypeId::TIMESTAMP_NS: + // timestamp (us) to timestamp (ns) + UnaryExecutor::Execute(source, result, count); + break; + case LogicalTypeId::TIMESTAMP_MS: + // timestamp (us) to timestamp (ms) + UnaryExecutor::Execute(source, result, count); + break; + case LogicalTypeId::TIMESTAMP_SEC: + // timestamp (us) to timestamp (s) + UnaryExecutor::Execute(source, result, count); + break; + default: + return TryVectorNullCast(source, result, count, error_message); + } + return true; +} +static bool TimestampNsCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::VARCHAR: + // timestamp (ns) to varchar + VectorStringCast(source, result, count); + break; + case LogicalTypeId::TIMESTAMP: + // timestamp (ns) to timestamp (us) + UnaryExecutor::Execute(source, result, count); + break; + default: + return TryVectorNullCast(source, result, count, error_message); + } + return true; +} +static bool TimestampMsCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::VARCHAR: + // timestamp (ms) to varchar + VectorStringCast(source, result, count); + break; + case LogicalTypeId::TIMESTAMP: + // timestamp (ms) to timestamp (us) + UnaryExecutor::Execute(source, result, count); + break; + default: + return TryVectorNullCast(source, result, count, error_message); + } + return true; +} +static bool TimestampSecCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::VARCHAR: + // timestamp (sec) to varchar + VectorStringCast(source, result, count); + break; + case LogicalTypeId::TIMESTAMP: + // timestamp (s) to timestamp (us) + UnaryExecutor::Execute(source, result, count); + break; + default: + return TryVectorNullCast(source, result, count, error_message); + } + return true; +} +static bool IntervalCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::VARCHAR: + // time to varchar + VectorStringCast(source, result, count); + break; + default: + return TryVectorNullCast(source, result, count, error_message); + } + return true; +} -namespace duckdb { +static bool BlobCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + // now switch on the result type + switch (result.GetType().id()) { + case LogicalTypeId::VARCHAR: + // blob to varchar + VectorStringCast(source, result, count); + break; + default: + return TryVectorNullCast(source, result, count, error_message); + } + return true; +} -//! JoinCondition represents a left-right comparison join condition -struct JoinCondition { -public: - JoinCondition() : null_values_are_equal(false) { +static bool ValueStringCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + switch (result.GetType().id()) { + case LogicalTypeId::VARCHAR: + if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { + result.SetVectorType(source.GetVectorType()); + } else { + result.SetVectorType(VectorType::FLAT_VECTOR); + } + for (idx_t i = 0; i < count; i++) { + auto src_val = source.GetValue(i); + auto str_val = src_val.ToString(); + result.SetValue(i, Value(str_val)); + } + return true; + default: + return TryVectorNullCast(source, result, count, error_message); } +} - //! Turns the JoinCondition into an expression; note that this destroys the JoinCondition as the expression inherits - //! the left/right expressions - static unique_ptr CreateExpression(JoinCondition cond); +static bool ListCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + switch (result.GetType().id()) { + case LogicalTypeId::LIST: { + // only handle constant and flat vectors here for now + if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { + result.SetVectorType(source.GetVectorType()); + ConstantVector::SetNull(result, ConstantVector::IsNull(source)); -public: - unique_ptr left; - unique_ptr right; - ExpressionType comparison; - //! NULL values are equal for just THIS JoinCondition (instead of the entire join). - //! This is only supported by the HashJoin and can only be used in equality comparisons. - bool null_values_are_equal = false; -}; + auto ldata = ConstantVector::GetData(source); + auto tdata = ConstantVector::GetData(result); + *tdata = *ldata; + } else { + source.Normalify(count); + result.SetVectorType(VectorType::FLAT_VECTOR); + FlatVector::SetValidity(result, FlatVector::Validity(source)); -class JoinSide { -public: - enum join_value : uint8_t { NONE, LEFT, RIGHT, BOTH }; + auto ldata = FlatVector::GetData(source); + auto tdata = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + tdata[i] = ldata[i]; + } + } + auto &source_cc = ListVector::GetEntry(source); + auto source_size = ListVector::GetListSize(source); - JoinSide() = default; - constexpr JoinSide(join_value val) : value(val) { // NOLINT: Allow implicit conversion from `join_value` + ListVector::Reserve(result, source_size); + auto &append_vector = ListVector::GetEntry(result); + + VectorOperations::Cast(source_cc, append_vector, source_size); + ListVector::SetListSize(result, source_size); + D_ASSERT(ListVector::GetListSize(result) == source_size); + return true; } + default: + return ValueStringCastSwitch(source, result, count, error_message); + } +} - bool operator==(JoinSide a) const { - return value == a.value; +static bool StructCastSwitch(Vector &source, Vector &result, idx_t count, string *error_message) { + switch (result.GetType().id()) { + case LogicalTypeId::STRUCT: + case LogicalTypeId::MAP: { + auto &source_child_types = StructType::GetChildTypes(source.GetType()); + auto &result_child_types = StructType::GetChildTypes(result.GetType()); + if (source_child_types.size() != result_child_types.size()) { + throw TypeMismatchException(source.GetType(), result.GetType(), "Cannot cast STRUCTs of different size"); + } + auto &source_children = StructVector::GetEntries(source); + D_ASSERT(source_children.size() == source_child_types.size()); + + auto &result_children = StructVector::GetEntries(result); + for (idx_t c_idx = 0; c_idx < result_child_types.size(); c_idx++) { + auto &result_child_vector = result_children[c_idx]; + auto &source_child_vector = *source_children[c_idx]; + if (result_child_vector->GetType() != source_child_vector.GetType()) { + VectorOperations::Cast(source_child_vector, *result_child_vector, count, false); + } else { + result_child_vector->Reference(source_child_vector); + } + } + if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(result, ConstantVector::IsNull(source)); + } else { + source.Normalify(count); + FlatVector::Validity(result) = FlatVector::Validity(source); + } + return true; } - bool operator!=(JoinSide a) const { - return value != a.value; + case LogicalTypeId::VARCHAR: + if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { + result.SetVectorType(source.GetVectorType()); + } else { + result.SetVectorType(VectorType::FLAT_VECTOR); + } + for (idx_t i = 0; i < count; i++) { + auto src_val = source.GetValue(i); + auto str_val = src_val.ToString(); + result.SetValue(i, Value(str_val)); + } + return true; + default: + return TryVectorNullCast(source, result, count, error_message); } +} - static JoinSide CombineJoinSide(JoinSide left, JoinSide right); - static JoinSide GetJoinSide(idx_t table_binding, unordered_set &left_bindings, - unordered_set &right_bindings); - static JoinSide GetJoinSide(Expression &expression, unordered_set &left_bindings, - unordered_set &right_bindings); - static JoinSide GetJoinSide(const unordered_set &bindings, unordered_set &left_bindings, - unordered_set &right_bindings); +bool VectorOperations::TryCast(Vector &source, Vector &result, idx_t count, string *error_message, bool strict) { + D_ASSERT(source.GetType() != result.GetType()); + // first switch on source type + switch (source.GetType().id()) { + case LogicalTypeId::BOOLEAN: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::TINYINT: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::SMALLINT: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::INTEGER: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::BIGINT: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::UTINYINT: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::USMALLINT: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::UINTEGER: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::UBIGINT: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::HUGEINT: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::DECIMAL: + return DecimalCastSwitch(source, result, count, error_message); + case LogicalTypeId::FLOAT: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::DOUBLE: + return NumericCastSwitch(source, result, count, error_message); + case LogicalTypeId::DATE: + return DateCastSwitch(source, result, count, error_message); + case LogicalTypeId::TIME: + return TimeCastSwitch(source, result, count, error_message); + case LogicalTypeId::TIMESTAMP: + return TimestampCastSwitch(source, result, count, error_message); + case LogicalTypeId::TIMESTAMP_NS: + return TimestampNsCastSwitch(source, result, count, error_message); + case LogicalTypeId::TIMESTAMP_MS: + return TimestampMsCastSwitch(source, result, count, error_message); + case LogicalTypeId::TIMESTAMP_SEC: + return TimestampSecCastSwitch(source, result, count, error_message); + case LogicalTypeId::INTERVAL: + return IntervalCastSwitch(source, result, count, error_message); + case LogicalTypeId::VARCHAR: + return StringCastSwitch(source, result, count, strict, error_message); + case LogicalTypeId::BLOB: + return BlobCastSwitch(source, result, count, error_message); + case LogicalTypeId::SQLNULL: { + // cast a NULL to another type, just copy the properties and change the type + result.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(result, true); + return true; + } + case LogicalTypeId::MAP: + case LogicalTypeId::STRUCT: + return StructCastSwitch(source, result, count, error_message); + case LogicalTypeId::LIST: + return ListCastSwitch(source, result, count, error_message); + default: + return TryVectorNullCast(source, result, count, error_message); + } + return true; +} -private: - join_value value; -}; +void VectorOperations::Cast(Vector &source, Vector &result, idx_t count, bool strict) { + VectorOperations::TryCast(source, result, count, nullptr, strict); +} } // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/operator/logical_join.hpp -// -// -//===----------------------------------------------------------------------===// +//===--------------------------------------------------------------------===// +// copy.cpp +// Description: This file contains the implementation of the different copy +// functions +//===--------------------------------------------------------------------===// @@ -31379,67 +38723,257 @@ class JoinSide { namespace duckdb { -//! LogicalJoin represents a join between two relations -class LogicalJoin : public LogicalOperator { -public: - explicit LogicalJoin(JoinType type, LogicalOperatorType logical_type = LogicalOperatorType::LOGICAL_JOIN); +template +static void TemplatedCopy(const Vector &source, const SelectionVector &sel, Vector &target, idx_t source_offset, + idx_t target_offset, idx_t copy_count) { + auto ldata = FlatVector::GetData(source); + auto tdata = FlatVector::GetData(target); + for (idx_t i = 0; i < copy_count; i++) { + auto source_idx = sel.get_index(source_offset + i); + tdata[target_offset + i] = ldata[source_idx]; + } +} + +void VectorOperations::Copy(const Vector &source, Vector &target, const SelectionVector &sel_p, idx_t source_count, + idx_t source_offset, idx_t target_offset) { + D_ASSERT(source_offset <= source_count); + D_ASSERT(source.GetType() == target.GetType()); + idx_t copy_count = source_count - source_offset; + + SelectionVector owned_sel; + const SelectionVector *sel = &sel_p; + switch (source.GetVectorType()) { + case VectorType::DICTIONARY_VECTOR: { + // dictionary vector: merge selection vectors + auto &child = DictionaryVector::Child(source); + auto &dict_sel = DictionaryVector::SelVector(source); + // merge the selection vectors and verify the child + auto new_buffer = dict_sel.Slice(*sel, source_count); + SelectionVector merged_sel(new_buffer); + VectorOperations::Copy(child, target, merged_sel, source_count, source_offset, target_offset); + return; + } + case VectorType::SEQUENCE_VECTOR: { + int64_t start, increment; + Vector seq(source.GetType()); + SequenceVector::GetSequence(source, start, increment); + VectorOperations::GenerateSequence(seq, source_count, *sel, start, increment); + VectorOperations::Copy(seq, target, *sel, source_count, source_offset, target_offset); + return; + } + case VectorType::CONSTANT_VECTOR: + sel = ConstantVector::ZeroSelectionVector(copy_count, owned_sel); + break; // carry on with below code + case VectorType::FLAT_VECTOR: + break; + default: + throw NotImplementedException("FIXME unimplemented vector type for VectorOperations::Copy"); + } + + if (copy_count == 0) { + return; + } + + // Allow copying of a single value to constant vectors + const auto target_vector_type = target.GetVectorType(); + if (copy_count == 1 && target_vector_type == VectorType::CONSTANT_VECTOR) { + target_offset = 0; + target.SetVectorType(VectorType::FLAT_VECTOR); + } + D_ASSERT(target.GetVectorType() == VectorType::FLAT_VECTOR); + + // first copy the nullmask + auto &tmask = FlatVector::Validity(target); + if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { + const bool valid = !ConstantVector::IsNull(source); + for (idx_t i = 0; i < copy_count; i++) { + tmask.Set(target_offset + i, valid); + } + } else { + auto &smask = FlatVector::Validity(source); + if (smask.IsMaskSet()) { + for (idx_t i = 0; i < copy_count; i++) { + auto idx = sel->get_index(source_offset + i); + tmask.Set(target_offset + i, smask.RowIsValid(idx)); + } + } + } + + D_ASSERT(sel); + + // now copy over the data + switch (source.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::INT16: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::INT32: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::INT64: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::UINT8: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::UINT16: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::UINT32: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::UINT64: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::INT128: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::FLOAT: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::DOUBLE: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::INTERVAL: + TemplatedCopy(source, *sel, target, source_offset, target_offset, copy_count); + break; + case PhysicalType::VARCHAR: { + auto ldata = FlatVector::GetData(source); + auto tdata = FlatVector::GetData(target); + for (idx_t i = 0; i < copy_count; i++) { + auto source_idx = sel->get_index(source_offset + i); + auto target_idx = target_offset + i; + if (tmask.RowIsValid(target_idx)) { + tdata[target_idx] = StringVector::AddStringOrBlob(target, ldata[source_idx]); + } + } + break; + } + case PhysicalType::STRUCT: { + auto &source_children = StructVector::GetEntries(source); + auto &target_children = StructVector::GetEntries(target); + D_ASSERT(source_children.size() == target_children.size()); + for (idx_t i = 0; i < source_children.size(); i++) { + VectorOperations::Copy(*source_children[i], *target_children[i], *sel, source_count, source_offset, + target_offset); + } + break; + } + case PhysicalType::LIST: { + D_ASSERT(target.GetType().InternalType() == PhysicalType::LIST); - // Gets the set of table references that are reachable from this node - static void GetTableReferences(LogicalOperator &op, unordered_set &bindings); - static void GetExpressionBindings(Expression &expr, unordered_set &bindings); + auto &source_child = ListVector::GetEntry(source); + auto sdata = FlatVector::GetData(source); + auto tdata = FlatVector::GetData(target); - //! The type of the join (INNER, OUTER, etc...) - JoinType join_type; - //! Table index used to refer to the MARK column (in case of a MARK join) - idx_t mark_index; - //! The columns of the LHS that are output by the join - vector left_projection_map; - //! The columns of the RHS that are output by the join - vector right_projection_map; + if (target_vector_type == VectorType::CONSTANT_VECTOR) { + // If we are only writing one value, then the copied values (if any) are contiguous + // and we can just Append from the offset position + if (!tmask.RowIsValid(target_offset)) { + break; + } + auto source_idx = sel->get_index(source_offset); + auto &source_entry = sdata[source_idx]; + const idx_t source_child_size = source_entry.length + source_entry.offset; -public: - vector GetColumnBindings() override; + //! overwrite constant target vectors. + ListVector::SetListSize(target, 0); + ListVector::Append(target, source_child, source_child_size, source_entry.offset); -protected: - void ResolveTypes() override; -}; + auto &target_entry = tdata[target_offset]; + target_entry.length = source_entry.length; + target_entry.offset = 0; + } else { + //! if the source has list offsets, we need to append them to the target + //! build a selection vector for the copied child elements + vector child_rows; + for (idx_t i = 0; i < copy_count; ++i) { + if (tmask.RowIsValid(target_offset + i)) { + auto source_idx = sel->get_index(source_offset + i); + auto &source_entry = sdata[source_idx]; + for (idx_t j = 0; j < source_entry.length; ++j) { + child_rows.emplace_back(source_entry.offset + j); + } + } + } + idx_t source_child_size = child_rows.size(); + SelectionVector child_sel(child_rows.data()); -} // namespace duckdb + idx_t old_target_child_len = ListVector::GetListSize(target); + //! append to list itself + ListVector::Append(target, source_child, child_sel, source_child_size); -namespace duckdb { + //! now write the list offsets + for (idx_t i = 0; i < copy_count; i++) { + auto source_idx = sel->get_index(source_offset + i); + auto &source_entry = sdata[source_idx]; + auto &target_entry = tdata[target_offset + i]; -//! LogicalComparisonJoin represents a join that involves comparisons between the LHS and RHS -class LogicalComparisonJoin : public LogicalJoin { -public: - explicit LogicalComparisonJoin(JoinType type, - LogicalOperatorType logical_type = LogicalOperatorType::LOGICAL_COMPARISON_JOIN); + target_entry.length = source_entry.length; + target_entry.offset = old_target_child_len; + if (tmask.RowIsValid(target_offset + i)) { + old_target_child_len += target_entry.length; + } + } + } + break; + } + default: + throw NotImplementedException("Unimplemented type '%s' for copy!", + TypeIdToString(source.GetType().InternalType())); + } - //! The conditions of the join - vector conditions; - //! Used for duplicate-eliminated joins - vector delim_types; + if (target_vector_type != VectorType::FLAT_VECTOR) { + target.SetVectorType(target_vector_type); + } +} -public: - string ParamsToString() const override; +void VectorOperations::Copy(const Vector &source, Vector &target, idx_t source_count, idx_t source_offset, + idx_t target_offset) { + switch (source.GetVectorType()) { + case VectorType::DICTIONARY_VECTOR: { + // dictionary: continue into child with selection vector + auto &child = DictionaryVector::Child(source); + auto &dict_sel = DictionaryVector::SelVector(source); + VectorOperations::Copy(child, target, dict_sel, source_count, source_offset, target_offset); + break; + } + case VectorType::CONSTANT_VECTOR: { + SelectionVector owned_sel; + auto sel = ConstantVector::ZeroSelectionVector(source_count, owned_sel); + VectorOperations::Copy(source, target, *sel, source_count, source_offset, target_offset); + break; + } + case VectorType::FLAT_VECTOR: { + VectorOperations::Copy(source, target, FlatVector::INCREMENTAL_SELECTION_VECTOR, source_count, source_offset, + target_offset); + break; + } + case VectorType::SEQUENCE_VECTOR: { + int64_t start, increment; + SequenceVector::GetSequence(source, start, increment); + Vector flattened(source.GetType()); + VectorOperations::GenerateSequence(flattened, source_count, start, increment); -public: - static unique_ptr CreateJoin(JoinType type, unique_ptr left_child, - unique_ptr right_child, - unordered_set &left_bindings, - unordered_set &right_bindings, - vector> &expressions); -}; + VectorOperations::Copy(flattened, target, FlatVector::INCREMENTAL_SELECTION_VECTOR, source_count, source_offset, + target_offset); + break; + } + default: + throw NotImplementedException("FIXME: unimplemented vector type for VectorOperations::Copy"); + } +} } // namespace duckdb +//===--------------------------------------------------------------------===// +// hash.cpp +// Description: This file contains the vectorized hash implementations +//===--------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/operator/logical_create_index.hpp -// -// -//===----------------------------------------------------------------------===// @@ -31448,2153 +38982,1984 @@ class LogicalComparisonJoin : public LogicalJoin { namespace duckdb { -class LogicalCreateIndex : public LogicalOperator { -public: - LogicalCreateIndex(TableCatalogEntry &table, vector column_ids, - vector> expressions, unique_ptr info) - : LogicalOperator(LogicalOperatorType::LOGICAL_CREATE_INDEX), table(table), column_ids(column_ids), - info(std::move(info)) { - for (auto &expr : expressions) { - this->unbound_expressions.push_back(expr->Copy()); - } - this->expressions = move(expressions); - } - - //! The table to create the index for - TableCatalogEntry &table; - //! Column IDs needed for index creation - vector column_ids; - // Info for index creation - unique_ptr info; - //! Unbound expressions to be used in the optimizer - vector> unbound_expressions; - -protected: - void ResolveTypes() override { - types.push_back(LogicalType::BIGINT); +struct HashOp { + template + static inline hash_t Operation(T input, bool is_null) { + return duckdb::Hash(is_null ? duckdb::NullValue() : input); } }; -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/operator/logical_delim_join.hpp -// -// -//===----------------------------------------------------------------------===// - - - - -namespace duckdb { - -//! LogicalDelimJoin represents a special "duplicate eliminated" join. This join type is only used for subquery -//! flattening, and involves performing duplicate elimination on the LEFT side which is then pushed into the RIGHT side. -class LogicalDelimJoin : public LogicalComparisonJoin { -public: - explicit LogicalDelimJoin(JoinType type) : LogicalComparisonJoin(type, LogicalOperatorType::LOGICAL_DELIM_JOIN) { +template +static inline void TightLoopHash(T *__restrict ldata, hash_t *__restrict result_data, const SelectionVector *rsel, + idx_t count, const SelectionVector *__restrict sel_vector, ValidityMask &mask) { + if (!mask.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto ridx = HAS_RSEL ? rsel->get_index(i) : i; + auto idx = sel_vector->get_index(ridx); + result_data[ridx] = HashOp::Operation(ldata[idx], !mask.RowIsValid(idx)); + } + } else { + for (idx_t i = 0; i < count; i++) { + auto ridx = HAS_RSEL ? rsel->get_index(i) : i; + auto idx = sel_vector->get_index(ridx); + result_data[ridx] = duckdb::Hash(ldata[idx]); + } } +} - //! The set of columns that will be duplicate eliminated from the LHS and pushed into the RHS - vector> duplicate_eliminated_columns; -}; - -} // namespace duckdb - - - - - - +template +static inline void TemplatedLoopHash(Vector &input, Vector &result, const SelectionVector *rsel, idx_t count) { + if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + auto ldata = ConstantVector::GetData(input); + auto result_data = ConstantVector::GetData(result); + *result_data = HashOp::Operation(*ldata, ConstantVector::IsNull(input)); + } else { + result.SetVectorType(VectorType::FLAT_VECTOR); -namespace duckdb { + VectorData idata; + input.Orrify(count, idata); -ColumnBindingResolver::ColumnBindingResolver() { + TightLoopHash((T *)idata.data, FlatVector::GetData(result), rsel, count, idata.sel, + idata.validity); + } } -void ColumnBindingResolver::VisitOperator(LogicalOperator &op) { - if (op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN || op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) { - // special case: comparison join - auto &comp_join = (LogicalComparisonJoin &)op; - // first get the bindings of the LHS and resolve the LHS expressions - VisitOperator(*comp_join.children[0]); - for (auto &cond : comp_join.conditions) { - VisitExpression(&cond.left); +template +static inline void StructLoopHash(Vector &input, Vector &hashes, const SelectionVector *rsel, idx_t count) { + auto &children = StructVector::GetEntries(input); + + D_ASSERT(children.size() > 0); + idx_t col_no = 0; + if (HAS_RSEL) { + if (FIRST_HASH) { + VectorOperations::Hash(*children[col_no++], hashes, *rsel, count); + } else { + VectorOperations::CombineHash(hashes, *children[col_no++], *rsel, count); } - if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) { - // visit the duplicate eliminated columns on the LHS, if any - auto &delim_join = (LogicalDelimJoin &)op; - for (auto &expr : delim_join.duplicate_eliminated_columns) { - VisitExpression(&expr); - } + while (col_no < children.size()) { + VectorOperations::CombineHash(hashes, *children[col_no++], *rsel, count); } - // then get the bindings of the RHS and resolve the RHS expressions - VisitOperator(*comp_join.children[1]); - for (auto &cond : comp_join.conditions) { - VisitExpression(&cond.right); + } else { + if (FIRST_HASH) { + VectorOperations::Hash(*children[col_no++], hashes, count); + } else { + VectorOperations::CombineHash(hashes, *children[col_no++], count); + } + while (col_no < children.size()) { + VectorOperations::CombineHash(hashes, *children[col_no++], count); } - // finally update the bindings with the result bindings of the join - bindings = op.GetColumnBindings(); - return; - } else if (op.type == LogicalOperatorType::LOGICAL_ANY_JOIN) { - // ANY join, this join is different because we evaluate the expression on the bindings of BOTH join sides at - // once i.e. we set the bindings first to the bindings of the entire join, and then resolve the expressions of - // this operator - VisitOperatorChildren(op); - bindings = op.GetColumnBindings(); - VisitOperatorExpressions(op); - return; - } else if (op.type == LogicalOperatorType::LOGICAL_CREATE_INDEX) { - // CREATE INDEX statement, add the columns of the table with table index 0 to the binding set - // afterwards bind the expressions of the CREATE INDEX statement - auto &create_index = (LogicalCreateIndex &)op; - bindings = LogicalOperator::GenerateColumnBindings(0, create_index.table.columns.size()); - VisitOperatorExpressions(op); - return; - } else if (op.type == LogicalOperatorType::LOGICAL_GET) { - //! We first need to update the current set of bindings and then visit operator expressions - bindings = op.GetColumnBindings(); - VisitOperatorExpressions(op); - return; } - // general case - // first visit the children of this operator - VisitOperatorChildren(op); - // now visit the expressions of this operator to resolve any bound column references - VisitOperatorExpressions(op); - // finally update the current set of bindings to the current set of column bindings - bindings = op.GetColumnBindings(); } -unique_ptr ColumnBindingResolver::VisitReplace(BoundColumnRefExpression &expr, - unique_ptr *expr_ptr) { - D_ASSERT(expr.depth == 0); - // check the current set of column bindings to see which index corresponds to the column reference - for (idx_t i = 0; i < bindings.size(); i++) { - if (expr.binding == bindings[i]) { - return make_unique(expr.alias, expr.return_type, i); +template +static inline void ListLoopHash(Vector &input, Vector &hashes, const SelectionVector *rsel, idx_t count) { + auto hdata = FlatVector::GetData(hashes); + + VectorData idata; + input.Orrify(count, idata); + const auto ldata = (const list_entry_t *)idata.data; + + // Slice the child into a dictionary so we can iterate through the positions + // We only need one entry per position in the parent, but we have to index by + // the rsel so we need the full vector size. + SelectionVector cursor(HAS_RSEL ? STANDARD_VECTOR_SIZE : count); + + // Set up the cursor for the first position + SelectionVector unprocessed(count); + idx_t remaining = 0; + for (idx_t i = 0; i < count; ++i) { + const idx_t ridx = HAS_RSEL ? rsel->get_index(i) : i; + const auto lidx = idata.sel->get_index(ridx); + const auto &entry = ldata[lidx]; + if (idata.validity.RowIsValid(lidx) && entry.length > 0) { + cursor.set_index(ridx, entry.offset); + unprocessed.set_index(remaining++, ridx); + } else { + hdata[ridx] = 0; } } - // could not bind the column reference, this should never happen and indicates a bug in the code - // generate an error message - string bound_columns = "["; - for (idx_t i = 0; i < bindings.size(); i++) { - if (i != 0) { - bound_columns += " "; - } - bound_columns += to_string(bindings[i].table_index) + "." + to_string(bindings[i].column_index); + count = remaining; + if (count == 0) { + return; } - bound_columns += "]"; - - throw InternalException("Failed to bind column reference \"%s\" [%d.%d] (bindings: %s)", expr.alias, - expr.binding.table_index, expr.binding.column_index, bound_columns); -} - -} // namespace duckdb - - - - - -namespace duckdb { - -struct BothInclusiveBetweenOperator { - template - static inline bool Operation(T input, T lower, T upper) { - return GreaterThanEquals::Operation(input, lower) && LessThanEquals::Operation(input, upper); - } -}; + // Compute the first round of hashes + Vector child(ListVector::GetEntry(input), cursor, count); -struct LowerInclusiveBetweenOperator { - template - static inline bool Operation(T input, T lower, T upper) { - return GreaterThanEquals::Operation(input, lower) && LessThan::Operation(input, upper); + if (FIRST_HASH) { + VectorOperations::Hash(child, hashes, unprocessed, count); + } else { + VectorOperations::CombineHash(hashes, child, unprocessed, count); } -}; -struct UpperInclusiveBetweenOperator { - template - static inline bool Operation(T input, T lower, T upper) { - return GreaterThan::Operation(input, lower) && LessThanEquals::Operation(input, upper); - } -}; + // Combine the hashes for the remaining positions until there are none left + for (idx_t position = 1;; ++position) { + idx_t remaining = 0; + for (idx_t i = 0; i < count; ++i) { + const auto ridx = unprocessed.get_index(i); + const auto lidx = idata.sel->get_index(ridx); + const auto &entry = ldata[lidx]; + if (entry.length > position) { + // Entry still has values to hash + cursor.set_index(ridx, cursor.get_index(ridx) + 1); + unprocessed.set_index(remaining++, ridx); + } + } + if (remaining == 0) { + break; + } -struct ExclusiveBetweenOperator { - template - static inline bool Operation(T input, T lower, T upper) { - return GreaterThan::Operation(input, lower) && LessThan::Operation(input, upper); + VectorOperations::CombineHash(hashes, child, unprocessed, remaining); + count = remaining; } -}; +} -template -static idx_t BetweenLoopTypeSwitch(Vector &input, Vector &lower, Vector &upper, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { +template +static inline void HashTypeSwitch(Vector &input, Vector &result, const SelectionVector *rsel, idx_t count) { + D_ASSERT(result.GetType().id() == LogicalTypeId::HASH); switch (input.GetType().InternalType()) { case PhysicalType::BOOL: case PhysicalType::INT8: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; case PhysicalType::INT16: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; case PhysicalType::INT32: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; case PhysicalType::INT64: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); - case PhysicalType::INT128: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; case PhysicalType::UINT8: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; case PhysicalType::UINT16: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; case PhysicalType::UINT32: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; case PhysicalType::UINT64: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; + case PhysicalType::INT128: + TemplatedLoopHash(input, result, rsel, count); + break; case PhysicalType::FLOAT: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; case PhysicalType::DOUBLE: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; + case PhysicalType::INTERVAL: + TemplatedLoopHash(input, result, rsel, count); + break; case PhysicalType::VARCHAR: - return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, - false_sel); + TemplatedLoopHash(input, result, rsel, count); + break; + case PhysicalType::MAP: + case PhysicalType::STRUCT: + StructLoopHash(input, result, rsel, count); + break; + case PhysicalType::LIST: + ListLoopHash(input, result, rsel, count); + break; default: - throw InvalidTypeException(input.GetType(), "Invalid type for BETWEEN"); + throw InvalidTypeException(input.GetType(), "Invalid type for hash"); } } -unique_ptr ExpressionExecutor::InitializeState(BoundBetweenExpression &expr, - ExpressionExecutorState &root) { - auto result = make_unique(expr, root); - result->AddChild(expr.input.get()); - result->AddChild(expr.lower.get()); - result->AddChild(expr.upper.get()); - result->Finalize(); - return result; +void VectorOperations::Hash(Vector &input, Vector &result, idx_t count) { + HashTypeSwitch(input, result, nullptr, count); } -void ExpressionExecutor::Execute(BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, Vector &result) { - // resolve the children - Vector input, lower, upper; - input.Reference(state->intermediate_chunk.data[0]); - lower.Reference(state->intermediate_chunk.data[1]); - upper.Reference(state->intermediate_chunk.data[2]); - - Execute(*expr.input, state->child_states[0].get(), sel, count, input); - Execute(*expr.lower, state->child_states[1].get(), sel, count, lower); - Execute(*expr.upper, state->child_states[2].get(), sel, count, upper); +void VectorOperations::Hash(Vector &input, Vector &result, const SelectionVector &sel, idx_t count) { + HashTypeSwitch(input, result, &sel, count); +} - Vector intermediate1(LogicalType::BOOLEAN); - Vector intermediate2(LogicalType::BOOLEAN); +static inline hash_t CombineHashScalar(hash_t a, hash_t b) { + return (a * UINT64_C(0xbf58476d1ce4e5b9)) ^ b; +} - if (expr.upper_inclusive && expr.lower_inclusive) { - VectorOperations::GreaterThanEquals(input, lower, intermediate1, count); - VectorOperations::LessThanEquals(input, upper, intermediate2, count); - } else if (expr.lower_inclusive) { - VectorOperations::GreaterThanEquals(input, lower, intermediate1, count); - VectorOperations::LessThan(input, upper, intermediate2, count); - } else if (expr.upper_inclusive) { - VectorOperations::GreaterThan(input, lower, intermediate1, count); - VectorOperations::LessThanEquals(input, upper, intermediate2, count); +template +static inline void TightLoopCombineHashConstant(T *__restrict ldata, hash_t constant_hash, hash_t *__restrict hash_data, + const SelectionVector *rsel, idx_t count, + const SelectionVector *__restrict sel_vector, ValidityMask &mask) { + if (!mask.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto ridx = HAS_RSEL ? rsel->get_index(i) : i; + auto idx = sel_vector->get_index(ridx); + auto other_hash = HashOp::Operation(ldata[idx], !mask.RowIsValid(idx)); + hash_data[ridx] = CombineHashScalar(constant_hash, other_hash); + } } else { - VectorOperations::GreaterThan(input, lower, intermediate1, count); - VectorOperations::LessThan(input, upper, intermediate2, count); + for (idx_t i = 0; i < count; i++) { + auto ridx = HAS_RSEL ? rsel->get_index(i) : i; + auto idx = sel_vector->get_index(ridx); + auto other_hash = duckdb::Hash(ldata[idx]); + hash_data[ridx] = CombineHashScalar(constant_hash, other_hash); + } } - VectorOperations::And(intermediate1, intermediate2, result, count); } -idx_t ExpressionExecutor::Select(BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { - // resolve the children - Vector input, lower, upper; - input.Reference(state->intermediate_chunk.data[0]); - lower.Reference(state->intermediate_chunk.data[1]); - upper.Reference(state->intermediate_chunk.data[2]); - - Execute(*expr.input, state->child_states[0].get(), sel, count, input); - Execute(*expr.lower, state->child_states[1].get(), sel, count, lower); - Execute(*expr.upper, state->child_states[2].get(), sel, count, upper); - - if (expr.upper_inclusive && expr.lower_inclusive) { - return BetweenLoopTypeSwitch(input, lower, upper, sel, count, true_sel, - false_sel); - } else if (expr.lower_inclusive) { - return BetweenLoopTypeSwitch(input, lower, upper, sel, count, true_sel, - false_sel); - } else if (expr.upper_inclusive) { - return BetweenLoopTypeSwitch(input, lower, upper, sel, count, true_sel, - false_sel); +template +static inline void TightLoopCombineHash(T *__restrict ldata, hash_t *__restrict hash_data, const SelectionVector *rsel, + idx_t count, const SelectionVector *__restrict sel_vector, ValidityMask &mask) { + if (!mask.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto ridx = HAS_RSEL ? rsel->get_index(i) : i; + auto idx = sel_vector->get_index(ridx); + auto other_hash = HashOp::Operation(ldata[idx], !mask.RowIsValid(idx)); + hash_data[ridx] = CombineHashScalar(hash_data[ridx], other_hash); + } } else { - return BetweenLoopTypeSwitch(input, lower, upper, sel, count, true_sel, false_sel); + for (idx_t i = 0; i < count; i++) { + auto ridx = HAS_RSEL ? rsel->get_index(i) : i; + auto idx = sel_vector->get_index(ridx); + auto other_hash = duckdb::Hash(ldata[idx]); + hash_data[ridx] = CombineHashScalar(hash_data[ridx], other_hash); + } } } -} // namespace duckdb - +template +void TemplatedLoopCombineHash(Vector &input, Vector &hashes, const SelectionVector *rsel, idx_t count) { + if (input.GetVectorType() == VectorType::CONSTANT_VECTOR && hashes.GetVectorType() == VectorType::CONSTANT_VECTOR) { + auto ldata = ConstantVector::GetData(input); + auto hash_data = ConstantVector::GetData(hashes); + auto other_hash = HashOp::Operation(*ldata, ConstantVector::IsNull(input)); + *hash_data = CombineHashScalar(*hash_data, other_hash); + } else { + VectorData idata; + input.Orrify(count, idata); + if (hashes.GetVectorType() == VectorType::CONSTANT_VECTOR) { + // mix constant with non-constant, first get the constant value + auto constant_hash = *ConstantVector::GetData(hashes); + // now re-initialize the hashes vector to an empty flat vector + hashes.SetVectorType(VectorType::FLAT_VECTOR); + TightLoopCombineHashConstant((T *)idata.data, constant_hash, + FlatVector::GetData(hashes), rsel, count, idata.sel, + idata.validity); + } else { + D_ASSERT(hashes.GetVectorType() == VectorType::FLAT_VECTOR); + TightLoopCombineHash((T *)idata.data, FlatVector::GetData(hashes), rsel, count, + idata.sel, idata.validity); + } + } +} +template +static inline void CombineHashTypeSwitch(Vector &hashes, Vector &input, const SelectionVector *rsel, idx_t count) { + D_ASSERT(hashes.GetType().id() == LogicalTypeId::HASH); + switch (input.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::INT16: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::INT32: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::INT64: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::UINT8: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::UINT16: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::UINT32: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::UINT64: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::INT128: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::FLOAT: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::DOUBLE: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::INTERVAL: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::VARCHAR: + TemplatedLoopCombineHash(input, hashes, rsel, count); + break; + case PhysicalType::MAP: + case PhysicalType::STRUCT: + StructLoopHash(input, hashes, rsel, count); + break; + case PhysicalType::LIST: + ListLoopHash(input, hashes, rsel, count); + break; + default: + throw InvalidTypeException(input.GetType(), "Invalid type for hash"); + } +} +void VectorOperations::CombineHash(Vector &hashes, Vector &input, idx_t count) { + CombineHashTypeSwitch(hashes, input, nullptr, count); +} -namespace duckdb { +void VectorOperations::CombineHash(Vector &hashes, Vector &input, const SelectionVector &rsel, idx_t count) { + CombineHashTypeSwitch(hashes, input, &rsel, count); +} -void Case(Vector &res_true, Vector &res_false, Vector &result, SelectionVector &tside, idx_t tcount, - SelectionVector &fside, idx_t fcount); +} // namespace duckdb -unique_ptr ExpressionExecutor::InitializeState(BoundCaseExpression &expr, - ExpressionExecutorState &root) { - auto result = make_unique(expr, root); - result->AddChild(expr.check.get()); - result->AddChild(expr.result_if_true.get()); - result->AddChild(expr.result_if_false.get()); - result->Finalize(); - return result; -} -void ExpressionExecutor::Execute(BoundCaseExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, Vector &result) { - Vector res_true, res_false; - res_true.Reference(state->intermediate_chunk.data[1]); - res_false.Reference(state->intermediate_chunk.data[2]); - auto check_state = state->child_states[0].get(); - auto res_true_state = state->child_states[1].get(); - auto res_false_state = state->child_states[2].get(); - // first execute the check expression - SelectionVector true_sel(STANDARD_VECTOR_SIZE), false_sel(STANDARD_VECTOR_SIZE); - idx_t tcount = Select(*expr.check, check_state, sel, count, &true_sel, &false_sel); - idx_t fcount = count - tcount; - if (fcount == 0) { - // everything is true, only execute TRUE side - Execute(*expr.result_if_true, res_true_state, sel, count, result); - } else if (tcount == 0) { - // everything is false, only execute FALSE side - Execute(*expr.result_if_false, res_false_state, sel, count, result); - } else { - // have to execute both and mix and match - Execute(*expr.result_if_true, res_true_state, &true_sel, tcount, res_true); - Execute(*expr.result_if_false, res_false_state, &false_sel, fcount, res_false); +namespace duckdb { - Case(res_true, res_false, result, true_sel, tcount, false_sel, fcount); - if (sel) { - result.Slice(*sel, count); +template +static void CopyToStorageLoop(VectorData &vdata, idx_t count, data_ptr_t target) { + auto ldata = (T *)vdata.data; + auto result_data = (T *)target; + for (idx_t i = 0; i < count; i++) { + auto idx = vdata.sel->get_index(i); + if (!vdata.validity.RowIsValid(idx)) { + result_data[i] = NullValue(); + } else { + result_data[i] = ldata[idx]; } } } -template -void TemplatedFillLoop(Vector &vector, Vector &result, SelectionVector &sel, sel_t count) { - result.SetVectorType(VectorType::FLAT_VECTOR); - auto res = FlatVector::GetData(result); - auto &result_mask = FlatVector::Validity(result); - if (vector.GetVectorType() == VectorType::CONSTANT_VECTOR) { - auto data = ConstantVector::GetData(vector); - if (ConstantVector::IsNull(vector)) { - for (idx_t i = 0; i < count; i++) { - result_mask.SetInvalid(sel.get_index(i)); - } - } else { - for (idx_t i = 0; i < count; i++) { - res[sel.get_index(i)] = *data; - } - } - } else { - VectorData vdata; - vector.Orrify(count, vdata); - auto data = (T *)vdata.data; - for (idx_t i = 0; i < count; i++) { - auto source_idx = vdata.sel->get_index(i); - auto res_idx = sel.get_index(i); +void VectorOperations::WriteToStorage(Vector &source, idx_t count, data_ptr_t target) { + if (count == 0) { + return; + } + VectorData vdata; + source.Orrify(count, vdata); - res[res_idx] = data[source_idx]; - result_mask.Set(res_idx, vdata.validity.RowIsValid(source_idx)); - } + switch (source.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::INT16: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::INT32: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::INT64: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::UINT8: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::UINT16: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::UINT32: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::UINT64: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::INT128: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::FLOAT: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::DOUBLE: + CopyToStorageLoop(vdata, count, target); + break; + case PhysicalType::INTERVAL: + CopyToStorageLoop(vdata, count, target); + break; + default: + throw NotImplementedException("Unimplemented type for WriteToStorage"); } } template -void TemplatedCaseLoop(Vector &res_true, Vector &res_false, Vector &result, SelectionVector &tside, idx_t tcount, - SelectionVector &fside, idx_t fcount) { - TemplatedFillLoop(res_true, result, tside, tcount); - TemplatedFillLoop(res_false, result, fside, fcount); +static void ReadFromStorageLoop(data_ptr_t source, idx_t count, Vector &result) { + auto ldata = (T *)source; + auto result_data = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + result_data[i] = ldata[i]; + } } -void Case(Vector &res_true, Vector &res_false, Vector &result, SelectionVector &tside, idx_t tcount, - SelectionVector &fside, idx_t fcount) { - D_ASSERT(res_true.GetType() == res_false.GetType() && res_true.GetType() == result.GetType()); - +void VectorOperations::ReadFromStorage(data_ptr_t source, idx_t count, Vector &result) { + result.SetVectorType(VectorType::FLAT_VECTOR); switch (result.GetType().InternalType()) { case PhysicalType::BOOL: case PhysicalType::INT8: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; case PhysicalType::INT16: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; case PhysicalType::INT32: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; case PhysicalType::INT64: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; case PhysicalType::UINT8: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; case PhysicalType::UINT16: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; case PhysicalType::UINT32: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; case PhysicalType::UINT64: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; case PhysicalType::INT128: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; case PhysicalType::FLOAT: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; case PhysicalType::DOUBLE: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + ReadFromStorageLoop(source, count, result); break; - case PhysicalType::VARCHAR: - TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); - StringVector::AddHeapReference(result, res_true); - StringVector::AddHeapReference(result, res_false); + case PhysicalType::INTERVAL: + ReadFromStorageLoop(source, count, result); break; - case PhysicalType::LIST: { - auto result_vector = make_unique(result.GetType().child_types()[0].second); - ListVector::SetEntry(result, move(result_vector)); + default: + throw NotImplementedException("Unimplemented type for ReadFromStorage"); + } +} - idx_t offset = 0; - if (ListVector::HasEntry(res_true)) { - auto &true_child = ListVector::GetEntry(res_true); - offset += ListVector::GetListSize(res_true); - ListVector::Append(result, true_child, ListVector::GetListSize(res_true)); - } - if (ListVector::HasEntry(res_false)) { - auto &false_child = ListVector::GetEntry(res_false); - ListVector::Append(result, false_child, ListVector::GetListSize(res_false)); - } +} // namespace duckdb - // all the false offsets need to be incremented by true_child.count - TemplatedFillLoop(res_true, result, tside, tcount); - // FIXME the nullmask here is likely borked - // TODO uuugly - VectorData fdata; - res_false.Orrify(fcount, fdata); - auto data = (list_entry_t *)fdata.data; - auto res = FlatVector::GetData(result); - auto &mask = FlatVector::Validity(result); +#include - for (idx_t i = 0; i < fcount; i++) { - auto fidx = fdata.sel->get_index(i); - auto res_idx = fside.get_index(i); - auto list_entry = data[fidx]; - list_entry.offset += offset; - res[res_idx] = list_entry; - mask.Set(res_idx, fdata.validity.RowIsValid(fidx)); +namespace duckdb { + +AdaptiveFilter::AdaptiveFilter(const Expression &expr) + : iteration_count(0), observe_interval(10), execute_interval(20), warmup(true) { + auto &conj_expr = (const BoundConjunctionExpression &)expr; + D_ASSERT(conj_expr.children.size() > 1); + for (idx_t idx = 0; idx < conj_expr.children.size(); idx++) { + permutation.push_back(idx); + if (idx != conj_expr.children.size() - 1) { + swap_likeliness.push_back(100); } + } + right_random_border = 100 * (conj_expr.children.size() - 1); +} - result.Verify(tside, tcount); - result.Verify(fside, fcount); - break; +AdaptiveFilter::AdaptiveFilter(TableFilterSet *table_filters) + : iteration_count(0), observe_interval(10), execute_interval(20), warmup(true) { + for (auto &table_filter : table_filters->filters) { + permutation.push_back(table_filter.first); + swap_likeliness.push_back(100); } - default: - throw NotImplementedException("Unimplemented type for case expression: %s", result.GetType().ToString()); + swap_likeliness.pop_back(); + right_random_border = 100 * (table_filters->filters.size() - 1); +} +void AdaptiveFilter::AdaptRuntimeStatistics(double duration) { + iteration_count++; + runtime_sum += duration; + + if (!warmup) { + // the last swap was observed + if (observe && iteration_count == observe_interval) { + // keep swap if runtime decreased, else reverse swap + if (prev_mean - (runtime_sum / iteration_count) <= 0) { + // reverse swap because runtime didn't decrease + std::swap(permutation[swap_idx], permutation[swap_idx + 1]); + + // decrease swap likeliness, but make sure there is always a small likeliness left + if (swap_likeliness[swap_idx] > 1) { + swap_likeliness[swap_idx] /= 2; + } + } else { + // keep swap because runtime decreased, reset likeliness + swap_likeliness[swap_idx] = 100; + } + observe = false; + + // reset values + iteration_count = 0; + runtime_sum = 0.0; + } else if (!observe && iteration_count == execute_interval) { + // save old mean to evaluate swap + prev_mean = runtime_sum / iteration_count; + + // get swap index and swap likeliness + std::uniform_int_distribution distribution(1, right_random_border); // a <= i <= b + idx_t random_number = distribution(generator) - 1; + + swap_idx = random_number / 100; // index to be swapped + idx_t likeliness = random_number - 100 * swap_idx; // random number between [0, 100) + + // check if swap is going to happen + if (swap_likeliness[swap_idx] > likeliness) { // always true for the first swap of an index + // swap + std::swap(permutation[swap_idx], permutation[swap_idx + 1]); + + // observe whether swap will be applied + observe = true; + } + + // reset values + iteration_count = 0; + runtime_sum = 0.0; + } + } else { + if (iteration_count == 5) { + // initially set all values + iteration_count = 0; + runtime_sum = 0.0; + observe = false; + warmup = false; + } } } } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/aggregate_hashtable.hpp +// +// +//===----------------------------------------------------------------------===// + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/base_aggregate_hashtable.hpp +// +// +//===----------------------------------------------------------------------===// + + + namespace duckdb { +class BufferManager; + +class BaseAggregateHashTable { +public: + BaseAggregateHashTable(BufferManager &buffer_manager, vector payload_types); + virtual ~BaseAggregateHashTable() { + } + +protected: + BufferManager &buffer_manager; + //! A helper for managing offsets into the data buffers + RowLayout layout; + //! The types of the payload columns stored in the hashtable + vector payload_types; +}; + +} // namespace duckdb + + +namespace duckdb { +class BlockHandle; +class BufferHandle; +class RowDataCollection; + +//! GroupedAggregateHashTable is a linear probing HT that is used for computing +//! aggregates +/*! + GroupedAggregateHashTable is a HT that is used for computing aggregates. It takes + as input the set of groups and the types of the aggregates to compute and + stores them in the HT. It uses linear probing for collision resolution. +*/ + +// two part hash table +// hashes and payload +// hashes layout: +// [SALT][PAGE_NR][PAGE_OFFSET] +// [SALT] are the high bits of the hash value, e.g. 16 for 64 bit hashes +// [PAGE_NR] is the buffer managed payload page index +// [PAGE_OFFSET] is the logical entry offset into said payload page + +// NOTE: PAGE_NR and PAGE_OFFSET are reversed for 64 bit HTs because struct packing + +// payload layout +// [VALIDITY][GROUPS][HASH][PADDING][PAYLOAD] +// [VALIDITY] is the validity bits of the data columns (including the HASH) +// [GROUPS] is the group data, could be multiple values, fixed size, strings are elsewhere +// [HASH] is the hash data of the groups +// [PADDING] is gunk data to align payload properly +// [PAYLOAD] is the payload (i.e. the aggregate states) +struct aggr_ht_entry_64 { + uint16_t salt; + uint16_t page_offset; + uint32_t page_nr; // this has to come last because alignment +}; + +struct aggr_ht_entry_32 { + uint8_t salt; + uint8_t page_nr; + uint16_t page_offset; +}; + +enum HtEntryType { HT_WIDTH_32, HT_WIDTH_64 }; + +class GroupedAggregateHashTable : public BaseAggregateHashTable { +public: + GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types, + vector payload_types, const vector &aggregates, + HtEntryType entry_type = HtEntryType::HT_WIDTH_64); + GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types, + vector payload_types, vector aggregates, + HtEntryType entry_type = HtEntryType::HT_WIDTH_64); + GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types); + ~GroupedAggregateHashTable() override; + + //! Add the given data to the HT, computing the aggregates grouped by the + //! data in the group chunk. When resize = true, aggregates will not be + //! computed but instead just assigned. + idx_t AddChunk(DataChunk &groups, DataChunk &payload); + idx_t AddChunk(DataChunk &groups, Vector &group_hashes, DataChunk &payload); + + //! Scan the HT starting from the scan_position until the result and group + //! chunks are filled. scan_position will be updated by this function. + //! Returns the amount of elements found. + idx_t Scan(idx_t &scan_position, DataChunk &result); + + //! Fetch the aggregates for specific groups from the HT and place them in the result + void FetchAggregates(DataChunk &groups, DataChunk &result); + + //! Finds or creates groups in the hashtable using the specified group keys. The addresses vector will be filled + //! with pointers to the groups in the hash table, and the new_groups selection vector will point to the newly + //! created groups. The return value is the amount of newly created groups. + idx_t FindOrCreateGroups(DataChunk &groups, Vector &group_hashes, Vector &addresses_out, + SelectionVector &new_groups_out); + idx_t FindOrCreateGroups(DataChunk &groups, Vector &addresses_out, SelectionVector &new_groups_out); + void FindOrCreateGroups(DataChunk &groups, Vector &addresses_out); + + //! Executes the filter(if any) and update the aggregates + void Combine(GroupedAggregateHashTable &other); + + idx_t Size() { + return entries; + } + + idx_t MaxCapacity(); + + void Partition(vector &partition_hts, hash_t mask, idx_t shift); + + void Finalize(); + + //! The stringheap of the AggregateHashTable + unique_ptr string_heap; + + //! The hash table load factor, when a resize is triggered + constexpr static float LOAD_FACTOR = 1.5; + constexpr static uint8_t HASH_WIDTH = sizeof(hash_t); + +private: + HtEntryType entry_type; + + //! The total tuple size + idx_t tuple_size; + //! The amount of tuples that fit in a single block + idx_t tuples_per_block; + //! The capacity of the HT. This can be increased using + //! GroupedAggregateHashTable::Resize + idx_t capacity; + //! The amount of entries stored in the HT currently + idx_t entries; + //! The data of the HT + vector> payload_hds; + vector payload_hds_ptrs; + + //! The hashes of the HT + unique_ptr hashes_hdl; + data_ptr_t hashes_hdl_ptr; + data_ptr_t hashes_end_ptr; // of hashes + idx_t hash_offset; // Offset into the layout of the hash column + + hash_t hash_prefix_shift; + idx_t payload_page_offset; + + //! Bitmask for getting relevant bits from the hashes to determine the position + hash_t bitmask; + + //! Pointer vector for Scan() + Vector addresses; + + vector> distinct_hashes; + + bool is_finalized; + + // some stuff from FindOrCreateGroupsInternal() to avoid allocation there + Vector ht_offsets; + Vector hash_salts; + SelectionVector group_compare_vector; + SelectionVector no_match_vector; + SelectionVector empty_vector; + vector predicates; + +private: + GroupedAggregateHashTable(const GroupedAggregateHashTable &) = delete; + + //! Resize the HT to the specified size. Must be larger than the current + //! size. + void Destroy(); + + void Verify(); + + void FlushMove(Vector &source_addresses, Vector &source_hashes, idx_t count); + void NewBlock(); + + template + void VerifyInternal(); + template + void Resize(idx_t size); + template + idx_t FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes, Vector &addresses, + SelectionVector &new_groups); + + template > + void PayloadApply(FUNC fun); +}; + +} // namespace duckdb + + + + + + -unique_ptr ExpressionExecutor::InitializeState(BoundCastExpression &expr, - ExpressionExecutorState &root) { - auto result = make_unique(expr, root); - result->AddChild(expr.child.get()); - result->Finalize(); - return result; -} -void ExpressionExecutor::Execute(BoundCastExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, Vector &result) { - // resolve the child - Vector child; - child.Reference(state->intermediate_chunk.data[0]); - auto child_state = state->child_states[0].get(); - Execute(*expr.child, child_state, sel, count, child); - if (expr.child->return_type == expr.return_type) { - // NOP cast - result.Reference(child); - } else { - // cast it to the type specified by the cast expression - D_ASSERT(result.GetType() == expr.return_type); - VectorOperations::Cast(child, result, count); - } -} -} // namespace duckdb +#include +#include namespace duckdb { -unique_ptr ExpressionExecutor::InitializeState(BoundComparisonExpression &expr, - ExpressionExecutorState &root) { - auto result = make_unique(expr, root); - result->AddChild(expr.left.get()); - result->AddChild(expr.right.get()); - result->Finalize(); - return result; +using ValidityBytes = RowLayout::ValidityBytes; + +GroupedAggregateHashTable::GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types, + vector payload_types, + const vector &bindings, + HtEntryType entry_type) + : GroupedAggregateHashTable(buffer_manager, move(group_types), move(payload_types), + AggregateObject::CreateAggregateObjects(bindings), entry_type) { } -void ExpressionExecutor::Execute(BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, Vector &result) { - // resolve the children - Vector left, right; - left.Reference(state->intermediate_chunk.data[0]); - right.Reference(state->intermediate_chunk.data[1]); +GroupedAggregateHashTable::GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types) + : GroupedAggregateHashTable(buffer_manager, move(group_types), {}, vector()) { +} - Execute(*expr.left, state->child_states[0].get(), sel, count, left); - Execute(*expr.right, state->child_states[1].get(), sel, count, right); +GroupedAggregateHashTable::GroupedAggregateHashTable(BufferManager &buffer_manager, vector group_types_p, + vector payload_types_p, + vector aggregate_objects_p, + HtEntryType entry_type) + : BaseAggregateHashTable(buffer_manager, move(payload_types_p)), entry_type(entry_type), capacity(0), entries(0), + payload_page_offset(0), addresses(LogicalType::POINTER), is_finalized(false), ht_offsets(LogicalTypeId::BIGINT), + hash_salts(LogicalTypeId::SMALLINT), group_compare_vector(STANDARD_VECTOR_SIZE), + no_match_vector(STANDARD_VECTOR_SIZE), empty_vector(STANDARD_VECTOR_SIZE) { - switch (expr.type) { - case ExpressionType::COMPARE_EQUAL: - VectorOperations::Equals(left, right, result, count); - break; - case ExpressionType::COMPARE_NOTEQUAL: - VectorOperations::NotEquals(left, right, result, count); - break; - case ExpressionType::COMPARE_LESSTHAN: - VectorOperations::LessThan(left, right, result, count); - break; - case ExpressionType::COMPARE_GREATERTHAN: - VectorOperations::GreaterThan(left, right, result, count); - break; - case ExpressionType::COMPARE_LESSTHANOREQUALTO: - VectorOperations::LessThanEquals(left, right, result, count); - break; - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - VectorOperations::GreaterThanEquals(left, right, result, count); - break; - case ExpressionType::COMPARE_DISTINCT_FROM: - VectorOperations::DistinctFrom(left, right, result, count); + // Append hash column to the end and initialise the row layout + group_types_p.emplace_back(LogicalType::HASH); + layout.Initialize(move(group_types_p), move(aggregate_objects_p)); + + // HT layout + hash_offset = layout.GetOffsets()[layout.ColumnCount() - 1]; + + tuple_size = layout.GetRowWidth(); + + D_ASSERT(tuple_size <= Storage::BLOCK_SIZE); + tuples_per_block = Storage::BLOCK_SIZE / tuple_size; + hashes_hdl = buffer_manager.Allocate(Storage::BLOCK_SIZE); + hashes_hdl_ptr = hashes_hdl->Ptr(); + + switch (entry_type) { + case HtEntryType::HT_WIDTH_64: { + hash_prefix_shift = (HASH_WIDTH - sizeof(aggr_ht_entry_64::salt)) * 8; + Resize(STANDARD_VECTOR_SIZE * 2); break; - case ExpressionType::COMPARE_NOT_DISTINCT_FROM: - VectorOperations::NotDistinctFrom(left, right, result, count); + } + case HtEntryType::HT_WIDTH_32: { + hash_prefix_shift = (HASH_WIDTH - sizeof(aggr_ht_entry_32::salt)) * 8; + Resize(STANDARD_VECTOR_SIZE * 2); break; + } default: - throw NotImplementedException("Unknown comparison type!"); + throw InternalException("Unknown HT entry width"); } -} -template -static idx_t TemplatedSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - // the inplace loops take the result as the last parameter - switch (left.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::INT16: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::INT32: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::INT64: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::UINT8: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::UINT16: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::UINT32: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::UINT64: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::INT128: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::POINTER: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::FLOAT: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::DOUBLE: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::INTERVAL: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - case PhysicalType::VARCHAR: - return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); - default: - throw InvalidTypeException(left.GetType(), "Invalid type for comparison"); + // create additional hash tables for distinct aggrs + auto &aggregates = layout.GetAggregates(); + distinct_hashes.resize(aggregates.size()); + + idx_t payload_idx = 0; + for (idx_t i = 0; i < aggregates.size(); i++) { + auto &aggr = aggregates[i]; + if (aggr.distinct) { + // layout types minus hash column plus aggr return type + vector distinct_group_types(layout.GetTypes()); + (void)distinct_group_types.pop_back(); + for (idx_t child_idx = 0; child_idx < aggr.child_count; child_idx++) { + distinct_group_types.push_back(payload_types[payload_idx]); + } + distinct_hashes[i] = make_unique(buffer_manager, distinct_group_types); + } + payload_idx += aggr.child_count; } + predicates.resize(layout.ColumnCount() - 1, ExpressionType::COMPARE_EQUAL); + string_heap = make_unique(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true); } -idx_t VectorOperations::Equals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); +GroupedAggregateHashTable::~GroupedAggregateHashTable() { + Destroy(); } -idx_t VectorOperations::NotEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); -} +template +void GroupedAggregateHashTable::PayloadApply(FUNC fun) { + if (entries == 0) { + return; + } + idx_t apply_entries = entries; + idx_t page_nr = 0; + idx_t page_offset = 0; -idx_t VectorOperations::GreaterThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); + for (auto &payload_chunk_ptr : payload_hds_ptrs) { + auto this_entries = MinValue(tuples_per_block, apply_entries); + page_offset = 0; + for (data_ptr_t ptr = payload_chunk_ptr, end = payload_chunk_ptr + this_entries * tuple_size; ptr < end; + ptr += tuple_size) { + fun(page_nr, page_offset++, ptr); + } + apply_entries -= this_entries; + page_nr++; + } + D_ASSERT(apply_entries == 0); } -idx_t VectorOperations::GreaterThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); +void GroupedAggregateHashTable::NewBlock() { + auto pin = buffer_manager.Allocate(Storage::BLOCK_SIZE); + payload_hds.push_back(move(pin)); + payload_hds_ptrs.push_back(payload_hds.back()->Ptr()); + payload_page_offset = 0; } -idx_t VectorOperations::LessThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); -} +void GroupedAggregateHashTable::Destroy() { + // check if there is a destructor + bool has_destructor = false; + for (auto &aggr : layout.GetAggregates()) { + if (aggr.function.destructor) { + has_destructor = true; + } + } + if (!has_destructor) { + return; + } + // there are aggregates with destructors: loop over the hash table + // and call the destructor method for each of the aggregates + data_ptr_t data_pointers[STANDARD_VECTOR_SIZE]; + Vector state_vector(LogicalType::POINTER, (data_ptr_t)data_pointers); + idx_t count = 0; -idx_t VectorOperations::LessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); + PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) { + data_pointers[count++] = ptr; + if (count == STANDARD_VECTOR_SIZE) { + RowOperations::DestroyStates(layout, state_vector, count); + count = 0; + } + }); + RowOperations::DestroyStates(layout, state_vector, count); } -idx_t ExpressionExecutor::Select(BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { - // resolve the children - Vector left, right; - left.Reference(state->intermediate_chunk.data[0]); - right.Reference(state->intermediate_chunk.data[1]); - - Execute(*expr.left, state->child_states[0].get(), sel, count, left); - Execute(*expr.right, state->child_states[1].get(), sel, count, right); +template +void GroupedAggregateHashTable::VerifyInternal() { + auto hashes_ptr = (ENTRY *)hashes_hdl_ptr; + D_ASSERT(payload_hds.size() == payload_hds_ptrs.size()); + idx_t count = 0; + for (idx_t i = 0; i < capacity; i++) { + if (hashes_ptr[i].page_nr > 0) { + D_ASSERT(hashes_ptr[i].page_offset < tuples_per_block); + D_ASSERT(hashes_ptr[i].page_nr <= payload_hds.size()); + auto ptr = payload_hds_ptrs[hashes_ptr[i].page_nr - 1] + ((hashes_ptr[i].page_offset) * tuple_size); + auto hash = Load(ptr + hash_offset); + D_ASSERT((hashes_ptr[i].salt) == (hash >> hash_prefix_shift)); - switch (expr.type) { - case ExpressionType::COMPARE_EQUAL: - return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel); - case ExpressionType::COMPARE_NOTEQUAL: - return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel); - case ExpressionType::COMPARE_LESSTHAN: - return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel); - case ExpressionType::COMPARE_GREATERTHAN: - return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel); - case ExpressionType::COMPARE_LESSTHANOREQUALTO: - return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel); - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel); - case ExpressionType::COMPARE_DISTINCT_FROM: - return VectorOperations::SelectDistinctFrom(left, right, sel, count, true_sel, false_sel); - case ExpressionType::COMPARE_NOT_DISTINCT_FROM: - return VectorOperations::SelectNotDistinctFrom(left, right, sel, count, true_sel, false_sel); - default: - throw NotImplementedException("Unknown comparison type!"); + count++; + } } + D_ASSERT(count == entries); } -} // namespace duckdb +idx_t GroupedAggregateHashTable::MaxCapacity() { + idx_t max_pages = 0; + idx_t max_tuples = 0; + switch (entry_type) { + case HtEntryType::HT_WIDTH_32: + max_pages = NumericLimits::Maximum(); + max_tuples = NumericLimits::Maximum(); + break; + default: + D_ASSERT(entry_type == HtEntryType::HT_WIDTH_64); + max_pages = NumericLimits::Maximum(); + max_tuples = NumericLimits::Maximum(); + break; + } + return max_pages * MinValue(max_tuples, (idx_t)Storage::BLOCK_SIZE / tuple_size); +} +void GroupedAggregateHashTable::Verify() { +#ifdef DEBUG + switch (entry_type) { + case HtEntryType::HT_WIDTH_32: + VerifyInternal(); + break; + case HtEntryType::HT_WIDTH_64: + VerifyInternal(); + break; + } +#endif +} +template +void GroupedAggregateHashTable::Resize(idx_t size) { + Verify(); + D_ASSERT(!is_finalized); -#include + if (size <= capacity) { + throw InternalException("Cannot downsize a hash table!"); + } + D_ASSERT(size >= STANDARD_VECTOR_SIZE); -namespace duckdb { + // size needs to be a power of 2 + D_ASSERT((size & (size - 1)) == 0); + bitmask = size - 1; -struct ConjunctionState : public ExpressionState { - ConjunctionState(Expression &expr, ExpressionExecutorState &root) : ExpressionState(expr, root) { - adaptive_filter = make_unique(expr); + auto byte_size = size * sizeof(ENTRY); + if (byte_size > (idx_t)Storage::BLOCK_SIZE) { + hashes_hdl = buffer_manager.Allocate(byte_size); + hashes_hdl_ptr = hashes_hdl->Ptr(); } - unique_ptr adaptive_filter; -}; + memset(hashes_hdl_ptr, 0, byte_size); + hashes_end_ptr = hashes_hdl_ptr + byte_size; + capacity = size; -unique_ptr ExpressionExecutor::InitializeState(BoundConjunctionExpression &expr, - ExpressionExecutorState &root) { - auto result = make_unique(expr, root); - for (auto &child : expr.children) { - result->AddChild(child.get()); - } - result->Finalize(); - return move(result); -} + auto hashes_arr = (ENTRY *)hashes_hdl_ptr; -void ExpressionExecutor::Execute(BoundConjunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, Vector &result) { - // execute the children - for (idx_t i = 0; i < expr.children.size(); i++) { - Vector current_result; - current_result.Reference(state->intermediate_chunk.data[i]); - Execute(*expr.children[i], state->child_states[i].get(), sel, count, current_result); - if (i == 0) { - // move the result - result.Reference(current_result); - } else { - Vector intermediate(LogicalType::BOOLEAN); - // AND/OR together - switch (expr.type) { - case ExpressionType::CONJUNCTION_AND: - VectorOperations::And(current_result, result, intermediate, count); - break; - case ExpressionType::CONJUNCTION_OR: - VectorOperations::Or(current_result, result, intermediate, count); - break; - default: - throw NotImplementedException("Unknown conjunction type!"); + PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) { + auto hash = Load(ptr + hash_offset); + D_ASSERT((hash & bitmask) == (hash % capacity)); + auto entry_idx = (idx_t)hash & bitmask; + while (hashes_arr[entry_idx].page_nr > 0) { + entry_idx++; + if (entry_idx >= capacity) { + entry_idx = 0; } - result.Reference(intermediate); } - } + + D_ASSERT(!hashes_arr[entry_idx].page_nr); + D_ASSERT(hash >> hash_prefix_shift <= NumericLimits::Maximum()); + + hashes_arr[entry_idx].salt = hash >> hash_prefix_shift; + hashes_arr[entry_idx].page_nr = page_nr + 1; + hashes_arr[entry_idx].page_offset = page_offset; + }); + + Verify(); } -idx_t ExpressionExecutor::Select(BoundConjunctionExpression &expr, ExpressionState *state_p, const SelectionVector *sel, - idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { - auto state = (ConjunctionState *)state_p; +idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload) { + Vector hashes(LogicalType::HASH); + groups.Hash(hashes); - if (expr.type == ExpressionType::CONJUNCTION_AND) { - // get runtime statistics - auto start_time = high_resolution_clock::now(); + return AddChunk(groups, hashes, payload); +} - const SelectionVector *current_sel = sel; - idx_t current_count = count; - idx_t false_count = 0; +idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashes, DataChunk &payload) { + D_ASSERT(!is_finalized); - unique_ptr temp_true, temp_false; - if (false_sel) { - temp_false = make_unique(STANDARD_VECTOR_SIZE); - } - if (!true_sel) { - temp_true = make_unique(STANDARD_VECTOR_SIZE); - true_sel = temp_true.get(); - } - for (idx_t i = 0; i < expr.children.size(); i++) { - idx_t tcount = Select(*expr.children[state->adaptive_filter->permutation[i]], - state->child_states[state->adaptive_filter->permutation[i]].get(), current_sel, - current_count, true_sel, temp_false.get()); - idx_t fcount = current_count - tcount; - if (fcount > 0 && false_sel) { - // move failing tuples into the false_sel - // tuples passed, move them into the actual result vector - for (idx_t i = 0; i < fcount; i++) { - false_sel->set_index(false_count++, temp_false->get_index(i)); - } + if (groups.size() == 0) { + return 0; + } + // dummy + SelectionVector new_groups(STANDARD_VECTOR_SIZE); + + D_ASSERT(groups.ColumnCount() + 1 == layout.ColumnCount()); + for (idx_t i = 0; i < groups.ColumnCount(); i++) { + D_ASSERT(groups.GetTypes()[i] == layout.GetTypes()[i]); + } + + Vector addresses(LogicalType::POINTER); + auto new_group_count = FindOrCreateGroups(groups, group_hashes, addresses, new_groups); + VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), payload.size()); + + // now every cell has an entry + // update the aggregates + idx_t payload_idx = 0; + + auto &aggregates = layout.GetAggregates(); + for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) { + // for any entries for which a group was found, update the aggregate + auto &aggr = aggregates[aggr_idx]; + if (aggr.distinct) { + // construct chunk for secondary hash table probing + vector probe_types(groups.GetTypes()); + for (idx_t i = 0; i < aggr.child_count; i++) { + probe_types.push_back(payload_types[payload_idx]); } - current_count = tcount; - if (current_count == 0) { - break; + DataChunk probe_chunk; + probe_chunk.Initialize(probe_types); + for (idx_t group_idx = 0; group_idx < groups.ColumnCount(); group_idx++) { + probe_chunk.data[group_idx].Reference(groups.data[group_idx]); } - if (current_count < count) { - // tuples were filtered out: move on to using the true_sel to only evaluate passing tuples in subsequent - // iterations - current_sel = true_sel; + for (idx_t i = 0; i < aggr.child_count; i++) { + probe_chunk.data[groups.ColumnCount() + i].Reference(payload.data[payload_idx + i]); } - } - - // adapt runtime statistics - auto end_time = high_resolution_clock::now(); - state->adaptive_filter->AdaptRuntimeStatistics(duration_cast>(end_time - start_time).count()); - return current_count; - } else { - // get runtime statistics - auto start_time = high_resolution_clock::now(); + probe_chunk.SetCardinality(groups); + probe_chunk.Verify(); - const SelectionVector *current_sel = sel; - idx_t current_count = count; - idx_t result_count = 0; + Vector dummy_addresses(LogicalType::POINTER); + // this is the actual meat, find out which groups plus payload + // value have not been seen yet + idx_t new_group_count = + distinct_hashes[aggr_idx]->FindOrCreateGroups(probe_chunk, dummy_addresses, new_groups); - unique_ptr temp_true, temp_false; - if (true_sel) { - temp_true = make_unique(STANDARD_VECTOR_SIZE); - } - if (!false_sel) { - temp_false = make_unique(STANDARD_VECTOR_SIZE); - false_sel = temp_false.get(); - } - for (idx_t i = 0; i < expr.children.size(); i++) { - idx_t tcount = Select(*expr.children[state->adaptive_filter->permutation[i]], - state->child_states[state->adaptive_filter->permutation[i]].get(), current_sel, - current_count, temp_true.get(), false_sel); - if (tcount > 0) { - if (true_sel) { - // tuples passed, move them into the actual result vector - for (idx_t i = 0; i < tcount; i++) { - true_sel->set_index(result_count++, temp_true->get_index(i)); + // now fix up the payload and addresses accordingly by creating + // a selection vector + if (new_group_count > 0) { + if (aggr.filter) { + Vector distinct_addresses(addresses, new_groups, new_group_count); + DataChunk distinct_payload; + auto pay_types = payload.GetTypes(); + distinct_payload.Initialize(pay_types); + distinct_payload.Slice(payload, new_groups, new_group_count); + distinct_addresses.Verify(new_group_count); + distinct_addresses.Normalify(new_group_count); + RowOperations::UpdateFilteredStates(aggr, distinct_addresses, distinct_payload, payload_idx); + } else { + Vector distinct_addresses(addresses, new_groups, new_group_count); + for (idx_t i = 0; i < aggr.child_count; i++) { + payload.data[payload_idx + i].Slice(new_groups, new_group_count); + payload.data[payload_idx + i].Verify(new_group_count); } + distinct_addresses.Verify(new_group_count); + + RowOperations::UpdateStates(aggr, distinct_addresses, payload, payload_idx, new_group_count); } - // now move on to check only the non-passing tuples - current_count -= tcount; - current_sel = false_sel; } + } else if (aggr.filter) { + RowOperations::UpdateFilteredStates(aggr, addresses, payload, payload_idx); + } else { + RowOperations::UpdateStates(aggr, addresses, payload, payload_idx, payload.size()); } - // adapt runtime statistics - auto end_time = high_resolution_clock::now(); - state->adaptive_filter->AdaptRuntimeStatistics(duration_cast>(end_time - start_time).count()); - return result_count; + // move to the next aggregate + payload_idx += aggr.child_count; + VectorOperations::AddInPlace(addresses, aggr.payload_size, payload.size()); } + + Verify(); + return new_group_count; } -} // namespace duckdb +void GroupedAggregateHashTable::FetchAggregates(DataChunk &groups, DataChunk &result) { + groups.Verify(); + D_ASSERT(groups.ColumnCount() + 1 == layout.ColumnCount()); + for (idx_t i = 0; i < result.ColumnCount(); i++) { + D_ASSERT(result.data[i].GetType() == payload_types[i]); + } + result.SetCardinality(groups); + if (groups.size() == 0) { + return; + } + // find the groups associated with the addresses + // FIXME: this should not use the FindOrCreateGroups, creating them is unnecessary + Vector addresses(LogicalType::POINTER); + FindOrCreateGroups(groups, addresses); + // now fetch the aggregates + RowOperations::FinalizeStates(layout, addresses, result, 0); +} +template +idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes, Vector &addresses, + SelectionVector &new_groups_out) { + D_ASSERT(!is_finalized); + if (entries + groups.size() > MaxCapacity()) { + throw InternalException("Hash table capacity reached"); + } -namespace duckdb { + // resize at 50% capacity, also need to fit the entire vector + if (capacity - entries <= groups.size() || entries > capacity / LOAD_FACTOR) { + Resize(capacity * 2); + } -unique_ptr ExpressionExecutor::InitializeState(BoundConstantExpression &expr, - ExpressionExecutorState &root) { - auto result = make_unique(expr, root); - result->Finalize(); - return result; -} + D_ASSERT(capacity - entries >= groups.size()); + D_ASSERT(groups.ColumnCount() + 1 == layout.ColumnCount()); + // we need to be able to fit at least one vector of data + D_ASSERT(capacity - entries >= groups.size()); + D_ASSERT(group_hashes.GetType() == LogicalType::HASH); -void ExpressionExecutor::Execute(BoundConstantExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, Vector &result) { - D_ASSERT(expr.value.type() == expr.return_type); - result.Reference(expr.value); -} + group_hashes.Normalify(groups.size()); + auto group_hashes_ptr = FlatVector::GetData(group_hashes); -} // namespace duckdb + D_ASSERT(ht_offsets.GetVectorType() == VectorType::FLAT_VECTOR); + D_ASSERT(ht_offsets.GetType() == LogicalType::BIGINT); + D_ASSERT(addresses.GetType() == LogicalType::POINTER); + addresses.Normalify(groups.size()); + auto addresses_ptr = FlatVector::GetData(addresses); + // now compute the entry in the table based on the hash using a modulo + UnaryExecutor::Execute(group_hashes, ht_offsets, groups.size(), [&](hash_t element) { + D_ASSERT((element & bitmask) == (element % capacity)); + return (element & bitmask); + }); + auto ht_offsets_ptr = FlatVector::GetData(ht_offsets); -namespace duckdb { + // precompute the hash salts for faster comparison below + D_ASSERT(hash_salts.GetType() == LogicalType::SMALLINT); + UnaryExecutor::Execute(group_hashes, hash_salts, groups.size(), + [&](hash_t element) { return (element >> hash_prefix_shift); }); + auto hash_salts_ptr = FlatVector::GetData(hash_salts); -struct FunctionExpressionState : public ExpressionState { - FunctionExpressionState(Expression &expr, ExpressionExecutorState &root) : ExpressionState(expr, root) { - } + // we start out with all entries [0, 1, 2, ..., groups.size()] + const SelectionVector *sel_vector = &FlatVector::INCREMENTAL_SELECTION_VECTOR; - DataChunk arguments; -}; -unique_ptr ExpressionExecutor::InitializeState(BoundFunctionExpression &expr, - ExpressionExecutorState &root) { - auto result = make_unique(expr, root); - for (auto &child : expr.children) { - result->AddChild(child.get()); - } - result->Finalize(); - if (!result->types.empty()) { - result->arguments.InitializeEmpty(result->types); - } - return move(result); -} + idx_t remaining_entries = groups.size(); -void ExpressionExecutor::Execute(BoundFunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, Vector &result) { - auto &fstate = (FunctionExpressionState &)*state; - auto &arguments = fstate.arguments; - if (!state->types.empty()) { - arguments.Reference(state->intermediate_chunk); - for (idx_t i = 0; i < expr.children.size(); i++) { - D_ASSERT(state->types[i] == expr.children[i]->return_type); - Execute(*expr.children[i], state->child_states[i].get(), sel, count, arguments.data[i]); -#ifdef DEBUG - if (expr.children[i]->return_type.id() == LogicalTypeId::VARCHAR) { - arguments.data[i].UTFVerify(count); - } -#endif - } - arguments.Verify(); - } - arguments.SetCardinality(count); - if (current_count >= next_sample) { - state->profiler.Start(); - } - expr.function.function(arguments, *state, result); - if (current_count >= next_sample) { - state->profiler.End(); - state->time += state->profiler.Elapsed(); - } - if (result.GetType() != expr.return_type) { - throw TypeMismatchException(expr.return_type, result.GetType(), - "expected function to return the former " - "but the function returned the latter"); + // make a chunk that references the groups and the hashes + DataChunk group_chunk; + group_chunk.InitializeEmpty(layout.GetTypes()); + for (idx_t grp_idx = 0; grp_idx < groups.ColumnCount(); grp_idx++) { + group_chunk.data[grp_idx].Reference(groups.data[grp_idx]); } -} - -} // namespace duckdb + group_chunk.data[groups.ColumnCount()].Reference(group_hashes); + group_chunk.SetCardinality(groups); + // orrify all the groups + auto group_data = group_chunk.Orrify(); + idx_t new_group_count = 0; + while (remaining_entries > 0) { + idx_t new_entry_count = 0; + idx_t need_compare_count = 0; + idx_t no_match_count = 0; + // first figure out for each remaining whether or not it belongs to a full or empty group + for (idx_t i = 0; i < remaining_entries; i++) { + const idx_t index = sel_vector->get_index(i); + const auto ht_entry_ptr = ((ENTRY *)this->hashes_hdl_ptr) + ht_offsets_ptr[index]; + if (ht_entry_ptr->page_nr == 0) { // we use page number 0 as a "unused marker" + // cell is empty; setup the new entry + if (payload_page_offset == tuples_per_block || payload_hds.empty()) { + NewBlock(); + } -namespace duckdb { + auto entry_payload_ptr = payload_hds_ptrs.back() + (payload_page_offset * tuple_size); -unique_ptr ExpressionExecutor::InitializeState(BoundOperatorExpression &expr, - ExpressionExecutorState &root) { - auto result = make_unique(expr, root); - for (auto &child : expr.children) { - result->AddChild(child.get()); - } - result->Finalize(); - return result; -} + D_ASSERT(group_hashes_ptr[index] >> hash_prefix_shift <= NumericLimits::Maximum()); + D_ASSERT(payload_page_offset < tuples_per_block); + D_ASSERT(payload_hds.size() < NumericLimits::Maximum()); + D_ASSERT(payload_page_offset + 1 < NumericLimits::Maximum()); -void ExpressionExecutor::Execute(BoundOperatorExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, Vector &result) { - // special handling for special snowflake 'IN' - // IN has n children - if (expr.type == ExpressionType::COMPARE_IN || expr.type == ExpressionType::COMPARE_NOT_IN) { - if (expr.children.size() < 2) { - throw Exception("IN needs at least two children"); - } - Vector left(expr.children[0]->return_type); - // eval left side - Execute(*expr.children[0], state->child_states[0].get(), sel, count, left); + ht_entry_ptr->salt = group_hashes_ptr[index] >> hash_prefix_shift; - // init result to false - Vector intermediate(LogicalType::BOOLEAN); - Value false_val = Value::BOOLEAN(false); - intermediate.Reference(false_val); + // page numbers start at one so we can use 0 as empty flag + // GetPtr undoes this + ht_entry_ptr->page_nr = payload_hds.size(); + ht_entry_ptr->page_offset = payload_page_offset++; - // in rhs is a list of constants - // for every child, OR the result of the comparision with the left - // to get the overall result. - for (idx_t child = 1; child < expr.children.size(); child++) { - Vector vector_to_check(expr.children[child]->return_type); - Vector comp_res(LogicalType::BOOLEAN); + // update selection lists for outer loops + empty_vector.set_index(new_entry_count++, index); + new_groups_out.set_index(new_group_count++, index); + entries++; - Execute(*expr.children[child], state->child_states[child].get(), sel, count, vector_to_check); - VectorOperations::Equals(left, vector_to_check, comp_res, count); + addresses_ptr[index] = entry_payload_ptr; - if (child == 1) { - // first child: move to result - intermediate.Reference(comp_res); } else { - // otherwise OR together - Vector new_result(LogicalType::BOOLEAN, true, false); - VectorOperations::Or(intermediate, comp_res, new_result, count); - intermediate.Reference(new_result); + // cell is occupied: add to check list + // only need to check if hash salt in ptr == prefix of hash in payload + if (ht_entry_ptr->salt == hash_salts_ptr[index]) { + group_compare_vector.set_index(need_compare_count++, index); + + auto page_ptr = payload_hds_ptrs[ht_entry_ptr->page_nr - 1]; + auto page_offset = ht_entry_ptr->page_offset * tuple_size; + addresses_ptr[index] = page_ptr + page_offset; + + } else { + no_match_vector.set_index(no_match_count++, index); + } } } - if (expr.type == ExpressionType::COMPARE_NOT_IN) { - // NOT IN: invert result - VectorOperations::Not(intermediate, result, count); - } else { - // directly use the result - result.Reference(intermediate); - } - } else if (expr.children.size() == 1) { - Vector child; - child.Reference(state->intermediate_chunk.data[0]); - Execute(*expr.children[0], state->child_states[0].get(), sel, count, child); - switch (expr.type) { - case ExpressionType::OPERATOR_NOT: { - VectorOperations::Not(child, result, count); - break; - } - case ExpressionType::OPERATOR_IS_NULL: { - VectorOperations::IsNull(child, result, count); - break; - } - case ExpressionType::OPERATOR_IS_NOT_NULL: { - VectorOperations::IsNotNull(child, result, count); - break; - } - default: - throw NotImplementedException("Unsupported operator type with 1 child!"); + // for each of the locations that are empty, serialize the group columns to the locations + RowOperations::Scatter(group_chunk, group_data.get(), layout, addresses, *string_heap, empty_vector, + new_entry_count); + RowOperations::InitializeStates(layout, addresses, empty_vector, new_entry_count); + + // now we have only the tuples remaining that might match to an existing group + // start performing comparisons with each of the groups + RowOperations::Match(group_chunk, group_data.get(), layout, addresses, predicates, group_compare_vector, + need_compare_count, &no_match_vector, no_match_count); + + // each of the entries that do not match we move them to the next entry in the HT + for (idx_t i = 0; i < no_match_count; i++) { + idx_t index = no_match_vector.get_index(i); + ht_offsets_ptr[index]++; + if (ht_offsets_ptr[index] >= capacity) { + ht_offsets_ptr[index] = 0; + } } - } else { - throw NotImplementedException("operator"); + sel_vector = &no_match_vector; + remaining_entries = no_match_count; } + + return new_group_count; } -} // namespace duckdb +// this is to support distinct aggregations where we need to record whether we +// have already seen a value for a group +idx_t GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &group_hashes, Vector &addresses_out, + SelectionVector &new_groups_out) { + switch (entry_type) { + case HtEntryType::HT_WIDTH_64: + return FindOrCreateGroupsInternal(groups, group_hashes, addresses_out, new_groups_out); + case HtEntryType::HT_WIDTH_32: + return FindOrCreateGroupsInternal(groups, group_hashes, addresses_out, new_groups_out); + default: + throw InternalException("Unknown HT entry width"); + } +} +void GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &addresses) { + // create a dummy new_groups sel vector + SelectionVector new_groups(STANDARD_VECTOR_SIZE); + FindOrCreateGroups(groups, addresses, new_groups); +} +idx_t GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &addresses_out, + SelectionVector &new_groups_out) { + Vector hashes(LogicalType::HASH); + groups.Hash(hashes); + return FindOrCreateGroups(groups, hashes, addresses_out, new_groups_out); +} +void GroupedAggregateHashTable::FlushMove(Vector &source_addresses, Vector &source_hashes, idx_t count) { + D_ASSERT(source_addresses.GetType() == LogicalType::POINTER); + D_ASSERT(source_hashes.GetType() == LogicalType::HASH); -namespace duckdb { + DataChunk groups; + groups.Initialize(vector(layout.GetTypes().begin(), layout.GetTypes().end() - 1)); + groups.SetCardinality(count); + for (idx_t i = 0; i < groups.ColumnCount(); i++) { + auto &column = groups.data[i]; + const auto col_offset = layout.GetOffsets()[i]; + RowOperations::Gather(source_addresses, FlatVector::INCREMENTAL_SELECTION_VECTOR, column, + FlatVector::INCREMENTAL_SELECTION_VECTOR, count, col_offset, i); + } -unique_ptr ExpressionExecutor::InitializeState(BoundParameterExpression &expr, - ExpressionExecutorState &root) { - auto result = make_unique(expr, root); - result->Finalize(); - return result; -} + SelectionVector new_groups(STANDARD_VECTOR_SIZE); + Vector group_addresses(LogicalType::POINTER); + SelectionVector new_groups_sel(STANDARD_VECTOR_SIZE); -void ExpressionExecutor::Execute(BoundParameterExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, Vector &result) { - D_ASSERT(expr.value); - D_ASSERT(expr.value->type() == expr.return_type); - result.Reference(*expr.value); + FindOrCreateGroups(groups, source_hashes, group_addresses, new_groups_sel); + + RowOperations::CombineStates(layout, source_addresses, group_addresses, count); } -} // namespace duckdb +void GroupedAggregateHashTable::Combine(GroupedAggregateHashTable &other) { + D_ASSERT(!is_finalized); + D_ASSERT(other.layout.GetAggrWidth() == layout.GetAggrWidth()); + D_ASSERT(other.layout.GetDataWidth() == layout.GetDataWidth()); + D_ASSERT(other.layout.GetRowWidth() == layout.GetRowWidth()); + D_ASSERT(other.tuples_per_block == tuples_per_block); -namespace duckdb { + if (other.entries == 0) { + return; + } -unique_ptr ExpressionExecutor::InitializeState(BoundReferenceExpression &expr, - ExpressionExecutorState &root) { - auto result = make_unique(expr, root); - result->Finalize(); - return result; -} + Vector addresses(LogicalType::POINTER); + auto addresses_ptr = FlatVector::GetData(addresses); -void ExpressionExecutor::Execute(BoundReferenceExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, Vector &result) { - D_ASSERT(expr.index != INVALID_INDEX); - D_ASSERT(expr.index < chunk->ColumnCount()); - if (sel) { - result.Slice(chunk->data[expr.index], *sel, count); - } else { - result.Reference(chunk->data[expr.index]); - } -} + Vector hashes(LogicalType::HASH); + auto hashes_ptr = FlatVector::GetData(hashes); -} // namespace duckdb + idx_t group_idx = 0; + other.PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) { + auto hash = Load(ptr + hash_offset); + hashes_ptr[group_idx] = hash; + addresses_ptr[group_idx] = ptr; + group_idx++; + if (group_idx == STANDARD_VECTOR_SIZE) { + FlushMove(addresses, hashes, group_idx); + group_idx = 0; + } + }); + FlushMove(addresses, hashes, group_idx); + string_heap->Merge(*other.string_heap); + Verify(); +} +struct PartitionInfo { + PartitionInfo() : addresses(LogicalType::POINTER), hashes(LogicalType::HASH), group_count(0) { + addresses_ptr = FlatVector::GetData(addresses); + hashes_ptr = FlatVector::GetData(hashes); + }; + Vector addresses; + Vector hashes; + idx_t group_count; + data_ptr_t *addresses_ptr; + hash_t *hashes_ptr; +}; +void GroupedAggregateHashTable::Partition(vector &partition_hts, hash_t mask, + idx_t shift) { + D_ASSERT(partition_hts.size() > 1); + vector partition_info(partition_hts.size()); -namespace duckdb { + PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) { + auto hash = Load(ptr + hash_offset); -ExpressionExecutor::ExpressionExecutor() : random(0) { -} + idx_t partition = (hash & mask) >> shift; + D_ASSERT(partition < partition_hts.size()); -ExpressionExecutor::ExpressionExecutor(Expression *expression) : ExpressionExecutor() { - D_ASSERT(expression); - AddExpression(*expression); -} + auto &info = partition_info[partition]; -ExpressionExecutor::ExpressionExecutor(Expression &expression) : ExpressionExecutor() { - AddExpression(expression); -} + info.hashes_ptr[info.group_count] = hash; + info.addresses_ptr[info.group_count] = ptr; + info.group_count++; + if (info.group_count == STANDARD_VECTOR_SIZE) { + D_ASSERT(partition_hts[partition]); + partition_hts[partition]->FlushMove(info.addresses, info.hashes, info.group_count); + info.group_count = 0; + } + }); -ExpressionExecutor::ExpressionExecutor(vector> &exprs) : ExpressionExecutor() { - D_ASSERT(exprs.size() > 0); - for (auto &expr : exprs) { - AddExpression(*expr); + idx_t info_idx = 0; + idx_t total_count = 0; + for (auto &partition_entry : partition_hts) { + auto &info = partition_info[info_idx++]; + partition_entry->FlushMove(info.addresses, info.hashes, info.group_count); + + partition_entry->string_heap->Merge(*string_heap); + partition_entry->Verify(); + total_count += partition_entry->Size(); } + D_ASSERT(total_count == entries); } -void ExpressionExecutor::AddExpression(Expression &expr) { - expressions.push_back(&expr); - auto state = make_unique(); - Initialize(expr, *state); - states.push_back(move(state)); -} +idx_t GroupedAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) { + auto data_pointers = FlatVector::GetData(addresses); -void ExpressionExecutor::Initialize(Expression &expression, ExpressionExecutorState &state) { - state.root_state = InitializeState(expression, state); - state.executor = this; -} + auto remaining = entries - scan_position; + if (remaining == 0) { + return 0; + } + auto this_n = MinValue((idx_t)STANDARD_VECTOR_SIZE, remaining); -void ExpressionExecutor::Execute(DataChunk *input, DataChunk &result) { - SetChunk(input); + auto chunk_idx = scan_position / tuples_per_block; + auto chunk_offset = (scan_position % tuples_per_block) * tuple_size; + D_ASSERT(chunk_offset + tuple_size <= Storage::BLOCK_SIZE); - D_ASSERT(expressions.size() == result.ColumnCount()); - D_ASSERT(!expressions.empty()); - for (idx_t i = 0; i < expressions.size(); i++) { - ExecuteExpression(i, result.data[i]); - if (current_count >= next_sample) { - next_sample = 50 + random.NextRandomInteger() % 100; - ++sample_count; - sample_tuples_count += input->size(); - current_count = 0; - } else { - ++current_count; + auto read_ptr = payload_hds_ptrs[chunk_idx++]; + for (idx_t i = 0; i < this_n; i++) { + data_pointers[i] = read_ptr + chunk_offset; + chunk_offset += tuple_size; + if (chunk_offset >= tuples_per_block * tuple_size) { + read_ptr = payload_hds_ptrs[chunk_idx++]; + chunk_offset = 0; } } - result.SetCardinality(input ? input->size() : 1); - result.Verify(); - ++total_count; - tuples_count += input->size(); -} -void ExpressionExecutor::ExecuteExpression(DataChunk &input, Vector &result) { - SetChunk(&input); - ExecuteExpression(result); -} + result.SetCardinality(this_n); + // fetch the group columns (ignoring the final hash column + const auto group_cols = layout.ColumnCount() - 1; + for (idx_t i = 0; i < group_cols; i++) { + auto &column = result.data[i]; + const auto col_offset = layout.GetOffsets()[i]; + RowOperations::Gather(addresses, FlatVector::INCREMENTAL_SELECTION_VECTOR, column, + FlatVector::INCREMENTAL_SELECTION_VECTOR, result.size(), col_offset, i); + } -idx_t ExpressionExecutor::SelectExpression(DataChunk &input, SelectionVector &sel) { - D_ASSERT(expressions.size() == 1); - SetChunk(&input); - return Select(*expressions[0], states[0]->root_state.get(), nullptr, input.size(), &sel, nullptr); -} + RowOperations::FinalizeStates(layout, addresses, result, group_cols); -void ExpressionExecutor::ExecuteExpression(Vector &result) { - D_ASSERT(expressions.size() == 1); - ExecuteExpression(0, result); + scan_position += this_n; + return this_n; } -void ExpressionExecutor::ExecuteExpression(idx_t expr_idx, Vector &result) { - D_ASSERT(expr_idx < expressions.size()); - D_ASSERT(result.GetType() == expressions[expr_idx]->return_type); - Execute(*expressions[expr_idx], states[expr_idx]->root_state.get(), nullptr, chunk ? chunk->size() : 1, result); +void GroupedAggregateHashTable::Finalize() { + D_ASSERT(!is_finalized); + + // early release hashes, not needed for partition/scan + hashes_hdl.reset(); + is_finalized = true; } -Value ExpressionExecutor::EvaluateScalar(Expression &expr) { - D_ASSERT(expr.IsFoldable()); - // use an ExpressionExecutor to execute the expression - ExpressionExecutor executor(expr); +} // namespace duckdb - Vector result(expr.return_type); - executor.ExecuteExpression(result); - D_ASSERT(result.GetVectorType() == VectorType::CONSTANT_VECTOR); - auto result_value = result.GetValue(0); - D_ASSERT(result_value.type() == expr.return_type); - return result_value; -} -void ExpressionExecutor::Verify(Expression &expr, Vector &vector, idx_t count) { - D_ASSERT(expr.return_type == vector.GetType()); - vector.Verify(count); - if (expr.stats) { - expr.stats->Verify(vector, count); - } -} +namespace duckdb { -unique_ptr ExpressionExecutor::InitializeState(Expression &expr, ExpressionExecutorState &state) { - switch (expr.expression_class) { - case ExpressionClass::BOUND_REF: - return InitializeState((BoundReferenceExpression &)expr, state); - case ExpressionClass::BOUND_BETWEEN: - return InitializeState((BoundBetweenExpression &)expr, state); - case ExpressionClass::BOUND_CASE: - return InitializeState((BoundCaseExpression &)expr, state); - case ExpressionClass::BOUND_CAST: - return InitializeState((BoundCastExpression &)expr, state); - case ExpressionClass::BOUND_COMPARISON: - return InitializeState((BoundComparisonExpression &)expr, state); - case ExpressionClass::BOUND_CONJUNCTION: - return InitializeState((BoundConjunctionExpression &)expr, state); - case ExpressionClass::BOUND_CONSTANT: - return InitializeState((BoundConstantExpression &)expr, state); - case ExpressionClass::BOUND_FUNCTION: - return InitializeState((BoundFunctionExpression &)expr, state); - case ExpressionClass::BOUND_OPERATOR: - return InitializeState((BoundOperatorExpression &)expr, state); - case ExpressionClass::BOUND_PARAMETER: - return InitializeState((BoundParameterExpression &)expr, state); - default: - throw NotImplementedException("Attempting to initialize state of expression of unknown type!"); - } +BaseAggregateHashTable::BaseAggregateHashTable(BufferManager &buffer_manager, vector payload_types_p) + : buffer_manager(buffer_manager), payload_types(move(payload_types_p)) { } -void ExpressionExecutor::Execute(Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result) { - if (count == 0) { - return; - } - switch (expr.expression_class) { - case ExpressionClass::BOUND_BETWEEN: - Execute((BoundBetweenExpression &)expr, state, sel, count, result); - break; - case ExpressionClass::BOUND_REF: - Execute((BoundReferenceExpression &)expr, state, sel, count, result); - break; - case ExpressionClass::BOUND_CASE: - Execute((BoundCaseExpression &)expr, state, sel, count, result); - break; - case ExpressionClass::BOUND_CAST: - Execute((BoundCastExpression &)expr, state, sel, count, result); - break; - case ExpressionClass::BOUND_COMPARISON: - Execute((BoundComparisonExpression &)expr, state, sel, count, result); - break; - case ExpressionClass::BOUND_CONJUNCTION: - Execute((BoundConjunctionExpression &)expr, state, sel, count, result); - break; - case ExpressionClass::BOUND_CONSTANT: - Execute((BoundConstantExpression &)expr, state, sel, count, result); - break; - case ExpressionClass::BOUND_FUNCTION: - Execute((BoundFunctionExpression &)expr, state, sel, count, result); - break; - case ExpressionClass::BOUND_OPERATOR: - Execute((BoundOperatorExpression &)expr, state, sel, count, result); - break; - case ExpressionClass::BOUND_PARAMETER: - Execute((BoundParameterExpression &)expr, state, sel, count, result); - break; - default: - throw NotImplementedException("Attempting to execute expression of unknown type!"); - } - Verify(expr, result, count); -} +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/column_binding_resolver.hpp +// +// +//===----------------------------------------------------------------------===// -idx_t ExpressionExecutor::Select(Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - if (count == 0) { - return 0; - } - D_ASSERT(true_sel || false_sel); - D_ASSERT(expr.return_type.id() == LogicalTypeId::BOOLEAN); - switch (expr.expression_class) { - case ExpressionClass::BOUND_BETWEEN: - return Select((BoundBetweenExpression &)expr, state, sel, count, true_sel, false_sel); - case ExpressionClass::BOUND_COMPARISON: - return Select((BoundComparisonExpression &)expr, state, sel, count, true_sel, false_sel); - case ExpressionClass::BOUND_CONJUNCTION: - return Select((BoundConjunctionExpression &)expr, state, sel, count, true_sel, false_sel); - default: - return DefaultSelect(expr, state, sel, count, true_sel, false_sel); - } -} -template -static inline idx_t DefaultSelectLoop(const SelectionVector *bsel, uint8_t *__restrict bdata, ValidityMask &mask, - const SelectionVector *sel, idx_t count, SelectionVector *true_sel, - SelectionVector *false_sel) { - idx_t true_count = 0, false_count = 0; - for (idx_t i = 0; i < count; i++) { - auto bidx = bsel->get_index(i); - auto result_idx = sel->get_index(i); - if (bdata[bidx] > 0 && (NO_NULL || mask.RowIsValid(bidx))) { - if (HAS_TRUE_SEL) { - true_sel->set_index(true_count++, result_idx); - } - } else { - if (HAS_FALSE_SEL) { - false_sel->set_index(false_count++, result_idx); - } - } - } - if (HAS_TRUE_SEL) { - return true_count; - } else { - return count - false_count; - } -} -template -static inline idx_t DefaultSelectSwitch(VectorData &idata, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { - if (true_sel && false_sel) { - return DefaultSelectLoop(idata.sel, (uint8_t *)idata.data, idata.validity, sel, count, - true_sel, false_sel); - } else if (true_sel) { - return DefaultSelectLoop(idata.sel, (uint8_t *)idata.data, idata.validity, sel, count, - true_sel, false_sel); - } else { - D_ASSERT(false_sel); - return DefaultSelectLoop(idata.sel, (uint8_t *)idata.data, idata.validity, sel, count, - true_sel, false_sel); - } -} -idx_t ExpressionExecutor::DefaultSelect(Expression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { - // generic selection of boolean expression: - // resolve the true/false expression first - // then use that to generate the selection vector - bool intermediate_bools[STANDARD_VECTOR_SIZE]; - Vector intermediate(LogicalType::BOOLEAN, (data_ptr_t)intermediate_bools); - Execute(expr, state, sel, count, intermediate); +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/column_binding_map.hpp +// +// +//===----------------------------------------------------------------------===// - VectorData idata; - intermediate.Orrify(count, idata); - if (!sel) { - sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; + + + + + + + +namespace duckdb { + +struct ColumnBindingHashFunction { + uint64_t operator()(const ColumnBinding &a) const { + return CombineHash(Hash(a.table_index), Hash(a.column_index)); } - if (!idata.validity.AllValid()) { - return DefaultSelectSwitch(idata, sel, count, true_sel, false_sel); - } else { - return DefaultSelectSwitch(idata, sel, count, true_sel, false_sel); +}; + +struct ColumnBindingEquality { + bool operator()(const ColumnBinding &a, const ColumnBinding &b) const { + return a == b; } -} +}; -vector> &ExpressionExecutor::GetStates() { - return states; -} +template +using column_binding_map_t = unordered_map; -} // namespace duckdb +using column_binding_set_t = unordered_set; +} // namespace duckdb namespace duckdb { -void ExpressionState::AddChild(Expression *expr) { - types.push_back(expr->return_type); - child_states.push_back(ExpressionExecutor::InitializeState(*expr, root)); -} +//! The ColumnBindingResolver resolves ColumnBindings into base tables +//! (table_index, column_index) into physical indices into the DataChunks that +//! are used within the execution engine +class ColumnBindingResolver : public LogicalOperatorVisitor { +public: + ColumnBindingResolver(); -void ExpressionState::Finalize() { - if (!types.empty()) { - intermediate_chunk.Initialize(types); - } -} -ExpressionState::ExpressionState(Expression &expr, ExpressionExecutorState &root) - : expr(expr), root(root), name(expr.ToString()) { -} + void VisitOperator(LogicalOperator &op) override; + +protected: + vector bindings; + unique_ptr VisitReplace(BoundColumnRefExpression &expr, unique_ptr *expr_ptr) override; +}; } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/operator/logical_comparison_join.hpp +// +// +//===----------------------------------------------------------------------===// -#include -#include -#include -namespace duckdb { -ART::ART(vector column_ids, vector> unbound_expressions, bool is_unique) - : Index(IndexType::ART, move(column_ids), move(unbound_expressions)), is_unique(is_unique) { - tree = nullptr; - expression_result.Initialize(logical_types); - is_little_endian = IsLittleEndian(); - switch (types[0]) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - case PhysicalType::INT16: - case PhysicalType::INT32: - case PhysicalType::INT64: - case PhysicalType::UINT8: - case PhysicalType::UINT16: - case PhysicalType::UINT32: - case PhysicalType::UINT64: - case PhysicalType::FLOAT: - case PhysicalType::DOUBLE: - case PhysicalType::VARCHAR: - break; - default: - throw InvalidTypeException(types[0], "Invalid type for index"); - } -} -ART::~ART() { -} +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/joinside.hpp +// +// +//===----------------------------------------------------------------------===// -bool ART::LeafMatches(Node *node, Key &key, unsigned depth) { - auto leaf = static_cast(node); - Key &leaf_key = *leaf->value; - for (idx_t i = depth; i < leaf_key.len; i++) { - if (leaf_key[i] != key[i]) { - return false; - } - } - return true; -} -unique_ptr ART::InitializeScanSinglePredicate(Transaction &transaction, Value value, - ExpressionType expression_type) { - auto result = make_unique(); - result->values[0] = value; - result->expressions[0] = expression_type; - return move(result); -} -unique_ptr ART::InitializeScanTwoPredicates(Transaction &transaction, Value low_value, - ExpressionType low_expression_type, Value high_value, - ExpressionType high_expression_type) { - auto result = make_unique(); - result->values[0] = low_value; - result->expressions[0] = low_expression_type; - result->values[1] = high_value; - result->expressions[1] = high_expression_type; - return move(result); -} -//===--------------------------------------------------------------------===// -// Insert -//===--------------------------------------------------------------------===// -template -static void TemplatedGenerateKeys(Vector &input, idx_t count, vector> &keys, bool is_little_endian) { - VectorData idata; - input.Orrify(count, idata); - auto input_data = (T *)idata.data; - for (idx_t i = 0; i < count; i++) { - auto idx = idata.sel->get_index(i); - if (idata.validity.RowIsValid(idx)) { - keys.push_back(Key::CreateKey(input_data[idx], is_little_endian)); - } else { - keys.push_back(nullptr); - } +namespace duckdb { + +//! JoinCondition represents a left-right comparison join condition +struct JoinCondition { +public: + JoinCondition() : null_values_are_equal(false) { } -} -template -static void ConcatenateKeys(Vector &input, idx_t count, vector> &keys, bool is_little_endian) { - VectorData idata; - input.Orrify(count, idata); + //! Turns the JoinCondition into an expression; note that this destroys the JoinCondition as the expression inherits + //! the left/right expressions + static unique_ptr CreateExpression(JoinCondition cond); - auto input_data = (T *)idata.data; - for (idx_t i = 0; i < count; i++) { - auto idx = idata.sel->get_index(i); - if (!idata.validity.RowIsValid(idx) || !keys[i]) { - // either this column is NULL, or the previous column is NULL! - keys[i] = nullptr; - } else { - // concatenate the keys - auto old_key = move(keys[i]); - auto new_key = Key::CreateKey(input_data[idx], is_little_endian); - auto key_len = old_key->len + new_key->len; - auto compound_data = unique_ptr(new data_t[key_len]); - memcpy(compound_data.get(), old_key->data.get(), old_key->len); - memcpy(compound_data.get() + old_key->len, new_key->data.get(), new_key->len); - keys[i] = make_unique(move(compound_data), key_len); - } +public: + unique_ptr left; + unique_ptr right; + ExpressionType comparison; + //! NULL values are equal for just THIS JoinCondition (instead of the entire join). + //! This is only supported by the HashJoin and can only be used in equality comparisons. + bool null_values_are_equal = false; +}; + +class JoinSide { +public: + enum join_value : uint8_t { NONE, LEFT, RIGHT, BOTH }; + + JoinSide() = default; + constexpr JoinSide(join_value val) : value(val) { // NOLINT: Allow implicit conversion from `join_value` } -} -void ART::GenerateKeys(DataChunk &input, vector> &keys) { - keys.reserve(STANDARD_VECTOR_SIZE); - // generate keys for the first input column - switch (input.data[0].GetType().InternalType()) { - case PhysicalType::BOOL: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::INT8: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::INT16: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::INT32: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::INT64: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::UINT8: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::UINT16: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::UINT32: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::UINT64: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::FLOAT: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::DOUBLE: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - case PhysicalType::VARCHAR: - TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); - break; - default: - throw InvalidTypeException(input.data[0].GetType(), "Invalid type for index"); + bool operator==(JoinSide a) const { + return value == a.value; } - for (idx_t i = 1; i < input.ColumnCount(); i++) { - // for each of the remaining columns, concatenate - switch (input.data[i].GetType().InternalType()) { - case PhysicalType::BOOL: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::INT8: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::INT16: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::INT32: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::INT64: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::UINT8: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::UINT16: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::UINT32: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::UINT64: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::FLOAT: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::DOUBLE: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - case PhysicalType::VARCHAR: - ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); - break; - default: - throw InvalidTypeException(input.data[0].GetType(), "Invalid type for index"); - } + bool operator!=(JoinSide a) const { + return value != a.value; } -} -bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) { - D_ASSERT(row_ids.GetType().InternalType() == ROW_TYPE); - D_ASSERT(logical_types[0] == input.data[0].GetType()); + static JoinSide CombineJoinSide(JoinSide left, JoinSide right); + static JoinSide GetJoinSide(idx_t table_binding, unordered_set &left_bindings, + unordered_set &right_bindings); + static JoinSide GetJoinSide(Expression &expression, unordered_set &left_bindings, + unordered_set &right_bindings); + static JoinSide GetJoinSide(const unordered_set &bindings, unordered_set &left_bindings, + unordered_set &right_bindings); - // generate the keys for the given input - vector> keys; - GenerateKeys(input, keys); +private: + join_value value; +}; - // now insert the elements into the index - row_ids.Normalify(input.size()); - auto row_identifiers = FlatVector::GetData(row_ids); - idx_t failed_index = INVALID_INDEX; - for (idx_t i = 0; i < input.size(); i++) { - if (!keys[i]) { - continue; - } +} // namespace duckdb - row_t row_id = row_identifiers[i]; - if (!Insert(tree, move(keys[i]), 0, row_id)) { - // failed to insert because of constraint violation - failed_index = i; - break; - } - } - if (failed_index != INVALID_INDEX) { - // failed to insert because of constraint violation: remove previously inserted entries - // generate keys again - keys.clear(); - GenerateKeys(input, keys); - unique_ptr key; +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/operator/logical_join.hpp +// +// +//===----------------------------------------------------------------------===// - // now erase the entries - for (idx_t i = 0; i < failed_index; i++) { - if (!keys[i]) { - continue; - } - row_t row_id = row_identifiers[i]; - Erase(tree, *keys[i], 0, row_id); - } - return false; - } - return true; -} -bool ART::Append(IndexLock &lock, DataChunk &appended_data, Vector &row_identifiers) { - DataChunk expression_result; - expression_result.Initialize(logical_types); - // first resolve the expressions for the index - ExecuteExpressions(appended_data, expression_result); - // now insert into the index - return Insert(lock, expression_result, row_identifiers); -} -void ART::VerifyAppend(DataChunk &chunk) { - if (!is_unique) { - return; - } - DataChunk expression_result; - expression_result.Initialize(logical_types); - // unique index, check - lock_guard l(lock); - // first resolve the expressions for the index - ExecuteExpressions(chunk, expression_result); +namespace duckdb { - // generate the keys for the given input - vector> keys; - GenerateKeys(expression_result, keys); +//! LogicalJoin represents a join between two relations +class LogicalJoin : public LogicalOperator { +public: + explicit LogicalJoin(JoinType type, LogicalOperatorType logical_type = LogicalOperatorType::LOGICAL_JOIN); - for (idx_t i = 0; i < chunk.size(); i++) { - if (!keys[i]) { - continue; - } - if (Lookup(tree, *keys[i], 0) != nullptr) { - // node already exists in tree - throw ConstraintException("duplicate key value violates primary key or unique constraint"); - } - } -} + // Gets the set of table references that are reachable from this node + static void GetTableReferences(LogicalOperator &op, unordered_set &bindings); + static void GetExpressionBindings(Expression &expr, unordered_set &bindings); -bool ART::InsertToLeaf(Leaf &leaf, row_t row_id) { - if (is_unique && leaf.num_elements != 0) { - return false; - } - leaf.Insert(row_id); - return true; -} + //! The type of the join (INNER, OUTER, etc...) + JoinType join_type; + //! Table index used to refer to the MARK column (in case of a MARK join) + idx_t mark_index; + //! The columns of the LHS that are output by the join + vector left_projection_map; + //! The columns of the RHS that are output by the join + vector right_projection_map; -bool ART::Insert(unique_ptr &node, unique_ptr value, unsigned depth, row_t row_id) { - Key &key = *value; - if (!node) { - // node is currently empty, create a leaf here with the key - node = make_unique(*this, move(value), row_id); - return true; - } +public: + vector GetColumnBindings() override; - if (node->type == NodeType::NLeaf) { - // Replace leaf with Node4 and store both leaves in it - auto leaf = static_cast(node.get()); +protected: + void ResolveTypes() override; +}; - Key &existing_key = *leaf->value; - uint32_t new_prefix_length = 0; - // Leaf node is already there, update row_id vector - if (depth + new_prefix_length == existing_key.len && existing_key.len == key.len) { - return InsertToLeaf(*leaf, row_id); - } - while (existing_key[depth + new_prefix_length] == key[depth + new_prefix_length]) { - new_prefix_length++; - // Leaf node is already there, update row_id vector - if (depth + new_prefix_length == existing_key.len && existing_key.len == key.len) { - return InsertToLeaf(*leaf, row_id); - } - } +} // namespace duckdb - unique_ptr new_node = make_unique(*this, new_prefix_length); - new_node->prefix_length = new_prefix_length; - memcpy(new_node->prefix.get(), &key[depth], new_prefix_length); - Node4::Insert(*this, new_node, existing_key[depth + new_prefix_length], node); - unique_ptr leaf_node = make_unique(*this, move(value), row_id); - Node4::Insert(*this, new_node, key[depth + new_prefix_length], leaf_node); - node = move(new_node); - return true; - } - // Handle prefix of inner node - if (node->prefix_length) { - uint32_t mismatch_pos = Node::PrefixMismatch(*this, node.get(), key, depth); - if (mismatch_pos != node->prefix_length) { - // Prefix differs, create new node - unique_ptr new_node = make_unique(*this, mismatch_pos); - new_node->prefix_length = mismatch_pos; - memcpy(new_node->prefix.get(), node->prefix.get(), mismatch_pos); - // Break up prefix - auto node_ptr = node.get(); - Node4::Insert(*this, new_node, node->prefix[mismatch_pos], node); - node_ptr->prefix_length -= (mismatch_pos + 1); - memmove(node_ptr->prefix.get(), node_ptr->prefix.get() + mismatch_pos + 1, node_ptr->prefix_length); - unique_ptr leaf_node = make_unique(*this, move(value), row_id); - Node4::Insert(*this, new_node, key[depth + mismatch_pos], leaf_node); - node = move(new_node); - return true; - } - depth += node->prefix_length; - } +namespace duckdb { - // Recurse - idx_t pos = node->GetChildPos(key[depth]); - if (pos != INVALID_INDEX) { - auto child = node->GetChild(pos); - return Insert(*child, move(value), depth + 1, row_id); - } - unique_ptr new_node = make_unique(*this, move(value), row_id); - Node::InsertLeaf(*this, node, key[depth], new_node); - return true; -} +//! LogicalComparisonJoin represents a join that involves comparisons between the LHS and RHS +class LogicalComparisonJoin : public LogicalJoin { +public: + explicit LogicalComparisonJoin(JoinType type, + LogicalOperatorType logical_type = LogicalOperatorType::LOGICAL_COMPARISON_JOIN); -//===--------------------------------------------------------------------===// -// Delete -//===--------------------------------------------------------------------===// -void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) { - DataChunk expression_result; - expression_result.Initialize(logical_types); + //! The conditions of the join + vector conditions; + //! Used for duplicate-eliminated joins + vector delim_types; - // first resolve the expressions - ExecuteExpressions(input, expression_result); +public: + string ParamsToString() const override; - // then generate the keys for the given input - vector> keys; - GenerateKeys(expression_result, keys); +public: + static unique_ptr CreateJoin(JoinType type, unique_ptr left_child, + unique_ptr right_child, + unordered_set &left_bindings, + unordered_set &right_bindings, + vector> &expressions); +}; - // now erase the elements from the database - row_ids.Normalify(input.size()); - auto row_identifiers = FlatVector::GetData(row_ids); +} // namespace duckdb - for (idx_t i = 0; i < input.size(); i++) { - if (!keys[i]) { - continue; - } - Erase(tree, *keys[i], 0, row_identifiers[i]); - } -} +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/operator/logical_create_index.hpp +// +// +//===----------------------------------------------------------------------===// -void ART::Erase(unique_ptr &node, Key &key, unsigned depth, row_t row_id) { - if (!node) { - return; - } - // Delete a leaf from a tree - if (node->type == NodeType::NLeaf) { - // Make sure we have the right leaf - if (ART::LeafMatches(node.get(), key, depth)) { - auto leaf = static_cast(node.get()); - leaf->Remove(row_id); - if (leaf->num_elements == 0) { - node.reset(); - } - } - return; - } - // Handle prefix - if (node->prefix_length) { - if (Node::PrefixMismatch(*this, node.get(), key, depth) != node->prefix_length) { - return; - } - depth += node->prefix_length; - } - idx_t pos = node->GetChildPos(key[depth]); - if (pos != INVALID_INDEX) { - auto child = node->GetChild(pos); - D_ASSERT(child); - unique_ptr &child_ref = *child; - if (child_ref->type == NodeType::NLeaf && LeafMatches(child_ref.get(), key, depth)) { - // Leaf found, remove entry - auto leaf = static_cast(child_ref.get()); - leaf->Remove(row_id); - if (leaf->num_elements == 0) { - // Leaf is empty, delete leaf, decrement node counter and maybe shrink node - Node::Erase(*this, node, pos); - } - } else { - // Recurse - Erase(*child, key, depth + 1, row_id); + + + +namespace duckdb { + +class LogicalCreateIndex : public LogicalOperator { +public: + LogicalCreateIndex(TableCatalogEntry &table, vector column_ids, + vector> expressions, unique_ptr info) + : LogicalOperator(LogicalOperatorType::LOGICAL_CREATE_INDEX), table(table), column_ids(column_ids), + info(std::move(info)) { + for (auto &expr : expressions) { + this->unbound_expressions.push_back(expr->Copy()); } + this->expressions = move(expressions); } -} -//===--------------------------------------------------------------------===// -// Point Query -//===--------------------------------------------------------------------===// -static unique_ptr CreateKey(ART &art, PhysicalType type, Value &value) { - D_ASSERT(type == value.type().InternalType()); - switch (type) { - case PhysicalType::BOOL: - return Key::CreateKey(value.value_.boolean, art.is_little_endian); - case PhysicalType::INT8: - return Key::CreateKey(value.value_.tinyint, art.is_little_endian); - case PhysicalType::INT16: - return Key::CreateKey(value.value_.smallint, art.is_little_endian); - case PhysicalType::INT32: - return Key::CreateKey(value.value_.integer, art.is_little_endian); - case PhysicalType::INT64: - return Key::CreateKey(value.value_.bigint, art.is_little_endian); - case PhysicalType::UINT8: - return Key::CreateKey(value.value_.utinyint, art.is_little_endian); - case PhysicalType::UINT16: - return Key::CreateKey(value.value_.usmallint, art.is_little_endian); - case PhysicalType::UINT32: - return Key::CreateKey(value.value_.uinteger, art.is_little_endian); - case PhysicalType::UINT64: - return Key::CreateKey(value.value_.ubigint, art.is_little_endian); - case PhysicalType::INT128: - return Key::CreateKey(value.value_.hugeint, art.is_little_endian); - case PhysicalType::FLOAT: - return Key::CreateKey(value.value_.float_, art.is_little_endian); - case PhysicalType::DOUBLE: - return Key::CreateKey(value.value_.double_, art.is_little_endian); - case PhysicalType::VARCHAR: - return Key::CreateKey(string_t(value.str_value.c_str(), value.str_value.size()), - art.is_little_endian); - default: - throw InvalidTypeException(type, "Invalid type for index"); - } -} + //! The table to create the index for + TableCatalogEntry &table; + //! Column IDs needed for index creation + vector column_ids; + // Info for index creation + unique_ptr info; + //! Unbound expressions to be used in the optimizer + vector> unbound_expressions; -bool ART::SearchEqual(ARTIndexScanState *state, idx_t max_count, vector &result_ids) { - auto key = CreateKey(*this, types[0], state->values[0]); - auto leaf = static_cast(Lookup(tree, *key, 0)); - if (!leaf) { - return true; - } - if (leaf->num_elements > max_count) { - return false; - } - for (idx_t i = 0; i < leaf->num_elements; i++) { - row_t row_id = leaf->GetRowId(i); - result_ids.push_back(row_id); +protected: + void ResolveTypes() override { + types.push_back(LogicalType::BIGINT); } - return true; -} +}; +} // namespace duckdb -void ART::SearchEqualJoinNoFetch(Value &equal_value, idx_t &result_size) { - //! We need to look for a leaf - auto key = CreateKey(*this, types[0], equal_value); - auto leaf = static_cast(Lookup(tree, *key, 0)); - if (!leaf) { - return; - } - result_size = leaf->num_elements; -} +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/operator/logical_delim_join.hpp +// +// +//===----------------------------------------------------------------------===// -Node *ART::Lookup(unique_ptr &node, Key &key, unsigned depth) { - auto node_val = node.get(); - while (node_val) { - if (node_val->type == NodeType::NLeaf) { - auto leaf = static_cast(node_val); - Key &leaf_key = *leaf->value; - //! Check leaf - for (idx_t i = depth; i < leaf_key.len; i++) { - if (leaf_key[i] != key[i]) { - return nullptr; - } - } - return node_val; - } - if (node_val->prefix_length) { - for (idx_t pos = 0; pos < node_val->prefix_length; pos++) { - if (key[depth + pos] != node_val->prefix[pos]) { - return nullptr; - } - } - depth += node_val->prefix_length; - } - idx_t pos = node_val->GetChildPos(key[depth]); - if (pos == INVALID_INDEX) { - return nullptr; - } - node_val = node_val->GetChild(pos)->get(); - D_ASSERT(node_val); - depth++; - } - return nullptr; -} -//===--------------------------------------------------------------------===// -// Iterator scans -//===--------------------------------------------------------------------===// -template -bool ART::IteratorScan(ARTIndexScanState *state, Iterator *it, Key *bound, idx_t max_count, vector &result_ids) { - bool has_next; - do { - if (HAS_BOUND) { - D_ASSERT(bound); - if (INCLUSIVE) { - if (*it->node->value > *bound) { - break; - } - } else { - if (*it->node->value >= *bound) { - break; - } - } - } - if (result_ids.size() + it->node->num_elements > max_count) { - // adding these elements would exceed the max count - return false; - } - for (idx_t i = 0; i < it->node->num_elements; i++) { - row_t row_id = it->node->GetRowId(i); - result_ids.push_back(row_id); - } - has_next = ART::IteratorNext(*it); - } while (has_next); - return true; -} +namespace duckdb { -void Iterator::SetEntry(idx_t entry_depth, IteratorEntry entry) { - if (stack.size() < entry_depth + 1) { - stack.resize(MaxValue(8, MaxValue(entry_depth + 1, stack.size() * 2))); +//! LogicalDelimJoin represents a special "duplicate eliminated" join. This join type is only used for subquery +//! flattening, and involves performing duplicate elimination on the LEFT side which is then pushed into the RIGHT side. +class LogicalDelimJoin : public LogicalComparisonJoin { +public: + explicit LogicalDelimJoin(JoinType type) : LogicalComparisonJoin(type, LogicalOperatorType::LOGICAL_DELIM_JOIN) { } - stack[entry_depth] = entry; -} -bool ART::IteratorNext(Iterator &it) { - // Skip leaf - if ((it.depth) && ((it.stack[it.depth - 1].node)->type == NodeType::NLeaf)) { - it.depth--; - } + //! The set of columns that will be duplicate eliminated from the LHS and pushed into the RHS + vector> duplicate_eliminated_columns; +}; - // Look for the next leaf - while (it.depth > 0) { - auto &top = it.stack[it.depth - 1]; - Node *node = top.node; +} // namespace duckdb - if (node->type == NodeType::NLeaf) { - // found a leaf: move to next node - it.node = (Leaf *)node; - return true; - } - // Find next node - top.pos = node->GetNextPos(top.pos); - if (top.pos != INVALID_INDEX) { - // next node found: go there - it.SetEntry(it.depth, IteratorEntry(node->GetChild(top.pos)->get(), INVALID_INDEX)); - it.depth++; - } else { - // no node found: move up the tree - it.depth--; - } - } - return false; -} -//===--------------------------------------------------------------------===// -// Greater Than -// Returns: True (If found leaf >= key) -// False (Otherwise) -//===--------------------------------------------------------------------===// -bool ART::Bound(unique_ptr &n, Key &key, Iterator &it, bool inclusive) { - it.depth = 0; - bool equal = false; - if (!n) { - return false; - } - Node *node = n.get(); - idx_t depth = 0; - while (true) { - it.SetEntry(it.depth, IteratorEntry(node, 0)); - auto &top = it.stack[it.depth]; - it.depth++; - if (!equal) { - while (node->type != NodeType::NLeaf) { - node = node->GetChild(node->GetMin())->get(); - auto &c_top = it.stack[it.depth]; - c_top.node = node; - it.depth++; - } - } - if (node->type == NodeType::NLeaf) { - // found a leaf node: check if it is bigger or equal than the current key - auto leaf = static_cast(node); - it.node = leaf; - // if the search is not inclusive the leaf node could still be equal to the current value - // check if leaf is equal to the current key - if (*leaf->value == key) { - // if its not inclusive check if there is a next leaf - if (!inclusive && !IteratorNext(it)) { - return false; - } else { - return true; - } - } - if (*leaf->value > key) { - return true; - } - // Leaf is lower than key - // Check if next leaf is still lower than key - while (IteratorNext(it)) { - if (*it.node->value == key) { - // if its not inclusive check if there is a next leaf - if (!inclusive && !IteratorNext(it)) { - return false; - } else { - return true; - } - } else if (*it.node->value > key) { - // if its not inclusive check if there is a next leaf - return true; - } - } - return false; - } - uint32_t mismatch_pos = Node::PrefixMismatch(*this, node, key, depth); - if (mismatch_pos != node->prefix_length) { - if (node->prefix[mismatch_pos] < key[depth + mismatch_pos]) { - // Less - it.depth--; - return IteratorNext(it); - } else { - // Greater - top.pos = INVALID_INDEX; - return IteratorNext(it); - } - } - // prefix matches, search inside the child for the key - depth += node->prefix_length; - top.pos = node->GetChildGreaterEqual(key[depth], equal); - if (top.pos == INVALID_INDEX) { - // Find min leaf - top.pos = node->GetMin(); - } - node = node->GetChild(top.pos)->get(); - //! This means all children of this node qualify as geq - depth++; - } -} -bool ART::SearchGreater(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector &result_ids) { - Iterator *it = &state->iterator; - auto key = CreateKey(*this, types[0], state->values[0]); +namespace duckdb { - // greater than scan: first set the iterator to the node at which we will start our scan by finding the lowest node - // that satisfies our requirement - if (!it->start) { - bool found = ART::Bound(tree, *key, *it, inclusive); - if (!found) { - return true; +ColumnBindingResolver::ColumnBindingResolver() { +} + +void ColumnBindingResolver::VisitOperator(LogicalOperator &op) { + if (op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN || op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) { + // special case: comparison join + auto &comp_join = (LogicalComparisonJoin &)op; + // first get the bindings of the LHS and resolve the LHS expressions + VisitOperator(*comp_join.children[0]); + for (auto &cond : comp_join.conditions) { + VisitExpression(&cond.left); } - it->start = true; + if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) { + // visit the duplicate eliminated columns on the LHS, if any + auto &delim_join = (LogicalDelimJoin &)op; + for (auto &expr : delim_join.duplicate_eliminated_columns) { + VisitExpression(&expr); + } + } + // then get the bindings of the RHS and resolve the RHS expressions + VisitOperator(*comp_join.children[1]); + for (auto &cond : comp_join.conditions) { + VisitExpression(&cond.right); + } + // finally update the bindings with the result bindings of the join + bindings = op.GetColumnBindings(); + return; + } else if (op.type == LogicalOperatorType::LOGICAL_ANY_JOIN) { + // ANY join, this join is different because we evaluate the expression on the bindings of BOTH join sides at + // once i.e. we set the bindings first to the bindings of the entire join, and then resolve the expressions of + // this operator + VisitOperatorChildren(op); + bindings = op.GetColumnBindings(); + VisitOperatorExpressions(op); + return; + } else if (op.type == LogicalOperatorType::LOGICAL_CREATE_INDEX) { + // CREATE INDEX statement, add the columns of the table with table index 0 to the binding set + // afterwards bind the expressions of the CREATE INDEX statement + auto &create_index = (LogicalCreateIndex &)op; + bindings = LogicalOperator::GenerateColumnBindings(0, create_index.table.columns.size()); + VisitOperatorExpressions(op); + return; + } else if (op.type == LogicalOperatorType::LOGICAL_GET) { + //! We first need to update the current set of bindings and then visit operator expressions + bindings = op.GetColumnBindings(); + VisitOperatorExpressions(op); + return; } - // after that we continue the scan; we don't need to check the bounds as any value following this value is - // automatically bigger and hence satisfies our predicate - return IteratorScan(state, it, nullptr, max_count, result_ids); + // general case + // first visit the children of this operator + VisitOperatorChildren(op); + // now visit the expressions of this operator to resolve any bound column references + VisitOperatorExpressions(op); + // finally update the current set of bindings to the current set of column bindings + bindings = op.GetColumnBindings(); } -//===--------------------------------------------------------------------===// -// Less Than -//===--------------------------------------------------------------------===// -static Leaf &FindMinimum(Iterator &it, Node &node) { - Node *next = nullptr; - idx_t pos = 0; - switch (node.type) { - case NodeType::NLeaf: - it.node = (Leaf *)&node; - return (Leaf &)node; - case NodeType::N4: - next = ((Node4 &)node).child[0].get(); - break; - case NodeType::N16: - next = ((Node16 &)node).child[0].get(); - break; - case NodeType::N48: { - auto &n48 = (Node48 &)node; - while (n48.child_index[pos] == Node::EMPTY_MARKER) { - pos++; +unique_ptr ColumnBindingResolver::VisitReplace(BoundColumnRefExpression &expr, + unique_ptr *expr_ptr) { + D_ASSERT(expr.depth == 0); + // check the current set of column bindings to see which index corresponds to the column reference + for (idx_t i = 0; i < bindings.size(); i++) { + if (expr.binding == bindings[i]) { + return make_unique(expr.alias, expr.return_type, i); } - next = n48.child[n48.child_index[pos]].get(); - break; } - case NodeType::N256: { - auto &n256 = (Node256 &)node; - while (!n256.child[pos]) { - pos++; + // LCOV_EXCL_START + // could not bind the column reference, this should never happen and indicates a bug in the code + // generate an error message + string bound_columns = "["; + for (idx_t i = 0; i < bindings.size(); i++) { + if (i != 0) { + bound_columns += " "; } - next = n256.child[pos].get(); - break; - } + bound_columns += to_string(bindings[i].table_index) + "." + to_string(bindings[i].column_index); } - it.SetEntry(it.depth, IteratorEntry(&node, pos)); - it.depth++; - return FindMinimum(it, *next); + bound_columns += "]"; + + throw InternalException("Failed to bind column reference \"%s\" [%d.%d] (bindings: %s)", expr.alias, + expr.binding.table_index, expr.binding.column_index, bound_columns); + // LCOV_EXCL_STOP } -bool ART::SearchLess(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector &result_ids) { - if (!tree) { - return true; - } +} // namespace duckdb - Iterator *it = &state->iterator; - auto upper_bound = CreateKey(*this, types[0], state->values[0]); - if (!it->start) { - // first find the minimum value in the ART: we start scanning from this value - auto &minimum = FindMinimum(state->iterator, *tree); - // early out min value higher than upper bound query - if (*minimum.value > *upper_bound) { - return true; - } - it->start = true; + + + + +namespace duckdb { + +struct BothInclusiveBetweenOperator { + template + static inline bool Operation(T input, T lower, T upper) { + return GreaterThanEquals::Operation(input, lower) && LessThanEquals::Operation(input, upper); } - // now continue the scan until we reach the upper bound - if (inclusive) { - return IteratorScan(state, it, upper_bound.get(), max_count, result_ids); - } else { - return IteratorScan(state, it, upper_bound.get(), max_count, result_ids); +}; + +struct LowerInclusiveBetweenOperator { + template + static inline bool Operation(T input, T lower, T upper) { + return GreaterThanEquals::Operation(input, lower) && LessThan::Operation(input, upper); } -} +}; -//===--------------------------------------------------------------------===// -// Closed Range Query -//===--------------------------------------------------------------------===// -bool ART::SearchCloseRange(ARTIndexScanState *state, bool left_inclusive, bool right_inclusive, idx_t max_count, - vector &result_ids) { - auto lower_bound = CreateKey(*this, types[0], state->values[0]); - auto upper_bound = CreateKey(*this, types[0], state->values[1]); - Iterator *it = &state->iterator; - // first find the first node that satisfies the left predicate - if (!it->start) { - bool found = ART::Bound(tree, *lower_bound, *it, left_inclusive); - if (!found) { - return true; - } - it->start = true; +struct UpperInclusiveBetweenOperator { + template + static inline bool Operation(T input, T lower, T upper) { + return GreaterThan::Operation(input, lower) && LessThanEquals::Operation(input, upper); } - // now continue the scan until we reach the upper bound - if (right_inclusive) { - return IteratorScan(state, it, upper_bound.get(), max_count, result_ids); - } else { - return IteratorScan(state, it, upper_bound.get(), max_count, result_ids); +}; + +struct ExclusiveBetweenOperator { + template + static inline bool Operation(T input, T lower, T upper) { + return GreaterThan::Operation(input, lower) && LessThan::Operation(input, upper); + } +}; + +template +static idx_t BetweenLoopTypeSwitch(Vector &input, Vector &lower, Vector &upper, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + switch (input.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + case PhysicalType::INT16: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + case PhysicalType::INT32: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + case PhysicalType::INT64: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + case PhysicalType::INT128: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + case PhysicalType::UINT8: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + case PhysicalType::UINT16: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + case PhysicalType::UINT32: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + case PhysicalType::UINT64: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + case PhysicalType::FLOAT: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, false_sel); + case PhysicalType::DOUBLE: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + case PhysicalType::VARCHAR: + return TernaryExecutor::Select(input, lower, upper, sel, count, true_sel, + false_sel); + default: + throw InvalidTypeException(input.GetType(), "Invalid type for BETWEEN"); } } -bool ART::Scan(Transaction &transaction, DataTable &table, IndexScanState &table_state, idx_t max_count, - vector &result_ids) { - auto state = (ARTIndexScanState *)&table_state; +unique_ptr ExpressionExecutor::InitializeState(const BoundBetweenExpression &expr, + ExpressionExecutorState &root) { + auto result = make_unique(expr, root); + result->AddChild(expr.input.get()); + result->AddChild(expr.lower.get()); + result->AddChild(expr.upper.get()); + result->Finalize(); + return result; +} + +void ExpressionExecutor::Execute(const BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, + idx_t count, Vector &result) { + // resolve the children + state->intermediate_chunk.Reset(); - D_ASSERT(state->values[0].type().InternalType() == types[0]); + auto &input = state->intermediate_chunk.data[0]; + auto &lower = state->intermediate_chunk.data[1]; + auto &upper = state->intermediate_chunk.data[2]; - vector row_ids; - bool success = true; - if (state->values[1].is_null) { - lock_guard l(lock); - // single predicate - switch (state->expressions[0]) { - case ExpressionType::COMPARE_EQUAL: - success = SearchEqual(state, max_count, row_ids); - break; - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - success = SearchGreater(state, true, max_count, row_ids); - break; - case ExpressionType::COMPARE_GREATERTHAN: - success = SearchGreater(state, false, max_count, row_ids); - break; - case ExpressionType::COMPARE_LESSTHANOREQUALTO: - success = SearchLess(state, true, max_count, row_ids); - break; - case ExpressionType::COMPARE_LESSTHAN: - success = SearchLess(state, false, max_count, row_ids); - break; - default: - throw NotImplementedException("Operation not implemented"); - } + Execute(*expr.input, state->child_states[0].get(), sel, count, input); + Execute(*expr.lower, state->child_states[1].get(), sel, count, lower); + Execute(*expr.upper, state->child_states[2].get(), sel, count, upper); + + Vector intermediate1(LogicalType::BOOLEAN); + Vector intermediate2(LogicalType::BOOLEAN); + + if (expr.upper_inclusive && expr.lower_inclusive) { + VectorOperations::GreaterThanEquals(input, lower, intermediate1, count); + VectorOperations::LessThanEquals(input, upper, intermediate2, count); + } else if (expr.lower_inclusive) { + VectorOperations::GreaterThanEquals(input, lower, intermediate1, count); + VectorOperations::LessThan(input, upper, intermediate2, count); + } else if (expr.upper_inclusive) { + VectorOperations::GreaterThan(input, lower, intermediate1, count); + VectorOperations::LessThanEquals(input, upper, intermediate2, count); } else { - lock_guard l(lock); - // two predicates - D_ASSERT(state->values[1].type().InternalType() == types[0]); - bool left_inclusive = state->expressions[0] == ExpressionType ::COMPARE_GREATERTHANOREQUALTO; - bool right_inclusive = state->expressions[1] == ExpressionType ::COMPARE_LESSTHANOREQUALTO; - success = SearchCloseRange(state, left_inclusive, right_inclusive, max_count, row_ids); - } - if (!success) { - return false; - } - if (row_ids.empty()) { - return true; + VectorOperations::GreaterThan(input, lower, intermediate1, count); + VectorOperations::LessThan(input, upper, intermediate2, count); } - // sort the row ids - sort(row_ids.begin(), row_ids.end()); - // duplicate eliminate the row ids and append them to the row ids of the state - result_ids.reserve(row_ids.size()); + VectorOperations::And(intermediate1, intermediate2, result, count); +} - result_ids.push_back(row_ids[0]); - for (idx_t i = 1; i < row_ids.size(); i++) { - if (row_ids[i] != row_ids[i - 1]) { - result_ids.push_back(row_ids[i]); - } +idx_t ExpressionExecutor::Select(const BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + // resolve the children + Vector input(state->intermediate_chunk.data[0]); + Vector lower(state->intermediate_chunk.data[1]); + Vector upper(state->intermediate_chunk.data[2]); + + Execute(*expr.input, state->child_states[0].get(), sel, count, input); + Execute(*expr.lower, state->child_states[1].get(), sel, count, lower); + Execute(*expr.upper, state->child_states[2].get(), sel, count, upper); + + if (expr.upper_inclusive && expr.lower_inclusive) { + return BetweenLoopTypeSwitch(input, lower, upper, sel, count, true_sel, + false_sel); + } else if (expr.lower_inclusive) { + return BetweenLoopTypeSwitch(input, lower, upper, sel, count, true_sel, + false_sel); + } else if (expr.upper_inclusive) { + return BetweenLoopTypeSwitch(input, lower, upper, sel, count, true_sel, + false_sel); + } else { + return BetweenLoopTypeSwitch(input, lower, upper, sel, count, true_sel, false_sel); } - return true; } } // namespace duckdb @@ -33602,111 +40967,260 @@ bool ART::Scan(Transaction &transaction, DataTable &table, IndexScanState &table + namespace duckdb { -Key::Key(unique_ptr data, idx_t len) : len(len), data(move(data)) { -} +void Case(Vector &res_true, Vector &res_false, Vector &result, SelectionVector &tside, idx_t tcount, + SelectionVector &fside, idx_t fcount); -template <> -unique_ptr Key::CreateKey(string_t value, bool is_little_endian) { - idx_t len = value.GetSize() + 1; - auto data = unique_ptr(new data_t[len]); - memcpy(data.get(), value.GetDataUnsafe(), len - 1); - data[len - 1] = '\0'; - return make_unique(move(data), len); -} +struct CaseExpressionState : public ExpressionState { + CaseExpressionState(const Expression &expr, ExpressionExecutorState &root) + : ExpressionState(expr, root), true_sel(STANDARD_VECTOR_SIZE), false_sel(STANDARD_VECTOR_SIZE) { + } -template <> -unique_ptr Key::CreateKey(const char *value, bool is_little_endian) { - return Key::CreateKey(string_t(value, strlen(value)), is_little_endian); + SelectionVector true_sel; + SelectionVector false_sel; +}; + +unique_ptr ExpressionExecutor::InitializeState(const BoundCaseExpression &expr, + ExpressionExecutorState &root) { + auto result = make_unique(expr, root); + result->AddChild(expr.check.get()); + result->AddChild(expr.result_if_true.get()); + result->AddChild(expr.result_if_false.get()); + result->Finalize(); + return move(result); } -bool Key::operator>(const Key &k) const { - for (idx_t i = 0; i < MinValue(len, k.len); i++) { - if (data[i] > k.data[i]) { - return true; - } else if (data[i] < k.data[i]) { - return false; +void ExpressionExecutor::Execute(const BoundCaseExpression &expr, ExpressionState *state_p, const SelectionVector *sel, + idx_t count, Vector &result) { + auto state = (CaseExpressionState *)state_p; + + state->intermediate_chunk.Reset(); + auto &res_true = state->intermediate_chunk.data[1]; + auto &res_false = state->intermediate_chunk.data[2]; + + auto check_state = state->child_states[0].get(); + auto res_true_state = state->child_states[1].get(); + auto res_false_state = state->child_states[2].get(); + + // first execute the check expression + auto &true_sel = state->true_sel; + auto &false_sel = state->false_sel; + idx_t tcount = Select(*expr.check, check_state, sel, count, &true_sel, &false_sel); + idx_t fcount = count - tcount; + if (fcount == 0) { + // everything is true, only execute TRUE side + Execute(*expr.result_if_true, res_true_state, sel, count, result); + } else if (tcount == 0) { + // everything is false, only execute FALSE side + Execute(*expr.result_if_false, res_false_state, sel, count, result); + } else { + // have to execute both and mix and match + Execute(*expr.result_if_true, res_true_state, &true_sel, tcount, res_true); + Execute(*expr.result_if_false, res_false_state, &false_sel, fcount, res_false); + + Case(res_true, res_false, result, true_sel, tcount, false_sel, fcount); + if (sel) { + result.Slice(*sel, count); } } - return len > k.len; } -bool Key::operator<(const Key &k) const { - for (idx_t i = 0; i < MinValue(len, k.len); i++) { - if (data[i] < k.data[i]) { - return true; - } else if (data[i] > k.data[i]) { - return false; +template +void TemplatedFillLoop(Vector &vector, Vector &result, SelectionVector &sel, sel_t count) { + result.SetVectorType(VectorType::FLAT_VECTOR); + auto res = FlatVector::GetData(result); + auto &result_mask = FlatVector::Validity(result); + if (vector.GetVectorType() == VectorType::CONSTANT_VECTOR) { + auto data = ConstantVector::GetData(vector); + if (ConstantVector::IsNull(vector)) { + for (idx_t i = 0; i < count; i++) { + result_mask.SetInvalid(sel.get_index(i)); + } + } else { + for (idx_t i = 0; i < count; i++) { + res[sel.get_index(i)] = *data; + } + } + } else { + VectorData vdata; + vector.Orrify(count, vdata); + auto data = (T *)vdata.data; + for (idx_t i = 0; i < count; i++) { + auto source_idx = vdata.sel->get_index(i); + auto res_idx = sel.get_index(i); + + res[res_idx] = data[source_idx]; + result_mask.Set(res_idx, vdata.validity.RowIsValid(source_idx)); } } - return len < k.len; } -bool Key::operator>=(const Key &k) const { - for (idx_t i = 0; i < MinValue(len, k.len); i++) { - if (data[i] > k.data[i]) { - return true; - } else if (data[i] < k.data[i]) { - return false; +void ValidityFillLoop(Vector &vector, Vector &result, SelectionVector &sel, sel_t count) { + result.SetVectorType(VectorType::FLAT_VECTOR); + auto &result_mask = FlatVector::Validity(result); + if (vector.GetVectorType() == VectorType::CONSTANT_VECTOR) { + if (ConstantVector::IsNull(vector)) { + for (idx_t i = 0; i < count; i++) { + result_mask.SetInvalid(sel.get_index(i)); + } + } + } else { + VectorData vdata; + vector.Orrify(count, vdata); + for (idx_t i = 0; i < count; i++) { + auto source_idx = vdata.sel->get_index(i); + auto res_idx = sel.get_index(i); + + result_mask.Set(res_idx, vdata.validity.RowIsValid(source_idx)); } } - return len >= k.len; } -bool Key::operator==(const Key &k) const { - if (len != k.len) { - return false; +template +void TemplatedCaseLoop(Vector &res_true, Vector &res_false, Vector &result, SelectionVector &tside, idx_t tcount, + SelectionVector &fside, idx_t fcount) { + TemplatedFillLoop(res_true, result, tside, tcount); + TemplatedFillLoop(res_false, result, fside, fcount); +} + +void ValidityCaseLoop(Vector &res_true, Vector &res_false, Vector &result, SelectionVector &tside, idx_t tcount, + SelectionVector &fside, idx_t fcount) { + ValidityFillLoop(res_true, result, tside, tcount); + ValidityFillLoop(res_false, result, fside, fcount); +} + +void Case(Vector &res_true, Vector &res_false, Vector &result, SelectionVector &tside, idx_t tcount, + SelectionVector &fside, idx_t fcount) { + D_ASSERT(res_true.GetType() == res_false.GetType() && res_true.GetType() == result.GetType()); + + switch (result.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::INT16: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::INT32: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::INT64: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::UINT8: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::UINT16: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::UINT32: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::UINT64: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::INT128: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::FLOAT: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::DOUBLE: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::INTERVAL: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + break; + case PhysicalType::VARCHAR: + TemplatedCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + StringVector::AddHeapReference(result, res_true); + StringVector::AddHeapReference(result, res_false); + break; + case PhysicalType::STRUCT: { + auto &res_true_entries = StructVector::GetEntries(res_true); + auto &res_false_entries = StructVector::GetEntries(res_false); + auto &result_entries = StructVector::GetEntries(result); + D_ASSERT(res_true_entries.size() == res_false_entries.size() && + res_true_entries.size() == result_entries.size()); + ValidityCaseLoop(res_true, res_false, result, tside, tcount, fside, fcount); + for (idx_t i = 0; i < res_true_entries.size(); i++) { + Case(*res_true_entries[i], *res_false_entries[i], *result_entries[i], tside, tcount, fside, fcount); + } + break; } - for (idx_t i = 0; i < len; i++) { - if (data[i] != k.data[i]) { - return false; + case PhysicalType::LIST: { + idx_t offset = 0; + + auto &true_child = ListVector::GetEntry(res_true); + offset += ListVector::GetListSize(res_true); + ListVector::Append(result, true_child, ListVector::GetListSize(res_true)); + + auto &false_child = ListVector::GetEntry(res_false); + ListVector::Append(result, false_child, ListVector::GetListSize(res_false)); + + // all the false offsets need to be incremented by true_child.count + TemplatedFillLoop(res_true, result, tside, tcount); + + // FIXME the nullmask here is likely borked + // TODO uuugly + VectorData fdata; + res_false.Orrify(fcount, fdata); + + auto data = (list_entry_t *)fdata.data; + auto res = FlatVector::GetData(result); + auto &mask = FlatVector::Validity(result); + + for (idx_t i = 0; i < fcount; i++) { + auto fidx = fdata.sel->get_index(i); + auto res_idx = fside.get_index(i); + auto list_entry = data[fidx]; + list_entry.offset += offset; + res[res_idx] = list_entry; + mask.Set(res_idx, fdata.validity.RowIsValid(fidx)); } + + result.Verify(tside, tcount); + result.Verify(fside, fcount); + break; + } + default: + throw NotImplementedException("Unimplemented type for case expression: %s", result.GetType().ToString()); } - return true; } + } // namespace duckdb -#include namespace duckdb { -Leaf::Leaf(ART &art, unique_ptr value, row_t row_id) : Node(art, NodeType::NLeaf, 0) { - this->value = move(value); - this->capacity = 1; - this->row_ids = unique_ptr(new row_t[this->capacity]); - this->row_ids[0] = row_id; - this->num_elements = 1; +unique_ptr ExpressionExecutor::InitializeState(const BoundCastExpression &expr, + ExpressionExecutorState &root) { + auto result = make_unique(expr, root); + result->AddChild(expr.child.get()); + result->Finalize(); + return result; } -void Leaf::Insert(row_t row_id) { - // Grow array - if (num_elements == capacity) { - auto new_row_id = unique_ptr(new row_t[capacity * 2]); - memcpy(new_row_id.get(), row_ids.get(), capacity * sizeof(row_t)); - capacity *= 2; - row_ids = move(new_row_id); - } - row_ids[num_elements++] = row_id; -} +void ExpressionExecutor::Execute(const BoundCastExpression &expr, ExpressionState *state, const SelectionVector *sel, + idx_t count, Vector &result) { + // resolve the child + state->intermediate_chunk.Reset(); -//! TODO: Maybe shrink array dynamically? -void Leaf::Remove(row_t row_id) { - idx_t entry_offset = INVALID_INDEX; - for (idx_t i = 0; i < num_elements; i++) { - if (row_ids[i] == row_id) { - entry_offset = i; - break; - } - } - if (entry_offset == INVALID_INDEX) { - return; - } - num_elements--; - for (idx_t j = entry_offset; j < num_elements; j++) { - row_ids[j] = row_ids[j + 1]; + auto &child = state->intermediate_chunk.data[0]; + auto child_state = state->child_states[0].get(); + + Execute(*expr.child, child_state, sel, count, child); + if (expr.try_cast) { + string error_message; + VectorOperations::TryCast(child, result, count, &error_message); + } else { + // cast it to the type specified by the cast expression + D_ASSERT(result.GetType() == expr.return_type); + VectorOperations::Cast(child, result, count); } } @@ -33715,266 +41229,302 @@ void Leaf::Remove(row_t row_id) { -namespace duckdb { -Node::Node(ART &art, NodeType type, size_t compressed_prefix_size) : prefix_length(0), count(0), type(type) { - this->prefix = unique_ptr(new uint8_t[compressed_prefix_size]); -} -void Node::CopyPrefix(ART &art, Node *src, Node *dst) { - dst->prefix_length = src->prefix_length; - memcpy(dst->prefix.get(), src->prefix.get(), src->prefix_length); -} +#include -unique_ptr *Node::GetChild(idx_t pos) { - D_ASSERT(0); - return nullptr; -} +namespace duckdb { -idx_t Node::GetMin() { - D_ASSERT(0); - return 0; +unique_ptr ExpressionExecutor::InitializeState(const BoundComparisonExpression &expr, + ExpressionExecutorState &root) { + auto result = make_unique(expr, root); + result->AddChild(expr.left.get()); + result->AddChild(expr.right.get()); + result->Finalize(); + return result; } -uint32_t Node::PrefixMismatch(ART &art, Node *node, Key &key, uint64_t depth) { - uint64_t pos; - for (pos = 0; pos < node->prefix_length; pos++) { - if (key[depth + pos] != node->prefix[pos]) { - return pos; - } - } - return pos; -} +void ExpressionExecutor::Execute(const BoundComparisonExpression &expr, ExpressionState *state, + const SelectionVector *sel, idx_t count, Vector &result) { + // resolve the children + state->intermediate_chunk.Reset(); + auto &left = state->intermediate_chunk.data[0]; + auto &right = state->intermediate_chunk.data[1]; -void Node::InsertLeaf(ART &art, unique_ptr &node, uint8_t key, unique_ptr &new_node) { - switch (node->type) { - case NodeType::N4: - Node4::Insert(art, node, key, new_node); + Execute(*expr.left, state->child_states[0].get(), sel, count, left); + Execute(*expr.right, state->child_states[1].get(), sel, count, right); + + switch (expr.type) { + case ExpressionType::COMPARE_EQUAL: + VectorOperations::Equals(left, right, result, count); break; - case NodeType::N16: - Node16::Insert(art, node, key, new_node); + case ExpressionType::COMPARE_NOTEQUAL: + VectorOperations::NotEquals(left, right, result, count); break; - case NodeType::N48: - Node48::Insert(art, node, key, new_node); + case ExpressionType::COMPARE_LESSTHAN: + VectorOperations::LessThan(left, right, result, count); break; - case NodeType::N256: - Node256::Insert(art, node, key, new_node); + case ExpressionType::COMPARE_GREATERTHAN: + VectorOperations::GreaterThan(left, right, result, count); break; - default: - D_ASSERT(0); - } -} - -void Node::Erase(ART &art, unique_ptr &node, idx_t pos) { - switch (node->type) { - case NodeType::N4: { - Node4::Erase(art, node, pos); + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + VectorOperations::LessThanEquals(left, right, result, count); break; - } - case NodeType::N16: { - Node16::Erase(art, node, pos); + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + VectorOperations::GreaterThanEquals(left, right, result, count); break; - } - case NodeType::N48: { - Node48::Erase(art, node, pos); + case ExpressionType::COMPARE_DISTINCT_FROM: + VectorOperations::DistinctFrom(left, right, result, count); break; - } - case NodeType::N256: - Node256::Erase(art, node, pos); + case ExpressionType::COMPARE_NOT_DISTINCT_FROM: + VectorOperations::NotDistinctFrom(left, right, result, count); break; default: - D_ASSERT(0); - break; + throw InternalException("Unknown comparison type!"); } } -} // namespace duckdb - - - - -#include - -namespace duckdb { +template +static idx_t NestedSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); -Node16::Node16(ART &art, size_t compression_length) : Node(art, NodeType::N16, compression_length) { - memset(key, 16, sizeof(key)); +template +static idx_t TemplatedSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + // the inplace loops take the result as the last parameter + switch (left.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::INT16: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::INT32: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::INT64: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::UINT8: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::UINT16: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::UINT32: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::UINT64: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::INT128: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::FLOAT: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::DOUBLE: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::INTERVAL: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::VARCHAR: + return BinaryExecutor::Select(left, right, sel, count, true_sel, false_sel); + case PhysicalType::LIST: + case PhysicalType::MAP: + case PhysicalType::STRUCT: + return NestedSelectOperation(left, right, sel, count, true_sel, false_sel); + default: + throw InternalException("Invalid type for comparison"); + } } -// TODO : In the future this can be performed using SIMD (#include x86 SSE intrinsics) -idx_t Node16::GetChildPos(uint8_t k) { - for (idx_t pos = 0; pos < count; pos++) { - if (key[pos] == k) { - return pos; - } +struct NestedSelector { + // Select the matching rows for the values of a nested type that are not both NULL. + // Those semantics are the same as the corresponding non-distinct comparator + template + static idx_t Select(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + throw InvalidTypeException(left.GetType(), "Invalid operation for nested SELECT"); } - return Node::GetChildPos(k); +}; + +template <> +idx_t NestedSelector::Select(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedEquals(left, right, vcount, sel, count, true_sel, false_sel); } -idx_t Node16::GetChildGreaterEqual(uint8_t k, bool &equal) { - for (idx_t pos = 0; pos < count; pos++) { - if (key[pos] >= k) { - if (key[pos] == k) { - equal = true; - } else { - equal = false; - } +template <> +idx_t NestedSelector::Select(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedNotEquals(left, right, vcount, sel, count, true_sel, false_sel); +} - return pos; - } - } - return Node::GetChildGreaterEqual(k, equal); +template <> +idx_t NestedSelector::Select(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedLessThan(left, right, vcount, sel, count, true_sel, false_sel); } -idx_t Node16::GetNextPos(idx_t pos) { - if (pos == INVALID_INDEX) { - return 0; - } - pos++; - return pos < count ? pos : INVALID_INDEX; +template <> +idx_t NestedSelector::Select(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, SelectionVector *true_sel, + SelectionVector *false_sel) { + return VectorOperations::NestedLessThanEquals(left, right, vcount, sel, count, true_sel, false_sel); } -unique_ptr *Node16::GetChild(idx_t pos) { - D_ASSERT(pos < count); - return &child[pos]; +template <> +idx_t NestedSelector::Select(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedGreaterThan(left, right, vcount, sel, count, true_sel, false_sel); } -idx_t Node16::GetMin() { - return 0; +template <> +idx_t NestedSelector::Select(Vector &left, Vector &right, idx_t vcount, + const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return VectorOperations::NestedGreaterThanEquals(left, right, vcount, sel, count, true_sel, false_sel); } -void Node16::Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child) { - Node16 *n = static_cast(node.get()); +static inline idx_t SelectNotNull(VectorData &lvdata, VectorData &rvdata, const idx_t count, + OptionalSelection &false_vec, SelectionVector &maybe_vec) { - if (n->count < 16) { - // Insert element - idx_t pos = 0; - while (pos < node->count && n->key[pos] < key_byte) { - pos++; - } - if (n->child[pos] != nullptr) { - for (idx_t i = n->count; i > pos; i--) { - n->key[i] = n->key[i - 1]; - n->child[i] = move(n->child[i - 1]); + // For top-level comparisons, NULL semantics are in effect, + // so filter out any NULLs + if (!lvdata.validity.AllValid() || !rvdata.validity.AllValid()) { + idx_t true_count = 0; + idx_t false_count = 0; + for (idx_t i = 0; i < count; ++i) { + const auto lidx = lvdata.sel->get_index(i); + const auto ridx = rvdata.sel->get_index(i); + if (!lvdata.validity.RowIsValid(lidx) || !rvdata.validity.RowIsValid(ridx)) { + false_vec.Append(false_count, i); + } else { + maybe_vec.set_index(true_count++, i); } } - n->key[pos] = key_byte; - n->child[pos] = move(child); - n->count++; + false_vec.Advance(false_count); + + return true_count; } else { - // Grow to Node48 - auto new_node = make_unique(art, n->prefix_length); - for (idx_t i = 0; i < node->count; i++) { - new_node->child_index[n->key[i]] = i; - new_node->child[i] = move(n->child[i]); + for (idx_t i = 0; i < count; ++i) { + maybe_vec.set_index(i, i); } - CopyPrefix(art, n, new_node.get()); - new_node->count = node->count; - node = move(new_node); - - Node48::Insert(art, node, key_byte, child); + return count; } } -void Node16::Erase(ART &art, unique_ptr &node, int pos) { - Node16 *n = static_cast(node.get()); - // erase the child and decrease the count - n->child[pos].reset(); - n->count--; - // potentially move any children backwards - for (; pos < n->count; pos++) { - n->key[pos] = n->key[pos + 1]; - n->child[pos] = move(n->child[pos + 1]); - } - if (node->count <= 3) { - // Shrink node - auto new_node = make_unique(art, n->prefix_length); - for (unsigned i = 0; i < n->count; i++) { - new_node->key[new_node->count] = n->key[i]; - new_node->child[new_node->count++] = move(n->child[i]); +static void ScatterSelection(SelectionVector *target, const idx_t count, const SelectionVector *sel, + const SelectionVector &dense_vec) { + if (target) { + for (idx_t i = 0; i < count; ++i) { + target->set_index(i, sel->get_index(dense_vec.get_index(i))); } - CopyPrefix(art, n, new_node.get()); - node = move(new_node); } } -} // namespace duckdb +template +static idx_t NestedSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t vcount, + SelectionVector *true_sel, SelectionVector *false_sel) { + // The Select operations all use a dense pair of input vectors to partition + // a selection vector in a single pass. But to implement progressive comparisons, + // we have to make multiple passes, so we need to keep track of the original input positions + // and then scatter the output selections when we are done. + if (!sel) { + sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; + } + VectorData lvdata, rvdata; + left.Orrify(vcount, lvdata); + right.Orrify(vcount, rvdata); + // Make real selections for progressive comparisons + SelectionVector true_vec(vcount); + OptionalSelection true_opt(&true_vec); -namespace duckdb { + SelectionVector false_vec(vcount); + OptionalSelection false_opt(&false_vec); -Node256::Node256(ART &art, size_t compression_length) : Node(art, NodeType::N256, compression_length) { -} + SelectionVector maybe_vec(vcount); + auto count = SelectNotNull(lvdata, rvdata, vcount, false_opt, maybe_vec); + auto no_match_count = vcount - count; -idx_t Node256::GetChildPos(uint8_t k) { - if (child[k]) { - return k; - } else { - return INVALID_INDEX; + // If everything was NULL, fill in false_sel with sel + if (count == 0) { + ScatterSelection(false_sel, no_match_count, sel, FlatVector::INCREMENTAL_SELECTION_VECTOR); + return count; } -} -idx_t Node256::GetChildGreaterEqual(uint8_t k, bool &equal) { - for (idx_t pos = k; pos < 256; pos++) { - if (child[pos]) { - if (pos == k) { - equal = true; - } else { - equal = false; - } - return pos; - } + // Now that we have handled the NULLs, we can use the recursive nested comparator for the rest. + auto match_count = NestedSelector::Select(left, right, vcount, maybe_vec, count, true_opt, false_opt); + no_match_count += (count - match_count); + + // Sort the optional selections if we would overwrite. + if (true_sel == sel) { + std::sort(true_vec.data(), true_vec.data() + match_count); } - return INVALID_INDEX; + if (false_sel == sel) { + std::sort(false_vec.data(), false_vec.data() + no_match_count); + } + + // Scatter the original selection to the output selections + ScatterSelection(true_sel, match_count, sel, true_vec); + ScatterSelection(false_sel, no_match_count, sel, false_vec); + + return match_count; } -idx_t Node256::GetMin() { - for (idx_t i = 0; i < 256; i++) { - if (child[i]) { - return i; - } - } - return INVALID_INDEX; +idx_t VectorOperations::Equals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); } -idx_t Node256::GetNextPos(idx_t pos) { - for (pos == INVALID_INDEX ? pos = 0 : pos++; pos < 256; pos++) { - if (child[pos]) { - return pos; - } - } - return Node::GetNextPos(pos); +idx_t VectorOperations::NotEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); } -unique_ptr *Node256::GetChild(idx_t pos) { - D_ASSERT(child[pos]); - return &child[pos]; +idx_t VectorOperations::GreaterThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); } -void Node256::Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child) { - Node256 *n = static_cast(node.get()); +idx_t VectorOperations::GreaterThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); +} - n->count++; - n->child[key_byte] = move(child); +idx_t VectorOperations::LessThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); } -void Node256::Erase(ART &art, unique_ptr &node, int pos) { - Node256 *n = static_cast(node.get()); +idx_t VectorOperations::LessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + return TemplatedSelectOperation(left, right, sel, count, true_sel, false_sel); +} - n->child[pos].reset(); - n->count--; - if (node->count <= 36) { - auto new_node = make_unique(art, n->prefix_length); - CopyPrefix(art, n, new_node.get()); - for (idx_t i = 0; i < 256; i++) { - if (n->child[i]) { - new_node->child_index[i] = new_node->count; - new_node->child[new_node->count] = move(n->child[i]); - new_node->count++; - } - } - node = move(new_node); +idx_t ExpressionExecutor::Select(const BoundComparisonExpression &expr, ExpressionState *state, + const SelectionVector *sel, idx_t count, SelectionVector *true_sel, + SelectionVector *false_sel) { + // resolve the children + state->intermediate_chunk.Reset(); + auto &left = state->intermediate_chunk.data[0]; + auto &right = state->intermediate_chunk.data[1]; + + Execute(*expr.left, state->child_states[0].get(), sel, count, left); + Execute(*expr.right, state->child_states[1].get(), sel, count, right); + + switch (expr.type) { + case ExpressionType::COMPARE_EQUAL: + return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel); + case ExpressionType::COMPARE_NOTEQUAL: + return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel); + case ExpressionType::COMPARE_LESSTHAN: + return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel); + case ExpressionType::COMPARE_GREATERTHAN: + return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel); + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel); + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel); + case ExpressionType::COMPARE_DISTINCT_FROM: + return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, false_sel); + case ExpressionType::COMPARE_NOT_DISTINCT_FROM: + return VectorOperations::NotDistinctFrom(left, right, sel, count, true_sel, false_sel); + default: + throw InternalException("Unknown comparison type!"); } } @@ -33983,121 +41533,142 @@ void Node256::Erase(ART &art, unique_ptr &node, int pos) { + + +#include + namespace duckdb { -Node4::Node4(ART &art, size_t compression_length) : Node(art, NodeType::N4, compression_length) { - memset(key, 0, sizeof(key)); -} +struct ConjunctionState : public ExpressionState { + ConjunctionState(const Expression &expr, ExpressionExecutorState &root) : ExpressionState(expr, root) { + adaptive_filter = make_unique(expr); + } + unique_ptr adaptive_filter; +}; -idx_t Node4::GetChildPos(uint8_t k) { - for (idx_t pos = 0; pos < count; pos++) { - if (key[pos] == k) { - return pos; - } +unique_ptr ExpressionExecutor::InitializeState(const BoundConjunctionExpression &expr, + ExpressionExecutorState &root) { + auto result = make_unique(expr, root); + for (auto &child : expr.children) { + result->AddChild(child.get()); } - return Node::GetChildPos(k); + result->Finalize(); + return move(result); } -idx_t Node4::GetChildGreaterEqual(uint8_t k, bool &equal) { - for (idx_t pos = 0; pos < count; pos++) { - if (key[pos] >= k) { - if (key[pos] == k) { - equal = true; - } else { - equal = false; +void ExpressionExecutor::Execute(const BoundConjunctionExpression &expr, ExpressionState *state, + const SelectionVector *sel, idx_t count, Vector &result) { + // execute the children + state->intermediate_chunk.Reset(); + for (idx_t i = 0; i < expr.children.size(); i++) { + auto ¤t_result = state->intermediate_chunk.data[i]; + Execute(*expr.children[i], state->child_states[i].get(), sel, count, current_result); + if (i == 0) { + // move the result + result.Reference(current_result); + } else { + Vector intermediate(LogicalType::BOOLEAN); + // AND/OR together + switch (expr.type) { + case ExpressionType::CONJUNCTION_AND: + VectorOperations::And(current_result, result, intermediate, count); + break; + case ExpressionType::CONJUNCTION_OR: + VectorOperations::Or(current_result, result, intermediate, count); + break; + default: + throw InternalException("Unknown conjunction type!"); } - return pos; + result.Reference(intermediate); } } - return Node::GetChildGreaterEqual(k, equal); } -idx_t Node4::GetMin() { - return 0; -} - -idx_t Node4::GetNextPos(idx_t pos) { - if (pos == INVALID_INDEX) { - return 0; - } - pos++; - return pos < count ? pos : INVALID_INDEX; -} +idx_t ExpressionExecutor::Select(const BoundConjunctionExpression &expr, ExpressionState *state_p, + const SelectionVector *sel, idx_t count, SelectionVector *true_sel, + SelectionVector *false_sel) { + auto state = (ConjunctionState *)state_p; -unique_ptr *Node4::GetChild(idx_t pos) { - D_ASSERT(pos < count); - return &child[pos]; -} + if (expr.type == ExpressionType::CONJUNCTION_AND) { + // get runtime statistics + auto start_time = high_resolution_clock::now(); -void Node4::Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child) { - Node4 *n = static_cast(node.get()); + const SelectionVector *current_sel = sel; + idx_t current_count = count; + idx_t false_count = 0; - // Insert leaf into inner node - if (node->count < 4) { - // Insert element - idx_t pos = 0; - while ((pos < node->count) && (n->key[pos] < key_byte)) { - pos++; + unique_ptr temp_true, temp_false; + if (false_sel) { + temp_false = make_unique(STANDARD_VECTOR_SIZE); } - if (n->child[pos] != nullptr) { - for (idx_t i = n->count; i > pos; i--) { - n->key[i] = n->key[i - 1]; - n->child[i] = move(n->child[i - 1]); - } + if (!true_sel) { + temp_true = make_unique(STANDARD_VECTOR_SIZE); + true_sel = temp_true.get(); } - n->key[pos] = key_byte; - n->child[pos] = move(child); - n->count++; - } else { - // Grow to Node16 - auto new_node = make_unique(art, n->prefix_length); - new_node->count = 4; - CopyPrefix(art, node.get(), new_node.get()); - for (idx_t i = 0; i < 4; i++) { - new_node->key[i] = n->key[i]; - new_node->child[i] = move(n->child[i]); + for (idx_t i = 0; i < expr.children.size(); i++) { + idx_t tcount = Select(*expr.children[state->adaptive_filter->permutation[i]], + state->child_states[state->adaptive_filter->permutation[i]].get(), current_sel, + current_count, true_sel, temp_false.get()); + idx_t fcount = current_count - tcount; + if (fcount > 0 && false_sel) { + // move failing tuples into the false_sel + // tuples passed, move them into the actual result vector + for (idx_t i = 0; i < fcount; i++) { + false_sel->set_index(false_count++, temp_false->get_index(i)); + } + } + current_count = tcount; + if (current_count == 0) { + break; + } + if (current_count < count) { + // tuples were filtered out: move on to using the true_sel to only evaluate passing tuples in subsequent + // iterations + current_sel = true_sel; + } } - node = move(new_node); - Node16::Insert(art, node, key_byte, child); - } -} - -void Node4::Erase(ART &art, unique_ptr &node, int pos) { - Node4 *n = static_cast(node.get()); - D_ASSERT(pos < n->count); - // erase the child and decrease the count - n->child[pos].reset(); - n->count--; - // potentially move any children backwards - for (; pos < n->count; pos++) { - n->key[pos] = n->key[pos + 1]; - n->child[pos] = move(n->child[pos + 1]); - } + // adapt runtime statistics + auto end_time = high_resolution_clock::now(); + state->adaptive_filter->AdaptRuntimeStatistics(duration_cast>(end_time - start_time).count()); + return current_count; + } else { + // get runtime statistics + auto start_time = high_resolution_clock::now(); - // This is a one way node - if (n->count == 1) { - auto childref = n->child[0].get(); - //! concatenate prefixes - auto new_length = node->prefix_length + childref->prefix_length + 1; - //! have to allocate space in our prefix array - unique_ptr new_prefix = unique_ptr(new uint8_t[new_length]); - ; + const SelectionVector *current_sel = sel; + idx_t current_count = count; + idx_t result_count = 0; - //! first move the existing prefix (if any) - for (uint32_t i = 0; i < childref->prefix_length; i++) { - new_prefix[new_length - (i + 1)] = childref->prefix[childref->prefix_length - (i + 1)]; + unique_ptr temp_true, temp_false; + if (true_sel) { + temp_true = make_unique(STANDARD_VECTOR_SIZE); } - //! now move the current key as part of the prefix - new_prefix[node->prefix_length] = n->key[0]; - //! finally add the old prefix - for (uint32_t i = 0; i < node->prefix_length; i++) { - new_prefix[i] = node->prefix[i]; + if (!false_sel) { + temp_false = make_unique(STANDARD_VECTOR_SIZE); + false_sel = temp_false.get(); } - //! set new prefix and move the child - childref->prefix = move(new_prefix); - childref->prefix_length = new_length; - node = move(n->child[0]); + for (idx_t i = 0; i < expr.children.size(); i++) { + idx_t tcount = Select(*expr.children[state->adaptive_filter->permutation[i]], + state->child_states[state->adaptive_filter->permutation[i]].get(), current_sel, + current_count, temp_true.get(), false_sel); + if (tcount > 0) { + if (true_sel) { + // tuples passed, move them into the actual result vector + for (idx_t i = 0; i < tcount; i++) { + true_sel->set_index(result_count++, temp_true->get_index(i)); + } + } + // now move on to check only the non-passing tuples + current_count -= tcount; + current_sel = false_sel; + } + } + + // adapt runtime statistics + auto end_time = high_resolution_clock::now(); + state->adaptive_filter->AdaptRuntimeStatistics(duration_cast>(end_time - start_time).count()); + return result_count; } } @@ -34108,1625 +41679,1461 @@ void Node4::Erase(ART &art, unique_ptr &node, int pos) { namespace duckdb { -Node48::Node48(ART &art, size_t compression_length) : Node(art, NodeType::N48, compression_length) { - for (idx_t i = 0; i < 256; i++) { - child_index[i] = Node::EMPTY_MARKER; - } +unique_ptr ExpressionExecutor::InitializeState(const BoundConstantExpression &expr, + ExpressionExecutorState &root) { + auto result = make_unique(expr, root); + result->Finalize(); + return result; } -idx_t Node48::GetChildPos(uint8_t k) { - if (child_index[k] == Node::EMPTY_MARKER) { - return INVALID_INDEX; - } else { - return k; - } +void ExpressionExecutor::Execute(const BoundConstantExpression &expr, ExpressionState *state, + const SelectionVector *sel, idx_t count, Vector &result) { + D_ASSERT(expr.value.type() == expr.return_type); + result.Reference(expr.value); } -idx_t Node48::GetChildGreaterEqual(uint8_t k, bool &equal) { - for (idx_t pos = k; pos < 256; pos++) { - if (child_index[pos] != Node::EMPTY_MARKER) { - if (pos == k) { - equal = true; - } else { - equal = false; - } - return pos; - } +} // namespace duckdb + + + +namespace duckdb { + +unique_ptr ExpressionExecutor::InitializeState(const BoundFunctionExpression &expr, + ExpressionExecutorState &root) { + auto result = make_unique(expr, root); + for (auto &child : expr.children) { + result->AddChild(child.get()); } - return Node::GetChildGreaterEqual(k, equal); + result->Finalize(); + return result; } -idx_t Node48::GetNextPos(idx_t pos) { - for (pos == INVALID_INDEX ? pos = 0 : pos++; pos < 256; pos++) { - if (child_index[pos] != Node::EMPTY_MARKER) { - return pos; +void ExpressionExecutor::Execute(const BoundFunctionExpression &expr, ExpressionState *state, + const SelectionVector *sel, idx_t count, Vector &result) { + state->intermediate_chunk.Reset(); + auto &arguments = state->intermediate_chunk; + if (!state->types.empty()) { + for (idx_t i = 0; i < expr.children.size(); i++) { + D_ASSERT(state->types[i] == expr.children[i]->return_type); + Execute(*expr.children[i], state->child_states[i].get(), sel, count, arguments.data[i]); +#ifdef DEBUG + if (expr.children[i]->return_type.id() == LogicalTypeId::VARCHAR) { + arguments.data[i].UTFVerify(count); + } +#endif } + arguments.Verify(); } - return Node::GetNextPos(pos); + arguments.SetCardinality(count); + state->profiler.BeginSample(); + expr.function.function(arguments, *state, result); + state->profiler.EndSample(count); + D_ASSERT(result.GetType() == expr.return_type); } -unique_ptr *Node48::GetChild(idx_t pos) { - D_ASSERT(child_index[pos] != Node::EMPTY_MARKER); - return &child[child_index[pos]]; -} +} // namespace duckdb -idx_t Node48::GetMin() { - for (idx_t i = 0; i < 256; i++) { - if (child_index[i] != Node::EMPTY_MARKER) { - return i; - } - } - return INVALID_INDEX; -} -void Node48::Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child) { - Node48 *n = static_cast(node.get()); - // Insert leaf into inner node - if (node->count < 48) { - // Insert element - idx_t pos = n->count; - if (n->child[pos]) { - // find an empty position in the node list if the current position is occupied - pos = 0; - while (n->child[pos]) { - pos++; - } - } - n->child[pos] = move(child); - n->child_index[key_byte] = pos; - n->count++; - } else { - // Grow to Node256 - auto new_node = make_unique(art, n->prefix_length); - for (idx_t i = 0; i < 256; i++) { - if (n->child_index[i] != Node::EMPTY_MARKER) { - new_node->child[i] = move(n->child[n->child_index[i]]); - } - } - new_node->count = n->count; - CopyPrefix(art, n, new_node.get()); - node = move(new_node); - Node256::Insert(art, node, key_byte, child); + +namespace duckdb { + +unique_ptr ExpressionExecutor::InitializeState(const BoundOperatorExpression &expr, + ExpressionExecutorState &root) { + auto result = make_unique(expr, root); + for (auto &child : expr.children) { + result->AddChild(child.get()); } + result->Finalize(); + return result; } -void Node48::Erase(ART &art, unique_ptr &node, int pos) { - Node48 *n = static_cast(node.get()); +void ExpressionExecutor::Execute(const BoundOperatorExpression &expr, ExpressionState *state, + const SelectionVector *sel, idx_t count, Vector &result) { + // special handling for special snowflake 'IN' + // IN has n children + if (expr.type == ExpressionType::COMPARE_IN || expr.type == ExpressionType::COMPARE_NOT_IN) { + if (expr.children.size() < 2) { + throw Exception("IN needs at least two children"); + } - n->child[n->child_index[pos]].reset(); - n->child_index[pos] = Node::EMPTY_MARKER; - n->count--; - if (node->count <= 12) { - auto new_node = make_unique(art, n->prefix_length); - CopyPrefix(art, n, new_node.get()); - for (idx_t i = 0; i < 256; i++) { - if (n->child_index[i] != Node::EMPTY_MARKER) { - new_node->key[new_node->count] = i; - new_node->child[new_node->count++] = move(n->child[n->child_index[i]]); + Vector left(expr.children[0]->return_type); + // eval left side + Execute(*expr.children[0], state->child_states[0].get(), sel, count, left); + + // init result to false + Vector intermediate(LogicalType::BOOLEAN); + Value false_val = Value::BOOLEAN(false); + intermediate.Reference(false_val); + + // in rhs is a list of constants + // for every child, OR the result of the comparision with the left + // to get the overall result. + for (idx_t child = 1; child < expr.children.size(); child++) { + Vector vector_to_check(expr.children[child]->return_type); + Vector comp_res(LogicalType::BOOLEAN); + + Execute(*expr.children[child], state->child_states[child].get(), sel, count, vector_to_check); + VectorOperations::Equals(left, vector_to_check, comp_res, count); + + if (child == 1) { + // first child: move to result + intermediate.Reference(comp_res); + } else { + // otherwise OR together + Vector new_result(LogicalType::BOOLEAN, true, false); + VectorOperations::Or(intermediate, comp_res, new_result, count); + intermediate.Reference(new_result); } } - node = move(new_node); + if (expr.type == ExpressionType::COMPARE_NOT_IN) { + // NOT IN: invert result + VectorOperations::Not(intermediate, result, count); + } else { + // directly use the result + result.Reference(intermediate); + } + } else if (expr.children.size() == 1) { + state->intermediate_chunk.Reset(); + auto &child = state->intermediate_chunk.data[0]; + + Execute(*expr.children[0], state->child_states[0].get(), sel, count, child); + switch (expr.type) { + case ExpressionType::OPERATOR_NOT: { + VectorOperations::Not(child, result, count); + break; + } + case ExpressionType::OPERATOR_IS_NULL: { + VectorOperations::IsNull(child, result, count); + break; + } + case ExpressionType::OPERATOR_IS_NOT_NULL: { + VectorOperations::IsNotNull(child, result, count); + break; + } + default: + throw NotImplementedException("Unsupported operator type with 1 child!"); + } + } else { + throw NotImplementedException("operator"); } } } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/join_hashtable.hpp -// -// -//===----------------------------------------------------------------------===// +namespace duckdb { +unique_ptr ExpressionExecutor::InitializeState(const BoundParameterExpression &expr, + ExpressionExecutorState &root) { + auto result = make_unique(expr, root); + result->Finalize(); + return result; +} +void ExpressionExecutor::Execute(const BoundParameterExpression &expr, ExpressionState *state, + const SelectionVector *sel, idx_t count, Vector &result) { + D_ASSERT(expr.value); + D_ASSERT(expr.value->type() == expr.return_type); + result.Reference(*expr.value); +} +} // namespace duckdb namespace duckdb { -class BufferManager; -class BufferHandle; -struct JoinHTScanState { - JoinHTScanState() : position(0), block_position(0) { +unique_ptr ExpressionExecutor::InitializeState(const BoundReferenceExpression &expr, + ExpressionExecutorState &root) { + auto result = make_unique(expr, root); + result->Finalize(); + return result; +} + +void ExpressionExecutor::Execute(const BoundReferenceExpression &expr, ExpressionState *state, + const SelectionVector *sel, idx_t count, Vector &result) { + D_ASSERT(expr.index != INVALID_INDEX); + D_ASSERT(expr.index < chunk->ColumnCount()); + if (sel) { + result.Slice(chunk->data[expr.index], *sel, count); + } else { + result.Reference(chunk->data[expr.index]); } +} - idx_t position; - idx_t block_position; -}; +} // namespace duckdb -//! JoinHashTable is a linear probing HT that is used for computing joins -/*! - The JoinHashTable concatenates incoming chunks inside a linked list of - data ptrs. The storage looks like this internally. - [SERIALIZED ROW][NEXT POINTER] - [SERIALIZED ROW][NEXT POINTER] - There is a separate hash map of pointers that point into this table. - This is what is used to resolve the hashes. - [POINTER] - [POINTER] - [POINTER] - The pointers are either NULL -*/ -class JoinHashTable { -public: - //! Scan structure that can be used to resume scans, as a single probe can - //! return 1024*N values (where N is the size of the HT). This is - //! returned by the JoinHashTable::Scan function and can be used to resume a - //! probe. - struct ScanStructure { - unique_ptr key_data; - Vector pointers; - idx_t count; - SelectionVector sel_vector; - // whether or not the given tuple has found a match - unique_ptr found_match; - JoinHashTable &ht; - bool finished; - explicit ScanStructure(JoinHashTable &ht); - //! Get the next batch of data from the scan structure - void Next(DataChunk &keys, DataChunk &left, DataChunk &result); - private: - void AdvancePointers(); - void AdvancePointers(const SelectionVector &sel, idx_t sel_count); - //! Next operator for the inner join - void NextInnerJoin(DataChunk &keys, DataChunk &left, DataChunk &result); - //! Next operator for the semi join - void NextSemiJoin(DataChunk &keys, DataChunk &left, DataChunk &result); - //! Next operator for the anti join - void NextAntiJoin(DataChunk &keys, DataChunk &left, DataChunk &result); - //! Next operator for the left outer join - void NextLeftJoin(DataChunk &keys, DataChunk &left, DataChunk &result); - //! Next operator for the mark join - void NextMarkJoin(DataChunk &keys, DataChunk &left, DataChunk &result); - //! Next operator for the single join - void NextSingleJoin(DataChunk &keys, DataChunk &left, DataChunk &result); - //! Scan the hashtable for matches of the specified keys, setting the found_match[] array to true or false for - //! every tuple - void ScanKeyMatches(DataChunk &keys); - template - void NextSemiOrAntiJoin(DataChunk &keys, DataChunk &left, DataChunk &result); +namespace duckdb { - void ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &child, DataChunk &result); +ExpressionExecutor::ExpressionExecutor() { +} - idx_t ScanInnerJoin(DataChunk &keys, SelectionVector &result_vector); +ExpressionExecutor::ExpressionExecutor(const Expression *expression) : ExpressionExecutor() { + D_ASSERT(expression); + AddExpression(*expression); +} - idx_t ResolvePredicates(DataChunk &keys, SelectionVector &match_sel); - idx_t ResolvePredicates(DataChunk &keys, SelectionVector &match_sel, SelectionVector &no_match_sel); - void GatherResult(Vector &result, const SelectionVector &result_vector, const SelectionVector &sel_vector, - idx_t count, idx_t &offset); - void GatherResult(Vector &result, const SelectionVector &sel_vector, idx_t count, idx_t &offset); +ExpressionExecutor::ExpressionExecutor(const Expression &expression) : ExpressionExecutor() { + AddExpression(expression); +} - template - idx_t ResolvePredicates(DataChunk &keys, SelectionVector *match_sel, SelectionVector *no_match_sel); - }; +ExpressionExecutor::ExpressionExecutor(const vector> &exprs) : ExpressionExecutor() { + D_ASSERT(exprs.size() > 0); + for (auto &expr : exprs) { + AddExpression(*expr); + } +} -private: - std::mutex ht_lock; +void ExpressionExecutor::AddExpression(const Expression &expr) { + expressions.push_back(&expr); + auto state = make_unique(expr.ToString()); + Initialize(expr, *state); + states.push_back(move(state)); +} - //! Nodes store the actual data of the tuples inside the HT as a linked list - struct HTDataBlock { - idx_t count; - idx_t capacity; - shared_ptr block; - }; +void ExpressionExecutor::Initialize(const Expression &expression, ExpressionExecutorState &state) { + state.root_state = InitializeState(expression, state); + state.executor = this; +} - struct BlockAppendEntry { - BlockAppendEntry(data_ptr_t baseptr_, idx_t count_) : baseptr(baseptr_), count(count_) { - } +void ExpressionExecutor::Execute(DataChunk *input, DataChunk &result) { + SetChunk(input); + D_ASSERT(expressions.size() == result.ColumnCount()); + D_ASSERT(!expressions.empty()); - data_ptr_t baseptr; - idx_t count; - }; + for (idx_t i = 0; i < expressions.size(); i++) { + ExecuteExpression(i, result.data[i]); + } + result.SetCardinality(input ? input->size() : 1); + result.Verify(); +} - idx_t AppendToBlock(HTDataBlock &block, BufferHandle &handle, vector &append_entries, - idx_t remaining); +void ExpressionExecutor::ExecuteExpression(DataChunk &input, Vector &result) { + SetChunk(&input); + ExecuteExpression(result); +} - void Hash(DataChunk &keys, const SelectionVector &sel, idx_t count, Vector &hashes); +idx_t ExpressionExecutor::SelectExpression(DataChunk &input, SelectionVector &sel) { + D_ASSERT(expressions.size() == 1); + SetChunk(&input); + states[0]->profiler.BeginSample(); + idx_t selected_tuples = Select(*expressions[0], states[0]->root_state.get(), nullptr, input.size(), &sel, nullptr); + states[0]->profiler.EndSample(chunk ? chunk->size() : 0); + return selected_tuples; +} -public: - JoinHashTable(BufferManager &buffer_manager, vector &conditions, vector build_types, - JoinType type); - ~JoinHashTable(); +void ExpressionExecutor::ExecuteExpression(Vector &result) { + D_ASSERT(expressions.size() == 1); + ExecuteExpression(0, result); +} - //! Add the given data to the HT - void Build(DataChunk &keys, DataChunk &input); - //! Finalize the build of the HT, constructing the actual hash table and making the HT ready for probing. Finalize - //! must be called before any call to Probe, and after Finalize is called Build should no longer be ever called. - void Finalize(); - //! Probe the HT with the given input chunk, resulting in the given result - unique_ptr Probe(DataChunk &keys); - //! Scan the HT to construct the final full outer join result after - void ScanFullOuter(DataChunk &result, JoinHTScanState &state); +void ExpressionExecutor::ExecuteExpression(idx_t expr_idx, Vector &result) { + D_ASSERT(expr_idx < expressions.size()); + D_ASSERT(result.GetType() == expressions[expr_idx]->return_type); + states[expr_idx]->profiler.BeginSample(); + Execute(*expressions[expr_idx], states[expr_idx]->root_state.get(), nullptr, chunk ? chunk->size() : 1, result); + states[expr_idx]->profiler.EndSample(chunk ? chunk->size() : 0); +} - idx_t size() { - return count; +Value ExpressionExecutor::EvaluateScalar(const Expression &expr) { + D_ASSERT(expr.IsFoldable()); + // use an ExpressionExecutor to execute the expression + ExpressionExecutor executor(expr); + + Vector result(expr.return_type); + executor.ExecuteExpression(result); + + D_ASSERT(result.GetVectorType() == VectorType::CONSTANT_VECTOR); + auto result_value = result.GetValue(0); + D_ASSERT(result_value.type() == expr.return_type); + return result_value; +} + +void ExpressionExecutor::Verify(const Expression &expr, Vector &vector, idx_t count) { + D_ASSERT(expr.return_type == vector.GetType()); + vector.Verify(count); + if (expr.stats) { + expr.stats->Verify(vector, count); } +} - //! The stringheap of the JoinHashTable - StringHeap string_heap; - //! BufferManager - BufferManager &buffer_manager; - //! The types of the keys used in equality comparison - vector equality_types; - //! The types of the keys - vector condition_types; - //! The types of all conditions - vector build_types; - //! The comparison predicates - vector predicates; - //! Size of condition keys - idx_t equality_size; - //! Size of condition keys - idx_t condition_size; - //! Size of build tuple - idx_t build_size; - //! The size of an entry as stored in the HashTable - idx_t entry_size; - //! The total tuple size - idx_t tuple_size; - //! Next pointer offset in tuple - idx_t pointer_offset; - //! The join type of the HT - JoinType join_type; - //! Whether or not the HT has been finalized - bool finalized; - //! Whether or not any of the key elements contain NULL - bool has_null; - //! Bitmask for getting relevant bits from the hashes to determine the position - uint64_t bitmask; - //! The amount of entries stored per block - idx_t block_capacity; +unique_ptr ExpressionExecutor::InitializeState(const Expression &expr, + ExpressionExecutorState &state) { + switch (expr.expression_class) { + case ExpressionClass::BOUND_REF: + return InitializeState((const BoundReferenceExpression &)expr, state); + case ExpressionClass::BOUND_BETWEEN: + return InitializeState((const BoundBetweenExpression &)expr, state); + case ExpressionClass::BOUND_CASE: + return InitializeState((const BoundCaseExpression &)expr, state); + case ExpressionClass::BOUND_CAST: + return InitializeState((const BoundCastExpression &)expr, state); + case ExpressionClass::BOUND_COMPARISON: + return InitializeState((const BoundComparisonExpression &)expr, state); + case ExpressionClass::BOUND_CONJUNCTION: + return InitializeState((const BoundConjunctionExpression &)expr, state); + case ExpressionClass::BOUND_CONSTANT: + return InitializeState((const BoundConstantExpression &)expr, state); + case ExpressionClass::BOUND_FUNCTION: + return InitializeState((const BoundFunctionExpression &)expr, state); + case ExpressionClass::BOUND_OPERATOR: + return InitializeState((const BoundOperatorExpression &)expr, state); + case ExpressionClass::BOUND_PARAMETER: + return InitializeState((const BoundParameterExpression &)expr, state); + default: + throw InternalException("Attempting to initialize state of expression of unknown type!"); + } +} - struct { - std::mutex mj_lock; - //! The types of the duplicate eliminated columns, only used in correlated MARK JOIN for flattening ANY()/ALL() - //! expressions - vector correlated_types; - //! The aggregate expression nodes used by the HT - vector> correlated_aggregates; - //! The HT that holds the group counts for every correlated column - unique_ptr correlated_counts; - //! Group chunk used for aggregating into correlated_counts - DataChunk group_chunk; - //! Payload chunk used for aggregating into correlated_counts - DataChunk payload_chunk; - //! Result chunk used for aggregating into correlated_counts - DataChunk result_chunk; - } correlated_mark_join_info; +void ExpressionExecutor::Execute(const Expression &expr, ExpressionState *state, const SelectionVector *sel, + idx_t count, Vector &result) { + if (count == 0) { + return; + } + switch (expr.expression_class) { + case ExpressionClass::BOUND_BETWEEN: + Execute((const BoundBetweenExpression &)expr, state, sel, count, result); + break; + case ExpressionClass::BOUND_REF: + Execute((const BoundReferenceExpression &)expr, state, sel, count, result); + break; + case ExpressionClass::BOUND_CASE: + Execute((const BoundCaseExpression &)expr, state, sel, count, result); + break; + case ExpressionClass::BOUND_CAST: + Execute((const BoundCastExpression &)expr, state, sel, count, result); + break; + case ExpressionClass::BOUND_COMPARISON: + Execute((const BoundComparisonExpression &)expr, state, sel, count, result); + break; + case ExpressionClass::BOUND_CONJUNCTION: + Execute((const BoundConjunctionExpression &)expr, state, sel, count, result); + break; + case ExpressionClass::BOUND_CONSTANT: + Execute((const BoundConstantExpression &)expr, state, sel, count, result); + break; + case ExpressionClass::BOUND_FUNCTION: + Execute((const BoundFunctionExpression &)expr, state, sel, count, result); + break; + case ExpressionClass::BOUND_OPERATOR: + Execute((const BoundOperatorExpression &)expr, state, sel, count, result); + break; + case ExpressionClass::BOUND_PARAMETER: + Execute((const BoundParameterExpression &)expr, state, sel, count, result); + break; + default: + throw InternalException("Attempting to execute expression of unknown type!"); + } + Verify(expr, result, count); +} -private: - //! Apply a bitmask to the hashes - void ApplyBitmask(Vector &hashes, idx_t count); - void ApplyBitmask(Vector &hashes, const SelectionVector &sel, idx_t count, Vector &pointers); - //! Insert the given set of locations into the HT with the given set of - //! hashes. Caller should hold lock in parallel HT. - void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[]); +idx_t ExpressionExecutor::Select(const Expression &expr, ExpressionState *state, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + if (count == 0) { + return 0; + } + D_ASSERT(true_sel || false_sel); + D_ASSERT(expr.return_type.id() == LogicalTypeId::BOOLEAN); + switch (expr.expression_class) { + case ExpressionClass::BOUND_BETWEEN: + return Select((BoundBetweenExpression &)expr, state, sel, count, true_sel, false_sel); + case ExpressionClass::BOUND_COMPARISON: + return Select((BoundComparisonExpression &)expr, state, sel, count, true_sel, false_sel); + case ExpressionClass::BOUND_CONJUNCTION: + return Select((BoundConjunctionExpression &)expr, state, sel, count, true_sel, false_sel); + default: + return DefaultSelect(expr, state, sel, count, true_sel, false_sel); + } +} - idx_t PrepareKeys(DataChunk &keys, unique_ptr &key_data, const SelectionVector *¤t_sel, - SelectionVector &sel, bool build_side); - void SerializeVectorData(VectorData &vdata, PhysicalType type, const SelectionVector &sel, idx_t count, - data_ptr_t key_locations[]); - void SerializeVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t count, data_ptr_t key_locations[]); +template +static inline idx_t DefaultSelectLoop(const SelectionVector *bsel, uint8_t *__restrict bdata, ValidityMask &mask, + const SelectionVector *sel, idx_t count, SelectionVector *true_sel, + SelectionVector *false_sel) { + idx_t true_count = 0, false_count = 0; + for (idx_t i = 0; i < count; i++) { + auto bidx = bsel->get_index(i); + auto result_idx = sel->get_index(i); + if (bdata[bidx] > 0 && (NO_NULL || mask.RowIsValid(bidx))) { + if (HAS_TRUE_SEL) { + true_sel->set_index(true_count++, result_idx); + } + } else { + if (HAS_FALSE_SEL) { + false_sel->set_index(false_count++, result_idx); + } + } + } + if (HAS_TRUE_SEL) { + return true_count; + } else { + return count - false_count; + } +} - //! The amount of entries stored in the HT currently - idx_t count; - //! The blocks holding the main data of the hash table - vector blocks; - //! Pinned handles, these are pinned during finalization only - vector> pinned_handles; - //! The hash map of the HT, created after finalization - unique_ptr hash_map; - //! Whether or not NULL values are considered equal in each of the comparisons - vector null_values_are_equal; +template +static inline idx_t DefaultSelectSwitch(VectorData &idata, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel) { + if (true_sel && false_sel) { + return DefaultSelectLoop(idata.sel, (uint8_t *)idata.data, idata.validity, sel, count, + true_sel, false_sel); + } else if (true_sel) { + return DefaultSelectLoop(idata.sel, (uint8_t *)idata.data, idata.validity, sel, count, + true_sel, false_sel); + } else { + D_ASSERT(false_sel); + return DefaultSelectLoop(idata.sel, (uint8_t *)idata.data, idata.validity, sel, count, + true_sel, false_sel); + } +} - //! Copying not allowed - JoinHashTable(const JoinHashTable &) = delete; -}; +idx_t ExpressionExecutor::DefaultSelect(const Expression &expr, ExpressionState *state, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + // generic selection of boolean expression: + // resolve the true/false expression first + // then use that to generate the selection vector + bool intermediate_bools[STANDARD_VECTOR_SIZE]; + Vector intermediate(LogicalType::BOOLEAN, (data_ptr_t)intermediate_bools); + Execute(expr, state, sel, count, intermediate); + + VectorData idata; + intermediate.Orrify(count, idata); + if (!sel) { + sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; + } + if (!idata.validity.AllValid()) { + return DefaultSelectSwitch(idata, sel, count, true_sel, false_sel); + } else { + return DefaultSelectSwitch(idata, sel, count, true_sel, false_sel); + } +} + +vector> &ExpressionExecutor::GetStates() { + return states; +} } // namespace duckdb +namespace duckdb { +void ExpressionState::AddChild(Expression *expr) { + types.push_back(expr->return_type); + child_states.push_back(ExpressionExecutor::InitializeState(*expr, root)); +} +void ExpressionState::Finalize() { + if (!types.empty()) { + intermediate_chunk.Initialize(types); + } +} +ExpressionState::ExpressionState(const Expression &expr, ExpressionExecutorState &root) + : expr(expr), root(root), name(expr.ToString()) { +} +ExpressionExecutorState::ExpressionExecutorState(const string &name) : profiler(), name(name) { +} +} // namespace duckdb -namespace duckdb { -using ScanStructure = JoinHashTable::ScanStructure; -JoinHashTable::JoinHashTable(BufferManager &buffer_manager, vector &conditions, - vector btypes, JoinType type) - : buffer_manager(buffer_manager), build_types(move(btypes)), equality_size(0), condition_size(0), build_size(0), - entry_size(0), tuple_size(0), join_type(type), finalized(false), has_null(false), count(0) { - for (auto &condition : conditions) { - D_ASSERT(condition.left->return_type == condition.right->return_type); - auto type = condition.left->return_type; - auto type_size = GetTypeIdSize(type.InternalType()); - if (condition.comparison == ExpressionType::COMPARE_EQUAL) { - // all equality conditions should be at the front - // all other conditions at the back - // this assert checks that - D_ASSERT(equality_types.size() == condition_types.size()); - equality_types.push_back(type); - equality_size += type_size; - } - predicates.push_back(condition.comparison); - null_values_are_equal.push_back(condition.null_values_are_equal); - D_ASSERT(!condition.null_values_are_equal || - (condition.null_values_are_equal && condition.comparison == ExpressionType::COMPARE_EQUAL)); - condition_types.push_back(type); - condition_size += type_size; - } - // at least one equality is necessary - D_ASSERT(equality_types.size() > 0); - for (idx_t i = 0; i < build_types.size(); i++) { - build_size += GetTypeIdSize(build_types[i].InternalType()); - } - tuple_size = condition_size + build_size; - pointer_offset = tuple_size; - // entry size is the tuple size and the size of the hash/next pointer - entry_size = tuple_size + MaxValue(sizeof(hash_t), sizeof(uintptr_t)); - if (IsRightOuterJoin(join_type)) { - // full/right outer joins need an extra bool to keep track of whether or not a tuple has found a matching entry - // we place the bool before the NEXT pointer - entry_size += sizeof(bool); - pointer_offset += sizeof(bool); +#include +#include +#include + +namespace duckdb { + +ART::ART(const vector &column_ids, const vector> &unbound_expressions, bool is_unique, + bool is_primary) + : Index(IndexType::ART, column_ids, unbound_expressions, is_unique, is_primary) { + tree = nullptr; + expression_result.Initialize(logical_types); + is_little_endian = IsLittleEndian(); + for (idx_t i = 0; i < types.size(); i++) { + switch (types[i]) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + case PhysicalType::INT16: + case PhysicalType::INT32: + case PhysicalType::INT64: + case PhysicalType::INT128: + case PhysicalType::UINT8: + case PhysicalType::UINT16: + case PhysicalType::UINT32: + case PhysicalType::UINT64: + case PhysicalType::FLOAT: + case PhysicalType::DOUBLE: + case PhysicalType::VARCHAR: + break; + default: + throw InvalidTypeException(logical_types[i], "Invalid type for index"); + } } - // compute the per-block capacity of this HT - block_capacity = MaxValue(STANDARD_VECTOR_SIZE, (Storage::BLOCK_ALLOC_SIZE / entry_size) + 1); } -JoinHashTable::~JoinHashTable() { +ART::~ART() { } -void JoinHashTable::ApplyBitmask(Vector &hashes, idx_t count) { - if (hashes.GetVectorType() == VectorType::CONSTANT_VECTOR) { - D_ASSERT(!ConstantVector::IsNull(hashes)); - auto indices = ConstantVector::GetData(hashes); - *indices = *indices & bitmask; - } else { - hashes.Normalify(count); - auto indices = FlatVector::GetData(hashes); - for (idx_t i = 0; i < count; i++) { - indices[i] &= bitmask; +bool ART::LeafMatches(Node *node, Key &key, unsigned depth) { + auto leaf = static_cast(node); + Key &leaf_key = *leaf->value; + for (idx_t i = depth; i < leaf_key.len; i++) { + if (leaf_key[i] != key[i]) { + return false; } } -} -void JoinHashTable::ApplyBitmask(Vector &hashes, const SelectionVector &sel, idx_t count, Vector &pointers) { - VectorData hdata; - hashes.Orrify(count, hdata); + return true; +} - auto hash_data = (hash_t *)hdata.data; - auto result_data = FlatVector::GetData(pointers); - auto main_ht = (data_ptr_t *)hash_map->node->buffer; - for (idx_t i = 0; i < count; i++) { - auto rindex = sel.get_index(i); - auto hindex = hdata.sel->get_index(rindex); - auto hash = hash_data[hindex]; - result_data[rindex] = main_ht + (hash & bitmask); - } +unique_ptr ART::InitializeScanSinglePredicate(Transaction &transaction, Value value, + ExpressionType expression_type) { + auto result = make_unique(); + result->values[0] = value; + result->expressions[0] = expression_type; + return move(result); } -void JoinHashTable::Hash(DataChunk &keys, const SelectionVector &sel, idx_t count, Vector &hashes) { - if (count == keys.size()) { - // no null values are filtered: use regular hash functions - VectorOperations::Hash(keys.data[0], hashes, keys.size()); - for (idx_t i = 1; i < equality_types.size(); i++) { - VectorOperations::CombineHash(hashes, keys.data[i], keys.size()); - } - } else { - // null values were filtered: use selection vector - VectorOperations::Hash(keys.data[0], hashes, sel, count); - for (idx_t i = 1; i < equality_types.size(); i++) { - VectorOperations::CombineHash(hashes, keys.data[i], sel, count); - } - } +unique_ptr ART::InitializeScanTwoPredicates(Transaction &transaction, Value low_value, + ExpressionType low_expression_type, Value high_value, + ExpressionType high_expression_type) { + auto result = make_unique(); + result->values[0] = low_value; + result->expressions[0] = low_expression_type; + result->values[1] = high_value; + result->expressions[1] = high_expression_type; + return move(result); } -template -static void TemplatedSerializeVData(VectorData &vdata, const SelectionVector &sel, idx_t count, - data_ptr_t key_locations[]) { - auto source = (T *)vdata.data; - if (!vdata.validity.AllValid()) { - for (idx_t i = 0; i < count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx); - auto target = (T *)key_locations[i]; - T value = !vdata.validity.RowIsValid(source_idx) ? NullValue() : source[source_idx]; - Store(value, (data_ptr_t)target); - key_locations[i] += sizeof(T); - } - } else { - for (idx_t i = 0; i < count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx); +//===--------------------------------------------------------------------===// +// Insert +//===--------------------------------------------------------------------===// +template +static void TemplatedGenerateKeys(Vector &input, idx_t count, vector> &keys, bool is_little_endian) { + VectorData idata; + input.Orrify(count, idata); - auto target = (T *)key_locations[i]; - Store(source[source_idx], (data_ptr_t)target); - key_locations[i] += sizeof(T); + auto input_data = (T *)idata.data; + for (idx_t i = 0; i < count; i++) { + auto idx = idata.sel->get_index(i); + if (idata.validity.RowIsValid(idx)) { + keys.push_back(Key::CreateKey(input_data[idx], is_little_endian)); + } else { + keys.push_back(nullptr); } } } -static void InitializeOuterJoin(idx_t count, data_ptr_t key_locations[]) { +template +static void ConcatenateKeys(Vector &input, idx_t count, vector> &keys, bool is_little_endian) { + VectorData idata; + input.Orrify(count, idata); + + auto input_data = (T *)idata.data; for (idx_t i = 0; i < count; i++) { - auto target = (bool *)key_locations[i]; - *target = false; - key_locations[i] += sizeof(bool); + auto idx = idata.sel->get_index(i); + if (!idata.validity.RowIsValid(idx) || !keys[i]) { + // either this column is NULL, or the previous column is NULL! + keys[i] = nullptr; + } else { + // concatenate the keys + auto old_key = move(keys[i]); + auto new_key = Key::CreateKey(input_data[idx], is_little_endian); + auto key_len = old_key->len + new_key->len; + auto compound_data = unique_ptr(new data_t[key_len]); + memcpy(compound_data.get(), old_key->data.get(), old_key->len); + memcpy(compound_data.get() + old_key->len, new_key->data.get(), new_key->len); + keys[i] = make_unique(move(compound_data), key_len); + } } } -void JoinHashTable::SerializeVectorData(VectorData &vdata, PhysicalType type, const SelectionVector &sel, idx_t count, - data_ptr_t key_locations[]) { - switch (type) { +void ART::GenerateKeys(DataChunk &input, vector> &keys) { + keys.reserve(STANDARD_VECTOR_SIZE); + // generate keys for the first input column + switch (input.data[0].GetType().InternalType()) { case PhysicalType::BOOL: + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); + break; case PhysicalType::INT8: - TemplatedSerializeVData(vdata, sel, count, key_locations); + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; case PhysicalType::INT16: - TemplatedSerializeVData(vdata, sel, count, key_locations); + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; case PhysicalType::INT32: - TemplatedSerializeVData(vdata, sel, count, key_locations); + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; case PhysicalType::INT64: - TemplatedSerializeVData(vdata, sel, count, key_locations); + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); + break; + case PhysicalType::INT128: + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; case PhysicalType::UINT8: - TemplatedSerializeVData(vdata, sel, count, key_locations); + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; case PhysicalType::UINT16: - TemplatedSerializeVData(vdata, sel, count, key_locations); + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; case PhysicalType::UINT32: - TemplatedSerializeVData(vdata, sel, count, key_locations); + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; case PhysicalType::UINT64: - TemplatedSerializeVData(vdata, sel, count, key_locations); - break; - case PhysicalType::INT128: - TemplatedSerializeVData(vdata, sel, count, key_locations); + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; case PhysicalType::FLOAT: - TemplatedSerializeVData(vdata, sel, count, key_locations); + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; case PhysicalType::DOUBLE: - TemplatedSerializeVData(vdata, sel, count, key_locations); - break; - case PhysicalType::HASH: - TemplatedSerializeVData(vdata, sel, count, key_locations); - break; - case PhysicalType::INTERVAL: - TemplatedSerializeVData(vdata, sel, count, key_locations); + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; - case PhysicalType::VARCHAR: { - StringHeap local_heap; - auto source = (string_t *)vdata.data; - for (idx_t i = 0; i < count; i++) { - auto idx = sel.get_index(i); - auto source_idx = vdata.sel->get_index(idx); - - string_t new_val; - if (!vdata.validity.RowIsValid(source_idx)) { - new_val = NullValue(); - } else if (source[source_idx].IsInlined()) { - new_val = source[source_idx]; - } else { - new_val = local_heap.AddBlob(source[source_idx].GetDataUnsafe(), source[source_idx].GetSize()); - } - Store(new_val, key_locations[i]); - key_locations[i] += sizeof(string_t); - } - lock_guard append_lock(ht_lock); - string_heap.MergeHeap(local_heap); + case PhysicalType::VARCHAR: + TemplatedGenerateKeys(input.data[0], input.size(), keys, is_little_endian); break; - } default: - throw NotImplementedException("FIXME: unimplemented serialize"); - } -} - -void JoinHashTable::SerializeVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t count, - data_ptr_t key_locations[]) { - VectorData vdata; - v.Orrify(vcount, vdata); - - SerializeVectorData(vdata, v.GetType().InternalType(), sel, count, key_locations); -} - -idx_t JoinHashTable::AppendToBlock(HTDataBlock &block, BufferHandle &handle, vector &append_entries, - idx_t remaining) { - idx_t append_count = MinValue(remaining, block.capacity - block.count); - auto dataptr = handle.node->buffer + block.count * entry_size; - append_entries.emplace_back(dataptr, append_count); - block.count += append_count; - return append_count; -} - -static idx_t FilterNullValues(VectorData &vdata, const SelectionVector &sel, idx_t count, SelectionVector &result) { - idx_t result_count = 0; - for (idx_t i = 0; i < count; i++) { - auto idx = sel.get_index(i); - auto key_idx = vdata.sel->get_index(idx); - if (vdata.validity.RowIsValid(key_idx)) { - result.set_index(result_count++, idx); - } + throw InternalException("Invalid type for index"); } - return result_count; -} -idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unique_ptr &key_data, - const SelectionVector *¤t_sel, SelectionVector &sel, bool build_side) { - key_data = keys.Orrify(); - - // figure out which keys are NULL, and create a selection vector out of them - current_sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; - idx_t added_count = keys.size(); - if (build_side && IsRightOuterJoin(join_type)) { - // in case of a right or full outer join, we cannot remove NULL keys from the build side - return added_count; - } - for (idx_t i = 0; i < keys.ColumnCount(); i++) { - if (!null_values_are_equal[i]) { - if (key_data[i].validity.AllValid()) { - continue; - } - added_count = FilterNullValues(key_data[i], *current_sel, added_count, sel); - // null values are NOT equal for this column, filter them out - current_sel = &sel; + for (idx_t i = 1; i < input.ColumnCount(); i++) { + // for each of the remaining columns, concatenate + switch (input.data[i].GetType().InternalType()) { + case PhysicalType::BOOL: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::INT8: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::INT16: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::INT32: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::INT64: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::INT128: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::UINT8: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::UINT16: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::UINT32: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::UINT64: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::FLOAT: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::DOUBLE: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + case PhysicalType::VARCHAR: + ConcatenateKeys(input.data[i], input.size(), keys, is_little_endian); + break; + default: + throw InternalException("Invalid type for index"); } } - return added_count; } -void JoinHashTable::Build(DataChunk &keys, DataChunk &payload) { - D_ASSERT(!finalized); - D_ASSERT(keys.size() == payload.size()); - if (keys.size() == 0) { - return; - } - // special case: correlated mark join - if (join_type == JoinType::MARK && !correlated_mark_join_info.correlated_types.empty()) { - auto &info = correlated_mark_join_info; - lock_guard mj_lock(info.mj_lock); - // Correlated MARK join - // for the correlated mark join we need to keep track of COUNT(*) and COUNT(COLUMN) for each of the correlated - // columns push into the aggregate hash table - D_ASSERT(info.correlated_counts); - info.group_chunk.SetCardinality(keys); - for (idx_t i = 0; i < info.correlated_types.size(); i++) { - info.group_chunk.data[i].Reference(keys.data[i]); - } - info.payload_chunk.SetCardinality(keys); - info.payload_chunk.data[0].Reference(keys.data[info.correlated_types.size()]); - info.correlated_counts->AddChunk(info.group_chunk, info.payload_chunk); - } +bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) { + D_ASSERT(row_ids.GetType().InternalType() == ROW_TYPE); + D_ASSERT(logical_types[0] == input.data[0].GetType()); - // prepare the keys for processing - unique_ptr key_data; - const SelectionVector *current_sel; - SelectionVector sel(STANDARD_VECTOR_SIZE); - idx_t added_count = PrepareKeys(keys, key_data, current_sel, sel, true); - if (added_count < keys.size()) { - has_null = true; - } - if (added_count == 0) { - return; - } - count += added_count; + // generate the keys for the given input + vector> keys; + GenerateKeys(input, keys); - vector> handles; - vector append_entries; - data_ptr_t key_locations[STANDARD_VECTOR_SIZE]; - // first allocate space of where to serialize the keys and payload columns - idx_t remaining = added_count; - { - // first append to the last block (if any) - lock_guard append_lock(ht_lock); - if (!blocks.empty()) { - auto &last_block = blocks.back(); - if (last_block.count < last_block.capacity) { - // last block has space: pin the buffer of this block - auto handle = buffer_manager.Pin(last_block.block); - // now append to the block - idx_t append_count = AppendToBlock(last_block, *handle, append_entries, remaining); - remaining -= append_count; - handles.push_back(move(handle)); - } + // now insert the elements into the index + row_ids.Normalify(input.size()); + auto row_identifiers = FlatVector::GetData(row_ids); + idx_t failed_index = INVALID_INDEX; + for (idx_t i = 0; i < input.size(); i++) { + if (!keys[i]) { + continue; } - while (remaining > 0) { - // now for the remaining data, allocate new buffers to store the data and append there - auto block = buffer_manager.RegisterMemory(block_capacity * entry_size, false); - auto handle = buffer_manager.Pin(block); - - HTDataBlock new_block; - new_block.count = 0; - new_block.capacity = block_capacity; - new_block.block = move(block); - idx_t append_count = AppendToBlock(new_block, *handle, append_entries, remaining); - remaining -= append_count; - handles.push_back(move(handle)); - blocks.push_back(move(new_block)); - } - } - // now set up the key_locations based on the append entries - idx_t append_idx = 0; - for (auto &append_entry : append_entries) { - idx_t next = append_idx + append_entry.count; - for (; append_idx < next; append_idx++) { - key_locations[append_idx] = append_entry.baseptr; - append_entry.baseptr += entry_size; + row_t row_id = row_identifiers[i]; + if (!Insert(tree, move(keys[i]), 0, row_id)) { + // failed to insert because of constraint violation + failed_index = i; + break; } } + if (failed_index != INVALID_INDEX) { + // failed to insert because of constraint violation: remove previously inserted entries + // generate keys again + keys.clear(); + GenerateKeys(input, keys); + unique_ptr key; - // hash the keys and obtain an entry in the list - // note that we only hash the keys used in the equality comparison - Vector hash_values(LogicalType::HASH); - Hash(keys, *current_sel, added_count, hash_values); - - // serialize the keys to the key locations - for (idx_t i = 0; i < keys.ColumnCount(); i++) { - SerializeVectorData(key_data[i], keys.data[i].GetType().InternalType(), *current_sel, added_count, - key_locations); - } - // now serialize the payload - if (!build_types.empty()) { - for (idx_t i = 0; i < payload.ColumnCount(); i++) { - SerializeVector(payload.data[i], payload.size(), *current_sel, added_count, key_locations); + // now erase the entries + for (idx_t i = 0; i < failed_index; i++) { + if (!keys[i]) { + continue; + } + row_t row_id = row_identifiers[i]; + Erase(tree, *keys[i], 0, row_id); } + return false; } - if (IsRightOuterJoin(join_type)) { - // for FULL/RIGHT OUTER joins initialize the "found" boolean to false - InitializeOuterJoin(added_count, key_locations); - } - SerializeVector(hash_values, payload.size(), *current_sel, added_count, key_locations); -} - -void JoinHashTable::InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[]) { - D_ASSERT(hashes.GetType().id() == LogicalTypeId::HASH); - - // use bitmask to get position in array - ApplyBitmask(hashes, count); - - hashes.Normalify(count); - - D_ASSERT(hashes.GetVectorType() == VectorType::FLAT_VECTOR); - auto pointers = (data_ptr_t *)hash_map->node->buffer; - auto indices = FlatVector::GetData(hashes); - for (idx_t i = 0; i < count; i++) { - auto index = indices[i]; - // set prev in current key to the value (NOTE: this will be nullptr if - // there is none) - auto prev_pointer = (data_ptr_t *)(key_locations[i] + pointer_offset); - Store(pointers[index], (data_ptr_t)prev_pointer); - - // set pointer to current tuple - pointers[index] = key_locations[i]; - } + return true; } -void JoinHashTable::Finalize() { - // the build has finished, now iterate over all the nodes and construct the final hash table - // select a HT that has at least 50% empty space - idx_t capacity = NextPowerOfTwo(MaxValue(count * 2, (Storage::BLOCK_ALLOC_SIZE / sizeof(data_ptr_t)) + 1)); - // size needs to be a power of 2 - D_ASSERT((capacity & (capacity - 1)) == 0); - bitmask = capacity - 1; - - // allocate the HT and initialize it with all-zero entries - hash_map = buffer_manager.Allocate(capacity * sizeof(data_ptr_t)); - memset(hash_map->node->buffer, 0, capacity * sizeof(data_ptr_t)); - - Vector hashes(LogicalType::HASH); - auto hash_data = FlatVector::GetData(hashes); - data_ptr_t key_locations[STANDARD_VECTOR_SIZE]; - // now construct the actual hash table; scan the nodes - // as we can the nodes we pin all the blocks of the HT and keep them pinned until the HT is destroyed - // this is so that we can keep pointers around to the blocks - // FIXME: if we cannot keep everything pinned in memory, we could switch to an out-of-memory merge join or so - for (auto &block : blocks) { - auto handle = buffer_manager.Pin(block.block); - data_ptr_t dataptr = handle->node->buffer; - idx_t entry = 0; - while (entry < block.count) { - // fetch the next vector of entries from the blocks - idx_t next = MinValue(STANDARD_VECTOR_SIZE, block.count - entry); - for (idx_t i = 0; i < next; i++) { - hash_data[i] = Load((data_ptr_t)(dataptr + pointer_offset)); - key_locations[i] = dataptr; - dataptr += entry_size; - } - // now insert into the hash table - InsertHashes(hashes, next, key_locations); +bool ART::Append(IndexLock &lock, DataChunk &appended_data, Vector &row_identifiers) { + DataChunk expression_result; + expression_result.Initialize(logical_types); - entry += next; - } - pinned_handles.push_back(move(handle)); - } + // first resolve the expressions for the index + ExecuteExpressions(appended_data, expression_result); - finalized = true; + // now insert into the index + return Insert(lock, expression_result, row_identifiers); } -unique_ptr JoinHashTable::Probe(DataChunk &keys) { - D_ASSERT(count > 0); // should be handled before - D_ASSERT(finalized); - - // set up the scan structure - auto ss = make_unique(*this); - - if (join_type != JoinType::INNER) { - ss->found_match = unique_ptr(new bool[STANDARD_VECTOR_SIZE]); - memset(ss->found_match.get(), 0, sizeof(bool) * STANDARD_VECTOR_SIZE); +void ART::VerifyAppend(DataChunk &chunk) { + if (!is_unique) { + return; } - // first prepare the keys for probing - const SelectionVector *current_sel; - ss->count = PrepareKeys(keys, ss->key_data, current_sel, ss->sel_vector, false); - if (ss->count == 0) { - return ss; - } + DataChunk expression_result; + expression_result.Initialize(logical_types); - // hash all the keys - Vector hashes(LogicalType::HASH); - Hash(keys, *current_sel, ss->count, hashes); + // unique index, check + lock_guard l(lock); + // first resolve the expressions for the index + ExecuteExpressions(chunk, expression_result); - // now initialize the pointers of the scan structure based on the hashes - ApplyBitmask(hashes, *current_sel, ss->count, ss->pointers); + // generate the keys for the given input + vector> keys; + GenerateKeys(expression_result, keys); - // create the selection vector linking to only non-empty entries - idx_t count = 0; - auto pointers = FlatVector::GetData(ss->pointers); - for (idx_t i = 0; i < ss->count; i++) { - auto idx = current_sel->get_index(i); - auto chain_pointer = (data_ptr_t *)(pointers[idx]); - pointers[idx] = *chain_pointer; - if (pointers[idx]) { - ss->sel_vector.set_index(count++, idx); + for (idx_t i = 0; i < chunk.size(); i++) { + if (!keys[i]) { + continue; + } + if (Lookup(tree, *keys[i], 0) != nullptr) { + string key_name; + for (idx_t k = 0; k < expression_result.ColumnCount(); k++) { + if (k > 0) { + key_name += ", "; + } + key_name += unbound_expressions[k]->GetName() + ": " + expression_result.data[k].GetValue(i).ToString(); + } + // node already exists in tree + throw ConstraintException("duplicate key \"%s\" violates %s constraint", key_name, + is_primary ? "primary key" : "unique"); } } - ss->count = count; - return ss; } -ScanStructure::ScanStructure(JoinHashTable &ht) : sel_vector(STANDARD_VECTOR_SIZE), ht(ht), finished(false) { - pointers.Initialize(LogicalType::POINTER); +bool ART::InsertToLeaf(Leaf &leaf, row_t row_id) { + if (is_unique && leaf.num_elements != 0) { + return false; + } + leaf.Insert(row_id); + return true; } -void ScanStructure::Next(DataChunk &keys, DataChunk &left, DataChunk &result) { - if (finished) { - return; +bool ART::Insert(unique_ptr &node, unique_ptr value, unsigned depth, row_t row_id) { + Key &key = *value; + if (!node) { + // node is currently empty, create a leaf here with the key + node = make_unique(*this, move(value), row_id); + return true; } - switch (ht.join_type) { - case JoinType::INNER: - case JoinType::RIGHT: - NextInnerJoin(keys, left, result); - break; - case JoinType::SEMI: - NextSemiJoin(keys, left, result); - break; - case JoinType::MARK: - NextMarkJoin(keys, left, result); - break; - case JoinType::ANTI: - NextAntiJoin(keys, left, result); - break; - case JoinType::OUTER: - case JoinType::LEFT: - NextLeftJoin(keys, left, result); - break; - case JoinType::SINGLE: - NextSingleJoin(keys, left, result); - break; - default: - throw Exception("Unhandled join type in JoinHashTable"); - } -} + if (node->type == NodeType::NLeaf) { + // Replace leaf with Node4 and store both leaves in it + auto leaf = static_cast(node.get()); -template -static idx_t TemplatedGather(VectorData &vdata, Vector &pointers, const SelectionVector ¤t_sel, idx_t count, - idx_t offset, SelectionVector *match_sel, SelectionVector *no_match_sel, - idx_t &no_match_count) { - idx_t result_count = 0; - auto data = (T *)vdata.data; - auto ptrs = FlatVector::GetData(pointers); - for (idx_t i = 0; i < count; i++) { - auto idx = current_sel.get_index(i); - auto kidx = vdata.sel->get_index(idx); - auto gdata = (T *)(ptrs[idx] + offset); - T val = Load((data_ptr_t)gdata); - if (!vdata.validity.RowIsValid(kidx)) { - if (IsNullValue(val)) { - match_sel->set_index(result_count++, idx); - } else { - if (NO_MATCH_SEL) { - no_match_sel->set_index(no_match_count++, idx); - } - } - } else { - if (OP::template Operation(data[kidx], val)) { - match_sel->set_index(result_count++, idx); - } else { - if (NO_MATCH_SEL) { - no_match_sel->set_index(no_match_count++, idx); - } + Key &existing_key = *leaf->value; + uint32_t new_prefix_length = 0; + // Leaf node is already there, update row_id vector + if (depth + new_prefix_length == existing_key.len && existing_key.len == key.len) { + return InsertToLeaf(*leaf, row_id); + } + while (existing_key[depth + new_prefix_length] == key[depth + new_prefix_length]) { + new_prefix_length++; + // Leaf node is already there, update row_id vector + if (depth + new_prefix_length == existing_key.len && existing_key.len == key.len) { + return InsertToLeaf(*leaf, row_id); } } - } - return result_count; -} -template -static idx_t GatherSwitch(VectorData &data, PhysicalType type, Vector &pointers, const SelectionVector ¤t_sel, - idx_t count, idx_t offset, SelectionVector *match_sel, SelectionVector *no_match_sel, - idx_t &no_match_count) { - switch (type) { - case PhysicalType::UINT8: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::UINT16: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::UINT32: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::UINT64: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::BOOL: - case PhysicalType::INT8: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::INT16: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::INT32: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::INT64: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::INT128: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::FLOAT: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::DOUBLE: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::INTERVAL: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - case PhysicalType::VARCHAR: - return TemplatedGather(data, pointers, current_sel, count, offset, match_sel, - no_match_sel, no_match_count); - default: - throw NotImplementedException("Unimplemented type for GatherSwitch"); + unique_ptr new_node = make_unique(*this, new_prefix_length); + new_node->prefix_length = new_prefix_length; + memcpy(new_node->prefix.get(), &key[depth], new_prefix_length); + Node4::Insert(*this, new_node, existing_key[depth + new_prefix_length], node); + unique_ptr leaf_node = make_unique(*this, move(value), row_id); + Node4::Insert(*this, new_node, key[depth + new_prefix_length], leaf_node); + node = move(new_node); + return true; } -} -template -idx_t ScanStructure::ResolvePredicates(DataChunk &keys, SelectionVector *match_sel, SelectionVector *no_match_sel) { - SelectionVector *current_sel = &this->sel_vector; - idx_t remaining_count = this->count; - idx_t offset = 0; - idx_t no_match_count = 0; - for (idx_t i = 0; i < ht.predicates.size(); i++) { - auto internal_type = keys.data[i].GetType().InternalType(); - switch (ht.predicates[i]) { - case ExpressionType::COMPARE_EQUAL: - remaining_count = - GatherSwitch(key_data[i], internal_type, this->pointers, *current_sel, - remaining_count, offset, match_sel, no_match_sel, no_match_count); - break; - case ExpressionType::COMPARE_NOTEQUAL: - remaining_count = - GatherSwitch(key_data[i], internal_type, this->pointers, *current_sel, - remaining_count, offset, match_sel, no_match_sel, no_match_count); - break; - case ExpressionType::COMPARE_GREATERTHAN: - remaining_count = GatherSwitch(key_data[i], internal_type, this->pointers, - *current_sel, remaining_count, offset, match_sel, - no_match_sel, no_match_count); - break; - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - remaining_count = GatherSwitch(key_data[i], internal_type, this->pointers, - *current_sel, remaining_count, offset, - match_sel, no_match_sel, no_match_count); - break; - case ExpressionType::COMPARE_LESSTHAN: - remaining_count = - GatherSwitch(key_data[i], internal_type, this->pointers, *current_sel, - remaining_count, offset, match_sel, no_match_sel, no_match_count); - break; - case ExpressionType::COMPARE_LESSTHANOREQUALTO: - remaining_count = GatherSwitch(key_data[i], internal_type, this->pointers, - *current_sel, remaining_count, offset, - match_sel, no_match_sel, no_match_count); - break; - default: - throw NotImplementedException("Unimplemented comparison type for join"); - } - if (remaining_count == 0) { - break; + // Handle prefix of inner node + if (node->prefix_length) { + uint32_t mismatch_pos = Node::PrefixMismatch(*this, node.get(), key, depth); + if (mismatch_pos != node->prefix_length) { + // Prefix differs, create new node + unique_ptr new_node = make_unique(*this, mismatch_pos); + new_node->prefix_length = mismatch_pos; + memcpy(new_node->prefix.get(), node->prefix.get(), mismatch_pos); + // Break up prefix + auto node_ptr = node.get(); + Node4::Insert(*this, new_node, node->prefix[mismatch_pos], node); + node_ptr->prefix_length -= (mismatch_pos + 1); + memmove(node_ptr->prefix.get(), node_ptr->prefix.get() + mismatch_pos + 1, node_ptr->prefix_length); + unique_ptr leaf_node = make_unique(*this, move(value), row_id); + Node4::Insert(*this, new_node, key[depth + mismatch_pos], leaf_node); + node = move(new_node); + return true; } - current_sel = match_sel; - offset += GetTypeIdSize(internal_type); + depth += node->prefix_length; } - return remaining_count; -} -idx_t ScanStructure::ResolvePredicates(DataChunk &keys, SelectionVector &match_sel, SelectionVector &no_match_sel) { - return ResolvePredicates(keys, &match_sel, &no_match_sel); + // Recurse + idx_t pos = node->GetChildPos(key[depth]); + if (pos != INVALID_INDEX) { + auto child = node->GetChild(pos); + return Insert(*child, move(value), depth + 1, row_id); + } + unique_ptr new_node = make_unique(*this, move(value), row_id); + Node::InsertLeaf(*this, node, key[depth], new_node); + return true; } -idx_t ScanStructure::ResolvePredicates(DataChunk &keys, SelectionVector &match_sel) { - return ResolvePredicates(keys, &match_sel, nullptr); -} +//===--------------------------------------------------------------------===// +// Delete +//===--------------------------------------------------------------------===// +void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) { + DataChunk expression_result; + expression_result.Initialize(logical_types); -idx_t ScanStructure::ScanInnerJoin(DataChunk &keys, SelectionVector &result_vector) { - while (true) { - // resolve the predicates for this set of keys - idx_t result_count = ResolvePredicates(keys, result_vector); + // first resolve the expressions + ExecuteExpressions(input, expression_result); - // after doing all the comparisons set the found_match vector - if (found_match) { - for (idx_t i = 0; i < result_count; i++) { - auto idx = result_vector.get_index(i); - found_match[idx] = true; - } - } - if (result_count > 0) { - return result_count; + // then generate the keys for the given input + vector> keys; + GenerateKeys(expression_result, keys); + + // now erase the elements from the database + row_ids.Normalify(input.size()); + auto row_identifiers = FlatVector::GetData(row_ids); + + for (idx_t i = 0; i < input.size(); i++) { + if (!keys[i]) { + continue; } - // no matches found: check the next set of pointers - AdvancePointers(); - if (this->count == 0) { - return 0; + Erase(tree, *keys[i], 0, row_identifiers[i]); +#ifdef DEBUG + auto node = Lookup(tree, *keys[i], 0); + if (node) { + auto leaf = static_cast(node); + for (idx_t k = 0; k < leaf->num_elements; k++) { + D_ASSERT(leaf->GetRowId(k) != row_identifiers[i]); + } } +#endif } } -void ScanStructure::AdvancePointers(const SelectionVector &sel, idx_t sel_count) { - // now for all the pointers, we move on to the next set of pointers - idx_t new_count = 0; - auto ptrs = FlatVector::GetData(this->pointers); - for (idx_t i = 0; i < sel_count; i++) { - auto idx = sel.get_index(i); - auto chain_pointer = (data_ptr_t *)(ptrs[idx] + ht.pointer_offset); - ptrs[idx] = Load((data_ptr_t)chain_pointer); - if (ptrs[idx]) { - this->sel_vector.set_index(new_count++, idx); +void ART::Erase(unique_ptr &node, Key &key, unsigned depth, row_t row_id) { + if (!node) { + return; + } + // Delete a leaf from a tree + if (node->type == NodeType::NLeaf) { + // Make sure we have the right leaf + if (ART::LeafMatches(node.get(), key, depth)) { + auto leaf = static_cast(node.get()); + leaf->Remove(row_id); + if (leaf->num_elements == 0) { + node.reset(); + } } + return; } - this->count = new_count; -} -void ScanStructure::AdvancePointers() { - AdvancePointers(this->sel_vector, this->count); -} + // Handle prefix + if (node->prefix_length) { + if (Node::PrefixMismatch(*this, node.get(), key, depth) != node->prefix_length) { + return; + } + depth += node->prefix_length; + } + idx_t pos = node->GetChildPos(key[depth]); + if (pos != INVALID_INDEX) { + auto child = node->GetChild(pos); + D_ASSERT(child); -template -static void TemplatedGatherResult(Vector &result, uintptr_t *pointers, const SelectionVector &result_vector, - const SelectionVector &sel_vector, idx_t count, idx_t offset) { - auto rdata = FlatVector::GetData(result); - auto &mask = FlatVector::Validity(result); - for (idx_t i = 0; i < count; i++) { - auto ridx = result_vector.get_index(i); - auto pidx = sel_vector.get_index(i); - T hdata = Load((data_ptr_t)(pointers[pidx] + offset)); - if (IsNullValue(hdata)) { - mask.SetInvalid(ridx); + unique_ptr &child_ref = *child; + if (child_ref->type == NodeType::NLeaf && LeafMatches(child_ref.get(), key, depth)) { + // Leaf found, remove entry + auto leaf = static_cast(child_ref.get()); + leaf->Remove(row_id); + if (leaf->num_elements == 0) { + // Leaf is empty, delete leaf, decrement node counter and maybe shrink node + Node::Erase(*this, node, pos); + } } else { - rdata[ridx] = hdata; + // Recurse + Erase(*child, key, depth + 1, row_id); } } } -static void GatherResultVector(Vector &result, const SelectionVector &result_vector, uintptr_t *ptrs, - const SelectionVector &sel_vector, idx_t count, idx_t &offset) { - result.SetVectorType(VectorType::FLAT_VECTOR); - switch (result.GetType().InternalType()) { +//===--------------------------------------------------------------------===// +// Point Query +//===--------------------------------------------------------------------===// +static unique_ptr CreateKey(ART &art, PhysicalType type, Value &value) { + D_ASSERT(type == value.type().InternalType()); + switch (type) { case PhysicalType::BOOL: + return Key::CreateKey(value.value_.boolean, art.is_little_endian); case PhysicalType::INT8: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.tinyint, art.is_little_endian); case PhysicalType::INT16: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.smallint, art.is_little_endian); case PhysicalType::INT32: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.integer, art.is_little_endian); case PhysicalType::INT64: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.bigint, art.is_little_endian); case PhysicalType::UINT8: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.utinyint, art.is_little_endian); case PhysicalType::UINT16: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.usmallint, art.is_little_endian); case PhysicalType::UINT32: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.uinteger, art.is_little_endian); case PhysicalType::UINT64: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.ubigint, art.is_little_endian); case PhysicalType::INT128: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.hugeint, art.is_little_endian); case PhysicalType::FLOAT: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.float_, art.is_little_endian); case PhysicalType::DOUBLE: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; - case PhysicalType::INTERVAL: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(value.value_.double_, art.is_little_endian); case PhysicalType::VARCHAR: - TemplatedGatherResult(result, ptrs, result_vector, sel_vector, count, offset); - break; + return Key::CreateKey(string_t(value.str_value.c_str(), value.str_value.size()), + art.is_little_endian); default: - throw NotImplementedException("Unimplemented type for ScanStructure::GatherResult"); + throw InternalException("Invalid type for index"); } - offset += GetTypeIdSize(result.GetType().InternalType()); -} - -void ScanStructure::GatherResult(Vector &result, const SelectionVector &result_vector, - const SelectionVector &sel_vector, idx_t count, idx_t &offset) { - auto ptrs = FlatVector::GetData(pointers); - GatherResultVector(result, result_vector, ptrs, sel_vector, count, offset); } -void ScanStructure::GatherResult(Vector &result, const SelectionVector &sel_vector, idx_t count, idx_t &offset) { - GatherResult(result, FlatVector::INCREMENTAL_SELECTION_VECTOR, sel_vector, count, offset); +bool ART::SearchEqual(ARTIndexScanState *state, idx_t max_count, vector &result_ids) { + auto key = CreateKey(*this, types[0], state->values[0]); + auto leaf = static_cast(Lookup(tree, *key, 0)); + if (!leaf) { + return true; + } + if (leaf->num_elements > max_count) { + return false; + } + for (idx_t i = 0; i < leaf->num_elements; i++) { + row_t row_id = leaf->GetRowId(i); + result_ids.push_back(row_id); + } + return true; } -void ScanStructure::NextInnerJoin(DataChunk &keys, DataChunk &left, DataChunk &result) { - D_ASSERT(result.ColumnCount() == left.ColumnCount() + ht.build_types.size()); - if (this->count == 0) { - // no pointers left to chase +void ART::SearchEqualJoinNoFetch(Value &equal_value, idx_t &result_size) { + //! We need to look for a leaf + auto key = CreateKey(*this, types[0], equal_value); + auto leaf = static_cast(Lookup(tree, *key, 0)); + if (!leaf) { return; } + result_size = leaf->num_elements; +} - SelectionVector result_vector(STANDARD_VECTOR_SIZE); +Node *ART::Lookup(unique_ptr &node, Key &key, unsigned depth) { + auto node_val = node.get(); - idx_t result_count = ScanInnerJoin(keys, result_vector); - if (result_count > 0) { - if (IsRightOuterJoin(ht.join_type)) { - // full/right outer join: mark join matches as FOUND in the HT - auto ptrs = FlatVector::GetData(pointers); - for (idx_t i = 0; i < result_count; i++) { - auto idx = result_vector.get_index(i); - auto chain_pointer = (data_ptr_t *)(ptrs[idx] + ht.tuple_size); - auto target = (bool *)chain_pointer; - *target = true; + while (node_val) { + if (node_val->type == NodeType::NLeaf) { + auto leaf = static_cast(node_val); + Key &leaf_key = *leaf->value; + //! Check leaf + for (idx_t i = depth; i < leaf_key.len; i++) { + if (leaf_key[i] != key[i]) { + return nullptr; + } } + return node_val; } - // matches were found - // construct the result - // on the LHS, we create a slice using the result vector - result.Slice(left, result_vector, result_count); - - // on the RHS, we need to fetch the data from the hash table - idx_t offset = ht.condition_size; - for (idx_t i = 0; i < ht.build_types.size(); i++) { - auto &vector = result.data[left.ColumnCount() + i]; - D_ASSERT(vector.GetType() == ht.build_types[i]); - GatherResult(vector, result_vector, result_count, offset); + if (node_val->prefix_length) { + for (idx_t pos = 0; pos < node_val->prefix_length; pos++) { + if (key[depth + pos] != node_val->prefix[pos]) { + return nullptr; + } + } + depth += node_val->prefix_length; } - AdvancePointers(); - } -} - -void ScanStructure::ScanKeyMatches(DataChunk &keys) { - // the semi-join, anti-join and mark-join we handle a differently from the inner join - // since there can be at most STANDARD_VECTOR_SIZE results - // we handle the entire chunk in one call to Next(). - // for every pointer, we keep chasing pointers and doing comparisons. - // this results in a boolean array indicating whether or not the tuple has a match - SelectionVector match_sel(STANDARD_VECTOR_SIZE), no_match_sel(STANDARD_VECTOR_SIZE); - while (this->count > 0) { - // resolve the predicates for the current set of pointers - idx_t match_count = ResolvePredicates(keys, match_sel, no_match_sel); - idx_t no_match_count = this->count - match_count; - - // mark each of the matches as found - for (idx_t i = 0; i < match_count; i++) { - found_match[match_sel.get_index(i)] = true; + idx_t pos = node_val->GetChildPos(key[depth]); + if (pos == INVALID_INDEX) { + return nullptr; } - // continue searching for the ones where we did not find a match yet - AdvancePointers(no_match_sel, no_match_count); + node_val = node_val->GetChild(pos)->get(); + D_ASSERT(node_val); + + depth++; } + + return nullptr; } -template -void ScanStructure::NextSemiOrAntiJoin(DataChunk &keys, DataChunk &left, DataChunk &result) { - D_ASSERT(left.ColumnCount() == result.ColumnCount()); - D_ASSERT(keys.size() == left.size()); - // create the selection vector from the matches that were found - SelectionVector sel(STANDARD_VECTOR_SIZE); - idx_t result_count = 0; - for (idx_t i = 0; i < keys.size(); i++) { - if (found_match[i] == MATCH) { - // part of the result - sel.set_index(result_count++, i); +//===--------------------------------------------------------------------===// +// Iterator scans +//===--------------------------------------------------------------------===// +template +bool ART::IteratorScan(ARTIndexScanState *state, Iterator *it, Key *bound, idx_t max_count, vector &result_ids) { + bool has_next; + do { + if (HAS_BOUND) { + D_ASSERT(bound); + if (INCLUSIVE) { + if (*it->node->value > *bound) { + break; + } + } else { + if (*it->node->value >= *bound) { + break; + } + } } - } - // construct the final result - if (result_count > 0) { - // we only return the columns on the left side - // reference the columns of the left side from the result - result.Slice(left, sel, result_count); - } else { - D_ASSERT(result.size() == 0); - } + if (result_ids.size() + it->node->num_elements > max_count) { + // adding these elements would exceed the max count + return false; + } + for (idx_t i = 0; i < it->node->num_elements; i++) { + row_t row_id = it->node->GetRowId(i); + result_ids.push_back(row_id); + } + has_next = ART::IteratorNext(*it); + } while (has_next); + return true; } -void ScanStructure::NextSemiJoin(DataChunk &keys, DataChunk &left, DataChunk &result) { - // first scan for key matches - ScanKeyMatches(keys); - // then construct the result from all tuples with a match - NextSemiOrAntiJoin(keys, left, result); - - finished = true; +void Iterator::SetEntry(idx_t entry_depth, IteratorEntry entry) { + if (stack.size() < entry_depth + 1) { + stack.resize(MaxValue(8, MaxValue(entry_depth + 1, stack.size() * 2))); + } + stack[entry_depth] = entry; } -void ScanStructure::NextAntiJoin(DataChunk &keys, DataChunk &left, DataChunk &result) { - // first scan for key matches - ScanKeyMatches(keys); - // then construct the result from all tuples that did not find a match - NextSemiOrAntiJoin(keys, left, result); +bool ART::IteratorNext(Iterator &it) { + // Skip leaf + if ((it.depth) && ((it.stack[it.depth - 1].node)->type == NodeType::NLeaf)) { + it.depth--; + } - finished = true; -} + // Look for the next leaf + while (it.depth > 0) { + auto &top = it.stack[it.depth - 1]; + Node *node = top.node; -void ScanStructure::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &child, DataChunk &result) { - // for the initial set of columns we just reference the left side - result.SetCardinality(child); - for (idx_t i = 0; i < child.ColumnCount(); i++) { - result.data[i].Reference(child.data[i]); - } - auto &mark_vector = result.data.back(); - mark_vector.SetVectorType(VectorType::FLAT_VECTOR); - // first we set the NULL values from the join keys - // if there is any NULL in the keys, the result is NULL - auto bool_result = FlatVector::GetData(mark_vector); - auto &mask = FlatVector::Validity(mark_vector); - for (idx_t col_idx = 0; col_idx < join_keys.ColumnCount(); col_idx++) { - if (ht.null_values_are_equal[col_idx]) { - continue; - } - VectorData jdata; - join_keys.data[col_idx].Orrify(join_keys.size(), jdata); - if (!jdata.validity.AllValid()) { - for (idx_t i = 0; i < join_keys.size(); i++) { - auto jidx = jdata.sel->get_index(i); - mask.Set(i, jdata.validity.RowIsValidUnsafe(jidx)); - } - } - } - // now set the remaining entries to either true or false based on whether a match was found - if (found_match) { - for (idx_t i = 0; i < child.size(); i++) { - bool_result[i] = found_match[i]; + if (node->type == NodeType::NLeaf) { + // found a leaf: move to next node + it.node = (Leaf *)node; + return true; } - } else { - memset(bool_result, 0, sizeof(bool) * child.size()); - } - // if the right side contains NULL values, the result of any FALSE becomes NULL - if (ht.has_null) { - for (idx_t i = 0; i < child.size(); i++) { - if (!bool_result[i]) { - mask.SetInvalid(i); - } + + // Find next node + top.pos = node->GetNextPos(top.pos); + if (top.pos != INVALID_INDEX) { + // next node found: go there + it.SetEntry(it.depth, IteratorEntry(node->GetChild(top.pos)->get(), INVALID_INDEX)); + it.depth++; + } else { + // no node found: move up the tree + it.depth--; } } + return false; } -void ScanStructure::NextMarkJoin(DataChunk &keys, DataChunk &input, DataChunk &result) { - D_ASSERT(result.ColumnCount() == input.ColumnCount() + 1); - D_ASSERT(result.data.back().GetType() == LogicalType::BOOLEAN); - // this method should only be called for a non-empty HT - D_ASSERT(ht.count > 0); +//===--------------------------------------------------------------------===// +// Greater Than +// Returns: True (If found leaf >= key) +// False (Otherwise) +//===--------------------------------------------------------------------===// +bool ART::Bound(unique_ptr &n, Key &key, Iterator &it, bool inclusive) { + it.depth = 0; + bool equal = false; + if (!n) { + return false; + } + Node *node = n.get(); - ScanKeyMatches(keys); - if (ht.correlated_mark_join_info.correlated_types.empty()) { - ConstructMarkJoinResult(keys, input, result); - } else { - auto &info = ht.correlated_mark_join_info; - // there are correlated columns - // first we fetch the counts from the aggregate hashtable corresponding to these entries - D_ASSERT(keys.ColumnCount() == info.group_chunk.ColumnCount() + 1); - info.group_chunk.SetCardinality(keys); - for (idx_t i = 0; i < info.group_chunk.ColumnCount(); i++) { - info.group_chunk.data[i].Reference(keys.data[i]); + idx_t depth = 0; + while (true) { + it.SetEntry(it.depth, IteratorEntry(node, 0)); + auto &top = it.stack[it.depth]; + it.depth++; + if (!equal) { + while (node->type != NodeType::NLeaf) { + node = node->GetChild(node->GetMin())->get(); + auto &c_top = it.stack[it.depth]; + c_top.node = node; + it.depth++; + } } - info.correlated_counts->FetchAggregates(info.group_chunk, info.result_chunk); + if (node->type == NodeType::NLeaf) { + // found a leaf node: check if it is bigger or equal than the current key + auto leaf = static_cast(node); + it.node = leaf; + // if the search is not inclusive the leaf node could still be equal to the current value + // check if leaf is equal to the current key + if (*leaf->value == key) { + // if its not inclusive check if there is a next leaf + if (!inclusive && !IteratorNext(it)) { + return false; + } else { + return true; + } + } - // for the initial set of columns we just reference the left side - result.SetCardinality(input); - for (idx_t i = 0; i < input.ColumnCount(); i++) { - result.data[i].Reference(input.data[i]); - } - // create the result matching vector - auto &last_key = keys.data.back(); - auto &result_vector = result.data.back(); - // first set the nullmask based on whether or not there were NULL values in the join key - result_vector.SetVectorType(VectorType::FLAT_VECTOR); - auto bool_result = FlatVector::GetData(result_vector); - auto &mask = FlatVector::Validity(result_vector); - switch (last_key.GetVectorType()) { - case VectorType::CONSTANT_VECTOR: - if (ConstantVector::IsNull(last_key)) { - mask.SetAllInvalid(input.size()); + if (*leaf->value > key) { + return true; } - break; - case VectorType::FLAT_VECTOR: - mask.Copy(FlatVector::Validity(last_key), input.size()); - break; - default: { - VectorData kdata; - last_key.Orrify(keys.size(), kdata); - for (idx_t i = 0; i < input.size(); i++) { - auto kidx = kdata.sel->get_index(i); - mask.Set(i, kdata.validity.RowIsValid(kidx)); + // Leaf is lower than key + // Check if next leaf is still lower than key + while (IteratorNext(it)) { + if (*it.node->value == key) { + // if its not inclusive check if there is a next leaf + if (!inclusive && !IteratorNext(it)) { + return false; + } else { + return true; + } + } else if (*it.node->value > key) { + // if its not inclusive check if there is a next leaf + return true; + } } - break; + return false; } + uint32_t mismatch_pos = Node::PrefixMismatch(*this, node, key, depth); + if (mismatch_pos != node->prefix_length) { + if (node->prefix[mismatch_pos] < key[depth + mismatch_pos]) { + // Less + it.depth--; + return IteratorNext(it); + } else { + // Greater + top.pos = INVALID_INDEX; + return IteratorNext(it); + } } + // prefix matches, search inside the child for the key + depth += node->prefix_length; - auto count_star = FlatVector::GetData(info.result_chunk.data[0]); - auto count = FlatVector::GetData(info.result_chunk.data[1]); - // set the entries to either true or false based on whether a match was found - for (idx_t i = 0; i < input.size(); i++) { - D_ASSERT(count_star[i] >= count[i]); - bool_result[i] = found_match ? found_match[i] : false; - if (!bool_result[i] && count_star[i] > count[i]) { - // RHS has NULL value and result is false: set to null - mask.SetInvalid(i); - } - if (count_star[i] == 0) { - // count == 0, set nullmask to false (we know the result is false now) - mask.SetValid(i); - } + top.pos = node->GetChildGreaterEqual(key[depth], equal); + if (top.pos == INVALID_INDEX) { + // Find min leaf + top.pos = node->GetMin(); } + node = node->GetChild(top.pos)->get(); + //! This means all children of this node qualify as geq + + depth++; } - finished = true; } -void ScanStructure::NextLeftJoin(DataChunk &keys, DataChunk &left, DataChunk &result) { - // a LEFT OUTER JOIN is identical to an INNER JOIN except all tuples that do - // not have a match must return at least one tuple (with the right side set - // to NULL in every column) - NextInnerJoin(keys, left, result); - if (result.size() == 0) { - // no entries left from the normal join - // fill in the result of the remaining left tuples - // together with NULL values on the right-hand side - idx_t remaining_count = 0; - SelectionVector sel(STANDARD_VECTOR_SIZE); - for (idx_t i = 0; i < left.size(); i++) { - if (!found_match[i]) { - sel.set_index(remaining_count++, i); - } - } - if (remaining_count > 0) { - // have remaining tuples - // slice the left side with tuples that did not find a match - result.Slice(left, sel, remaining_count); +bool ART::SearchGreater(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector &result_ids) { + Iterator *it = &state->iterator; + auto key = CreateKey(*this, types[0], state->values[0]); - // now set the right side to NULL - for (idx_t i = left.ColumnCount(); i < result.ColumnCount(); i++) { - result.data[i].SetVectorType(VectorType::CONSTANT_VECTOR); - ConstantVector::SetNull(result.data[i], true); - } + // greater than scan: first set the iterator to the node at which we will start our scan by finding the lowest node + // that satisfies our requirement + if (!it->start) { + bool found = ART::Bound(tree, *key, *it, inclusive); + if (!found) { + return true; } - finished = true; + it->start = true; } + // after that we continue the scan; we don't need to check the bounds as any value following this value is + // automatically bigger and hence satisfies our predicate + return IteratorScan(state, it, nullptr, max_count, result_ids); } -void ScanStructure::NextSingleJoin(DataChunk &keys, DataChunk &input, DataChunk &result) { - // single join - // this join is similar to the semi join except that - // (1) we actually return data from the RHS and - // (2) we return NULL for that data if there is no match - idx_t result_count = 0; - SelectionVector result_sel(STANDARD_VECTOR_SIZE); - SelectionVector match_sel(STANDARD_VECTOR_SIZE), no_match_sel(STANDARD_VECTOR_SIZE); - while (this->count > 0) { - // resolve the predicates for the current set of pointers - idx_t match_count = ResolvePredicates(keys, match_sel, no_match_sel); - idx_t no_match_count = this->count - match_count; - - // mark each of the matches as found - for (idx_t i = 0; i < match_count; i++) { - // found a match for this index - auto index = match_sel.get_index(i); - found_match[index] = true; - result_sel.set_index(result_count++, index); +//===--------------------------------------------------------------------===// +// Less Than +//===--------------------------------------------------------------------===// +static Leaf &FindMinimum(Iterator &it, Node &node) { + Node *next = nullptr; + idx_t pos = 0; + switch (node.type) { + case NodeType::NLeaf: + it.node = (Leaf *)&node; + return (Leaf &)node; + case NodeType::N4: + next = ((Node4 &)node).child[0].get(); + break; + case NodeType::N16: + next = ((Node16 &)node).child[0].get(); + break; + case NodeType::N48: { + auto &n48 = (Node48 &)node; + while (n48.child_index[pos] == Node::EMPTY_MARKER) { + pos++; } - // continue searching for the ones where we did not find a match yet - AdvancePointers(no_match_sel, no_match_count); - } - // reference the columns of the left side from the result - D_ASSERT(input.ColumnCount() > 0); - for (idx_t i = 0; i < input.ColumnCount(); i++) { - result.data[i].Reference(input.data[i]); + next = n48.child[n48.child_index[pos]].get(); + break; } - // now fetch the data from the RHS - idx_t offset = ht.condition_size; - for (idx_t i = 0; i < ht.build_types.size(); i++) { - auto &vector = result.data[input.ColumnCount() + i]; - // set NULL entries for every entry that was not found - auto &mask = FlatVector::Validity(vector); - mask.SetAllInvalid(input.size()); - for (idx_t j = 0; j < result_count; j++) { - mask.SetValid(result_sel.get_index(j)); + case NodeType::N256: { + auto &n256 = (Node256 &)node; + while (!n256.child[pos]) { + pos++; } - // for the remaining values we fetch the values - GatherResult(vector, result_sel, result_sel, result_count, offset); + next = n256.child[pos].get(); + break; } - result.SetCardinality(input.size()); - - // like the SEMI, ANTI and MARK join types, the SINGLE join only ever does one pass over the HT per input chunk - finished = true; + } + it.SetEntry(it.depth, IteratorEntry(&node, pos)); + it.depth++; + return FindMinimum(it, *next); } -void JoinHashTable::ScanFullOuter(DataChunk &result, JoinHTScanState &state) { - // scan the HT starting from the current position and check which rows from the build side did not find a match - data_ptr_t key_locations[STANDARD_VECTOR_SIZE]; - idx_t found_entries = 0; - for (; state.block_position < blocks.size(); state.block_position++, state.position = 0) { - auto &block = blocks[state.block_position]; - auto &handle = pinned_handles[state.block_position]; - auto baseptr = handle->node->buffer; - for (; state.position < block.count; state.position++) { - auto tuple_base = baseptr + state.position * entry_size; - auto found_match = (bool *)(tuple_base + tuple_size); - if (!*found_match) { - key_locations[found_entries++] = tuple_base; - if (found_entries == STANDARD_VECTOR_SIZE) { - state.position++; - break; - } - } - } - if (found_entries == STANDARD_VECTOR_SIZE) { - break; - } +bool ART::SearchLess(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector &result_ids) { + if (!tree) { + return true; } - result.SetCardinality(found_entries); - if (found_entries > 0) { - idx_t left_column_count = result.ColumnCount() - build_types.size(); - // set the left side as a constant NULL - for (idx_t i = 0; i < left_column_count; i++) { - result.data[i].SetVectorType(VectorType::CONSTANT_VECTOR); - ConstantVector::SetNull(result.data[i], true); + + Iterator *it = &state->iterator; + auto upper_bound = CreateKey(*this, types[0], state->values[0]); + + if (!it->start) { + // first find the minimum value in the ART: we start scanning from this value + auto &minimum = FindMinimum(state->iterator, *tree); + // early out min value higher than upper bound query + if (*minimum.value > *upper_bound) { + return true; } - // gather the values from the RHS - idx_t offset = condition_size; - for (idx_t i = 0; i < build_types.size(); i++) { - auto &vector = result.data[left_column_count + i]; - D_ASSERT(vector.GetType() == build_types[i]); - GatherResultVector(vector, FlatVector::INCREMENTAL_SELECTION_VECTOR, (uintptr_t *)key_locations, - FlatVector::INCREMENTAL_SELECTION_VECTOR, found_entries, offset); + it->start = true; + } + // now continue the scan until we reach the upper bound + if (inclusive) { + return IteratorScan(state, it, upper_bound.get(), max_count, result_ids); + } else { + return IteratorScan(state, it, upper_bound.get(), max_count, result_ids); + } +} + +//===--------------------------------------------------------------------===// +// Closed Range Query +//===--------------------------------------------------------------------===// +bool ART::SearchCloseRange(ARTIndexScanState *state, bool left_inclusive, bool right_inclusive, idx_t max_count, + vector &result_ids) { + auto lower_bound = CreateKey(*this, types[0], state->values[0]); + auto upper_bound = CreateKey(*this, types[0], state->values[1]); + Iterator *it = &state->iterator; + // first find the first node that satisfies the left predicate + if (!it->start) { + bool found = ART::Bound(tree, *lower_bound, *it, left_inclusive); + if (!found) { + return true; } + it->start = true; + } + // now continue the scan until we reach the upper bound + if (right_inclusive) { + return IteratorScan(state, it, upper_bound.get(), max_count, result_ids); + } else { + return IteratorScan(state, it, upper_bound.get(), max_count, result_ids); } } -} // namespace duckdb//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/merge_join.hpp -// -// -//===----------------------------------------------------------------------===// +bool ART::Scan(Transaction &transaction, DataTable &table, IndexScanState &table_state, idx_t max_count, + vector &result_ids) { + auto state = (ARTIndexScanState *)&table_state; + D_ASSERT(state->values[0].type().InternalType() == types[0]); + vector row_ids; + bool success = true; + if (state->values[1].is_null) { + lock_guard l(lock); + // single predicate + switch (state->expressions[0]) { + case ExpressionType::COMPARE_EQUAL: + success = SearchEqual(state, max_count, row_ids); + break; + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + success = SearchGreater(state, true, max_count, row_ids); + break; + case ExpressionType::COMPARE_GREATERTHAN: + success = SearchGreater(state, false, max_count, row_ids); + break; + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + success = SearchLess(state, true, max_count, row_ids); + break; + case ExpressionType::COMPARE_LESSTHAN: + success = SearchLess(state, false, max_count, row_ids); + break; + default: + throw InternalException("Operation not implemented"); + } + } else { + lock_guard l(lock); + // two predicates + D_ASSERT(state->values[1].type().InternalType() == types[0]); + bool left_inclusive = state->expressions[0] == ExpressionType ::COMPARE_GREATERTHANOREQUALTO; + bool right_inclusive = state->expressions[1] == ExpressionType ::COMPARE_LESSTHANOREQUALTO; + success = SearchCloseRange(state, left_inclusive, right_inclusive, max_count, row_ids); + } + if (!success) { + return false; + } + if (row_ids.empty()) { + return true; + } + // sort the row ids + sort(row_ids.begin(), row_ids.end()); + // duplicate eliminate the row ids and append them to the row ids of the state + result_ids.reserve(row_ids.size()); + result_ids.push_back(row_ids[0]); + for (idx_t i = 1; i < row_ids.size(); i++) { + if (row_ids[i] != row_ids[i - 1]) { + result_ids.push_back(row_ids[i]); + } + } + return true; +} +} // namespace duckdb namespace duckdb { -struct MergeOrder { - SelectionVector order; - idx_t count; - VectorData vdata; -}; - -enum MergeInfoType : uint8_t { SCALAR_MERGE_INFO = 1, CHUNK_MERGE_INFO = 2 }; +Key::Key(unique_ptr data, idx_t len) : len(len), data(move(data)) { +} -struct MergeInfo { - MergeInfo(MergeInfoType info_type, LogicalType type) : info_type(info_type), type(type) { - } - MergeInfoType info_type; - LogicalType type; -}; +template <> +unique_ptr Key::CreateKey(string_t value, bool is_little_endian) { + idx_t len = value.GetSize() + 1; + auto data = unique_ptr(new data_t[len]); + memcpy(data.get(), value.GetDataUnsafe(), len - 1); + data[len - 1] = '\0'; + return make_unique(move(data), len); +} -struct ScalarMergeInfo : public MergeInfo { - MergeOrder ℴ - idx_t &pos; - SelectionVector result; +template <> +unique_ptr Key::CreateKey(const char *value, bool is_little_endian) { + return Key::CreateKey(string_t(value, strlen(value)), is_little_endian); +} - ScalarMergeInfo(MergeOrder &order, LogicalType type, idx_t &pos) - : MergeInfo(MergeInfoType::SCALAR_MERGE_INFO, type), order(order), pos(pos), result(STANDARD_VECTOR_SIZE) { +bool Key::operator>(const Key &k) const { + for (idx_t i = 0; i < MinValue(len, k.len); i++) { + if (data[i] > k.data[i]) { + return true; + } else if (data[i] < k.data[i]) { + return false; + } } -}; - -struct ChunkMergeInfo : public MergeInfo { - ChunkCollection &data_chunks; - vector &order_info; - bool found_match[STANDARD_VECTOR_SIZE]; + return len > k.len; +} - ChunkMergeInfo(ChunkCollection &data_chunks, vector &order_info) - : MergeInfo(MergeInfoType::CHUNK_MERGE_INFO, data_chunks.Types()[0]), data_chunks(data_chunks), - order_info(order_info) { - memset(found_match, 0, sizeof(found_match)); +bool Key::operator<(const Key &k) const { + for (idx_t i = 0; i < MinValue(len, k.len); i++) { + if (data[i] < k.data[i]) { + return true; + } else if (data[i] > k.data[i]) { + return false; + } } -}; + return len < k.len; +} -struct MergeJoinComplex { - struct LessThan { - template - static idx_t Operation(ScalarMergeInfo &l, ScalarMergeInfo &r); - }; - struct LessThanEquals { - template - static idx_t Operation(ScalarMergeInfo &l, ScalarMergeInfo &r); - }; - struct GreaterThan { - template - static idx_t Operation(ScalarMergeInfo &l, ScalarMergeInfo &r) { - return LessThan::Operation(r, l); - } - }; - struct GreaterThanEquals { - template - static idx_t Operation(ScalarMergeInfo &l, ScalarMergeInfo &r) { - return LessThanEquals::Operation(r, l); +bool Key::operator>=(const Key &k) const { + for (idx_t i = 0; i < MinValue(len, k.len); i++) { + if (data[i] > k.data[i]) { + return true; + } else if (data[i] < k.data[i]) { + return false; } - }; - - static idx_t Perform(MergeInfo &l, MergeInfo &r, ExpressionType comparison_type); -}; - -struct MergeJoinSimple { - struct LessThan { - template - static idx_t Operation(ScalarMergeInfo &l, ChunkMergeInfo &r); - }; - struct LessThanEquals { - template - static idx_t Operation(ScalarMergeInfo &l, ChunkMergeInfo &r); - }; - struct GreaterThan { - template - static idx_t Operation(ScalarMergeInfo &l, ChunkMergeInfo &r); - }; - struct GreaterThanEquals { - template - static idx_t Operation(ScalarMergeInfo &l, ChunkMergeInfo &r); - }; - - static idx_t Perform(MergeInfo &l, MergeInfo &r, ExpressionType comparison); -}; - -#define INSTANTIATE_MERGEJOIN_TEMPLATES(MJCLASS, OPNAME, L, R) \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ - template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); + } + return len >= k.len; +} +bool Key::operator==(const Key &k) const { + if (len != k.len) { + return false; + } + for (idx_t i = 0; i < len; i++) { + if (data[i] != k.data[i]) { + return false; + } + } + return true; +} } // namespace duckdb +#include namespace duckdb { -template -static idx_t MergeJoinSwitch(L_ARG &l, R_ARG &r) { - switch (l.type.InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - return MJ::template Operation(l, r); - case PhysicalType::INT16: - return MJ::template Operation(l, r); - case PhysicalType::INT32: - return MJ::template Operation(l, r); - case PhysicalType::INT64: - return MJ::template Operation(l, r); - case PhysicalType::UINT8: - return MJ::template Operation(l, r); - case PhysicalType::UINT16: - return MJ::template Operation(l, r); - case PhysicalType::UINT32: - return MJ::template Operation(l, r); - case PhysicalType::UINT64: - return MJ::template Operation(l, r); - case PhysicalType::INT128: - return MJ::template Operation(l, r); - case PhysicalType::FLOAT: - return MJ::template Operation(l, r); - case PhysicalType::DOUBLE: - return MJ::template Operation(l, r); - case PhysicalType::INTERVAL: - return MJ::template Operation(l, r); - case PhysicalType::VARCHAR: - return MJ::template Operation(l, r); - default: - throw NotImplementedException("Type not implemented for merge join!"); - } +Leaf::Leaf(ART &art, unique_ptr value, row_t row_id) : Node(art, NodeType::NLeaf, 0) { + this->value = move(value); + this->capacity = 1; + this->row_ids = unique_ptr(new row_t[this->capacity]); + this->row_ids[0] = row_id; + this->num_elements = 1; } -template -static idx_t MergeJoinComparisonSwitch(L_ARG &l, R_ARG &r, ExpressionType comparison_type) { - switch (comparison_type) { - case ExpressionType::COMPARE_LESSTHAN: - return MergeJoinSwitch(l, r); - case ExpressionType::COMPARE_LESSTHANOREQUALTO: - return MergeJoinSwitch(l, r); - case ExpressionType::COMPARE_GREATERTHAN: - return MergeJoinSwitch(l, r); - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - return MergeJoinSwitch(l, r); - default: - throw NotImplementedException("Unimplemented comparison type for merge join!"); +void Leaf::Insert(row_t row_id) { + // Grow array + if (num_elements == capacity) { + auto new_row_id = unique_ptr(new row_t[capacity * 2]); + memcpy(new_row_id.get(), row_ids.get(), capacity * sizeof(row_t)); + capacity *= 2; + row_ids = move(new_row_id); } + row_ids[num_elements++] = row_id; } -idx_t MergeJoinComplex::Perform(MergeInfo &l, MergeInfo &r, ExpressionType comparison_type) { - D_ASSERT(l.info_type == MergeInfoType::SCALAR_MERGE_INFO && r.info_type == MergeInfoType::SCALAR_MERGE_INFO); - auto &left = (ScalarMergeInfo &)l; - auto &right = (ScalarMergeInfo &)r; - D_ASSERT(left.type == right.type); - if (left.order.count == 0 || right.order.count == 0) { - return 0; +//! TODO: Maybe shrink array dynamically? +void Leaf::Remove(row_t row_id) { + idx_t entry_offset = INVALID_INDEX; + for (idx_t i = 0; i < num_elements; i++) { + if (row_ids[i] == row_id) { + entry_offset = i; + break; + } } - return MergeJoinComparisonSwitch(left, right, comparison_type); -} - -idx_t MergeJoinSimple::Perform(MergeInfo &l, MergeInfo &r, ExpressionType comparison_type) { - D_ASSERT(l.info_type == MergeInfoType::SCALAR_MERGE_INFO && r.info_type == MergeInfoType::CHUNK_MERGE_INFO); - auto &left = (ScalarMergeInfo &)l; - auto &right = (ChunkMergeInfo &)r; - D_ASSERT(left.type == right.type); - if (left.order.count == 0 || right.data_chunks.Count() == 0) { - return 0; + if (entry_offset == INVALID_INDEX) { + return; + } + num_elements--; + for (idx_t j = entry_offset; j < num_elements; j++) { + row_ids[j] = row_ids[j + 1]; } - return MergeJoinComparisonSwitch(left, right, comparison_type); } } // namespace duckdb @@ -35734,490 +43141,268 @@ idx_t MergeJoinSimple::Perform(MergeInfo &l, MergeInfo &r, ExpressionType compar - namespace duckdb { -template -idx_t MergeJoinComplexLessThan(ScalarMergeInfo &l, ScalarMergeInfo &r) { - if (r.pos >= r.order.count) { - return 0; - } - auto ldata = (T *)l.order.vdata.data; - auto rdata = (T *)r.order.vdata.data; - auto &lorder = l.order.order; - auto &rorder = r.order.order; - idx_t result_count = 0; - while (true) { - if (l.pos < l.order.count) { - auto lidx = lorder.get_index(l.pos); - auto ridx = rorder.get_index(r.pos); - auto dlidx = l.order.vdata.sel->get_index(lidx); - auto dridx = r.order.vdata.sel->get_index(ridx); - if (OP::Operation(ldata[dlidx], rdata[dridx])) { - // left side smaller: found match - l.result.set_index(result_count, lidx); - r.result.set_index(result_count, ridx); - result_count++; - // move left side forward - l.pos++; - if (result_count == STANDARD_VECTOR_SIZE) { - // out of space! - break; - } - continue; - } - } - // right side smaller or equal, or left side exhausted: move - // right pointer forward reset left side to start - l.pos = 0; - r.pos++; - if (r.pos == r.order.count) { - break; +Node::Node(ART &art, NodeType type, size_t compressed_prefix_size) : prefix_length(0), count(0), type(type) { + this->prefix = unique_ptr(new uint8_t[compressed_prefix_size]); +} + +void Node::CopyPrefix(ART &art, Node *src, Node *dst) { + dst->prefix_length = src->prefix_length; + memcpy(dst->prefix.get(), src->prefix.get(), src->prefix_length); +} + +// LCOV_EXCL_START +unique_ptr *Node::GetChild(idx_t pos) { + D_ASSERT(0); + return nullptr; +} + +idx_t Node::GetMin() { + D_ASSERT(0); + return 0; +} +// LCOV_EXCL_STOP + +uint32_t Node::PrefixMismatch(ART &art, Node *node, Key &key, uint64_t depth) { + uint64_t pos; + for (pos = 0; pos < node->prefix_length; pos++) { + if (key[depth + pos] != node->prefix[pos]) { + return pos; } } - return result_count; + return pos; } -template -idx_t MergeJoinComplex::LessThan::Operation(ScalarMergeInfo &l, ScalarMergeInfo &r) { - return MergeJoinComplexLessThan(l, r); +void Node::InsertLeaf(ART &art, unique_ptr &node, uint8_t key, unique_ptr &new_node) { + switch (node->type) { + case NodeType::N4: + Node4::Insert(art, node, key, new_node); + break; + case NodeType::N16: + Node16::Insert(art, node, key, new_node); + break; + case NodeType::N48: + Node48::Insert(art, node, key, new_node); + break; + case NodeType::N256: + Node256::Insert(art, node, key, new_node); + break; + default: + throw InternalException("Unrecognized leaf type for insert"); + } } -template -idx_t MergeJoinComplex::LessThanEquals::Operation(ScalarMergeInfo &l, ScalarMergeInfo &r) { - return MergeJoinComplexLessThan(l, r); +void Node::Erase(ART &art, unique_ptr &node, idx_t pos) { + switch (node->type) { + case NodeType::N4: { + Node4::Erase(art, node, pos); + break; + } + case NodeType::N16: { + Node16::Erase(art, node, pos); + break; + } + case NodeType::N48: { + Node48::Erase(art, node, pos); + break; + } + case NodeType::N256: + Node256::Erase(art, node, pos); + break; + default: + throw InternalException("Unrecognized leaf type for erase"); + } } -INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinComplex, LessThan, ScalarMergeInfo, ScalarMergeInfo) -INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinComplex, LessThanEquals, ScalarMergeInfo, ScalarMergeInfo) - } // namespace duckdb +#include namespace duckdb { -template -static idx_t MergeJoinSimpleGreaterThan(ScalarMergeInfo &l, ChunkMergeInfo &r) { - auto ldata = (T *)l.order.vdata.data; - auto &lorder = l.order.order; - l.pos = l.order.count; - for (idx_t chunk_idx = 0; chunk_idx < r.order_info.size(); chunk_idx++) { - // we only care about the SMALLEST value in each of the RHS - // because we want to figure out if they are greater than [or equal] to ANY value - // get the smallest value from the RHS - auto &rorder = r.order_info[chunk_idx]; - auto rdata = (T *)rorder.vdata.data; - auto min_r_value = rdata[rorder.vdata.sel->get_index(rorder.order.get_index(0))]; - // now we start from the current lpos value and check if we found a new value that is [>= OR >] the min RHS - // value - while (true) { - auto lidx = lorder.get_index(l.pos - 1); - auto dlidx = l.order.vdata.sel->get_index(lidx); - if (OP::Operation(ldata[dlidx], min_r_value)) { - // found a match for lpos, set it in the found_match vector - r.found_match[lidx] = true; - l.pos--; - if (l.pos == 0) { - // early out: we exhausted the entire LHS and they all match - return 0; - } - } else { - // we found no match: any subsequent value from the LHS we scan now will be smaller and thus also not - // match move to the next RHS chunk - break; - } - } - } - return 0; -} -template -idx_t MergeJoinSimple::GreaterThan::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { - return MergeJoinSimpleGreaterThan(l, r); +Node16::Node16(ART &art, size_t compression_length) : Node(art, NodeType::N16, compression_length) { + memset(key, 16, sizeof(key)); } -template -idx_t MergeJoinSimple::GreaterThanEquals::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { - return MergeJoinSimpleGreaterThan(l, r); +// TODO : In the future this can be performed using SIMD (#include x86 SSE intrinsics) +idx_t Node16::GetChildPos(uint8_t k) { + for (idx_t pos = 0; pos < count; pos++) { + if (key[pos] == k) { + return pos; + } + } + return Node::GetChildPos(k); } -template -static idx_t MergeJoinSimpleLessThan(ScalarMergeInfo &l, ChunkMergeInfo &r) { - auto ldata = (T *)l.order.vdata.data; - auto &lorder = l.order.order; - l.pos = 0; - for (idx_t chunk_idx = 0; chunk_idx < r.order_info.size(); chunk_idx++) { - // we only care about the BIGGEST value in each of the RHS - // because we want to figure out if they are less than [or equal] to ANY value - // get the biggest value from the RHS - auto &rorder = r.order_info[chunk_idx]; - auto rdata = (T *)rorder.vdata.data; - auto max_r_value = rdata[rorder.vdata.sel->get_index(rorder.order.get_index(rorder.count - 1))]; - // now we start from the current lpos value and check if we found a new value that is [<= OR <] the max RHS - // value - while (true) { - auto lidx = lorder.get_index(l.pos); - auto dlidx = l.order.vdata.sel->get_index(lidx); - if (OP::Operation(ldata[dlidx], max_r_value)) { - // found a match for lpos, set it in the found_match vector - r.found_match[lidx] = true; - l.pos++; - if (l.pos == l.order.count) { - // early out: we exhausted the entire LHS and they all match - return 0; - } +idx_t Node16::GetChildGreaterEqual(uint8_t k, bool &equal) { + for (idx_t pos = 0; pos < count; pos++) { + if (key[pos] >= k) { + if (key[pos] == k) { + equal = true; } else { - // we found no match: any subsequent value from the LHS we scan now will be bigger and thus also not - // match move to the next RHS chunk - break; + equal = false; } + + return pos; } } - return 0; + return Node::GetChildGreaterEqual(k, equal); } -template -idx_t MergeJoinSimple::LessThan::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { - return MergeJoinSimpleLessThan(l, r); +idx_t Node16::GetNextPos(idx_t pos) { + if (pos == INVALID_INDEX) { + return 0; + } + pos++; + return pos < count ? pos : INVALID_INDEX; } -template -idx_t MergeJoinSimple::LessThanEquals::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { - return MergeJoinSimpleLessThan(l, r); +unique_ptr *Node16::GetChild(idx_t pos) { + D_ASSERT(pos < count); + return &child[pos]; } -INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinSimple, LessThan, ScalarMergeInfo, ChunkMergeInfo) -INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinSimple, LessThanEquals, ScalarMergeInfo, ChunkMergeInfo) -INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinSimple, GreaterThan, ScalarMergeInfo, ChunkMergeInfo) -INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinSimple, GreaterThanEquals, ScalarMergeInfo, ChunkMergeInfo) - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/nested_loop_join.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - +idx_t Node16::GetMin() { + return 0; +} +void Node16::Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child) { + Node16 *n = static_cast(node.get()); -namespace duckdb { + if (n->count < 16) { + // Insert element + idx_t pos = 0; + while (pos < node->count && n->key[pos] < key_byte) { + pos++; + } + if (n->child[pos] != nullptr) { + for (idx_t i = n->count; i > pos; i--) { + n->key[i] = n->key[i - 1]; + n->child[i] = move(n->child[i - 1]); + } + } + n->key[pos] = key_byte; + n->child[pos] = move(child); + n->count++; + } else { + // Grow to Node48 + auto new_node = make_unique(art, n->prefix_length); + for (idx_t i = 0; i < node->count; i++) { + new_node->child_index[n->key[i]] = i; + new_node->child[i] = move(n->child[i]); + } + CopyPrefix(art, n, new_node.get()); + new_node->count = node->count; + node = move(new_node); -struct NestedLoopJoinInner { - static idx_t Perform(idx_t <uple, idx_t &rtuple, DataChunk &left_conditions, DataChunk &right_conditions, - SelectionVector &lvector, SelectionVector &rvector, vector &conditions); -}; + Node48::Insert(art, node, key_byte, child); + } +} -struct NestedLoopJoinMark { - static void Perform(DataChunk &left, ChunkCollection &right, bool found_match[], vector &conditions); -}; +void Node16::Erase(ART &art, unique_ptr &node, int pos) { + Node16 *n = static_cast(node.get()); + // erase the child and decrease the count + n->child[pos].reset(); + n->count--; + // potentially move any children backwards + for (; pos < n->count; pos++) { + n->key[pos] = n->key[pos + 1]; + n->child[pos] = move(n->child[pos + 1]); + } + if (node->count <= 3) { + // Shrink node + auto new_node = make_unique(art, n->prefix_length); + for (unsigned i = 0; i < n->count; i++) { + new_node->key[new_node->count] = n->key[i]; + new_node->child[new_node->count++] = move(n->child[i]); + } + CopyPrefix(art, n, new_node.get()); + node = move(new_node); + } +} } // namespace duckdb + namespace duckdb { -struct InitialNestedLoopJoin { - template - static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos, - SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) { - // initialize phase of nested loop join - // fill lvector and rvector with matches from the base vectors - VectorData left_data, right_data; - left.Orrify(left_size, left_data); - right.Orrify(right_size, right_data); +Node256::Node256(ART &art, size_t compression_length) : Node(art, NodeType::N256, compression_length) { +} - auto ldata = (T *)left_data.data; - auto rdata = (T *)right_data.data; - idx_t result_count = 0; - for (; rpos < right_size; rpos++) { - idx_t right_position = right_data.sel->get_index(rpos); - if (!right_data.validity.RowIsValid(right_position)) { - continue; - } - for (; lpos < left_size; lpos++) { - if (result_count == STANDARD_VECTOR_SIZE) { - // out of space! - return result_count; - } - idx_t left_position = left_data.sel->get_index(lpos); - if (!left_data.validity.RowIsValid(left_position)) { - continue; - } - if (OP::Operation(ldata[left_position], rdata[right_position])) { - // emit tuple - lvector.set_index(result_count, lpos); - rvector.set_index(result_count, rpos); - result_count++; - } - } - lpos = 0; - } - return result_count; +idx_t Node256::GetChildPos(uint8_t k) { + if (child[k]) { + return k; + } else { + return INVALID_INDEX; } +} - template - static idx_t DistinctOperation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, - idx_t &rpos, SelectionVector &lvector, SelectionVector &rvector, - idx_t current_match_count) { - // initialize phase of nested loop join - // fill lvector and rvector with matches from the base vectors - VectorData left_data, right_data; - left.Orrify(left_size, left_data); - right.Orrify(right_size, right_data); - - auto ldata = (T *)left_data.data; - auto rdata = (T *)right_data.data; - idx_t result_count = 0; - for (; rpos < right_size; rpos++) { - idx_t right_position = right_data.sel->get_index(rpos); - for (; lpos < left_size; lpos++) { - if (result_count == STANDARD_VECTOR_SIZE) { - // out of space! - return result_count; - } - idx_t left_position = left_data.sel->get_index(lpos); - if (OP::Operation(ldata[left_position], rdata[right_position], - !left_data.validity.RowIsValid(left_position), - !right_data.validity.RowIsValid(right_position))) { - // emit tuple - lvector.set_index(result_count, lpos); - rvector.set_index(result_count, rpos); - result_count++; - } +idx_t Node256::GetChildGreaterEqual(uint8_t k, bool &equal) { + for (idx_t pos = k; pos < 256; pos++) { + if (child[pos]) { + if (pos == k) { + equal = true; + } else { + equal = false; } - lpos = 0; + return pos; } - return result_count; } -}; - -struct RefineNestedLoopJoin { - template - static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos, - SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) { - VectorData left_data, right_data; - left.Orrify(left_size, left_data); - right.Orrify(right_size, right_data); + return INVALID_INDEX; +} - // refine phase of the nested loop join - // refine lvector and rvector based on matches of subsequent conditions (in case there are multiple conditions - // in the join) - D_ASSERT(current_match_count > 0); - auto ldata = (T *)left_data.data; - auto rdata = (T *)right_data.data; - idx_t result_count = 0; - for (idx_t i = 0; i < current_match_count; i++) { - auto lidx = lvector.get_index(i); - auto ridx = rvector.get_index(i); - auto left_idx = left_data.sel->get_index(lidx); - auto right_idx = right_data.sel->get_index(ridx); - if (!left_data.validity.RowIsValid(left_idx) || !right_data.validity.RowIsValid(right_idx)) { - continue; - } - if (OP::Operation(ldata[left_idx], rdata[right_idx])) { - lvector.set_index(result_count, lidx); - rvector.set_index(result_count, ridx); - result_count++; - } +idx_t Node256::GetMin() { + for (idx_t i = 0; i < 256; i++) { + if (child[i]) { + return i; } - return result_count; } + return INVALID_INDEX; +} - template - static idx_t DistinctOperation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, - idx_t &rpos, SelectionVector &lvector, SelectionVector &rvector, - idx_t current_match_count) { - VectorData left_data, right_data; - left.Orrify(left_size, left_data); - right.Orrify(right_size, right_data); - - // refine phase of the nested loop join - // refine lvector and rvector based on matches of subsequent conditions (in case there are multiple conditions - // in the join) - D_ASSERT(current_match_count > 0); - auto ldata = (T *)left_data.data; - auto rdata = (T *)right_data.data; - idx_t result_count = 0; - for (idx_t i = 0; i < current_match_count; i++) { - auto lidx = lvector.get_index(i); - auto ridx = rvector.get_index(i); - auto left_idx = left_data.sel->get_index(lidx); - auto right_idx = right_data.sel->get_index(ridx); - // null values should be filtered out before - if (OP::Operation(ldata[left_idx], rdata[right_idx], !left_data.validity.RowIsValid(left_idx), - !right_data.validity.RowIsValid(right_idx))) { - lvector.set_index(result_count, lidx); - rvector.set_index(result_count, ridx); - result_count++; - } +idx_t Node256::GetNextPos(idx_t pos) { + for (pos == INVALID_INDEX ? pos = 0 : pos++; pos < 256; pos++) { + if (child[pos]) { + return pos; } - return result_count; - } -}; - -template -static idx_t NestedLoopJoinTypeSwitch(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, - idx_t &rpos, SelectionVector &lvector, SelectionVector &rvector, - idx_t current_match_count) { - switch (left.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, - current_match_count); - case PhysicalType::INT16: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, - current_match_count); - case PhysicalType::INT32: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, - current_match_count); - case PhysicalType::INT64: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, - current_match_count); - case PhysicalType::UINT8: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, - current_match_count); - case PhysicalType::UINT16: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::UINT32: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::UINT64: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::INT128: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::FLOAT: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, - current_match_count); - case PhysicalType::DOUBLE: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, - current_match_count); - case PhysicalType::INTERVAL: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::VARCHAR: - return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - default: - throw NotImplementedException("Unimplemented type for join!"); } + return Node::GetNextPos(pos); } -template -static idx_t DistinctNestedLoopJoinTypeSwitch(Vector &left, Vector &right, idx_t left_size, idx_t right_size, - idx_t &lpos, idx_t &rpos, SelectionVector &lvector, - SelectionVector &rvector, idx_t current_match_count) { - switch (left.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::INT16: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::INT32: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::INT64: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::UINT8: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::UINT16: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::UINT32: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::UINT64: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::INT128: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, - lvector, rvector, current_match_count); - case PhysicalType::FLOAT: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case PhysicalType::DOUBLE: - return NLTYPE::template DistinctOperation(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - default: - throw NotImplementedException("Unimplemented type for join!"); - } +unique_ptr *Node256::GetChild(idx_t pos) { + D_ASSERT(child[pos]); + return &child[pos]; } -template -idx_t NestedLoopJoinComparisonSwitch(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, - idx_t &rpos, SelectionVector &lvector, SelectionVector &rvector, - idx_t current_match_count, ExpressionType comparison_type) { - D_ASSERT(left.GetType() == right.GetType()); - switch (comparison_type) { - case ExpressionType::COMPARE_EQUAL: - return NestedLoopJoinTypeSwitch(left, right, left_size, right_size, lpos, rpos, lvector, - rvector, current_match_count); - case ExpressionType::COMPARE_NOTEQUAL: - return NestedLoopJoinTypeSwitch(left, right, left_size, right_size, lpos, rpos, - lvector, rvector, current_match_count); - case ExpressionType::COMPARE_LESSTHAN: - return NestedLoopJoinTypeSwitch(left, right, left_size, right_size, lpos, rpos, - lvector, rvector, current_match_count); - case ExpressionType::COMPARE_GREATERTHAN: - return NestedLoopJoinTypeSwitch(left, right, left_size, right_size, lpos, rpos, - lvector, rvector, current_match_count); - case ExpressionType::COMPARE_LESSTHANOREQUALTO: - return NestedLoopJoinTypeSwitch(left, right, left_size, right_size, lpos, rpos, - lvector, rvector, current_match_count); - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - return NestedLoopJoinTypeSwitch(left, right, left_size, right_size, lpos, - rpos, lvector, rvector, current_match_count); - case ExpressionType::COMPARE_DISTINCT_FROM: - return DistinctNestedLoopJoinTypeSwitch( - left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); - default: - throw NotImplementedException("Unimplemented comparison type for join!"); - } +void Node256::Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child) { + Node256 *n = static_cast(node.get()); + + n->count++; + n->child[key_byte] = move(child); } -idx_t NestedLoopJoinInner::Perform(idx_t &lpos, idx_t &rpos, DataChunk &left_conditions, DataChunk &right_conditions, - SelectionVector &lvector, SelectionVector &rvector, - vector &conditions) { - D_ASSERT(left_conditions.ColumnCount() == right_conditions.ColumnCount()); - if (lpos >= left_conditions.size() || rpos >= right_conditions.size()) { - return 0; - } - // for the first condition, lvector and rvector are not set yet - // we initialize them using the InitialNestedLoopJoin - idx_t match_count = NestedLoopJoinComparisonSwitch( - left_conditions.data[0], right_conditions.data[0], left_conditions.size(), right_conditions.size(), lpos, rpos, - lvector, rvector, 0, conditions[0].comparison); - // now resolve the rest of the conditions - for (idx_t i = 1; i < conditions.size(); i++) { - // check if we have run out of tuples to compare - if (match_count == 0) { - return 0; +void Node256::Erase(ART &art, unique_ptr &node, int pos) { + Node256 *n = static_cast(node.get()); + + n->child[pos].reset(); + n->count--; + if (node->count <= 36) { + auto new_node = make_unique(art, n->prefix_length); + CopyPrefix(art, n, new_node.get()); + for (idx_t i = 0; i < 256; i++) { + if (n->child[i]) { + new_node->child_index[i] = new_node->count; + new_node->child[new_node->count] = move(n->child[i]); + new_node->count++; + } } - // if not, get the vectors to compare - Vector &l = left_conditions.data[i]; - Vector &r = right_conditions.data[i]; - // then we refine the currently obtained results using the RefineNestedLoopJoin - match_count = NestedLoopJoinComparisonSwitch( - l, r, left_conditions.size(), right_conditions.size(), lpos, rpos, lvector, rvector, match_count, - conditions[i].comparison); + node = move(new_node); } - return match_count; } } // namespace duckdb @@ -36227,188 +43412,236 @@ idx_t NestedLoopJoinInner::Perform(idx_t &lpos, idx_t &rpos, DataChunk &left_con namespace duckdb { -template -static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) { - VectorData left_data, right_data; - left.Orrify(lcount, left_data); - right.Orrify(rcount, right_data); +Node4::Node4(ART &art, size_t compression_length) : Node(art, NodeType::N4, compression_length) { + memset(key, 0, sizeof(key)); +} - auto ldata = (T *)left_data.data; - auto rdata = (T *)right_data.data; - for (idx_t i = 0; i < lcount; i++) { - if (found_match[i]) { - continue; - } - auto lidx = left_data.sel->get_index(i); - if (!left_data.validity.RowIsValid(lidx)) { - continue; +idx_t Node4::GetChildPos(uint8_t k) { + for (idx_t pos = 0; pos < count; pos++) { + if (key[pos] == k) { + return pos; } - for (idx_t j = 0; j < rcount; j++) { - auto ridx = right_data.sel->get_index(j); - if (!right_data.validity.RowIsValid(ridx)) { - continue; - } - if (OP::Operation(ldata[lidx], rdata[ridx])) { - found_match[i] = true; - break; + } + return Node::GetChildPos(k); +} + +idx_t Node4::GetChildGreaterEqual(uint8_t k, bool &equal) { + for (idx_t pos = 0; pos < count; pos++) { + if (key[pos] >= k) { + if (key[pos] == k) { + equal = true; + } else { + equal = false; } + return pos; } } + return Node::GetChildGreaterEqual(k, equal); } -template -static void MarkJoinSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) { - switch (left.GetType().InternalType()) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - case PhysicalType::INT16: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - case PhysicalType::INT32: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - case PhysicalType::INT64: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - case PhysicalType::UINT8: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - case PhysicalType::UINT16: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - case PhysicalType::UINT32: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - case PhysicalType::UINT64: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - case PhysicalType::FLOAT: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - case PhysicalType::DOUBLE: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - case PhysicalType::VARCHAR: - return TemplatedMarkJoin(left, right, lcount, rcount, found_match); - default: - throw NotImplementedException("Unimplemented type for join!"); - } +idx_t Node4::GetMin() { + return 0; } -static void MarkJoinComparisonSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[], - ExpressionType comparison_type) { - D_ASSERT(left.GetType() == right.GetType()); - switch (comparison_type) { - case ExpressionType::COMPARE_EQUAL: - return MarkJoinSwitch(left, right, lcount, rcount, found_match); - case ExpressionType::COMPARE_NOTEQUAL: - return MarkJoinSwitch(left, right, lcount, rcount, found_match); - case ExpressionType::COMPARE_LESSTHAN: - return MarkJoinSwitch(left, right, lcount, rcount, found_match); - case ExpressionType::COMPARE_GREATERTHAN: - return MarkJoinSwitch(left, right, lcount, rcount, found_match); - case ExpressionType::COMPARE_LESSTHANOREQUALTO: - return MarkJoinSwitch(left, right, lcount, rcount, found_match); - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - return MarkJoinSwitch(left, right, lcount, rcount, found_match); - default: - throw NotImplementedException("Unimplemented comparison type for join!"); +idx_t Node4::GetNextPos(idx_t pos) { + if (pos == INVALID_INDEX) { + return 0; } + pos++; + return pos < count ? pos : INVALID_INDEX; } -void NestedLoopJoinMark::Perform(DataChunk &left, ChunkCollection &right, bool found_match[], - vector &conditions) { - // initialize a new temporary selection vector for the left chunk - // loop over all chunks in the RHS - for (idx_t chunk_idx = 0; chunk_idx < right.ChunkCount(); chunk_idx++) { - DataChunk &right_chunk = right.GetChunk(chunk_idx); - for (idx_t i = 0; i < conditions.size(); i++) { - MarkJoinComparisonSwitch(left.data[i], right_chunk.data[i], left.size(), right_chunk.size(), found_match, - conditions[i].comparison); - } - } +unique_ptr *Node4::GetChild(idx_t pos) { + D_ASSERT(pos < count); + return &child[pos]; } -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp -// -// -//===----------------------------------------------------------------------===// +void Node4::Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child) { + Node4 *n = static_cast(node.get()); + // Insert leaf into inner node + if (node->count < 4) { + // Insert element + idx_t pos = 0; + while ((pos < node->count) && (n->key[pos] < key_byte)) { + pos++; + } + if (n->child[pos] != nullptr) { + for (idx_t i = n->count; i > pos; i--) { + n->key[i] = n->key[i - 1]; + n->child[i] = move(n->child[i - 1]); + } + } + n->key[pos] = key_byte; + n->child[pos] = move(child); + n->count++; + } else { + // Grow to Node16 + auto new_node = make_unique(art, n->prefix_length); + new_node->count = 4; + CopyPrefix(art, node.get(), new_node.get()); + for (idx_t i = 0; i < 4; i++) { + new_node->key[i] = n->key[i]; + new_node->child[i] = move(n->child[i]); + } + node = move(new_node); + Node16::Insert(art, node, key_byte, child); + } +} +void Node4::Erase(ART &art, unique_ptr &node, int pos) { + Node4 *n = static_cast(node.get()); + D_ASSERT(pos < n->count); + // erase the child and decrease the count + n->child[pos].reset(); + n->count--; + // potentially move any children backwards + for (; pos < n->count; pos++) { + n->key[pos] = n->key[pos + 1]; + n->child[pos] = move(n->child[pos + 1]); + } + // This is a one way node + if (n->count == 1) { + auto childref = n->child[0].get(); + //! concatenate prefixes + auto new_length = node->prefix_length + childref->prefix_length + 1; + //! have to allocate space in our prefix array + unique_ptr new_prefix = unique_ptr(new uint8_t[new_length]); + ; + //! first move the existing prefix (if any) + for (uint32_t i = 0; i < childref->prefix_length; i++) { + new_prefix[new_length - (i + 1)] = childref->prefix[childref->prefix_length - (i + 1)]; + } + //! now move the current key as part of the prefix + new_prefix[node->prefix_length] = n->key[0]; + //! finally add the old prefix + for (uint32_t i = 0; i < node->prefix_length; i++) { + new_prefix[i] = node->prefix[i]; + } + //! set new prefix and move the child + childref->prefix = move(new_prefix); + childref->prefix_length = new_length; + node = move(n->child[0]); + } +} -namespace duckdb { +} // namespace duckdb -class ClientContext; -class BufferManager; -//! PhysicalHashAggregate is an group-by and aggregate implementation that uses -//! a hash table to perform the grouping -class PhysicalHashAggregate : public PhysicalSink { -public: - PhysicalHashAggregate(ClientContext &context, vector types, vector> expressions, - idx_t estimated_cardinality, PhysicalOperatorType type = PhysicalOperatorType::HASH_GROUP_BY); - PhysicalHashAggregate(ClientContext &context, vector types, vector> expressions, - vector> groups, idx_t estimated_cardinality, - PhysicalOperatorType type = PhysicalOperatorType::HASH_GROUP_BY); - //! The groups - vector> groups; - //! The aggregates that have to be computed - vector> aggregates; - //! Whether or not the aggregate is an implicit (i.e. ungrouped) aggregate - bool is_implicit_aggr; - //! Whether or not all aggregates are combinable - bool all_combinable; - //! Whether or not any aggregation is DISTINCT - bool any_distinct; +namespace duckdb { - //! The group types - vector group_types; - //! The payload types - vector payload_types; - //! The aggregate return types - vector aggregate_return_types; +Node48::Node48(ART &art, size_t compression_length) : Node(art, NodeType::N48, compression_length) { + for (idx_t i = 0; i < 256; i++) { + child_index[i] = Node::EMPTY_MARKER; + } +} - //! Pointers to the aggregates - vector bindings; +idx_t Node48::GetChildPos(uint8_t k) { + if (child_index[k] == Node::EMPTY_MARKER) { + return INVALID_INDEX; + } else { + return k; + } +} - unordered_map ht; +idx_t Node48::GetChildGreaterEqual(uint8_t k, bool &equal) { + for (idx_t pos = k; pos < 256; pos++) { + if (child_index[pos] != Node::EMPTY_MARKER) { + if (pos == k) { + equal = true; + } else { + equal = false; + } + return pos; + } + } + return Node::GetChildGreaterEqual(k, equal); +} -public: - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; - void Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) override; - void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; +idx_t Node48::GetNextPos(idx_t pos) { + for (pos == INVALID_INDEX ? pos = 0 : pos++; pos < 256; pos++) { + if (child_index[pos] != Node::EMPTY_MARKER) { + return pos; + } + } + return Node::GetNextPos(pos); +} - void FinalizeImmediate(ClientContext &context, unique_ptr gstate); +unique_ptr *Node48::GetChild(idx_t pos) { + D_ASSERT(child_index[pos] != Node::EMPTY_MARKER); + return &child[child_index[pos]]; +} - unique_ptr GetLocalSinkState(ExecutionContext &context) override; - unique_ptr GetGlobalState(ClientContext &context) override; +idx_t Node48::GetMin() { + for (idx_t i = 0; i < 256; i++) { + if (child_index[i] != Node::EMPTY_MARKER) { + return i; + } + } + return INVALID_INDEX; +} - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; - unique_ptr GetOperatorState() override; +void Node48::Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child) { + Node48 *n = static_cast(node.get()); - string ParamsToString() const override; + // Insert leaf into inner node + if (node->count < 48) { + // Insert element + idx_t pos = n->count; + if (n->child[pos]) { + // find an empty position in the node list if the current position is occupied + pos = 0; + while (n->child[pos]) { + pos++; + } + } + n->child[pos] = move(child); + n->child_index[key_byte] = pos; + n->count++; + } else { + // Grow to Node256 + auto new_node = make_unique(art, n->prefix_length); + for (idx_t i = 0; i < 256; i++) { + if (n->child_index[i] != Node::EMPTY_MARKER) { + new_node->child[i] = move(n->child[n->child_index[i]]); + } + } + new_node->count = n->count; + CopyPrefix(art, n, new_node.get()); + node = move(new_node); + Node256::Insert(art, node, key_byte, child); + } +} -private: - //! how many groups can we have in the operator before we switch to radix partitioning - idx_t radix_limit; +void Node48::Erase(ART &art, unique_ptr &node, int pos) { + Node48 *n = static_cast(node.get()); -private: - void FinalizeInternal(ClientContext &context, unique_ptr gstate, bool immediate, - Pipeline *pipeline); - bool ForceSingleHT(GlobalOperatorState &state); -}; + n->child[n->child_index[pos]].reset(); + n->child_index[pos] = Node::EMPTY_MARKER; + n->count--; + if (node->count <= 12) { + auto new_node = make_unique(art, n->prefix_length); + CopyPrefix(art, n, new_node.get()); + for (idx_t i = 0; i < 256; i++) { + if (n->child_index[i] != Node::EMPTY_MARKER) { + new_node->key[new_node->count] = i; + new_node->child[new_node->count++] = move(n->child[n->child_index[i]]); + } + } + node = move(new_node); + } +} } // namespace duckdb - - - - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/execution/partitionable_hashtable.hpp +// duckdb/execution/join_hashtable.hpp // // //===----------------------------------------------------------------------===// @@ -36417,53 +43650,6 @@ class PhysicalHashAggregate : public PhysicalSink { -namespace duckdb { - -struct RadixPartitionInfo { - explicit RadixPartitionInfo(idx_t _n_partitions_upper_bound); - idx_t n_partitions; - idx_t radix_bits; - hash_t radix_mask; - constexpr static idx_t RADIX_SHIFT = 40; -}; - -typedef vector> HashTableList; - -class PartitionableHashTable { -public: - PartitionableHashTable(BufferManager &buffer_manager_p, RadixPartitionInfo &partition_info_p, - vector group_types_p, vector payload_types_p, - vector bindings_p); - - idx_t AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition); - void Partition(); - bool IsPartitioned(); - - HashTableList GetPartition(idx_t partition); - HashTableList GetUnpartitioned(); - - void Finalize(); - -private: - BufferManager &buffer_manager; - vector group_types; - vector payload_types; - vector bindings; - - bool is_partitioned; - RadixPartitionInfo &partition_info; - vector sel_vectors; - vector sel_vector_sizes; - DataChunk group_subset, payload_subset; - Vector hashes, hashes_subset; - - HashTableList unpartitioned_hts; - unordered_map radix_partitioned_hts; - -private: - idx_t ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes, DataChunk &payload); -}; -} // namespace duckdb @@ -36472,786 +43658,961 @@ class PartitionableHashTable { namespace duckdb { +class BufferManager; +class BufferHandle; -PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector types, - vector> expressions, idx_t estimated_cardinality, - PhysicalOperatorType type) - : PhysicalHashAggregate(context, move(types), move(expressions), {}, estimated_cardinality, type) { -} - -PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector types, - vector> expressions, - vector> groups_p, idx_t estimated_cardinality, - PhysicalOperatorType type) - : PhysicalSink(type, move(types), estimated_cardinality), groups(move(groups_p)), all_combinable(true), - any_distinct(false) { - // get a list of all aggregates to be computed - // fake a single group with a constant value for aggregation without groups - if (this->groups.empty()) { - group_types.push_back(LogicalType::TINYINT); - is_implicit_aggr = true; - } else { - is_implicit_aggr = false; - } - for (auto &expr : groups) { - group_types.push_back(expr->return_type); - } - vector payload_types_filters; - for (auto &expr : expressions) { - D_ASSERT(expr->expression_class == ExpressionClass::BOUND_AGGREGATE); - D_ASSERT(expr->IsAggregate()); - auto &aggr = (BoundAggregateExpression &)*expr; - bindings.push_back(&aggr); - - if (aggr.distinct) { - any_distinct = true; - } - - aggregate_return_types.push_back(aggr.return_type); - for (auto &child : aggr.children) { - payload_types.push_back(child->return_type); - } - if (aggr.filter) { - payload_types_filters.push_back(aggr.filter->return_type); - } - if (!aggr.function.combine) { - all_combinable = false; - } - aggregates.push_back(move(expr)); - } - - for (const auto &pay_filters : payload_types_filters) { - payload_types.push_back(pay_filters); - } - - // 10000 seems like a good compromise here - radix_limit = 10000; -} - -//===--------------------------------------------------------------------===// -// Sink -//===--------------------------------------------------------------------===// -class HashAggregateGlobalState : public GlobalOperatorState { -public: - HashAggregateGlobalState(PhysicalHashAggregate &op_p, ClientContext &context) - : op(op_p), is_empty(true), lossy_total_groups(0), - partition_info((idx_t)TaskScheduler::GetScheduler(context).NumberOfThreads()) { +struct JoinHTScanState { + JoinHTScanState() : position(0), block_position(0) { } - PhysicalHashAggregate &op; - vector> intermediate_hts; - vector> finalized_hts; - - //! Whether or not any tuples were added to the HT - bool is_empty; - //! The lock for updating the global aggregate state - std::mutex lock; - //! a counter to determine if we should switch over to p - idx_t lossy_total_groups; - - RadixPartitionInfo partition_info; + idx_t position; + idx_t block_position; + mutex lock; }; -class HashAggregateLocalState : public LocalSinkState { +//! JoinHashTable is a linear probing HT that is used for computing joins +/*! + The JoinHashTable concatenates incoming chunks inside a linked list of + data ptrs. The storage looks like this internally. + [SERIALIZED ROW][NEXT POINTER] + [SERIALIZED ROW][NEXT POINTER] + There is a separate hash map of pointers that point into this table. + This is what is used to resolve the hashes. + [POINTER] + [POINTER] + [POINTER] + The pointers are either NULL +*/ +class JoinHashTable { public: - explicit HashAggregateLocalState(PhysicalHashAggregate &op_p) : op(op_p), is_empty(true) { - group_chunk.InitializeEmpty(op.group_types); - if (!op.payload_types.empty()) { - aggregate_input_chunk.InitializeEmpty(op.payload_types); - } + using ValidityBytes = TemplatedValidityMask; - // if there are no groups we create a fake group so everything has the same group - if (op.groups.empty()) { - group_chunk.data[0].Reference(Value::TINYINT(42)); - } - } + //! Scan structure that can be used to resume scans, as a single probe can + //! return 1024*N values (where N is the size of the HT). This is + //! returned by the JoinHashTable::Scan function and can be used to resume a + //! probe. + struct ScanStructure { + unique_ptr key_data; + Vector pointers; + idx_t count; + SelectionVector sel_vector; + // whether or not the given tuple has found a match + unique_ptr found_match; + JoinHashTable &ht; + bool finished; - PhysicalHashAggregate &op; + explicit ScanStructure(JoinHashTable &ht); + //! Get the next batch of data from the scan structure + void Next(DataChunk &keys, DataChunk &left, DataChunk &result); - DataChunk group_chunk; - DataChunk aggregate_input_chunk; - //! The aggregate HT - unique_ptr ht; + private: + void AdvancePointers(); + void AdvancePointers(const SelectionVector &sel, idx_t sel_count); - //! Whether or not any tuples were added to the HT - bool is_empty; -}; + //! Next operator for the inner join + void NextInnerJoin(DataChunk &keys, DataChunk &left, DataChunk &result); + //! Next operator for the semi join + void NextSemiJoin(DataChunk &keys, DataChunk &left, DataChunk &result); + //! Next operator for the anti join + void NextAntiJoin(DataChunk &keys, DataChunk &left, DataChunk &result); + //! Next operator for the left outer join + void NextLeftJoin(DataChunk &keys, DataChunk &left, DataChunk &result); + //! Next operator for the mark join + void NextMarkJoin(DataChunk &keys, DataChunk &left, DataChunk &result); + //! Next operator for the single join + void NextSingleJoin(DataChunk &keys, DataChunk &left, DataChunk &result); -unique_ptr PhysicalHashAggregate::GetGlobalState(ClientContext &context) { - return make_unique(*this, context); -} + //! Scan the hashtable for matches of the specified keys, setting the found_match[] array to true or false for + //! every tuple + void ScanKeyMatches(DataChunk &keys); + template + void NextSemiOrAntiJoin(DataChunk &keys, DataChunk &left, DataChunk &result); -unique_ptr PhysicalHashAggregate::GetLocalSinkState(ExecutionContext &context) { - return make_unique(*this); -} + void ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &child, DataChunk &result); -void PhysicalHashAggregate::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, - DataChunk &input) { - auto &llstate = (HashAggregateLocalState &)lstate; - auto &gstate = (HashAggregateGlobalState &)state; + idx_t ScanInnerJoin(DataChunk &keys, SelectionVector &result_vector); - DataChunk &group_chunk = llstate.group_chunk; - DataChunk &aggregate_input_chunk = llstate.aggregate_input_chunk; + void GatherResult(Vector &result, const SelectionVector &result_vector, const SelectionVector &sel_vector, + const idx_t count, const idx_t col_idx); + void GatherResult(Vector &result, const SelectionVector &sel_vector, const idx_t count, const idx_t col_idx); - for (idx_t group_idx = 0; group_idx < groups.size(); group_idx++) { - auto &group = groups[group_idx]; - D_ASSERT(group->type == ExpressionType::BOUND_REF); - auto &bound_ref_expr = (BoundReferenceExpression &)*group; - group_chunk.data[group_idx].Reference(input.data[bound_ref_expr.index]); - } - idx_t aggregate_input_idx = 0; - for (auto &aggregate : aggregates) { - auto &aggr = (BoundAggregateExpression &)*aggregate; - for (auto &child_expr : aggr.children) { - D_ASSERT(child_expr->type == ExpressionType::BOUND_REF); - auto &bound_ref_expr = (BoundReferenceExpression &)*child_expr; - aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[bound_ref_expr.index]); - } - } - for (auto &aggregate : aggregates) { - auto &aggr = (BoundAggregateExpression &)*aggregate; - if (aggr.filter) { - auto &bound_ref_expr = (BoundReferenceExpression &)*aggr.filter; - auto it = ht.find(aggr.filter.get()); - if (it == ht.end()) { - aggregate_input_chunk.data[aggregate_input_idx].Reference(input.data[bound_ref_expr.index]); - ht[aggr.filter.get()] = bound_ref_expr.index; - bound_ref_expr.index = aggregate_input_idx++; - } else { - aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[it->second]); - } - } - } + idx_t ResolvePredicates(DataChunk &keys, SelectionVector &match_sel, SelectionVector *no_match_sel); + }; - group_chunk.SetCardinality(input.size()); - aggregate_input_chunk.SetCardinality(input.size()); +private: +public: + JoinHashTable(BufferManager &buffer_manager, vector &conditions, vector build_types, + JoinType type); + ~JoinHashTable(); - group_chunk.Verify(); - aggregate_input_chunk.Verify(); - D_ASSERT(aggregate_input_chunk.ColumnCount() == 0 || group_chunk.size() == aggregate_input_chunk.size()); + //! Add the given data to the HT + void Build(DataChunk &keys, DataChunk &input); + //! Finalize the build of the HT, constructing the actual hash table and making the HT ready for probing. Finalize + //! must be called before any call to Probe, and after Finalize is called Build should no longer be ever called. + void Finalize(); + //! Probe the HT with the given input chunk, resulting in the given result + unique_ptr Probe(DataChunk &keys); + //! Scan the HT to construct the final full outer join result after + void ScanFullOuter(DataChunk &result, JoinHTScanState &state); - // if we have non-combinable aggregates (e.g. string_agg) or any distinct aggregates we cannot keep parallel hash - // tables - if (ForceSingleHT(state)) { - lock_guard glock(gstate.lock); - gstate.is_empty = gstate.is_empty && group_chunk.size() == 0; - if (gstate.finalized_hts.empty()) { - gstate.finalized_hts.push_back( - make_unique(BufferManager::GetBufferManager(context.client), group_types, - payload_types, bindings, HtEntryType::HT_WIDTH_64)); - } - D_ASSERT(gstate.finalized_hts.size() == 1); - gstate.lossy_total_groups += gstate.finalized_hts[0]->AddChunk(group_chunk, aggregate_input_chunk); - return; + idx_t Count() { + return block_collection->count; } - D_ASSERT(all_combinable); - D_ASSERT(!any_distinct); + //! BufferManager + BufferManager &buffer_manager; + //! The types of the keys used in equality comparison + vector equality_types; + //! The types of the keys + vector condition_types; + //! The types of all conditions + vector build_types; + //! The comparison predicates + vector predicates; + //! Data column layout + RowLayout layout; + //! The size of an entry as stored in the HashTable + idx_t entry_size; + //! The total tuple size + idx_t tuple_size; + //! Next pointer offset in tuple + idx_t pointer_offset; + //! A constant false column for initialising right outer joins + Vector vfound; + //! The join type of the HT + JoinType join_type; + //! Whether or not the HT has been finalized + bool finalized; + //! Whether or not any of the key elements contain NULL + bool has_null; + //! Bitmask for getting relevant bits from the hashes to determine the position + uint64_t bitmask; - if (group_chunk.size() > 0) { - llstate.is_empty = false; - } + struct { + mutex mj_lock; + //! The types of the duplicate eliminated columns, only used in correlated MARK JOIN for flattening ANY()/ALL() + //! expressions + vector correlated_types; + //! The aggregate expression nodes used by the HT + vector> correlated_aggregates; + //! The HT that holds the group counts for every correlated column + unique_ptr correlated_counts; + //! Group chunk used for aggregating into correlated_counts + DataChunk group_chunk; + //! Payload chunk used for aggregating into correlated_counts + DataChunk correlated_payload; + //! Result chunk used for aggregating into correlated_counts + DataChunk result_chunk; + } correlated_mark_join_info; - if (!llstate.ht) { - llstate.ht = make_unique(BufferManager::GetBufferManager(context.client), - gstate.partition_info, group_types, payload_types, bindings); - } +private: + void Hash(DataChunk &keys, const SelectionVector &sel, idx_t count, Vector &hashes); - gstate.lossy_total_groups += - llstate.ht->AddChunk(group_chunk, aggregate_input_chunk, - gstate.lossy_total_groups > radix_limit && gstate.partition_info.n_partitions > 1); -} + //! Apply a bitmask to the hashes + void ApplyBitmask(Vector &hashes, idx_t count); + void ApplyBitmask(Vector &hashes, const SelectionVector &sel, idx_t count, Vector &pointers); + //! Insert the given set of locations into the HT with the given set of + //! hashes. Caller should hold lock in parallel HT. + void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[]); -class PhysicalHashAggregateState : public PhysicalOperatorState { -public: - PhysicalHashAggregateState(PhysicalOperator &op, vector &group_types, - vector &aggregate_types, PhysicalOperator *child) - : PhysicalOperatorState(op, child), ht_index(0), ht_scan_position(0) { - auto scan_chunk_types = group_types; - for (auto &aggr_type : aggregate_types) { - scan_chunk_types.push_back(aggr_type); - } - scan_chunk.Initialize(scan_chunk_types); - } + idx_t PrepareKeys(DataChunk &keys, unique_ptr &key_data, const SelectionVector *¤t_sel, + SelectionVector &sel, bool build_side); - //! Materialized GROUP BY expressions & aggregates - DataChunk scan_chunk; + //! The RowDataCollection holding the main data of the hash table + unique_ptr block_collection; + //! The stringheap of the JoinHashTable + unique_ptr string_heap; + //! Pinned handles, these are pinned during finalization only + vector> pinned_handles; + //! The hash map of the HT, created after finalization + unique_ptr hash_map; + //! Whether or not NULL values are considered equal in each of the comparisons + vector null_values_are_equal; - //! The current position to scan the HT for output tuples - idx_t ht_index; - idx_t ht_scan_position; + //! Copying not allowed + JoinHashTable(const JoinHashTable &) = delete; }; -void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) { - auto &gstate = (HashAggregateGlobalState &)state; - auto &llstate = (HashAggregateLocalState &)lstate; +} // namespace duckdb - // this actually does not do a lot but just pushes the local HTs into the global state so we can later combine them - // in parallel - if (ForceSingleHT(state)) { - D_ASSERT(gstate.finalized_hts.size() <= 1); - return; - } - if (!llstate.ht) { - return; // no data - } - if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && - gstate.lossy_total_groups > radix_limit) { - llstate.ht->Partition(); - } - lock_guard glock(gstate.lock); - D_ASSERT(all_combinable); - D_ASSERT(!any_distinct); - if (!llstate.is_empty) { - gstate.is_empty = false; - } - // we will never add new values to these HTs so we can drop the first part of the HT - llstate.ht->Finalize(); - // at this point we just collect them the PhysicalHashAggregateFinalizeTask (below) will merge them in parallel - gstate.intermediate_hts.push_back(move(llstate.ht)); -} -// this task is run in multiple threads and combines the radix-partitioned hash tables into a single onen and then -// folds them into the global ht finally. -class PhysicalHashAggregateFinalizeTask : public Task { -public: - PhysicalHashAggregateFinalizeTask(Pipeline &parent_p, HashAggregateGlobalState &state_p, idx_t radix_p) - : parent(parent_p), state(state_p), radix(radix_p) { - } - static void FinalizeHT(HashAggregateGlobalState &gstate, idx_t radix) { - D_ASSERT(gstate.finalized_hts[radix]); - for (auto &pht : gstate.intermediate_hts) { - for (auto &ht : pht->GetPartition(radix)) { - gstate.finalized_hts[radix]->Combine(*ht); - ht.reset(); - } +namespace duckdb { + +using ValidityBytes = JoinHashTable::ValidityBytes; +using ScanStructure = JoinHashTable::ScanStructure; + +JoinHashTable::JoinHashTable(BufferManager &buffer_manager, vector &conditions, + vector btypes, JoinType type) + : buffer_manager(buffer_manager), build_types(move(btypes)), entry_size(0), tuple_size(0), + vfound(Value::BOOLEAN(false)), join_type(type), finalized(false), has_null(false) { + for (auto &condition : conditions) { + D_ASSERT(condition.left->return_type == condition.right->return_type); + auto type = condition.left->return_type; + if (condition.comparison == ExpressionType::COMPARE_EQUAL) { + // all equality conditions should be at the front + // all other conditions at the back + // this assert checks that + D_ASSERT(equality_types.size() == condition_types.size()); + equality_types.push_back(type); } - gstate.finalized_hts[radix]->Finalize(); + predicates.push_back(condition.comparison); + null_values_are_equal.push_back(condition.null_values_are_equal); + D_ASSERT(!condition.null_values_are_equal || + (condition.null_values_are_equal && condition.comparison == ExpressionType::COMPARE_EQUAL)); + + condition_types.push_back(type); } + // at least one equality is necessary + D_ASSERT(!equality_types.empty()); - void Execute() override { - FinalizeHT(state, radix); - lock_guard glock(state.lock); - parent.finished_tasks++; - // finish the whole pipeline - if (parent.total_tasks == parent.finished_tasks) { - parent.Finish(); - } + // Types for the layout + vector layout_types(condition_types); + layout_types.insert(layout_types.end(), build_types.begin(), build_types.end()); + if (IsRightOuterJoin(join_type)) { + // full/right outer joins need an extra bool to keep track of whether or not a tuple has found a matching entry + // we place the bool before the NEXT pointer + layout_types.emplace_back(LogicalType::BOOLEAN); } + layout_types.emplace_back(LogicalType::HASH); + layout.Initialize(layout_types, false); -private: - Pipeline &parent; - HashAggregateGlobalState &state; - idx_t radix; -}; + const auto &offsets = layout.GetOffsets(); + tuple_size = offsets[condition_types.size() + build_types.size()]; + pointer_offset = offsets.back(); + entry_size = layout.GetRowWidth(); -void PhysicalHashAggregate::Finalize(Pipeline &pipeline, ClientContext &context, - unique_ptr state) { - FinalizeInternal(context, move(state), false, &pipeline); + // compute the per-block capacity of this HT + idx_t block_capacity = MaxValue(STANDARD_VECTOR_SIZE, (Storage::BLOCK_SIZE / entry_size) + 1); + block_collection = make_unique(buffer_manager, block_capacity, entry_size); + string_heap = make_unique(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true); } -void PhysicalHashAggregate::FinalizeImmediate(ClientContext &context, unique_ptr state) { - FinalizeInternal(context, move(state), true, nullptr); +JoinHashTable::~JoinHashTable() { } -void PhysicalHashAggregate::FinalizeInternal(ClientContext &context, unique_ptr state, - bool immediate, Pipeline *pipeline) { - this->sink_state = move(state); - auto &gstate = (HashAggregateGlobalState &)*this->sink_state; - - // special case if we have non-combinable aggregates - // we have already aggreagted into a global shared HT that does not require any additional finalization steps - if (ForceSingleHT(gstate)) { - D_ASSERT(gstate.finalized_hts.size() <= 1); - return; +void JoinHashTable::ApplyBitmask(Vector &hashes, idx_t count) { + if (hashes.GetVectorType() == VectorType::CONSTANT_VECTOR) { + D_ASSERT(!ConstantVector::IsNull(hashes)); + auto indices = ConstantVector::GetData(hashes); + *indices = *indices & bitmask; + } else { + hashes.Normalify(count); + auto indices = FlatVector::GetData(hashes); + for (idx_t i = 0; i < count; i++) { + indices[i] &= bitmask; + } } +} - // we can have two cases now, non-partitioned for few groups and radix-partitioned for very many groups. - // go through all of the child hts and see if we ever called partition() on any of them - // if we did, its the latter case. - bool any_partitioned = false; - for (auto &pht : gstate.intermediate_hts) { - if (pht->IsPartitioned()) { - any_partitioned = true; - break; - } +void JoinHashTable::ApplyBitmask(Vector &hashes, const SelectionVector &sel, idx_t count, Vector &pointers) { + VectorData hdata; + hashes.Orrify(count, hdata); + + auto hash_data = (hash_t *)hdata.data; + auto result_data = FlatVector::GetData(pointers); + auto main_ht = (data_ptr_t *)hash_map->node->buffer; + for (idx_t i = 0; i < count; i++) { + auto rindex = sel.get_index(i); + auto hindex = hdata.sel->get_index(rindex); + auto hash = hash_data[hindex]; + result_data[rindex] = main_ht + (hash & bitmask); } +} - if (any_partitioned) { - // if one is partitioned, all have to be - // this should mostly have already happened in Combine, but if not we do it here - for (auto &pht : gstate.intermediate_hts) { - if (!pht->IsPartitioned()) { - pht->Partition(); - } - } - // schedule additional tasks to combine the partial HTs - if (!immediate) { - D_ASSERT(pipeline); - pipeline->total_tasks += gstate.partition_info.n_partitions; +void JoinHashTable::Hash(DataChunk &keys, const SelectionVector &sel, idx_t count, Vector &hashes) { + if (count == keys.size()) { + // no null values are filtered: use regular hash functions + VectorOperations::Hash(keys.data[0], hashes, keys.size()); + for (idx_t i = 1; i < equality_types.size(); i++) { + VectorOperations::CombineHash(hashes, keys.data[i], keys.size()); } - gstate.finalized_hts.resize(gstate.partition_info.n_partitions); - for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) { - gstate.finalized_hts[r] = - make_unique(BufferManager::GetBufferManager(context), group_types, - payload_types, bindings, HtEntryType::HT_WIDTH_64); - if (immediate) { - PhysicalHashAggregateFinalizeTask::FinalizeHT(gstate, r); - } else { - D_ASSERT(pipeline); - auto new_task = make_unique(*pipeline, gstate, r); - TaskScheduler::GetScheduler(context).ScheduleTask(pipeline->token, move(new_task)); - } + } else { + // null values were filtered: use selection vector + VectorOperations::Hash(keys.data[0], hashes, sel, count); + for (idx_t i = 1; i < equality_types.size(); i++) { + VectorOperations::CombineHash(hashes, keys.data[i], sel, count); } - } else { // in the non-partitioned case we immediately combine all the unpartitioned hts created by the threads. - // TODO possible optimization, if total count < limit for 32 bit ht, use that one - // create this ht here so finalize needs no lock on gstate + } +} - gstate.finalized_hts.push_back(make_unique( - BufferManager::GetBufferManager(context), group_types, payload_types, bindings, HtEntryType::HT_WIDTH_64)); - for (auto &pht : gstate.intermediate_hts) { - auto unpartitioned = pht->GetUnpartitioned(); - for (auto &unpartitioned_ht : unpartitioned) { - D_ASSERT(unpartitioned_ht); - gstate.finalized_hts[0]->Combine(*unpartitioned_ht); - unpartitioned_ht.reset(); - } - unpartitioned.clear(); +static idx_t FilterNullValues(VectorData &vdata, const SelectionVector &sel, idx_t count, SelectionVector &result) { + idx_t result_count = 0; + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto key_idx = vdata.sel->get_index(idx); + if (vdata.validity.RowIsValid(key_idx)) { + result.set_index(result_count++, idx); } - gstate.finalized_hts[0]->Finalize(); } + return result_count; } -void PhysicalHashAggregate::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { - auto &gstate = (HashAggregateGlobalState &)*sink_state; - auto &state = (PhysicalHashAggregateState &)*state_p; - - state.scan_chunk.Reset(); - - // special case hack to sort out aggregating from empty intermediates - // for aggregations without groups - if (gstate.is_empty && is_implicit_aggr) { - D_ASSERT(chunk.ColumnCount() == aggregates.size()); - // for each column in the aggregates, set to initial state - chunk.SetCardinality(1); - for (idx_t i = 0; i < chunk.ColumnCount(); i++) { - D_ASSERT(aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE); - auto &aggr = (BoundAggregateExpression &)*aggregates[i]; - auto aggr_state = unique_ptr(new data_t[aggr.function.state_size()]); - aggr.function.initialize(aggr_state.get()); +idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unique_ptr &key_data, + const SelectionVector *¤t_sel, SelectionVector &sel, bool build_side) { + key_data = keys.Orrify(); - Vector state_vector(Value::POINTER((uintptr_t)aggr_state.get())); - aggr.function.finalize(state_vector, aggr.bind_info.get(), chunk.data[i], 1); - if (aggr.function.destructor) { - aggr.function.destructor(state_vector, 1); + // figure out which keys are NULL, and create a selection vector out of them + current_sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; + idx_t added_count = keys.size(); + if (build_side && IsRightOuterJoin(join_type)) { + // in case of a right or full outer join, we cannot remove NULL keys from the build side + return added_count; + } + for (idx_t i = 0; i < keys.ColumnCount(); i++) { + if (!null_values_are_equal[i]) { + if (key_data[i].validity.AllValid()) { + continue; } + added_count = FilterNullValues(key_data[i], *current_sel, added_count, sel); + // null values are NOT equal for this column, filter them out + current_sel = &sel; } - state.finished = true; - return; } - if (gstate.is_empty && !state.finished) { - state.finished = true; + return added_count; +} + +void JoinHashTable::Build(DataChunk &keys, DataChunk &payload) { + D_ASSERT(!finalized); + D_ASSERT(keys.size() == payload.size()); + if (keys.size() == 0) { return; } - idx_t elements_found = 0; - - while (true) { - if (state.ht_index == gstate.finalized_hts.size()) { - state.finished = true; - return; + // special case: correlated mark join + if (join_type == JoinType::MARK && !correlated_mark_join_info.correlated_types.empty()) { + auto &info = correlated_mark_join_info; + lock_guard mj_lock(info.mj_lock); + // Correlated MARK join + // for the correlated mark join we need to keep track of COUNT(*) and COUNT(COLUMN) for each of the correlated + // columns push into the aggregate hash table + D_ASSERT(info.correlated_counts); + info.group_chunk.SetCardinality(keys); + for (idx_t i = 0; i < info.correlated_types.size(); i++) { + info.group_chunk.data[i].Reference(keys.data[i]); } - elements_found = gstate.finalized_hts[state.ht_index]->Scan(state.ht_scan_position, state.scan_chunk); - - if (elements_found > 0) { - break; + if (info.correlated_payload.data.empty()) { + vector types; + types.push_back(keys.data[info.correlated_types.size()].GetType()); + info.correlated_payload.InitializeEmpty(types); } - gstate.finalized_hts[state.ht_index].reset(); - state.ht_index++; - state.ht_scan_position = 0; + info.correlated_payload.SetCardinality(keys); + info.correlated_payload.data[0].Reference(keys.data[info.correlated_types.size()]); + info.correlated_counts->AddChunk(info.group_chunk, info.correlated_payload); } - // compute the final projection list - idx_t chunk_index = 0; - chunk.SetCardinality(elements_found); - if (group_types.size() + aggregates.size() == chunk.ColumnCount()) { - for (idx_t col_idx = 0; col_idx < group_types.size(); col_idx++) { - chunk.data[chunk_index++].Reference(state.scan_chunk.data[col_idx]); - } - } else { - D_ASSERT(aggregates.size() == chunk.ColumnCount()); + // prepare the keys for processing + unique_ptr key_data; + const SelectionVector *current_sel; + SelectionVector sel(STANDARD_VECTOR_SIZE); + idx_t added_count = PrepareKeys(keys, key_data, current_sel, sel, true); + if (added_count < keys.size()) { + has_null = true; } - - for (idx_t col_idx = 0; col_idx < aggregates.size(); col_idx++) { - chunk.data[chunk_index++].Reference(state.scan_chunk.data[group_types.size() + col_idx]); + if (added_count == 0) { + return; } -} -unique_ptr PhysicalHashAggregate::GetOperatorState() { - return make_unique(*this, group_types, aggregate_return_types, - children.empty() ? nullptr : children[0].get()); -} + // build out the buffer space + Vector addresses(LogicalType::POINTER); + auto key_locations = FlatVector::GetData(addresses); + auto handles = block_collection->Build(added_count, key_locations, nullptr, current_sel); -bool PhysicalHashAggregate::ForceSingleHT(GlobalOperatorState &state) { - auto &gstate = (HashAggregateGlobalState &)state; + // hash the keys and obtain an entry in the list + // note that we only hash the keys used in the equality comparison + Vector hash_values(LogicalType::HASH); + Hash(keys, *current_sel, added_count, hash_values); - return !all_combinable || any_distinct || gstate.partition_info.n_partitions < 2; -} + // build a chunk so we can handle nested types that need more than Orrification + DataChunk source_chunk; + source_chunk.InitializeEmpty(layout.GetTypes()); -string PhysicalHashAggregate::ParamsToString() const { - string result; - for (idx_t i = 0; i < groups.size(); i++) { - if (i > 0) { - result += "\n"; - } - result += groups[i]->GetName(); + vector source_data; + source_data.reserve(layout.ColumnCount()); + + // serialize the keys to the key locations + for (idx_t i = 0; i < keys.ColumnCount(); i++) { + source_chunk.data[i].Reference(keys.data[i]); + source_data.emplace_back(move(key_data[i])); } - for (idx_t i = 0; i < aggregates.size(); i++) { - auto &aggregate = (BoundAggregateExpression &)*aggregates[i]; - if (i > 0 || !groups.empty()) { - result += "\n"; - } - result += aggregates[i]->GetName(); - if (aggregate.filter) { - result += " Filter: " + aggregate.filter->GetName(); - } + // now serialize the payload + D_ASSERT(build_types.size() == payload.ColumnCount()); + for (idx_t i = 0; i < payload.ColumnCount(); i++) { + source_chunk.data[source_data.size()].Reference(payload.data[i]); + VectorData pdata; + payload.data[i].Orrify(payload.size(), pdata); + source_data.emplace_back(move(pdata)); + } + if (IsRightOuterJoin(join_type)) { + // for FULL/RIGHT OUTER joins initialize the "found" boolean to false + source_chunk.data[source_data.size()].Reference(vfound); + VectorData fdata; + vfound.Orrify(keys.size(), fdata); + source_data.emplace_back(move(fdata)); } - return result; -} - -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { -class ClientContext; -class PerfectAggregateHashTable; - -//! PhysicalPerfectHashAggregate performs a group-by and aggregation using a perfect hash table -class PhysicalPerfectHashAggregate : public PhysicalSink { -public: - PhysicalPerfectHashAggregate(ClientContext &context, vector types, - vector> aggregates, vector> groups, - vector> group_stats, vector required_bits, - idx_t estimated_cardinality); - //! The groups - vector> groups; - //! The aggregates that have to be computed - vector> aggregates; + // serialise the hashes at the end + source_chunk.data[source_data.size()].Reference(hash_values); + VectorData hdata; + hash_values.Orrify(keys.size(), hdata); + source_data.emplace_back(move(hdata)); -public: - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; - void Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) override; + source_chunk.SetCardinality(keys); - unique_ptr GetLocalSinkState(ExecutionContext &context) override; - unique_ptr GetGlobalState(ClientContext &context) override; + RowOperations::Scatter(source_chunk, source_data.data(), layout, addresses, *string_heap, *current_sel, + added_count); +} - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; - unique_ptr GetOperatorState() override; +void JoinHashTable::InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[]) { + D_ASSERT(hashes.GetType().id() == LogicalTypeId::HASH); - string ParamsToString() const override; + // use bitmask to get position in array + ApplyBitmask(hashes, count); - //! Create a perfect aggregate hash table for this node - unique_ptr CreateHT(ClientContext &context); + hashes.Normalify(count); -public: - //! The group types - vector group_types; - //! The payload types - vector payload_types; - //! The aggregates to be computed - vector aggregate_objects; - //! The minimum value of each of the groups - vector group_minima; - //! The number of bits we need to completely cover each of the groups - vector required_bits; + D_ASSERT(hashes.GetVectorType() == VectorType::FLAT_VECTOR); + auto pointers = (data_ptr_t *)hash_map->node->buffer; + auto indices = FlatVector::GetData(hashes); + for (idx_t i = 0; i < count; i++) { + auto index = indices[i]; + // set prev in current key to the value (NOTE: this will be nullptr if + // there is none) + Store(pointers[index], key_locations[i] + pointer_offset); - unordered_map ht; -}; + // set pointer to current tuple + pointers[index] = key_locations[i]; + } +} -} // namespace duckdb +void JoinHashTable::Finalize() { + // the build has finished, now iterate over all the nodes and construct the final hash table + // select a HT that has at least 50% empty space + idx_t capacity = NextPowerOfTwo(MaxValue(Count() * 2, (Storage::BLOCK_SIZE / sizeof(data_ptr_t)) + 1)); + // size needs to be a power of 2 + D_ASSERT((capacity & (capacity - 1)) == 0); + bitmask = capacity - 1; + // allocate the HT and initialize it with all-zero entries + hash_map = buffer_manager.Allocate(capacity * sizeof(data_ptr_t)); + memset(hash_map->node->buffer, 0, capacity * sizeof(data_ptr_t)); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/perfect_aggregate_hashtable.hpp -// -// -//===----------------------------------------------------------------------===// + Vector hashes(LogicalType::HASH); + auto hash_data = FlatVector::GetData(hashes); + data_ptr_t key_locations[STANDARD_VECTOR_SIZE]; + // now construct the actual hash table; scan the nodes + // as we can the nodes we pin all the blocks of the HT and keep them pinned until the HT is destroyed + // this is so that we can keep pointers around to the blocks + // FIXME: if we cannot keep everything pinned in memory, we could switch to an out-of-memory merge join or so + for (auto &block : block_collection->blocks) { + auto handle = buffer_manager.Pin(block.block); + data_ptr_t dataptr = handle->node->buffer; + idx_t entry = 0; + while (entry < block.count) { + // fetch the next vector of entries from the blocks + idx_t next = MinValue(STANDARD_VECTOR_SIZE, block.count - entry); + for (idx_t i = 0; i < next; i++) { + hash_data[i] = Load((data_ptr_t)(dataptr + pointer_offset)); + key_locations[i] = dataptr; + dataptr += entry_size; + } + // now insert into the hash table + InsertHashes(hashes, next, key_locations); + entry += next; + } + pinned_handles.push_back(move(handle)); + } + finalized = true; +} +unique_ptr JoinHashTable::Probe(DataChunk &keys) { + D_ASSERT(Count() > 0); // should be handled before + D_ASSERT(finalized); + // set up the scan structure + auto ss = make_unique(*this); -namespace duckdb { + if (join_type != JoinType::INNER) { + ss->found_match = unique_ptr(new bool[STANDARD_VECTOR_SIZE]); + memset(ss->found_match.get(), 0, sizeof(bool) * STANDARD_VECTOR_SIZE); + } -class PerfectAggregateHashTable : public BaseAggregateHashTable { -public: - PerfectAggregateHashTable(BufferManager &buffer_manager, vector group_types, - vector payload_types_p, vector aggregate_objects, - vector group_minima, vector required_bits); - ~PerfectAggregateHashTable() override; + // first prepare the keys for probing + const SelectionVector *current_sel; + ss->count = PrepareKeys(keys, ss->key_data, current_sel, ss->sel_vector, false); + if (ss->count == 0) { + return ss; + } -public: - //! Add the given data to the HT - void AddChunk(DataChunk &groups, DataChunk &payload); + // hash all the keys + Vector hashes(LogicalType::HASH); + Hash(keys, *current_sel, ss->count, hashes); - //! Combines the target perfect aggregate HT into this one - void Combine(PerfectAggregateHashTable &other); + // now initialize the pointers of the scan structure based on the hashes + ApplyBitmask(hashes, *current_sel, ss->count, ss->pointers); - //! Scan the HT starting from the scan_position - void Scan(idx_t &scan_position, DataChunk &result); + // create the selection vector linking to only non-empty entries + idx_t count = 0; + auto pointers = FlatVector::GetData(ss->pointers); + for (idx_t i = 0; i < ss->count; i++) { + auto idx = current_sel->get_index(i); + pointers[idx] = Load(pointers[idx]); + if (pointers[idx]) { + ss->sel_vector.set_index(count++, idx); + } + } + ss->count = count; + return ss; +} -protected: - Vector addresses; - //! The required bits per group - vector required_bits; - //! The total required bits for the HT (this determines the max capacity) - idx_t total_required_bits; - //! The total amount of groups - idx_t total_groups; - //! The tuple size - idx_t tuple_size; +ScanStructure::ScanStructure(JoinHashTable &ht) + : pointers(LogicalType::POINTER), sel_vector(STANDARD_VECTOR_SIZE), ht(ht), finished(false) { +} - // The actual pointer to the data - data_ptr_t data; - //! The owned data of the HT - unique_ptr owned_data; - //! Information on whether or not a specific group has any entries - unique_ptr group_is_set; +void ScanStructure::Next(DataChunk &keys, DataChunk &left, DataChunk &result) { + if (finished) { + return; + } - //! The minimum values for each of the group columns - vector group_minima; + switch (ht.join_type) { + case JoinType::INNER: + case JoinType::RIGHT: + NextInnerJoin(keys, left, result); + break; + case JoinType::SEMI: + NextSemiJoin(keys, left, result); + break; + case JoinType::MARK: + NextMarkJoin(keys, left, result); + break; + case JoinType::ANTI: + NextAntiJoin(keys, left, result); + break; + case JoinType::OUTER: + case JoinType::LEFT: + NextLeftJoin(keys, left, result); + break; + case JoinType::SINGLE: + NextSingleJoin(keys, left, result); + break; + default: + throw InternalException("Unhandled join type in JoinHashTable"); + } +} -private: - void Combine(Vector &source_addresses, Vector &target_addresses, idx_t combine_count); - //! Destroy the perfect aggregate HT (called automatically by the destructor) - void Destroy(); -}; +idx_t ScanStructure::ResolvePredicates(DataChunk &keys, SelectionVector &match_sel, SelectionVector *no_match_sel) { + // Start with the scan selection + for (idx_t i = 0; i < this->count; ++i) { + match_sel.set_index(i, this->sel_vector.get_index(i)); + } + idx_t no_match_count = 0; -} // namespace duckdb + return RowOperations::Match(keys, key_data.get(), ht.layout, pointers, ht.predicates, match_sel, this->count, + no_match_sel, no_match_count); +} +idx_t ScanStructure::ScanInnerJoin(DataChunk &keys, SelectionVector &result_vector) { + while (true) { + // resolve the predicates for this set of keys + idx_t result_count = ResolvePredicates(keys, result_vector, nullptr); + // after doing all the comparisons set the found_match vector + if (found_match) { + for (idx_t i = 0; i < result_count; i++) { + auto idx = result_vector.get_index(i); + found_match[idx] = true; + } + } + if (result_count > 0) { + return result_count; + } + // no matches found: check the next set of pointers + AdvancePointers(); + if (this->count == 0) { + return 0; + } + } +} +void ScanStructure::AdvancePointers(const SelectionVector &sel, idx_t sel_count) { + // now for all the pointers, we move on to the next set of pointers + idx_t new_count = 0; + auto ptrs = FlatVector::GetData(this->pointers); + for (idx_t i = 0; i < sel_count; i++) { + auto idx = sel.get_index(i); + ptrs[idx] = Load(ptrs[idx] + ht.pointer_offset); + if (ptrs[idx]) { + this->sel_vector.set_index(new_count++, idx); + } + } + this->count = new_count; +} +void ScanStructure::AdvancePointers() { + AdvancePointers(this->sel_vector, this->count); +} +void ScanStructure::GatherResult(Vector &result, const SelectionVector &result_vector, + const SelectionVector &sel_vector, const idx_t count, const idx_t col_no) { + const auto col_offset = ht.layout.GetOffsets()[col_no]; + RowOperations::Gather(pointers, sel_vector, result, result_vector, count, col_offset, col_no); +} -namespace duckdb { +void ScanStructure::GatherResult(Vector &result, const SelectionVector &sel_vector, const idx_t count, + const idx_t col_idx) { + GatherResult(result, FlatVector::INCREMENTAL_SELECTION_VECTOR, sel_vector, count, col_idx); +} -PhysicalPerfectHashAggregate::PhysicalPerfectHashAggregate(ClientContext &context, vector types_p, - vector> aggregates_p, - vector> groups_p, - vector> group_stats, - vector required_bits_p, idx_t estimated_cardinality) - : PhysicalSink(PhysicalOperatorType::PERFECT_HASH_GROUP_BY, move(types_p), estimated_cardinality), - groups(move(groups_p)), aggregates(move(aggregates_p)), required_bits(move(required_bits_p)) { - D_ASSERT(groups.size() == group_stats.size()); - group_minima.reserve(group_stats.size()); - for (auto &stats : group_stats) { - D_ASSERT(stats); - auto &nstats = (NumericStatistics &)*stats; - D_ASSERT(!nstats.min.is_null); - group_minima.push_back(move(nstats.min)); - } - for (auto &expr : groups) { - group_types.push_back(expr->return_type); +void ScanStructure::NextInnerJoin(DataChunk &keys, DataChunk &left, DataChunk &result) { + D_ASSERT(result.ColumnCount() == left.ColumnCount() + ht.build_types.size()); + if (this->count == 0) { + // no pointers left to chase + return; } - vector bindings; - vector payload_types_filters; - for (auto &expr : aggregates) { - D_ASSERT(expr->expression_class == ExpressionClass::BOUND_AGGREGATE); - D_ASSERT(expr->IsAggregate()); - auto &aggr = (BoundAggregateExpression &)*expr; - bindings.push_back(&aggr); + SelectionVector result_vector(STANDARD_VECTOR_SIZE); - D_ASSERT(!aggr.distinct); - D_ASSERT(aggr.function.combine); - for (auto &child : aggr.children) { - payload_types.push_back(child->return_type); + idx_t result_count = ScanInnerJoin(keys, result_vector); + if (result_count > 0) { + if (IsRightOuterJoin(ht.join_type)) { + // full/right outer join: mark join matches as FOUND in the HT + auto ptrs = FlatVector::GetData(pointers); + for (idx_t i = 0; i < result_count; i++) { + auto idx = result_vector.get_index(i); + // NOTE: threadsan reports this as a data race because this can be set concurrently by separate threads + // Technically it is, but it does not matter, since the only value that can be written is "true" + Store(true, ptrs[idx] + ht.tuple_size); + } } - if (aggr.filter) { - payload_types_filters.push_back(aggr.filter->return_type); + // matches were found + // construct the result + // on the LHS, we create a slice using the result vector + result.Slice(left, result_vector, result_count); + + // on the RHS, we need to fetch the data from the hash table + for (idx_t i = 0; i < ht.build_types.size(); i++) { + auto &vector = result.data[left.ColumnCount() + i]; + D_ASSERT(vector.GetType() == ht.build_types[i]); + GatherResult(vector, result_vector, result_count, i + ht.condition_types.size()); } + AdvancePointers(); } - for (const auto &pay_filters : payload_types_filters) { - payload_types.push_back(pay_filters); - } - aggregate_objects = AggregateObject::CreateAggregateObjects(bindings); } -unique_ptr PhysicalPerfectHashAggregate::CreateHT(ClientContext &context) { - return make_unique(BufferManager::GetBufferManager(context), group_types, payload_types, - aggregate_objects, group_minima, required_bits); -} +void ScanStructure::ScanKeyMatches(DataChunk &keys) { + // the semi-join, anti-join and mark-join we handle a differently from the inner join + // since there can be at most STANDARD_VECTOR_SIZE results + // we handle the entire chunk in one call to Next(). + // for every pointer, we keep chasing pointers and doing comparisons. + // this results in a boolean array indicating whether or not the tuple has a match + SelectionVector match_sel(STANDARD_VECTOR_SIZE), no_match_sel(STANDARD_VECTOR_SIZE); + while (this->count > 0) { + // resolve the predicates for the current set of pointers + idx_t match_count = ResolvePredicates(keys, match_sel, &no_match_sel); + idx_t no_match_count = this->count - match_count; -//===--------------------------------------------------------------------===// -// Sink -//===--------------------------------------------------------------------===// -class PerfectHashAggregateGlobalState : public GlobalOperatorState { -public: - PerfectHashAggregateGlobalState(PhysicalPerfectHashAggregate &op, ClientContext &context) - : ht(op.CreateHT(context)) { + // mark each of the matches as found + for (idx_t i = 0; i < match_count; i++) { + found_match[match_sel.get_index(i)] = true; + } + // continue searching for the ones where we did not find a match yet + AdvancePointers(no_match_sel, no_match_count); } +} - //! The lock for updating the global aggregate state - std::mutex lock; - //! The global aggregate hash table - unique_ptr ht; -}; - -class PerfectHashAggregateLocalState : public LocalSinkState { -public: - PerfectHashAggregateLocalState(PhysicalPerfectHashAggregate &op, ClientContext &context) - : ht(op.CreateHT(context)) { - group_chunk.InitializeEmpty(op.group_types); - if (!op.payload_types.empty()) { - aggregate_input_chunk.InitializeEmpty(op.payload_types); +template +void ScanStructure::NextSemiOrAntiJoin(DataChunk &keys, DataChunk &left, DataChunk &result) { + D_ASSERT(left.ColumnCount() == result.ColumnCount()); + D_ASSERT(keys.size() == left.size()); + // create the selection vector from the matches that were found + SelectionVector sel(STANDARD_VECTOR_SIZE); + idx_t result_count = 0; + for (idx_t i = 0; i < keys.size(); i++) { + if (found_match[i] == MATCH) { + // part of the result + sel.set_index(result_count++, i); } } + // construct the final result + if (result_count > 0) { + // we only return the columns on the left side + // reference the columns of the left side from the result + result.Slice(left, sel, result_count); + } else { + D_ASSERT(result.size() == 0); + } +} - //! The local aggregate hash table - unique_ptr ht; - DataChunk group_chunk; - DataChunk aggregate_input_chunk; -}; +void ScanStructure::NextSemiJoin(DataChunk &keys, DataChunk &left, DataChunk &result) { + // first scan for key matches + ScanKeyMatches(keys); + // then construct the result from all tuples with a match + NextSemiOrAntiJoin(keys, left, result); -unique_ptr PhysicalPerfectHashAggregate::GetGlobalState(ClientContext &context) { - return make_unique(*this, context); + finished = true; } -unique_ptr PhysicalPerfectHashAggregate::GetLocalSinkState(ExecutionContext &context) { - return make_unique(*this, context.client); -} +void ScanStructure::NextAntiJoin(DataChunk &keys, DataChunk &left, DataChunk &result) { + // first scan for key matches + ScanKeyMatches(keys); + // then construct the result from all tuples that did not find a match + NextSemiOrAntiJoin(keys, left, result); -void PhysicalPerfectHashAggregate::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate_p, - DataChunk &input) { - auto &lstate = (PerfectHashAggregateLocalState &)lstate_p; - DataChunk &group_chunk = lstate.group_chunk; - DataChunk &aggregate_input_chunk = lstate.aggregate_input_chunk; + finished = true; +} - for (idx_t group_idx = 0; group_idx < groups.size(); group_idx++) { - auto &group = groups[group_idx]; - D_ASSERT(group->type == ExpressionType::BOUND_REF); - auto &bound_ref_expr = (BoundReferenceExpression &)*group; - group_chunk.data[group_idx].Reference(input.data[bound_ref_expr.index]); +void ScanStructure::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &child, DataChunk &result) { + // for the initial set of columns we just reference the left side + result.SetCardinality(child); + for (idx_t i = 0; i < child.ColumnCount(); i++) { + result.data[i].Reference(child.data[i]); } - idx_t aggregate_input_idx = 0; - for (auto &aggregate : aggregates) { - auto &aggr = (BoundAggregateExpression &)*aggregate; - for (auto &child_expr : aggr.children) { - D_ASSERT(child_expr->type == ExpressionType::BOUND_REF); - auto &bound_ref_expr = (BoundReferenceExpression &)*child_expr; - aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[bound_ref_expr.index]); + auto &mark_vector = result.data.back(); + mark_vector.SetVectorType(VectorType::FLAT_VECTOR); + // first we set the NULL values from the join keys + // if there is any NULL in the keys, the result is NULL + auto bool_result = FlatVector::GetData(mark_vector); + auto &mask = FlatVector::Validity(mark_vector); + for (idx_t col_idx = 0; col_idx < join_keys.ColumnCount(); col_idx++) { + if (ht.null_values_are_equal[col_idx]) { + continue; + } + VectorData jdata; + join_keys.data[col_idx].Orrify(join_keys.size(), jdata); + if (!jdata.validity.AllValid()) { + for (idx_t i = 0; i < join_keys.size(); i++) { + auto jidx = jdata.sel->get_index(i); + mask.Set(i, jdata.validity.RowIsValidUnsafe(jidx)); + } } } - for (auto &aggregate : aggregates) { - auto &aggr = (BoundAggregateExpression &)*aggregate; - if (aggr.filter) { - auto &bound_ref_expr = (BoundReferenceExpression &)*aggr.filter; - auto it = ht.find(aggr.filter.get()); - if (it == ht.end()) { - aggregate_input_chunk.data[aggregate_input_idx].Reference(input.data[bound_ref_expr.index]); - ht[aggr.filter.get()] = bound_ref_expr.index; - bound_ref_expr.index = aggregate_input_idx++; - } else { - aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[it->second]); + // now set the remaining entries to either true or false based on whether a match was found + if (found_match) { + for (idx_t i = 0; i < child.size(); i++) { + bool_result[i] = found_match[i]; + } + } else { + memset(bool_result, 0, sizeof(bool) * child.size()); + } + // if the right side contains NULL values, the result of any FALSE becomes NULL + if (ht.has_null) { + for (idx_t i = 0; i < child.size(); i++) { + if (!bool_result[i]) { + mask.SetInvalid(i); } } } +} - group_chunk.SetCardinality(input.size()); +void ScanStructure::NextMarkJoin(DataChunk &keys, DataChunk &input, DataChunk &result) { + D_ASSERT(result.ColumnCount() == input.ColumnCount() + 1); + D_ASSERT(result.data.back().GetType() == LogicalType::BOOLEAN); + // this method should only be called for a non-empty HT + D_ASSERT(ht.Count() > 0); - aggregate_input_chunk.SetCardinality(input.size()); + ScanKeyMatches(keys); + if (ht.correlated_mark_join_info.correlated_types.empty()) { + ConstructMarkJoinResult(keys, input, result); + } else { + auto &info = ht.correlated_mark_join_info; + // there are correlated columns + // first we fetch the counts from the aggregate hashtable corresponding to these entries + D_ASSERT(keys.ColumnCount() == info.group_chunk.ColumnCount() + 1); + info.group_chunk.SetCardinality(keys); + for (idx_t i = 0; i < info.group_chunk.ColumnCount(); i++) { + info.group_chunk.data[i].Reference(keys.data[i]); + } + info.correlated_counts->FetchAggregates(info.group_chunk, info.result_chunk); - group_chunk.Verify(); - aggregate_input_chunk.Verify(); - D_ASSERT(aggregate_input_chunk.ColumnCount() == 0 || group_chunk.size() == aggregate_input_chunk.size()); + // for the initial set of columns we just reference the left side + result.SetCardinality(input); + for (idx_t i = 0; i < input.ColumnCount(); i++) { + result.data[i].Reference(input.data[i]); + } + // create the result matching vector + auto &last_key = keys.data.back(); + auto &result_vector = result.data.back(); + // first set the nullmask based on whether or not there were NULL values in the join key + result_vector.SetVectorType(VectorType::FLAT_VECTOR); + auto bool_result = FlatVector::GetData(result_vector); + auto &mask = FlatVector::Validity(result_vector); + switch (last_key.GetVectorType()) { + case VectorType::CONSTANT_VECTOR: + if (ConstantVector::IsNull(last_key)) { + mask.SetAllInvalid(input.size()); + } + break; + case VectorType::FLAT_VECTOR: + mask.Copy(FlatVector::Validity(last_key), input.size()); + break; + default: { + VectorData kdata; + last_key.Orrify(keys.size(), kdata); + for (idx_t i = 0; i < input.size(); i++) { + auto kidx = kdata.sel->get_index(i); + mask.Set(i, kdata.validity.RowIsValid(kidx)); + } + break; + } + } - lstate.ht->AddChunk(group_chunk, aggregate_input_chunk); + auto count_star = FlatVector::GetData(info.result_chunk.data[0]); + auto count = FlatVector::GetData(info.result_chunk.data[1]); + // set the entries to either true or false based on whether a match was found + for (idx_t i = 0; i < input.size(); i++) { + D_ASSERT(count_star[i] >= count[i]); + bool_result[i] = found_match ? found_match[i] : false; + if (!bool_result[i] && count_star[i] > count[i]) { + // RHS has NULL value and result is false: set to null + mask.SetInvalid(i); + } + if (count_star[i] == 0) { + // count == 0, set nullmask to false (we know the result is false now) + mask.SetValid(i); + } + } + } + finished = true; } -//===--------------------------------------------------------------------===// -// Combine -//===--------------------------------------------------------------------===// -void PhysicalPerfectHashAggregate::Combine(ExecutionContext &context, GlobalOperatorState &gstate_p, - LocalSinkState &lstate_p) { - auto &lstate = (PerfectHashAggregateLocalState &)lstate_p; - auto &gstate = (PerfectHashAggregateGlobalState &)gstate_p; - - lock_guard l(gstate.lock); - gstate.ht->Combine(*lstate.ht); -} +void ScanStructure::NextLeftJoin(DataChunk &keys, DataChunk &left, DataChunk &result) { + // a LEFT OUTER JOIN is identical to an INNER JOIN except all tuples that do + // not have a match must return at least one tuple (with the right side set + // to NULL in every column) + NextInnerJoin(keys, left, result); + if (result.size() == 0) { + // no entries left from the normal join + // fill in the result of the remaining left tuples + // together with NULL values on the right-hand side + idx_t remaining_count = 0; + SelectionVector sel(STANDARD_VECTOR_SIZE); + for (idx_t i = 0; i < left.size(); i++) { + if (!found_match[i]) { + sel.set_index(remaining_count++, i); + } + } + if (remaining_count > 0) { + // have remaining tuples + // slice the left side with tuples that did not find a match + result.Slice(left, sel, remaining_count); -//===--------------------------------------------------------------------===// -// GetChunk -//===--------------------------------------------------------------------===// -class PerfectHashAggregateState : public PhysicalOperatorState { -public: - PerfectHashAggregateState(PhysicalOperator &op, PhysicalOperator *child) - : PhysicalOperatorState(op, child), ht_scan_position(0) { + // now set the right side to NULL + for (idx_t i = left.ColumnCount(); i < result.ColumnCount(); i++) { + Vector &vec = result.data[i]; + vec.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(vec, true); + } + } + finished = true; } - //! The current position to scan the HT for output tuples - idx_t ht_scan_position; -}; +} -void PhysicalPerfectHashAggregate::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { - auto &state = (PerfectHashAggregateState &)*state_p; - auto &gstate = (PerfectHashAggregateGlobalState &)*sink_state; +void ScanStructure::NextSingleJoin(DataChunk &keys, DataChunk &input, DataChunk &result) { + // single join + // this join is similar to the semi join except that + // (1) we actually return data from the RHS and + // (2) we return NULL for that data if there is no match + idx_t result_count = 0; + SelectionVector result_sel(STANDARD_VECTOR_SIZE); + SelectionVector match_sel(STANDARD_VECTOR_SIZE), no_match_sel(STANDARD_VECTOR_SIZE); + while (this->count > 0) { + // resolve the predicates for the current set of pointers + idx_t match_count = ResolvePredicates(keys, match_sel, &no_match_sel); + idx_t no_match_count = this->count - match_count; - gstate.ht->Scan(state.ht_scan_position, chunk); -} + // mark each of the matches as found + for (idx_t i = 0; i < match_count; i++) { + // found a match for this index + auto index = match_sel.get_index(i); + found_match[index] = true; + result_sel.set_index(result_count++, index); + } + // continue searching for the ones where we did not find a match yet + AdvancePointers(no_match_sel, no_match_count); + } + // reference the columns of the left side from the result + D_ASSERT(input.ColumnCount() > 0); + for (idx_t i = 0; i < input.ColumnCount(); i++) { + result.data[i].Reference(input.data[i]); + } + // now fetch the data from the RHS + for (idx_t i = 0; i < ht.build_types.size(); i++) { + auto &vector = result.data[input.ColumnCount() + i]; + // set NULL entries for every entry that was not found + auto &mask = FlatVector::Validity(vector); + mask.SetAllInvalid(input.size()); + for (idx_t j = 0; j < result_count; j++) { + mask.SetValid(result_sel.get_index(j)); + } + // for the remaining values we fetch the values + GatherResult(vector, result_sel, result_sel, result_count, i + ht.condition_types.size()); + } + result.SetCardinality(input.size()); -unique_ptr PhysicalPerfectHashAggregate::GetOperatorState() { - return make_unique(*this, children[0].get()); + // like the SEMI, ANTI and MARK join types, the SINGLE join only ever does one pass over the HT per input chunk + finished = true; } -string PhysicalPerfectHashAggregate::ParamsToString() const { - string result; - for (idx_t i = 0; i < groups.size(); i++) { - if (i > 0) { - result += "\n"; +void JoinHashTable::ScanFullOuter(DataChunk &result, JoinHTScanState &state) { + // scan the HT starting from the current position and check which rows from the build side did not find a match + Vector addresses(LogicalType::POINTER); + auto key_locations = FlatVector::GetData(addresses); + idx_t found_entries = 0; + { + lock_guard state_lock(state.lock); + for (; state.block_position < block_collection->blocks.size(); state.block_position++, state.position = 0) { + auto &block = block_collection->blocks[state.block_position]; + auto &handle = pinned_handles[state.block_position]; + auto baseptr = handle->node->buffer; + for (; state.position < block.count; state.position++) { + auto tuple_base = baseptr + state.position * entry_size; + auto found_match = Load(tuple_base + tuple_size); + if (!found_match) { + key_locations[found_entries++] = tuple_base; + if (found_entries == STANDARD_VECTOR_SIZE) { + state.position++; + break; + } + } + } + if (found_entries == STANDARD_VECTOR_SIZE) { + break; + } } - result += groups[i]->GetName(); } - for (idx_t i = 0; i < aggregates.size(); i++) { - if (i > 0 || !groups.empty()) { - result += "\n"; + result.SetCardinality(found_entries); + if (found_entries > 0) { + idx_t left_column_count = result.ColumnCount() - build_types.size(); + const auto &sel_vector = FlatVector::INCREMENTAL_SELECTION_VECTOR; + // set the left side as a constant NULL + for (idx_t i = 0; i < left_column_count; i++) { + Vector &vec = result.data[i]; + vec.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(vec, true); } - result += aggregates[i]->GetName(); - auto &aggregate = (BoundAggregateExpression &)*aggregates[i]; - if (aggregate.filter) { - result += " Filter: " + aggregate.filter->GetName(); + // gather the values from the RHS + for (idx_t i = 0; i < build_types.size(); i++) { + auto &vector = result.data[left_column_count + i]; + D_ASSERT(vector.GetType() == build_types[i]); + const auto col_no = condition_types.size() + i; + const auto col_offset = layout.GetOffsets()[col_no]; + RowOperations::Gather(addresses, sel_vector, vector, sel_vector, found_entries, col_offset, col_no); } } - return result; } } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/execution/operator/aggregate/physical_simple_aggregate.hpp +// duckdb/execution/merge_join.hpp // // //===----------------------------------------------------------------------===// @@ -37259,254 +44620,191 @@ string PhysicalPerfectHashAggregate::ParamsToString() const { -namespace duckdb { - -//! PhysicalSimpleAggregate is an aggregate operator that can only perform aggregates (1) without any groups, and (2) -//! without any DISTINCT aggregates -class PhysicalSimpleAggregate : public PhysicalSink { -public: - PhysicalSimpleAggregate(vector types, vector> expressions, bool all_combinable, - idx_t estimated_cardinality); - - //! The aggregates that have to be computed - vector> aggregates; - //! Whether or not all aggregates are trivially combinable. Aggregates that are trivially combinable can be - //! parallelized. - bool all_combinable; - -public: - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; - void Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) override; - - unique_ptr GetLocalSinkState(ExecutionContext &context) override; - unique_ptr GetGlobalState(ClientContext &context) override; - - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; - - string ParamsToString() const override; -}; - -} // namespace duckdb - - - namespace duckdb { -PhysicalSimpleAggregate::PhysicalSimpleAggregate(vector types, vector> expressions, - bool all_combinable, idx_t estimated_cardinality) - : PhysicalSink(PhysicalOperatorType::SIMPLE_AGGREGATE, move(types), estimated_cardinality), - aggregates(move(expressions)), all_combinable(all_combinable) { -} +struct MergeOrder { + SelectionVector order; + idx_t count; + VectorData vdata; +}; -//===--------------------------------------------------------------------===// -// Sink -//===--------------------------------------------------------------------===// +enum MergeInfoType : uint8_t { SCALAR_MERGE_INFO = 1, CHUNK_MERGE_INFO = 2 }; -struct AggregateState { - explicit AggregateState(vector> &aggregate_expressions) { - for (auto &aggregate : aggregate_expressions) { - D_ASSERT(aggregate->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE); - auto &aggr = (BoundAggregateExpression &)*aggregate; - auto state = unique_ptr(new data_t[aggr.function.state_size()]); - aggr.function.initialize(state.get()); - aggregates.push_back(move(state)); - destructors.push_back(aggr.function.destructor); - } +struct MergeInfo { + MergeInfo(MergeInfoType info_type, LogicalType type) : info_type(info_type), type(type) { } - ~AggregateState() { - D_ASSERT(destructors.size() == aggregates.size()); - for (idx_t i = 0; i < destructors.size(); i++) { - if (!destructors[i]) { - continue; - } - Vector state_vector(Value::POINTER((uintptr_t)aggregates[i].get())); - state_vector.SetVectorType(VectorType::FLAT_VECTOR); + MergeInfoType info_type; + LogicalType type; +}; - destructors[i](state_vector, 1); - } - } +struct ScalarMergeInfo : public MergeInfo { + MergeOrder ℴ + idx_t &pos; + SelectionVector result; - void Move(AggregateState &other) { - other.aggregates = move(aggregates); - other.destructors = move(destructors); + ScalarMergeInfo(MergeOrder &order, LogicalType type, idx_t &pos) + : MergeInfo(MergeInfoType::SCALAR_MERGE_INFO, type), order(order), pos(pos), result(STANDARD_VECTOR_SIZE) { } - - //! The aggregate values - vector> aggregates; - // The destructors - vector destructors; }; -class SimpleAggregateGlobalState : public GlobalOperatorState { -public: - explicit SimpleAggregateGlobalState(vector> &aggregates) : state(aggregates) { - } +struct ChunkMergeInfo : public MergeInfo { + ChunkCollection &data_chunks; + vector &order_info; + bool found_match[STANDARD_VECTOR_SIZE]; - //! The lock for updating the global aggregate state - std::mutex lock; - //! The global aggregate state - AggregateState state; + ChunkMergeInfo(ChunkCollection &data_chunks, vector &order_info) + : MergeInfo(MergeInfoType::CHUNK_MERGE_INFO, data_chunks.Types()[0]), data_chunks(data_chunks), + order_info(order_info) { + memset(found_match, 0, sizeof(found_match)); + } }; -class SimpleAggregateLocalState : public LocalSinkState { -public: - explicit SimpleAggregateLocalState(vector> &aggregates) : state(aggregates) { - vector payload_types; - for (auto &aggregate : aggregates) { - D_ASSERT(aggregate->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE); - auto &aggr = (BoundAggregateExpression &)*aggregate; - // initialize the payload chunk - if (!aggr.children.empty()) { - for (auto &child : aggr.children) { - payload_types.push_back(child->return_type); - child_executor.AddExpression(*child); - } - } +struct MergeJoinComplex { + struct LessThan { + template + static idx_t Operation(ScalarMergeInfo &l, ScalarMergeInfo &r); + }; + struct LessThanEquals { + template + static idx_t Operation(ScalarMergeInfo &l, ScalarMergeInfo &r); + }; + struct GreaterThan { + template + static idx_t Operation(ScalarMergeInfo &l, ScalarMergeInfo &r) { + return LessThan::Operation(r, l); } - if (!payload_types.empty()) { // for select count(*) from t; there is no payload at all - payload_chunk_base.Initialize(payload_types); - payload_chunk.InitializeEmpty(payload_types); + }; + struct GreaterThanEquals { + template + static idx_t Operation(ScalarMergeInfo &l, ScalarMergeInfo &r) { + return LessThanEquals::Operation(r, l); } - } - void Reset() { - payload_chunk.Reference(payload_chunk_base); - } + }; - //! The local aggregate state - AggregateState state; - //! The executor - ExpressionExecutor child_executor; - //! The payload chunk - DataChunk payload_chunk; - //! The payload chunk - DataChunk payload_chunk_base; + static idx_t Perform(MergeInfo &l, MergeInfo &r, ExpressionType comparison_type); }; -unique_ptr PhysicalSimpleAggregate::GetGlobalState(ClientContext &context) { - return make_unique(aggregates); -} +struct MergeJoinSimple { + struct LessThan { + template + static idx_t Operation(ScalarMergeInfo &l, ChunkMergeInfo &r); + }; + struct LessThanEquals { + template + static idx_t Operation(ScalarMergeInfo &l, ChunkMergeInfo &r); + }; + struct GreaterThan { + template + static idx_t Operation(ScalarMergeInfo &l, ChunkMergeInfo &r); + }; + struct GreaterThanEquals { + template + static idx_t Operation(ScalarMergeInfo &l, ChunkMergeInfo &r); + }; -unique_ptr PhysicalSimpleAggregate::GetLocalSinkState(ExecutionContext &context) { - return make_unique(aggregates); -} + static idx_t Perform(MergeInfo &l, MergeInfo &r, ExpressionType comparison); +}; -void PhysicalSimpleAggregate::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, - DataChunk &input) { - auto &sink = (SimpleAggregateLocalState &)lstate; - // perform the aggregation inside the local state - idx_t payload_idx = 0, payload_expr_idx = 0; - sink.Reset(); +#define INSTANTIATE_MERGEJOIN_TEMPLATES(MJCLASS, OPNAME, L, R) \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); \ + template idx_t MJCLASS::OPNAME::Operation(L & l, R & r); - DataChunk &payload_chunk = sink.payload_chunk; - sink.child_executor.SetChunk(input); - payload_chunk.SetCardinality(input); - for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) { - DataChunk filtered_input; - auto &aggregate = (BoundAggregateExpression &)*aggregates[aggr_idx]; - idx_t payload_cnt = 0; - // resolve the filter (if any) - if (aggregate.filter) { - ExpressionExecutor filter_execution(aggregate.filter.get()); - SelectionVector true_sel(STANDARD_VECTOR_SIZE); - auto count = filter_execution.SelectExpression(input, true_sel); - auto input_types = input.GetTypes(); - filtered_input.Initialize(input_types); - filtered_input.Slice(input, true_sel, count); - sink.child_executor.SetChunk(filtered_input); - payload_chunk.SetCardinality(count); - } - // resolve the child expressions of the aggregate (if any) - if (!aggregate.children.empty()) { - for (idx_t i = 0; i < aggregate.children.size(); ++i) { - sink.child_executor.ExecuteExpression(payload_expr_idx, payload_chunk.data[payload_idx + payload_cnt]); - payload_expr_idx++; - payload_cnt++; - } - } +} // namespace duckdb - // perform the actual aggregation - aggregate.function.simple_update(payload_cnt == 0 ? nullptr : &payload_chunk.data[payload_idx], - aggregate.bind_info.get(), payload_cnt, sink.state.aggregates[aggr_idx].get(), - payload_chunk.size()); - payload_idx += payload_cnt; - } -} -//===--------------------------------------------------------------------===// -// Finalize -//===--------------------------------------------------------------------===// -void PhysicalSimpleAggregate::Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) { - auto &gstate = (SimpleAggregateGlobalState &)state; - auto &source = (SimpleAggregateLocalState &)lstate; - // finalize: combine the local state into the global state - if (all_combinable) { - // all aggregates are combinable: we might be doing a parallel aggregate - // use the combine method to combine the partial aggregates - lock_guard glock(gstate.lock); - for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) { - auto &aggregate = (BoundAggregateExpression &)*aggregates[aggr_idx]; - Vector source_state(Value::POINTER((uintptr_t)source.state.aggregates[aggr_idx].get())); - Vector dest_state(Value::POINTER((uintptr_t)gstate.state.aggregates[aggr_idx].get())); - aggregate.function.combine(source_state, dest_state, 1); - } - } else { - // complex aggregates: this is necessarily a non-parallel aggregate - // simply move over the source state into the global state - source.state.Move(gstate.state); +namespace duckdb { + +template +static idx_t MergeJoinSwitch(L_ARG &l, R_ARG &r) { + switch (l.type.InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + return MJ::template Operation(l, r); + case PhysicalType::INT16: + return MJ::template Operation(l, r); + case PhysicalType::INT32: + return MJ::template Operation(l, r); + case PhysicalType::INT64: + return MJ::template Operation(l, r); + case PhysicalType::UINT8: + return MJ::template Operation(l, r); + case PhysicalType::UINT16: + return MJ::template Operation(l, r); + case PhysicalType::UINT32: + return MJ::template Operation(l, r); + case PhysicalType::UINT64: + return MJ::template Operation(l, r); + case PhysicalType::INT128: + return MJ::template Operation(l, r); + case PhysicalType::FLOAT: + return MJ::template Operation(l, r); + case PhysicalType::DOUBLE: + return MJ::template Operation(l, r); + case PhysicalType::INTERVAL: + return MJ::template Operation(l, r); + case PhysicalType::VARCHAR: + return MJ::template Operation(l, r); + default: + throw InternalException("Type not implemented for merge join!"); } } -//===--------------------------------------------------------------------===// -// GetChunkInternal -//===--------------------------------------------------------------------===// -void PhysicalSimpleAggregate::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state) { - auto &gstate = (SimpleAggregateGlobalState &)*sink_state; - if (state->finished) { - return; +template +static idx_t MergeJoinComparisonSwitch(L_ARG &l, R_ARG &r, ExpressionType comparison_type) { + switch (comparison_type) { + case ExpressionType::COMPARE_LESSTHAN: + return MergeJoinSwitch(l, r); + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + return MergeJoinSwitch(l, r); + case ExpressionType::COMPARE_GREATERTHAN: + return MergeJoinSwitch(l, r); + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + return MergeJoinSwitch(l, r); + default: + throw InternalException("Unimplemented comparison type for merge join!"); } - // initialize the result chunk with the aggregate values - chunk.SetCardinality(1); - for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) { - auto &aggregate = (BoundAggregateExpression &)*aggregates[aggr_idx]; +} - Vector state_vector(Value::POINTER((uintptr_t)gstate.state.aggregates[aggr_idx].get())); - aggregate.function.finalize(state_vector, aggregate.bind_info.get(), chunk.data[aggr_idx], 1); +idx_t MergeJoinComplex::Perform(MergeInfo &l, MergeInfo &r, ExpressionType comparison_type) { + D_ASSERT(l.info_type == MergeInfoType::SCALAR_MERGE_INFO && r.info_type == MergeInfoType::SCALAR_MERGE_INFO); + auto &left = (ScalarMergeInfo &)l; + auto &right = (ScalarMergeInfo &)r; + D_ASSERT(left.type == right.type); + if (left.order.count == 0 || right.order.count == 0) { + return 0; } - state->finished = true; + return MergeJoinComparisonSwitch(left, right, comparison_type); } -string PhysicalSimpleAggregate::ParamsToString() const { - string result; - for (idx_t i = 0; i < aggregates.size(); i++) { - auto &aggregate = (BoundAggregateExpression &)*aggregates[i]; - if (i > 0) { - result += "\n"; - } - result += aggregates[i]->GetName(); - if (aggregate.filter) { - result += " Filter: " + aggregate.filter->GetName(); - } +idx_t MergeJoinSimple::Perform(MergeInfo &l, MergeInfo &r, ExpressionType comparison_type) { + D_ASSERT(l.info_type == MergeInfoType::SCALAR_MERGE_INFO && r.info_type == MergeInfoType::CHUNK_MERGE_INFO); + auto &left = (ScalarMergeInfo &)l; + auto &right = (ChunkMergeInfo &)r; + D_ASSERT(left.type == right.type); + if (left.order.count == 0 || right.data_chunks.Count() == 0) { + return 0; } - return result; + return MergeJoinComparisonSwitch(left, right, comparison_type); } } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/operator/aggregate/physical_window.hpp -// -// -//===----------------------------------------------------------------------===// - - @@ -37514,33 +44812,59 @@ string PhysicalSimpleAggregate::ParamsToString() const { namespace duckdb { -//! PhysicalWindow implements window functions -//! It assumes that all functions have a common partitioning and ordering -class PhysicalWindow : public PhysicalSink { -public: - PhysicalWindow(vector types, vector> select_list, idx_t estimated_cardinality, - PhysicalOperatorType type = PhysicalOperatorType::WINDOW); - - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; - unique_ptr GetOperatorState() override; - - // sink stuff - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; - void Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) override; - void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; - - unique_ptr GetLocalSinkState(ExecutionContext &context) override; - unique_ptr GetGlobalState(ClientContext &context) override; +template +idx_t MergeJoinComplexLessThan(ScalarMergeInfo &l, ScalarMergeInfo &r) { + if (r.pos >= r.order.count) { + return 0; + } + auto ldata = (T *)l.order.vdata.data; + auto rdata = (T *)r.order.vdata.data; + auto &lorder = l.order.order; + auto &rorder = r.order.order; + idx_t result_count = 0; + while (true) { + if (l.pos < l.order.count) { + auto lidx = lorder.get_index(l.pos); + auto ridx = rorder.get_index(r.pos); + auto dlidx = l.order.vdata.sel->get_index(lidx); + auto dridx = r.order.vdata.sel->get_index(ridx); + if (OP::Operation(ldata[dlidx], rdata[dridx])) { + // left side smaller: found match + l.result.set_index(result_count, lidx); + r.result.set_index(result_count, ridx); + result_count++; + // move left side forward + l.pos++; + if (result_count == STANDARD_VECTOR_SIZE) { + // out of space! + break; + } + continue; + } + } + // right side smaller or equal, or left side exhausted: move + // right pointer forward reset left side to start + l.pos = 0; + r.pos++; + if (r.pos == r.order.count) { + break; + } + } + return result_count; +} - idx_t MaxThreads(ClientContext &context); - unique_ptr GetParallelState(); +template +idx_t MergeJoinComplex::LessThan::Operation(ScalarMergeInfo &l, ScalarMergeInfo &r) { + return MergeJoinComplexLessThan(l, r); +} - string ParamsToString() const override; +template +idx_t MergeJoinComplex::LessThanEquals::Operation(ScalarMergeInfo &l, ScalarMergeInfo &r) { + return MergeJoinComplexLessThan(l, r); +} -public: - //! The projection list of the WINDOW statement (may contain aggregates) - vector> select_list; -}; +INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinComplex, LessThan, ScalarMergeInfo, ScalarMergeInfo) +INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinComplex, LessThanEquals, ScalarMergeInfo, ScalarMergeInfo) } // namespace duckdb @@ -37548,72 +44872,108 @@ class PhysicalWindow : public PhysicalSink { - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/window_segment_tree.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - namespace duckdb { -class WindowSegmentTree { -public: - WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info, LogicalType result_type, - ChunkCollection *input); - ~WindowSegmentTree(); - - Value Compute(idx_t start, idx_t end); - -private: - void ConstructTree(); - void WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end); - void AggregateInit(); - Value AggegateFinal(); - - //! The aggregate that the window function is computed over - AggregateFunction aggregate; - //! The bind info of the aggregate - FunctionData *bind_info; - //! The result type of the window function - LogicalType result_type; +template +static idx_t MergeJoinSimpleGreaterThan(ScalarMergeInfo &l, ChunkMergeInfo &r) { + auto ldata = (T *)l.order.vdata.data; + auto &lorder = l.order.order; + l.pos = l.order.count; + for (idx_t chunk_idx = 0; chunk_idx < r.order_info.size(); chunk_idx++) { + // we only care about the SMALLEST value in each of the RHS + // because we want to figure out if they are greater than [or equal] to ANY value + // get the smallest value from the RHS + auto &rorder = r.order_info[chunk_idx]; + auto rdata = (T *)rorder.vdata.data; + auto min_r_value = rdata[rorder.vdata.sel->get_index(rorder.order.get_index(0))]; + // now we start from the current lpos value and check if we found a new value that is [>= OR >] the min RHS + // value + while (true) { + auto lidx = lorder.get_index(l.pos - 1); + auto dlidx = l.order.vdata.sel->get_index(lidx); + if (OP::Operation(ldata[dlidx], min_r_value)) { + // found a match for lpos, set it in the found_match vector + r.found_match[lidx] = true; + l.pos--; + if (l.pos == 0) { + // early out: we exhausted the entire LHS and they all match + return 0; + } + } else { + // we found no match: any subsequent value from the LHS we scan now will be smaller and thus also not + // match move to the next RHS chunk + break; + } + } + } + return 0; +} +template +idx_t MergeJoinSimple::GreaterThan::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { + return MergeJoinSimpleGreaterThan(l, r); +} - //! Data pointer that contains a single state, used for intermediate window segment aggregation - vector state; - //! Input data chunk, used for intermediate window segment aggregation - DataChunk inputs; - //! A vector of pointers to "state", used for intermediate window segment aggregation - Vector statep; +template +idx_t MergeJoinSimple::GreaterThanEquals::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { + return MergeJoinSimpleGreaterThan(l, r); +} - //! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes - unique_ptr levels_flat_native; - //! For each level, the starting location in the levels_flat_native array - vector levels_flat_start; +template +static idx_t MergeJoinSimpleLessThan(ScalarMergeInfo &l, ChunkMergeInfo &r) { + auto ldata = (T *)l.order.vdata.data; + auto &lorder = l.order.order; + l.pos = 0; + for (idx_t chunk_idx = 0; chunk_idx < r.order_info.size(); chunk_idx++) { + // we only care about the BIGGEST value in each of the RHS + // because we want to figure out if they are less than [or equal] to ANY value + // get the biggest value from the RHS + auto &rorder = r.order_info[chunk_idx]; + auto rdata = (T *)rorder.vdata.data; + auto max_r_value = rdata[rorder.vdata.sel->get_index(rorder.order.get_index(rorder.count - 1))]; + // now we start from the current lpos value and check if we found a new value that is [<= OR <] the max RHS + // value + while (true) { + auto lidx = lorder.get_index(l.pos); + auto dlidx = l.order.vdata.sel->get_index(lidx); + if (OP::Operation(ldata[dlidx], max_r_value)) { + // found a match for lpos, set it in the found_match vector + r.found_match[lidx] = true; + l.pos++; + if (l.pos == l.order.count) { + // early out: we exhausted the entire LHS and they all match + return 0; + } + } else { + // we found no match: any subsequent value from the LHS we scan now will be bigger and thus also not + // match move to the next RHS chunk + break; + } + } + } + return 0; +} - //! The total number of internal nodes of the tree, stored in levels_flat_native - idx_t internal_nodes; +template +idx_t MergeJoinSimple::LessThan::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { + return MergeJoinSimpleLessThan(l, r); +} - //! The (sorted) input chunk collection on which the tree is built - ChunkCollection *input_ref; +template +idx_t MergeJoinSimple::LessThanEquals::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { + return MergeJoinSimpleLessThan(l, r); +} - // TREE_FANOUT needs to cleanly divide STANDARD_VECTOR_SIZE - static constexpr idx_t TREE_FANOUT = 64; -}; +INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinSimple, LessThan, ScalarMergeInfo, ChunkMergeInfo) +INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinSimple, LessThanEquals, ScalarMergeInfo, ChunkMergeInfo) +INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinSimple, GreaterThan, ScalarMergeInfo, ChunkMergeInfo) +INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinSimple, GreaterThanEquals, ScalarMergeInfo, ChunkMergeInfo) } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parallel/task_context.hpp +// duckdb/execution/nested_loop_join.hpp // // //===----------------------------------------------------------------------===// @@ -37624,1139 +44984,936 @@ class WindowSegmentTree { + namespace duckdb { -class PhysicalOperator; -//! TaskContext holds task specific information relating to the excution -class TaskContext { -public: - TaskContext() { - } +struct NestedLoopJoinInner { + static idx_t Perform(idx_t <uple, idx_t &rtuple, DataChunk &left_conditions, DataChunk &right_conditions, + SelectionVector &lvector, SelectionVector &rvector, const vector &conditions); +}; - //! Per-operator task info - unordered_map task_info; +struct NestedLoopJoinMark { + static void Perform(DataChunk &left, ChunkCollection &right, bool found_match[], + const vector &conditions); }; } // namespace duckdb - - - -#include -#include - namespace duckdb { -using counts_t = std::vector; - -// Global sink state -class WindowGlobalState : public GlobalOperatorState { -public: - WindowGlobalState(PhysicalWindow &op_p, ClientContext &context) : op(op_p) { - } - - PhysicalWindow &op; - std::mutex lock; - ChunkCollection chunks; - ChunkCollection over_collection; - ChunkCollection hash_collection; - ChunkCollection window_results; - counts_t counts; -}; - -// Per-thread sink state -class WindowLocalState : public LocalSinkState { -public: - explicit WindowLocalState(PhysicalWindow &op_p, const unsigned partition_bits = 10) - : op(op_p), partition_count(size_t(1) << partition_bits) { +template +struct ComparisonOperationWrapper { + template + static inline bool Operation(T left, T right, bool left_is_null, bool right_is_null) { + if (left_is_null || right_is_null) { + return false; + } + return OP::Operation(left, right); } - - PhysicalWindow &op; - ChunkCollection chunks; - ChunkCollection over_collection; - ChunkCollection hash_collection; - const size_t partition_count; - counts_t counts; }; -// this implements a sorted window functions variant -PhysicalWindow::PhysicalWindow(vector types, vector> select_list, - idx_t estimated_cardinality, PhysicalOperatorType type) - : PhysicalSink(type, move(types), estimated_cardinality), select_list(move(select_list)) { -} - -template -class BitArray { -public: - using bits_t = std::vector; - - static const auto BITS_PER_WORD = std::numeric_limits::digits; - static const auto ZEROS = std::numeric_limits::min(); - static const auto ONES = std::numeric_limits::max(); - - class reference { // NOLINT - public: - friend BitArray; +struct InitialNestedLoopJoin { + template + static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos, + SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) { + // initialize phase of nested loop join + // fill lvector and rvector with matches from the base vectors + VectorData left_data, right_data; + left.Orrify(left_size, left_data); + right.Orrify(right_size, right_data); - reference &operator=(bool x) noexcept { - auto b = parent.Block(pos); - auto s = parent.Shift(pos); - auto w = parent.GetBlock(b); - if (parent.TestBit(w, s) != x) { - parent.SetBlock(b, parent.FlipBit(w, s)); + auto ldata = (T *)left_data.data; + auto rdata = (T *)right_data.data; + idx_t result_count = 0; + for (; rpos < right_size; rpos++) { + idx_t right_position = right_data.sel->get_index(rpos); + bool right_is_valid = right_data.validity.RowIsValid(right_position); + for (; lpos < left_size; lpos++) { + if (result_count == STANDARD_VECTOR_SIZE) { + // out of space! + return result_count; + } + idx_t left_position = left_data.sel->get_index(lpos); + bool left_is_valid = left_data.validity.RowIsValid(left_position); + if (OP::Operation(ldata[left_position], rdata[right_position], !left_is_valid, !right_is_valid)) { + // emit tuple + lvector.set_index(result_count, lpos); + rvector.set_index(result_count, rpos); + result_count++; + } } - return *this; - } - - reference &operator=(const reference &r) noexcept { - return *this = bool(r); - } - - explicit operator bool() const noexcept { - return parent[pos]; - } - - bool operator~() const noexcept { - return !parent[pos]; - } - - private: - explicit reference(BitArray &parent_p, size_t pos_p) : parent(parent_p), pos(pos_p) { + lpos = 0; } - - BitArray &parent; - size_t pos; - }; - - static size_t Block(const size_t &pos) { - return pos / BITS_PER_WORD; - } - - static unsigned Shift(const size_t &pos) { - return pos % BITS_PER_WORD; - } - - static bool TestBit(W w, unsigned s) { - return (w >> s) & 0x01; - } - - static W SetBit(W w, unsigned s) { - return w | (W(1) << s); - } - - static W ClearBit(W w, unsigned s) { - return w & ~(W(1) << s); - } - - static W FlipBit(W w, unsigned s) { - return w ^ (W(1) << s); - } - - explicit BitArray(const size_t &count, const W &init = 0) - : bits(count ? Block(count - 1) + 1 : 0, init), count(count) { - } - - size_t Count() const { - return count; - } - - const W &GetBlock(size_t b) const { - return bits[b]; - } - - W &GetBlock(size_t b) { - return bits[b]; - } - - void SetBlock(size_t b, const W &block) { - GetBlock(b) = block; + return result_count; } +}; - bool operator[](size_t pos) const { - return TestBit(GetBlock(Block(pos)), Shift(pos)); - } +struct RefineNestedLoopJoin { + template + static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos, + SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) { + VectorData left_data, right_data; + left.Orrify(left_size, left_data); + right.Orrify(right_size, right_data); - reference operator[](size_t pos) { - return reference(*this, pos); + // refine phase of the nested loop join + // refine lvector and rvector based on matches of subsequent conditions (in case there are multiple conditions + // in the join) + D_ASSERT(current_match_count > 0); + auto ldata = (T *)left_data.data; + auto rdata = (T *)right_data.data; + idx_t result_count = 0; + for (idx_t i = 0; i < current_match_count; i++) { + auto lidx = lvector.get_index(i); + auto ridx = rvector.get_index(i); + auto left_idx = left_data.sel->get_index(lidx); + auto right_idx = right_data.sel->get_index(ridx); + bool left_is_valid = left_data.validity.RowIsValid(left_idx); + bool right_is_valid = right_data.validity.RowIsValid(right_idx); + if (OP::Operation(ldata[left_idx], rdata[right_idx], !left_is_valid, !right_is_valid)) { + lvector.set_index(result_count, lidx); + rvector.set_index(result_count, ridx); + result_count++; + } + } + return result_count; } - -private: - bits_t bits; - size_t count; }; -template -struct ChunkIterator { - - ChunkIterator(ChunkCollection &collection, const idx_t col_idx) - : collection(collection), col_idx(col_idx), chunk_begin(0), chunk_end(0), ch_idx(0), data(nullptr), - validity(nullptr) { - Update(0); +template +static idx_t NestedLoopJoinTypeSwitch(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, + idx_t &rpos, SelectionVector &lvector, SelectionVector &rvector, + idx_t current_match_count) { + switch (left.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, + current_match_count); + case PhysicalType::INT16: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, + current_match_count); + case PhysicalType::INT32: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, + current_match_count); + case PhysicalType::INT64: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, + current_match_count); + case PhysicalType::UINT8: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, + current_match_count); + case PhysicalType::UINT16: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, + rvector, current_match_count); + case PhysicalType::UINT32: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, + rvector, current_match_count); + case PhysicalType::UINT64: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, + rvector, current_match_count); + case PhysicalType::INT128: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, + rvector, current_match_count); + case PhysicalType::FLOAT: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, + current_match_count); + case PhysicalType::DOUBLE: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, rvector, + current_match_count); + case PhysicalType::INTERVAL: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, + rvector, current_match_count); + case PhysicalType::VARCHAR: + return NLTYPE::template Operation(left, right, left_size, right_size, lpos, rpos, lvector, + rvector, current_match_count); + default: + throw InternalException("Unimplemented type for join!"); } +} - void Update(idx_t r) { - if (r >= chunk_end) { - ch_idx = collection.LocateChunk(r); - auto &ch = collection.GetChunk(ch_idx); - chunk_begin = ch_idx * STANDARD_VECTOR_SIZE; - chunk_end = chunk_begin + ch.size(); - auto &vector = ch.data[col_idx]; - data = FlatVector::GetData(vector); - validity = &FlatVector::Validity(vector); - } +template +idx_t NestedLoopJoinComparisonSwitch(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, + idx_t &rpos, SelectionVector &lvector, SelectionVector &rvector, + idx_t current_match_count, ExpressionType comparison_type) { + D_ASSERT(left.GetType() == right.GetType()); + switch (comparison_type) { + case ExpressionType::COMPARE_EQUAL: + return NestedLoopJoinTypeSwitch>( + left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); + case ExpressionType::COMPARE_NOTEQUAL: + return NestedLoopJoinTypeSwitch>( + left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); + case ExpressionType::COMPARE_LESSTHAN: + return NestedLoopJoinTypeSwitch>( + left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); + case ExpressionType::COMPARE_GREATERTHAN: + return NestedLoopJoinTypeSwitch>( + left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + return NestedLoopJoinTypeSwitch>( + left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + return NestedLoopJoinTypeSwitch>( + left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); + case ExpressionType::COMPARE_DISTINCT_FROM: + return NestedLoopJoinTypeSwitch(left, right, left_size, right_size, lpos, rpos, + lvector, rvector, current_match_count); + default: + throw NotImplementedException("Unimplemented comparison type for join!"); } +} - bool IsValid(idx_t r) { - return validity->RowIsValid(r - chunk_begin); +idx_t NestedLoopJoinInner::Perform(idx_t &lpos, idx_t &rpos, DataChunk &left_conditions, DataChunk &right_conditions, + SelectionVector &lvector, SelectionVector &rvector, + const vector &conditions) { + D_ASSERT(left_conditions.ColumnCount() == right_conditions.ColumnCount()); + if (lpos >= left_conditions.size() || rpos >= right_conditions.size()) { + return 0; } - - INPUT_TYPE GetValue(idx_t r) { - return data[r - chunk_begin]; + // for the first condition, lvector and rvector are not set yet + // we initialize them using the InitialNestedLoopJoin + idx_t match_count = NestedLoopJoinComparisonSwitch( + left_conditions.data[0], right_conditions.data[0], left_conditions.size(), right_conditions.size(), lpos, rpos, + lvector, rvector, 0, conditions[0].comparison); + // now resolve the rest of the conditions + for (idx_t i = 1; i < conditions.size(); i++) { + // check if we have run out of tuples to compare + if (match_count == 0) { + return 0; + } + // if not, get the vectors to compare + Vector &l = left_conditions.data[i]; + Vector &r = right_conditions.data[i]; + // then we refine the currently obtained results using the RefineNestedLoopJoin + match_count = NestedLoopJoinComparisonSwitch( + l, r, left_conditions.size(), right_conditions.size(), lpos, rpos, lvector, rvector, match_count, + conditions[i].comparison); } + return match_count; +} -private: - ChunkCollection &collection; - idx_t col_idx; - idx_t chunk_begin; - idx_t chunk_end; - idx_t ch_idx; - const INPUT_TYPE *data; - ValidityMask *validity; -}; - -template -static void MaskTypedColumn(MASK_TYPE &mask, ChunkCollection &over_collection, const idx_t c) { - ChunkIterator ci(over_collection, c); - - // Record the first value - idx_t r = 0; - auto prev_valid = ci.IsValid(r); - auto prev = ci.GetValue(r); - ++r; +} // namespace duckdb - // Process complete blocks - const auto row_count = over_collection.Count(); - const auto complete_block_count = mask.Block(row_count); - for (idx_t b = mask.Block(r); b < complete_block_count; ++b) { - auto block = mask.GetBlock(b); - // Skip the block if it is all boundaries. - if (block == mask.ONES) { - r -= (r % mask.BITS_PER_WORD); - r += mask.BITS_PER_WORD; - continue; - } - // Scan the rows in the complete block - for (unsigned shift = mask.Shift(r); shift < mask.BITS_PER_WORD; ++shift, ++r) { - // Update the chunk for this row - ci.Update(r); - auto curr_valid = ci.IsValid(r); - auto curr = ci.GetValue(r); - if (!mask.TestBit(block, shift)) { - if (curr_valid != prev_valid || (curr_valid && !Equals::Operation(curr, prev))) { - block = mask.SetBit(block, shift); - } - } - prev_valid = curr_valid; - prev = curr; - } - mask.SetBlock(b, block); - } +namespace duckdb { - // Finish last ragged block - if (r < row_count) { - auto block = mask.GetBlock(complete_block_count); - if (block != mask.ONES) { - for (unsigned shift = mask.Shift(r); r < row_count; ++shift, ++r) { - // Update the chunk for this row - ci.Update(r); +template +static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) { + VectorData left_data, right_data; + left.Orrify(lcount, left_data); + right.Orrify(rcount, right_data); - auto curr_valid = ci.IsValid(r); - auto curr = ci.GetValue(r); - if (!mask.TestBit(block, shift)) { - if (curr_valid != prev_valid || (curr_valid && !Equals::Operation(curr, prev))) { - block = mask.SetBit(block, shift); - } - } - prev_valid = curr_valid; - prev = curr; + auto ldata = (T *)left_data.data; + auto rdata = (T *)right_data.data; + for (idx_t i = 0; i < lcount; i++) { + if (found_match[i]) { + continue; + } + auto lidx = left_data.sel->get_index(i); + if (!left_data.validity.RowIsValid(lidx)) { + continue; + } + for (idx_t j = 0; j < rcount; j++) { + auto ridx = right_data.sel->get_index(j); + if (!right_data.validity.RowIsValid(ridx)) { + continue; + } + if (OP::Operation(ldata[lidx], rdata[ridx])) { + found_match[i] = true; + break; } - mask.SetBlock(complete_block_count, block); } } } -template -static void MaskColumn(BitArray &mask, ChunkCollection &over_collection, const idx_t c) { - using MASK_TYPE = BitArray; - - auto &vector = over_collection.GetChunk(0).data[c]; - switch (vector.GetType().InternalType()) { +template +static void MarkJoinSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) { + switch (left.GetType().InternalType()) { case PhysicalType::BOOL: case PhysicalType::INT8: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); case PhysicalType::INT16: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); case PhysicalType::INT32: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); case PhysicalType::INT64: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); + case PhysicalType::INT128: + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); case PhysicalType::UINT8: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); case PhysicalType::UINT16: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); case PhysicalType::UINT32: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); case PhysicalType::UINT64: - MaskTypedColumn(mask, over_collection, c); - break; - case PhysicalType::INT128: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); case PhysicalType::FLOAT: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); case PhysicalType::DOUBLE: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); case PhysicalType::VARCHAR: - MaskTypedColumn(mask, over_collection, c); - break; - case PhysicalType::INTERVAL: - MaskTypedColumn(mask, over_collection, c); - break; + return TemplatedMarkJoin(left, right, lcount, rcount, found_match); default: - throw NotImplementedException("Type for comparison"); - break; + throw NotImplementedException("Unimplemented type for mark join!"); } } -template -static idx_t FindNextStart(const BitArray &mask, idx_t l, idx_t r) { - while (l < r) { - // If l is aligned with the start of a block, and the block is blank, then skip forward one block. - const auto block = mask.GetBlock(mask.Block(l)); - auto shift = mask.Shift(l); - if (!block && !shift) { - l += mask.BITS_PER_WORD; - continue; - } - - // Loop over the block - for (; shift < mask.BITS_PER_WORD; ++shift, ++l) { - if (mask.TestBit(block, shift)) { - return std::min(l, r); - } - } +static void MarkJoinComparisonSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[], + ExpressionType comparison_type) { + D_ASSERT(left.GetType() == right.GetType()); + switch (comparison_type) { + case ExpressionType::COMPARE_EQUAL: + return MarkJoinSwitch(left, right, lcount, rcount, found_match); + case ExpressionType::COMPARE_NOTEQUAL: + return MarkJoinSwitch(left, right, lcount, rcount, found_match); + case ExpressionType::COMPARE_LESSTHAN: + return MarkJoinSwitch(left, right, lcount, rcount, found_match); + case ExpressionType::COMPARE_GREATERTHAN: + return MarkJoinSwitch(left, right, lcount, rcount, found_match); + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + return MarkJoinSwitch(left, right, lcount, rcount, found_match); + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + return MarkJoinSwitch(left, right, lcount, rcount, found_match); + default: + throw NotImplementedException("Unimplemented comparison type for join!"); } - - // Didn't find a start so return the end of the range - return r; } -static void MaterializeExpressions(Expression **exprs, idx_t expr_count, ChunkCollection &input, - ChunkCollection &output, bool scalar = false) { - if (expr_count == 0) { - return; +void NestedLoopJoinMark::Perform(DataChunk &left, ChunkCollection &right, bool found_match[], + const vector &conditions) { + // initialize a new temporary selection vector for the left chunk + // loop over all chunks in the RHS + for (idx_t chunk_idx = 0; chunk_idx < right.ChunkCount(); chunk_idx++) { + DataChunk &right_chunk = right.GetChunk(chunk_idx); + for (idx_t i = 0; i < conditions.size(); i++) { + MarkJoinComparisonSwitch(left.data[i], right_chunk.data[i], left.size(), right_chunk.size(), found_match, + conditions[i].comparison); + } } +} - vector types; - ExpressionExecutor executor; - for (idx_t expr_idx = 0; expr_idx < expr_count; ++expr_idx) { - types.push_back(exprs[expr_idx]->return_type); - executor.AddExpression(*exprs[expr_idx]); - } +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +// +// +//===----------------------------------------------------------------------===// - for (idx_t i = 0; i < input.ChunkCount(); i++) { - DataChunk chunk; - chunk.Initialize(types); - executor.Execute(input.GetChunk(i), chunk); - chunk.Verify(); - output.Append(chunk); - if (scalar) { - break; - } - } -} -static void MaterializeExpression(Expression *expr, ChunkCollection &input, ChunkCollection &output, - bool scalar = false) { - MaterializeExpressions(&expr, 1, input, output, scalar); -} -static void SortCollectionForPartition(BoundWindowExpression *wexpr, ChunkCollection &input, ChunkCollection &output, - ChunkCollection &sort_collection) { - if (input.Count() == 0) { - return; - } - vector orders; - vector null_order_types; +namespace duckdb { - // we sort by both 1) partition by expression list and 2) order by expressions - for (idx_t prt_idx = 0; prt_idx < wexpr->partitions.size(); prt_idx++) { - orders.push_back(OrderType::ASCENDING); - null_order_types.push_back(OrderByNullType::NULLS_FIRST); - } +class ClientContext; +class BufferManager; - for (idx_t ord_idx = 0; ord_idx < wexpr->orders.size(); ord_idx++) { - orders.push_back(wexpr->orders[ord_idx].type); - null_order_types.push_back(wexpr->orders[ord_idx].null_order); - } +//! PhysicalHashAggregate is an group-by and aggregate implementation that uses +//! a hash table to perform the grouping +class PhysicalHashAggregate : public PhysicalSink { +public: + PhysicalHashAggregate(ClientContext &context, vector types, vector> expressions, + idx_t estimated_cardinality, PhysicalOperatorType type = PhysicalOperatorType::HASH_GROUP_BY); + PhysicalHashAggregate(ClientContext &context, vector types, vector> expressions, + vector> groups, idx_t estimated_cardinality, + PhysicalOperatorType type = PhysicalOperatorType::HASH_GROUP_BY); - auto sorted_vector = unique_ptr(new idx_t[input.Count()]); - sort_collection.Sort(orders, null_order_types, sorted_vector.get()); + //! The groups + vector> groups; + //! The aggregates that have to be computed + vector> aggregates; + //! Whether or not the aggregate is an implicit (i.e. ungrouped) aggregate + bool is_implicit_aggr; + //! Whether or not all aggregates are combinable + bool all_combinable; - input.Reorder(sorted_vector.get()); - output.Reorder(sorted_vector.get()); - sort_collection.Reorder(sorted_vector.get()); -} + //! Whether or not any aggregation is DISTINCT + bool any_distinct; -static void HashChunk(counts_t &counts, DataChunk &hash_chunk, DataChunk &sort_chunk, const idx_t partition_cols) { - const vector hash_types(1, LogicalTypeId::HASH); - hash_chunk.Initialize(hash_types); - hash_chunk.SetCardinality(sort_chunk); - auto &hash_vector = hash_chunk.data[0]; + //! The group types + vector group_types; + //! The payload types + vector payload_types; + //! The aggregate return types + vector aggregate_return_types; - const auto count = sort_chunk.size(); - VectorOperations::Hash(sort_chunk.data[0], hash_vector, count); - for (idx_t prt_idx = 1; prt_idx < partition_cols; ++prt_idx) { - VectorOperations::CombineHash(hash_vector, sort_chunk.data[prt_idx], count); - } + //! Pointers to the aggregates + vector bindings; - const auto partition_mask = hash_t(counts.size() - 1); - auto hashes = FlatVector::GetData(hash_vector); - if (hash_vector.GetVectorType() == VectorType::CONSTANT_VECTOR) { - const auto bin = (hashes[0] & partition_mask); - counts[bin] += count; - } else { - for (idx_t i = 0; i < count; ++i) { - const auto bin = (hashes[i] & partition_mask); - ++counts[bin]; - } - } -} + unordered_map filter_indexes; -static void MaterializeOverForWindow(BoundWindowExpression *wexpr, DataChunk &input_chunk, DataChunk &over_chunk) { - vector over_types; - ExpressionExecutor executor; +public: + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; + void Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) override; + bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; - // we sort by both 1) partition by expression list and 2) order by expressions - for (idx_t prt_idx = 0; prt_idx < wexpr->partitions.size(); prt_idx++) { - auto &pexpr = wexpr->partitions[prt_idx]; - over_types.push_back(pexpr->return_type); - executor.AddExpression(*pexpr); - } + void FinalizeImmediate(ClientContext &context, unique_ptr gstate); - for (idx_t ord_idx = 0; ord_idx < wexpr->orders.size(); ord_idx++) { - auto &oexpr = wexpr->orders[ord_idx].expression; - over_types.push_back(oexpr->return_type); - executor.AddExpression(*oexpr); - } + unique_ptr GetLocalSinkState(ExecutionContext &context) override; + unique_ptr GetGlobalState(ClientContext &context) override; - D_ASSERT(over_types.size() > 0); + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; + unique_ptr GetOperatorState() override; - over_chunk.Initialize(over_types); - executor.Execute(input_chunk, over_chunk); + string ParamsToString() const override; - over_chunk.Verify(); -} +private: + //! how many groups can we have in the operator before we switch to radix partitioning + idx_t radix_limit; -struct WindowBoundariesState { - idx_t partition_start = 0; - idx_t partition_end = 0; - idx_t peer_start = 0; - idx_t peer_end = 0; - int64_t window_start = -1; - int64_t window_end = -1; - bool is_same_partition = false; - bool is_peer = false; +private: + bool FinalizeInternal(ClientContext &context, unique_ptr gstate, bool immediate, + Pipeline *pipeline); + bool ForceSingleHT(GlobalOperatorState &state) const; }; -static bool WindowNeedsRank(BoundWindowExpression *wexpr) { - return wexpr->type == ExpressionType::WINDOW_PERCENT_RANK || wexpr->type == ExpressionType::WINDOW_RANK || - wexpr->type == ExpressionType::WINDOW_RANK_DENSE || wexpr->type == ExpressionType::WINDOW_CUME_DIST; -} +} // namespace duckdb -static void UpdateWindowBoundaries(BoundWindowExpression *wexpr, const idx_t input_size, const idx_t row_idx, - ChunkCollection &boundary_start_collection, ChunkCollection &boundary_end_collection, - const BitArray &partition_mask, const BitArray &order_mask, - WindowBoundariesState &bounds) { - if (wexpr->partitions.size() + wexpr->orders.size() > 0) { - // determine partition and peer group boundaries to ultimately figure out window size - bounds.is_same_partition = !partition_mask[row_idx]; - bounds.is_peer = !order_mask[row_idx]; - // when the partition changes, recompute the boundaries - if (!bounds.is_same_partition) { - bounds.partition_start = row_idx; - bounds.peer_start = row_idx; - // find end of partition - bounds.partition_end = input_size; - if (!wexpr->partitions.empty()) { - bounds.partition_end = FindNextStart(partition_mask, bounds.partition_start + 1, input_size); - } +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/partitionable_hashtable.hpp +// +// +//===----------------------------------------------------------------------===// - } else if (!bounds.is_peer) { - bounds.peer_start = row_idx; - } - if (wexpr->end == WindowBoundary::CURRENT_ROW_RANGE || wexpr->type == ExpressionType::WINDOW_CUME_DIST) { - bounds.peer_end = bounds.partition_end; - if (!wexpr->orders.empty()) { - bounds.peer_end = FindNextStart(order_mask, bounds.peer_start + 1, bounds.partition_end); - } - } - } else { - bounds.is_same_partition = false; - bounds.is_peer = true; - bounds.partition_end = input_size; - bounds.peer_end = bounds.partition_end; - } - // determine window boundaries depending on the type of expression - bounds.window_start = -1; - bounds.window_end = -1; - switch (wexpr->start) { - case WindowBoundary::UNBOUNDED_PRECEDING: - bounds.window_start = bounds.partition_start; - break; - case WindowBoundary::CURRENT_ROW_ROWS: - bounds.window_start = row_idx; - break; - case WindowBoundary::CURRENT_ROW_RANGE: - bounds.window_start = bounds.peer_start; - break; - case WindowBoundary::UNBOUNDED_FOLLOWING: - D_ASSERT(0); // disallowed - break; - case WindowBoundary::EXPR_PRECEDING: { - D_ASSERT(boundary_start_collection.ColumnCount() > 0); - bounds.window_start = - (int64_t)row_idx - - boundary_start_collection.GetValue(0, wexpr->start_expr->IsScalar() ? 0 : row_idx).GetValue(); - break; - } - case WindowBoundary::EXPR_FOLLOWING: { - D_ASSERT(boundary_start_collection.ColumnCount() > 0); - bounds.window_start = - row_idx + - boundary_start_collection.GetValue(0, wexpr->start_expr->IsScalar() ? 0 : row_idx).GetValue(); - break; - } - default: - throw NotImplementedException("Unsupported boundary"); - } +namespace duckdb { - switch (wexpr->end) { - case WindowBoundary::UNBOUNDED_PRECEDING: - D_ASSERT(0); // disallowed - break; - case WindowBoundary::CURRENT_ROW_ROWS: - bounds.window_end = row_idx + 1; - break; - case WindowBoundary::CURRENT_ROW_RANGE: - bounds.window_end = bounds.peer_end; - break; - case WindowBoundary::UNBOUNDED_FOLLOWING: - bounds.window_end = bounds.partition_end; - break; - case WindowBoundary::EXPR_PRECEDING: - D_ASSERT(boundary_end_collection.ColumnCount() > 0); - bounds.window_end = - (int64_t)row_idx - - boundary_end_collection.GetValue(0, wexpr->end_expr->IsScalar() ? 0 : row_idx).GetValue() + 1; - break; - case WindowBoundary::EXPR_FOLLOWING: - D_ASSERT(boundary_end_collection.ColumnCount() > 0); - bounds.window_end = - row_idx + - boundary_end_collection.GetValue(0, wexpr->end_expr->IsScalar() ? 0 : row_idx).GetValue() + 1; +struct RadixPartitionInfo { + explicit RadixPartitionInfo(idx_t _n_partitions_upper_bound); + const idx_t n_partitions; + const idx_t radix_bits; + const hash_t radix_mask; + constexpr static idx_t RADIX_SHIFT = 40; +}; - break; - default: - throw NotImplementedException("Unsupported boundary"); - } +typedef vector> HashTableList; - // clamp windows to partitions if they should exceed - if (bounds.window_start < (int64_t)bounds.partition_start) { - bounds.window_start = bounds.partition_start; - } - if (bounds.window_start > (int64_t)bounds.partition_end) { - bounds.window_start = bounds.partition_end; - } - if (bounds.window_end < (int64_t)bounds.partition_start) { - bounds.window_end = bounds.partition_start; - } - if (bounds.window_end > (int64_t)bounds.partition_end) { - bounds.window_end = bounds.partition_end; - } +class PartitionableHashTable { +public: + PartitionableHashTable(BufferManager &buffer_manager_p, RadixPartitionInfo &partition_info_p, + vector group_types_p, vector payload_types_p, + vector bindings_p); - if (bounds.window_start < 0 || bounds.window_end < 0) { - throw Exception("Failed to compute window boundaries"); - } -} + idx_t AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition); + void Partition(); + bool IsPartitioned(); -static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollection &input, ChunkCollection &output, - const BitArray &partition_mask, const BitArray &order_mask, - const idx_t output_col) { + HashTableList GetPartition(idx_t partition); + HashTableList GetUnpartitioned(); - // TODO we could evaluate those expressions in parallel + void Finalize(); - // evaluate inner expressions of window functions, could be more complex - ChunkCollection payload_collection; - vector exprs; - for (auto &child : wexpr->children) { - exprs.push_back(child.get()); - } - // TODO: child may be a scalar, don't need to materialize the whole collection then - MaterializeExpressions(exprs.data(), exprs.size(), input, payload_collection); +private: + BufferManager &buffer_manager; + vector group_types; + vector payload_types; + vector bindings; - ChunkCollection leadlag_offset_collection; - ChunkCollection leadlag_default_collection; - if (wexpr->type == ExpressionType::WINDOW_LEAD || wexpr->type == ExpressionType::WINDOW_LAG) { - if (wexpr->offset_expr) { - MaterializeExpression(wexpr->offset_expr.get(), input, leadlag_offset_collection, - wexpr->offset_expr->IsScalar()); - } - if (wexpr->default_expr) { - MaterializeExpression(wexpr->default_expr.get(), input, leadlag_default_collection, - wexpr->default_expr->IsScalar()); - } - } + bool is_partitioned; + RadixPartitionInfo &partition_info; + vector sel_vectors; + vector sel_vector_sizes; + DataChunk group_subset, payload_subset; + Vector hashes, hashes_subset; - // evaluate boundaries if present. - ChunkCollection boundary_start_collection; - if (wexpr->start_expr && - (wexpr->start == WindowBoundary::EXPR_PRECEDING || wexpr->start == WindowBoundary::EXPR_FOLLOWING)) { - MaterializeExpression(wexpr->start_expr.get(), input, boundary_start_collection, wexpr->start_expr->IsScalar()); - } - ChunkCollection boundary_end_collection; - if (wexpr->end_expr && - (wexpr->end == WindowBoundary::EXPR_PRECEDING || wexpr->end == WindowBoundary::EXPR_FOLLOWING)) { - MaterializeExpression(wexpr->end_expr.get(), input, boundary_end_collection, wexpr->end_expr->IsScalar()); - } + HashTableList unpartitioned_hts; + unordered_map radix_partitioned_hts; - // build a segment tree for frame-adhering aggregates - // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf - unique_ptr segment_tree = nullptr; +private: + idx_t ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes, DataChunk &payload); +}; +} // namespace duckdb - if (wexpr->aggregate) { - segment_tree = make_unique(*(wexpr->aggregate), wexpr->bind_info.get(), wexpr->return_type, - &payload_collection); - } - WindowBoundariesState bounds; - uint64_t dense_rank = 1, rank_equal = 0, rank = 1; - // this is the main loop, go through all sorted rows and compute window function result - for (idx_t row_idx = 0; row_idx < input.Count(); row_idx++) { - // special case, OVER (), aggregate over everything - UpdateWindowBoundaries(wexpr, input.Count(), row_idx, boundary_start_collection, boundary_end_collection, - partition_mask, order_mask, bounds); - if (WindowNeedsRank(wexpr)) { - if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init - dense_rank = 1; - rank = 1; - rank_equal = 0; - } else if (!bounds.is_peer) { - dense_rank++; - rank += rank_equal; - rank_equal = 0; - } - rank_equal++; - } - Value res; - // if no values are read for window, result is NULL - if (bounds.window_start >= bounds.window_end) { - output.SetValue(output_col, row_idx, res); - continue; - } - switch (wexpr->type) { - case ExpressionType::WINDOW_AGGREGATE: { - res = segment_tree->Compute(bounds.window_start, bounds.window_end); - break; - } - case ExpressionType::WINDOW_ROW_NUMBER: { - res = Value::Numeric(wexpr->return_type, row_idx - bounds.partition_start + 1); - break; - } - case ExpressionType::WINDOW_RANK_DENSE: { - res = Value::Numeric(wexpr->return_type, dense_rank); - break; - } - case ExpressionType::WINDOW_RANK: { - res = Value::Numeric(wexpr->return_type, rank); - break; - } - case ExpressionType::WINDOW_PERCENT_RANK: { - int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start - 1; - double percent_rank = denom > 0 ? ((double)rank - 1) / denom : 0; - res = Value(percent_rank); - break; - } - case ExpressionType::WINDOW_CUME_DIST: { - int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start; - double cume_dist = denom > 0 ? ((double)(bounds.peer_end - bounds.partition_start)) / denom : 0; - res = Value(cume_dist); - break; - } - case ExpressionType::WINDOW_NTILE: { - if (payload_collection.ColumnCount() != 1) { - throw Exception("NTILE needs a parameter"); - } - auto n_param = payload_collection.GetValue(0, row_idx).GetValue(); - // With thanks from SQLite's ntileValueFunc() - int64_t n_total = bounds.partition_end - bounds.partition_start; - if (n_param > n_total) { - // more groups allowed than we have values - // map every entry to a unique group - n_param = n_total; - } - int64_t n_size = (n_total / n_param); - // find the row idx within the group - D_ASSERT(row_idx >= bounds.partition_start); - int64_t adjusted_row_idx = row_idx - bounds.partition_start; - // now compute the ntile - int64_t n_large = n_total - n_param * n_size; - int64_t i_small = n_large * (n_size + 1); - int64_t result_ntile; - D_ASSERT((n_large * (n_size + 1) + (n_param - n_large) * n_size) == n_total); - if (adjusted_row_idx < i_small) { - result_ntile = 1 + adjusted_row_idx / (n_size + 1); - } else { - result_ntile = 1 + n_large + (adjusted_row_idx - i_small) / n_size; - } - // result has to be between [1, NTILE] - D_ASSERT(result_ntile >= 1 && result_ntile <= n_param); - res = Value::Numeric(wexpr->return_type, result_ntile); - break; - } - case ExpressionType::WINDOW_LEAD: - case ExpressionType::WINDOW_LAG: { - Value def_val = Value(wexpr->return_type); - idx_t offset = 1; - if (wexpr->offset_expr) { - offset = leadlag_offset_collection.GetValue(0, wexpr->offset_expr->IsScalar() ? 0 : row_idx) - .GetValue(); - } - if (wexpr->default_expr) { - def_val = leadlag_default_collection.GetValue(0, wexpr->default_expr->IsScalar() ? 0 : row_idx); - } - if (wexpr->type == ExpressionType::WINDOW_LEAD) { - auto lead_idx = row_idx + offset; - if (lead_idx < bounds.partition_end) { - res = payload_collection.GetValue(0, lead_idx); - } else { - res = def_val; - } - } else { - int64_t lag_idx = (int64_t)row_idx - offset; - if (lag_idx >= 0 && (idx_t)lag_idx >= bounds.partition_start) { - res = payload_collection.GetValue(0, lag_idx); - } else { - res = def_val; - } - } +namespace duckdb { - break; +PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector types, + vector> expressions, idx_t estimated_cardinality, + PhysicalOperatorType type) + : PhysicalHashAggregate(context, move(types), move(expressions), {}, estimated_cardinality, type) { +} + +PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector types, + vector> expressions, + vector> groups_p, idx_t estimated_cardinality, + PhysicalOperatorType type) + : PhysicalSink(type, move(types), estimated_cardinality), groups(move(groups_p)), all_combinable(true), + any_distinct(false) { + // get a list of all aggregates to be computed + // fake a single group with a constant value for aggregation without groups + if (this->groups.empty()) { + group_types.push_back(LogicalType::TINYINT); + is_implicit_aggr = true; + } else { + is_implicit_aggr = false; + } + for (auto &expr : groups) { + group_types.push_back(expr->return_type); + } + vector payload_types_filters; + for (auto &expr : expressions) { + D_ASSERT(expr->expression_class == ExpressionClass::BOUND_AGGREGATE); + D_ASSERT(expr->IsAggregate()); + auto &aggr = (BoundAggregateExpression &)*expr; + bindings.push_back(&aggr); + + if (aggr.distinct) { + any_distinct = true; } - case ExpressionType::WINDOW_FIRST_VALUE: { - res = payload_collection.GetValue(0, bounds.window_start); - break; + + aggregate_return_types.push_back(aggr.return_type); + for (auto &child : aggr.children) { + payload_types.push_back(child->return_type); } - case ExpressionType::WINDOW_LAST_VALUE: { - res = payload_collection.GetValue(0, bounds.window_end - 1); - break; + if (aggr.filter) { + payload_types_filters.push_back(aggr.filter->return_type); } - default: - throw NotImplementedException("Window aggregate type %s", ExpressionTypeToString(wexpr->type)); + if (!aggr.function.combine) { + all_combinable = false; } + aggregates.push_back(move(expr)); + } - output.SetValue(output_col, row_idx, res); + for (const auto &pay_filters : payload_types_filters) { + payload_types.push_back(pay_filters); } -} -using WindowExpressions = vector; + // 10000 seems like a good compromise here + radix_limit = 10000; -static void ComputeWindowExpressions(WindowExpressions &window_exprs, ChunkCollection &big_data, - ChunkCollection &window_results, ChunkCollection &over_collection) { - // Idempotency - if (big_data.Count() == 0) { - return; + // filter_indexes must be pre-built, not lazily instantiated in parallel... + idx_t aggregate_input_idx = 0; + for (auto &aggregate : aggregates) { + auto &aggr = (BoundAggregateExpression &)*aggregate; + aggregate_input_idx += aggr.children.size(); } - // Pick out a function for the OVER clause - auto over_expr = window_exprs[0]; + for (auto &aggregate : aggregates) { + auto &aggr = (BoundAggregateExpression &)*aggregate; + if (aggr.filter) { + auto &bound_ref_expr = (BoundReferenceExpression &)*aggr.filter; + auto it = filter_indexes.find(aggr.filter.get()); + if (it == filter_indexes.end()) { + filter_indexes[aggr.filter.get()] = bound_ref_expr.index; + bound_ref_expr.index = aggregate_input_idx++; + } else { + ++aggregate_input_idx; + } + } + } +} - // Sort the partition - const auto sort_col_count = over_expr->partitions.size() + over_expr->orders.size(); - if (sort_col_count > 0) { - SortCollectionForPartition(over_expr, big_data, window_results, over_collection); +//===--------------------------------------------------------------------===// +// Sink +//===--------------------------------------------------------------------===// +class HashAggregateGlobalState : public GlobalOperatorState { +public: + HashAggregateGlobalState(PhysicalHashAggregate &op_p, ClientContext &context) + : op(op_p), is_empty(true), total_groups(0), + partition_info((idx_t)TaskScheduler::GetScheduler(context).NumberOfThreads()) { } - // Set bits for the start of each partition - BitArray partition_bits(big_data.Count()); - partition_bits[0] = true; + PhysicalHashAggregate &op; + vector> intermediate_hts; + vector> finalized_hts; - for (idx_t c = 0; c < over_expr->partitions.size(); ++c) { - MaskColumn(partition_bits, over_collection, c); - } + //! Whether or not any tuples were added to the HT + bool is_empty; + //! The lock for updating the global aggregate state + mutex lock; + //! a counter to determine if we should switch over to p + atomic total_groups; - // Set bits for the start of each peer group. - // Partitions also break peer groups, so start with the partition bits. - auto order_bits = partition_bits; - for (idx_t c = over_expr->partitions.size(); c < sort_col_count; ++c) { - MaskColumn(order_bits, over_collection, c); - } + RadixPartitionInfo partition_info; +}; - // Compute the functions - for (idx_t expr_idx = 0; expr_idx < window_exprs.size(); ++expr_idx) { - ComputeWindowExpression(window_exprs[expr_idx], big_data, window_results, partition_bits, order_bits, expr_idx); +class HashAggregateLocalState : public LocalSinkState { +public: + explicit HashAggregateLocalState(PhysicalHashAggregate &op_p) : op(op_p), is_empty(true) { + group_chunk.InitializeEmpty(op.group_types); + if (!op.payload_types.empty()) { + aggregate_input_chunk.InitializeEmpty(op.payload_types); + } + + // if there are no groups we create a fake group so everything has the same group + if (op.groups.empty()) { + group_chunk.data[0].Reference(Value::TINYINT(42)); + } } -} -static void AppendCollection(const ChunkCollection &source, ChunkCollection &target, SelectionVector &sel, - const idx_t source_count, const idx_t chunk_idx) { + PhysicalHashAggregate &op; - DataChunk chunk; - chunk.Initialize(source.Types()); - source.GetChunk(chunk_idx).Copy(chunk, sel, source_count); - target.Append(chunk); -} + DataChunk group_chunk; + DataChunk aggregate_input_chunk; -static void ExtractPartition(WindowGlobalState &gstate, ChunkCollection &chunks, ChunkCollection &window_results, - ChunkCollection &over_collection, const hash_t hash_bin, const hash_t hash_mask) { + //! The aggregate HT + unique_ptr ht; - // Copy the partition data so we can work with it on this thread - ChunkCollection &hashes = gstate.hash_collection; - SelectionVector sel; - for (idx_t chunk_idx = 0; chunk_idx < hashes.ChunkCount(); ++chunk_idx) { - // Build a selection vector of matching hashes - auto &hash_chunk = hashes.GetChunk(chunk_idx); - auto hash_size = hash_chunk.size(); - auto hash_data = FlatVector::GetData(hash_chunk.data[0]); - sel.Initialize(hash_size); - idx_t bin_size = 0; - for (idx_t i = 0; i < hash_size; ++i) { - if ((hash_data[i] & hash_mask) == hash_bin) { - sel.set_index(bin_size++, i); - } - } + //! Whether or not any tuples were added to the HT + bool is_empty; +}; - // Copy the data for each collection - if (bin_size == 0) { - continue; - } +unique_ptr PhysicalHashAggregate::GetGlobalState(ClientContext &context) { + return make_unique(*this, context); +} - AppendCollection(gstate.chunks, chunks, sel, bin_size, chunk_idx); - AppendCollection(gstate.window_results, window_results, sel, bin_size, chunk_idx); - AppendCollection(gstate.over_collection, over_collection, sel, bin_size, chunk_idx); - } +unique_ptr PhysicalHashAggregate::GetLocalSinkState(ExecutionContext &context) { + return make_unique(*this); } -//===--------------------------------------------------------------------===// -// GetChunkInternal -//===--------------------------------------------------------------------===// -idx_t PhysicalWindow::MaxThreads(ClientContext &context) { - // Recursive CTE can cause us to be called befor Finalize, - // so we have to check and fall back to the cardinality estimate - // in that case - if (!this->sink_state.get()) { - return (estimated_cardinality + STANDARD_VECTOR_SIZE - 1) / STANDARD_VECTOR_SIZE + 1; - } - auto &state = (WindowGlobalState &)*this->sink_state; +void PhysicalHashAggregate::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const { + auto &llstate = (HashAggregateLocalState &)lstate; + auto &gstate = (HashAggregateGlobalState &)state; - // If there is only one partition, we have to process it on one thread. - if (state.counts.empty()) { - return 1; + DataChunk &group_chunk = llstate.group_chunk; + DataChunk &aggregate_input_chunk = llstate.aggregate_input_chunk; + + for (idx_t group_idx = 0; group_idx < groups.size(); group_idx++) { + auto &group = groups[group_idx]; + D_ASSERT(group->type == ExpressionType::BOUND_REF); + auto &bound_ref_expr = (BoundReferenceExpression &)*group; + group_chunk.data[group_idx].Reference(input.data[bound_ref_expr.index]); + } + idx_t aggregate_input_idx = 0; + for (auto &aggregate : aggregates) { + auto &aggr = (BoundAggregateExpression &)*aggregate; + for (auto &child_expr : aggr.children) { + D_ASSERT(child_expr->type == ExpressionType::BOUND_REF); + auto &bound_ref_expr = (BoundReferenceExpression &)*child_expr; + aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[bound_ref_expr.index]); + } + } + for (auto &aggregate : aggregates) { + auto &aggr = (BoundAggregateExpression &)*aggregate; + if (aggr.filter) { + auto it = filter_indexes.find(aggr.filter.get()); + D_ASSERT(it != filter_indexes.end()); + aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[it->second]); + } } - idx_t max_threads = 0; - for (const auto count : state.counts) { - max_threads += int(count > 0); + group_chunk.SetCardinality(input.size()); + aggregate_input_chunk.SetCardinality(input.size()); + + group_chunk.Verify(); + aggregate_input_chunk.Verify(); + D_ASSERT(aggregate_input_chunk.ColumnCount() == 0 || group_chunk.size() == aggregate_input_chunk.size()); + + // if we have non-combinable aggregates (e.g. string_agg) or any distinct aggregates we cannot keep parallel hash + // tables + if (ForceSingleHT(state)) { + lock_guard glock(gstate.lock); + gstate.is_empty = gstate.is_empty && group_chunk.size() == 0; + if (gstate.finalized_hts.empty()) { + gstate.finalized_hts.push_back( + make_unique(BufferManager::GetBufferManager(context.client), group_types, + payload_types, bindings, HtEntryType::HT_WIDTH_64)); + } + D_ASSERT(gstate.finalized_hts.size() == 1); + gstate.total_groups += gstate.finalized_hts[0]->AddChunk(group_chunk, aggregate_input_chunk); + return; } - return max_threads; -} + D_ASSERT(all_combinable); + D_ASSERT(!any_distinct); -// Global read state -class WindowParallelState : public ParallelState { -public: - WindowParallelState() : next_part(0) { + if (group_chunk.size() > 0) { + llstate.is_empty = false; } - //! The output read position. - std::atomic next_part; -}; -unique_ptr PhysicalWindow::GetParallelState() { - auto result = make_unique(); - return move(result); + if (!llstate.ht) { + llstate.ht = make_unique(BufferManager::GetBufferManager(context.client), + gstate.partition_info, group_types, payload_types, bindings); + } + + gstate.total_groups += + llstate.ht->AddChunk(group_chunk, aggregate_input_chunk, + gstate.total_groups > radix_limit && gstate.partition_info.n_partitions > 1); } -// Per-thread read state -class PhysicalWindowOperatorState : public PhysicalOperatorState { +class PhysicalHashAggregateState : public PhysicalOperatorState { public: - PhysicalWindowOperatorState(PhysicalOperator &op, PhysicalOperator *child) - : PhysicalOperatorState(op, child), parallel_state(nullptr), initialized(false) { + PhysicalHashAggregateState(PhysicalOperator &op, vector &group_types, + vector &aggregate_types, PhysicalOperator *child) + : PhysicalOperatorState(op, child), ht_index(0), ht_scan_position(0) { + auto scan_chunk_types = group_types; + for (auto &aggr_type : aggregate_types) { + scan_chunk_types.push_back(aggr_type); + } + scan_chunk.Initialize(scan_chunk_types); } - ParallelState *parallel_state; - bool initialized; + //! Materialized GROUP BY expressions & aggregates + DataChunk scan_chunk; - //! The number of partitions to process (0 if there is no partitioning) - size_t partitions; - //! The output read position. - size_t next_part; - //! The generated input chunks - ChunkCollection chunks; - //! The generated output chunks - ChunkCollection window_results; - //! The read cursor - idx_t position; + //! The current position to scan the HT for output tuples + idx_t ht_index; + idx_t ht_scan_position; }; -unique_ptr PhysicalWindow::GetOperatorState() { - return make_unique(*this, children[0].get()); -} +void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) { + auto &gstate = (HashAggregateGlobalState &)state; + auto &llstate = (HashAggregateLocalState &)lstate; -static void GeneratePartition(PhysicalWindowOperatorState &state, WindowGlobalState &gstate, const idx_t hash_bin) { - auto &op = (PhysicalWindow &)gstate.op; - WindowExpressions window_exprs; - for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) { - D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW); - auto wexpr = reinterpret_cast(op.select_list[expr_idx].get()); - window_exprs.emplace_back(wexpr); + // this actually does not do a lot but just pushes the local HTs into the global state so we can later combine them + // in parallel + + if (ForceSingleHT(state)) { + D_ASSERT(gstate.finalized_hts.size() <= 1); + return; } - // Get rid of any stale data - state.chunks.Reset(); - state.window_results.Reset(); - state.position = 0; + if (!llstate.ht) { + return; // no data + } - if (gstate.counts.empty() && hash_bin == 0) { - ChunkCollection &big_data = gstate.chunks; - ChunkCollection &window_results = gstate.window_results; - ChunkCollection &over_collection = gstate.over_collection; - ComputeWindowExpressions(window_exprs, big_data, window_results, over_collection); - state.chunks.Merge(big_data); - state.window_results.Merge(window_results); - } else if (hash_bin < gstate.counts.size() && gstate.counts[hash_bin] > 0) { - ChunkCollection input; - ChunkCollection output; - ChunkCollection over; - const auto hash_mask = hash_t(gstate.counts.size() - 1); - ExtractPartition(gstate, input, output, over, hash_bin, hash_mask); - ComputeWindowExpressions(window_exprs, input, output, over); - state.chunks.Merge(input); - state.window_results.Merge(output); + if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.total_groups > radix_limit) { + llstate.ht->Partition(); } -} -static void Scan(PhysicalWindowOperatorState &state, DataChunk &chunk) { - ChunkCollection &big_data = state.chunks; - ChunkCollection &window_results = state.window_results; + lock_guard glock(gstate.lock); + D_ASSERT(all_combinable); + D_ASSERT(!any_distinct); - if (state.position >= big_data.Count()) { - return; + if (!llstate.is_empty) { + gstate.is_empty = false; } - // just return what was computed before, appending the result cols of the window expressions at the end - auto &proj_ch = big_data.GetChunkForRow(state.position); - auto &wind_ch = window_results.GetChunkForRow(state.position); + // we will never add new values to these HTs so we can drop the first part of the HT + llstate.ht->Finalize(); - idx_t out_idx = 0; - D_ASSERT(proj_ch.size() == wind_ch.size()); - chunk.SetCardinality(proj_ch); - for (idx_t col_idx = 0; col_idx < proj_ch.ColumnCount(); col_idx++) { - chunk.data[out_idx++].Reference(proj_ch.data[col_idx]); + // at this point we just collect them the PhysicalHashAggregateFinalizeTask (below) will merge them in parallel + gstate.intermediate_hts.push_back(move(llstate.ht)); +} + +// this task is run in multiple threads and combines the radix-partitioned hash tables into a single onen and then +// folds them into the global ht finally. +class PhysicalHashAggregateFinalizeTask : public Task { +public: + PhysicalHashAggregateFinalizeTask(Pipeline &parent_p, HashAggregateGlobalState &state_p, idx_t radix_p) + : parent(parent_p), state(state_p), radix(radix_p) { } - for (idx_t col_idx = 0; col_idx < wind_ch.ColumnCount(); col_idx++) { - chunk.data[out_idx++].Reference(wind_ch.data[col_idx]); + static void FinalizeHT(HashAggregateGlobalState &gstate, idx_t radix) { + D_ASSERT(gstate.finalized_hts[radix]); + for (auto &pht : gstate.intermediate_hts) { + for (auto &ht : pht->GetPartition(radix)) { + gstate.finalized_hts[radix]->Combine(*ht); + ht.reset(); + } + } + gstate.finalized_hts[radix]->Finalize(); } - chunk.Verify(); - state.position += STANDARD_VECTOR_SIZE; + void Execute() override { + FinalizeHT(state, radix); + auto total_tasks = parent.total_tasks.load(); + auto finished_tasks = ++parent.finished_tasks; + // finish the whole pipeline + if (total_tasks == finished_tasks) { + parent.Finish(); + } + } + +private: + Pipeline &parent; + HashAggregateGlobalState &state; + idx_t radix; +}; + +bool PhysicalHashAggregate::Finalize(Pipeline &pipeline, ClientContext &context, + unique_ptr state) { + return FinalizeInternal(context, move(state), false, &pipeline); } -void PhysicalWindow::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { - auto &state = *reinterpret_cast(state_p); - auto &gstate = (WindowGlobalState &)*sink_state; +void PhysicalHashAggregate::FinalizeImmediate(ClientContext &context, unique_ptr state) { + FinalizeInternal(context, move(state), true, nullptr); +} - if (!state.initialized) { - // initialize thread-local operator state - state.partitions = gstate.counts.size(); - state.next_part = 0; - // record parallel state (if any) - state.parallel_state = nullptr; - auto &task = context.task; - // check if there is any parallel state to fetch - state.parallel_state = nullptr; - auto task_info = task.task_info.find(this); - if (task_info != task.task_info.end()) { - // parallel scan init - state.parallel_state = task_info->second; +bool PhysicalHashAggregate::FinalizeInternal(ClientContext &context, unique_ptr state, + bool immediate, Pipeline *pipeline) { + this->sink_state = move(state); + auto &gstate = (HashAggregateGlobalState &)*this->sink_state; + + // special case if we have non-combinable aggregates + // we have already aggreagted into a global shared HT that does not require any additional finalization steps + if (ForceSingleHT(gstate)) { + D_ASSERT(gstate.finalized_hts.size() <= 1); + return true; + } + + // we can have two cases now, non-partitioned for few groups and radix-partitioned for very many groups. + // go through all of the child hts and see if we ever called partition() on any of them + // if we did, its the latter case. + bool any_partitioned = false; + for (auto &pht : gstate.intermediate_hts) { + if (pht->IsPartitioned()) { + any_partitioned = true; + break; } - state.initialized = true; } - if (!state.parallel_state) { - // sequential scan - if (state.position >= state.chunks.Count()) { - auto hash_bin = state.next_part++; - for (; hash_bin < state.partitions; hash_bin = state.next_part++) { - if (gstate.counts[hash_bin] > 0) { - break; - } + if (any_partitioned) { + // if one is partitioned, all have to be + // this should mostly have already happened in Combine, but if not we do it here + for (auto &pht : gstate.intermediate_hts) { + if (!pht->IsPartitioned()) { + pht->Partition(); } - GeneratePartition(state, gstate, hash_bin); } - Scan(state, chunk); - if (chunk.size() != 0) { - return; + // schedule additional tasks to combine the partial HTs + if (!immediate) { + D_ASSERT(pipeline); + pipeline->total_tasks += gstate.partition_info.n_partitions; } - } else { - // parallel scan - auto ¶llel_state = *reinterpret_cast(state.parallel_state); - do { - if (state.position >= state.chunks.Count()) { - auto hash_bin = parallel_state.next_part++; - for (; hash_bin < state.partitions; hash_bin = parallel_state.next_part++) { - if (gstate.counts[hash_bin] > 0) { - break; - } - } - GeneratePartition(state, gstate, hash_bin); - } - Scan(state, chunk); - if (chunk.size() != 0) { - return; + gstate.finalized_hts.resize(gstate.partition_info.n_partitions); + for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) { + gstate.finalized_hts[r] = + make_unique(BufferManager::GetBufferManager(context), group_types, + payload_types, bindings, HtEntryType::HT_WIDTH_64); + if (immediate) { + PhysicalHashAggregateFinalizeTask::FinalizeHT(gstate, r); } else { - break; + D_ASSERT(pipeline); + auto new_task = make_unique(*pipeline, gstate, r); + TaskScheduler::GetScheduler(context).ScheduleTask(pipeline->token, move(new_task)); } - } while (true); - } - D_ASSERT(chunk.size() == 0); -} - -void PhysicalWindow::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate_p, - DataChunk &input) { - auto &lstate = (WindowLocalState &)lstate_p; - lstate.chunks.Append(input); - - // Compute the over columns and the hash values for this block (if any) - const auto over_idx = 0; - auto over_expr = reinterpret_cast(select_list[over_idx].get()); - - const auto sort_col_count = over_expr->partitions.size() + over_expr->orders.size(); - if (sort_col_count > 0) { - DataChunk over_chunk; - MaterializeOverForWindow(over_expr, input, over_chunk); + } + return immediate; + } else { // in the non-partitioned case we immediately combine all the unpartitioned hts created by the threads. + // TODO possible optimization, if total count < limit for 32 bit ht, use that one + // create this ht here so finalize needs no lock on gstate - if (!over_expr->partitions.empty()) { - if (lstate.counts.empty()) { - lstate.counts.resize(lstate.partition_count, 0); + gstate.finalized_hts.push_back(make_unique( + BufferManager::GetBufferManager(context), group_types, payload_types, bindings, HtEntryType::HT_WIDTH_64)); + for (auto &pht : gstate.intermediate_hts) { + auto unpartitioned = pht->GetUnpartitioned(); + for (auto &unpartitioned_ht : unpartitioned) { + D_ASSERT(unpartitioned_ht); + gstate.finalized_hts[0]->Combine(*unpartitioned_ht); + unpartitioned_ht.reset(); } - - DataChunk hash_chunk; - HashChunk(lstate.counts, hash_chunk, over_chunk, over_expr->partitions.size()); - lstate.hash_collection.Append(hash_chunk); - D_ASSERT(lstate.chunks.Count() == lstate.hash_collection.Count()); + unpartitioned.clear(); } - - lstate.over_collection.Append(over_chunk); - D_ASSERT(lstate.chunks.Count() == lstate.over_collection.Count()); + gstate.finalized_hts[0]->Finalize(); + return true; } } -void PhysicalWindow::Combine(ExecutionContext &context, GlobalOperatorState &gstate_p, LocalSinkState &lstate_p) { - auto &lstate = (WindowLocalState &)lstate_p; - if (lstate.chunks.Count() == 0) { - return; - } - auto &gstate = (WindowGlobalState &)gstate_p; - lock_guard glock(gstate.lock); - gstate.chunks.Merge(lstate.chunks); - gstate.over_collection.Merge(lstate.over_collection); - gstate.hash_collection.Merge(lstate.hash_collection); - if (gstate.counts.empty()) { - gstate.counts = lstate.counts; - } else { - D_ASSERT(gstate.counts.size() == lstate.counts.size()); - for (idx_t i = 0; i < gstate.counts.size(); ++i) { - gstate.counts[i] += lstate.counts[i]; - } - } -} +void PhysicalHashAggregate::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { + auto &gstate = (HashAggregateGlobalState &)*sink_state; + auto &state = (PhysicalHashAggregateState &)*state_p; -void PhysicalWindow::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate_p) { - this->sink_state = move(gstate_p); - auto &gstate = (WindowGlobalState &)*this->sink_state; + state.scan_chunk.Reset(); - ChunkCollection &big_data = gstate.chunks; - ChunkCollection &window_results = gstate.window_results; + // special case hack to sort out aggregating from empty intermediates + // for aggregations without groups + if (gstate.is_empty && is_implicit_aggr) { + D_ASSERT(chunk.ColumnCount() == aggregates.size()); + // for each column in the aggregates, set to initial state + chunk.SetCardinality(1); + for (idx_t i = 0; i < chunk.ColumnCount(); i++) { + D_ASSERT(aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE); + auto &aggr = (BoundAggregateExpression &)*aggregates[i]; + auto aggr_state = unique_ptr(new data_t[aggr.function.state_size()]); + aggr.function.initialize(aggr_state.get()); - if (big_data.Count() == 0) { + Vector state_vector(Value::POINTER((uintptr_t)aggr_state.get())); + aggr.function.finalize(state_vector, aggr.bind_info.get(), chunk.data[i], 1, 0); + if (aggr.function.destructor) { + aggr.function.destructor(state_vector, 1); + } + } + state.finished = true; return; } - - vector window_types; - for (idx_t expr_idx = 0; expr_idx < select_list.size(); expr_idx++) { - window_types.push_back(select_list[expr_idx]->return_type); + if (gstate.is_empty && !state.finished) { + state.finished = true; + return; } + idx_t elements_found = 0; - for (idx_t i = 0; i < big_data.ChunkCount(); i++) { - DataChunk window_chunk; - window_chunk.Initialize(window_types); - window_chunk.SetCardinality(big_data.GetChunk(i).size()); - for (idx_t col_idx = 0; col_idx < window_chunk.ColumnCount(); col_idx++) { - window_chunk.data[col_idx].SetVectorType(VectorType::CONSTANT_VECTOR); - ConstantVector::SetNull(window_chunk.data[col_idx], true); + while (true) { + if (state.ht_index == gstate.finalized_hts.size()) { + state.finished = true; + return; + } + elements_found = gstate.finalized_hts[state.ht_index]->Scan(state.ht_scan_position, state.scan_chunk); + + if (elements_found > 0) { + break; } + gstate.finalized_hts[state.ht_index].reset(); + state.ht_index++; + state.ht_scan_position = 0; + } - window_chunk.Verify(); - window_results.Append(window_chunk); + // compute the final projection list + idx_t chunk_index = 0; + chunk.SetCardinality(elements_found); + if (group_types.size() + aggregates.size() == chunk.ColumnCount()) { + for (idx_t col_idx = 0; col_idx < group_types.size(); col_idx++) { + chunk.data[chunk_index++].Reference(state.scan_chunk.data[col_idx]); + } + } else { + D_ASSERT(aggregates.size() == chunk.ColumnCount()); } - D_ASSERT(window_results.ColumnCount() == select_list.size()); + for (idx_t col_idx = 0; col_idx < aggregates.size(); col_idx++) { + chunk.data[chunk_index++].Reference(state.scan_chunk.data[group_types.size() + col_idx]); + } } -unique_ptr PhysicalWindow::GetLocalSinkState(ExecutionContext &context) { - return make_unique(*this); +unique_ptr PhysicalHashAggregate::GetOperatorState() { + return make_unique(*this, group_types, aggregate_return_types, + children.empty() ? nullptr : children[0].get()); } -unique_ptr PhysicalWindow::GetGlobalState(ClientContext &context) { - return make_unique(*this, context); +bool PhysicalHashAggregate::ForceSingleHT(GlobalOperatorState &state) const { + auto &gstate = (HashAggregateGlobalState &)state; + + return !all_combinable || any_distinct || gstate.partition_info.n_partitions < 2; } -string PhysicalWindow::ParamsToString() const { +string PhysicalHashAggregate::ParamsToString() const { string result; - for (idx_t i = 0; i < select_list.size(); i++) { + for (idx_t i = 0; i < groups.size(); i++) { if (i > 0) { result += "\n"; } - result += select_list[i]->GetName(); + result += groups[i]->GetName(); + } + for (idx_t i = 0; i < aggregates.size(); i++) { + auto &aggregate = (BoundAggregateExpression &)*aggregates[i]; + if (i > 0 || !groups.empty()) { + result += "\n"; + } + result += aggregates[i]->GetName(); + if (aggregate.filter) { + result += " Filter: " + aggregate.filter->GetName(); + } } return result; } @@ -38765,7 +45922,7 @@ string PhysicalWindow::ParamsToString() const { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/execution/operator/filter/physical_filter.hpp +// duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp // // //===----------------------------------------------------------------------===// @@ -38774,126 +45931,62 @@ string PhysicalWindow::ParamsToString() const { + namespace duckdb { +class ClientContext; +class PerfectAggregateHashTable; -//! PhysicalFilter represents a filter operator. It removes non-matching tuples -//! from the result. Note that it does not physically change the data, it only -//! adds a selection vector to the chunk. -class PhysicalFilter : public PhysicalOperator { +//! PhysicalPerfectHashAggregate performs a group-by and aggregation using a perfect hash table +class PhysicalPerfectHashAggregate : public PhysicalSink { public: - PhysicalFilter(vector types, vector> select_list, idx_t estimated_cardinality); + PhysicalPerfectHashAggregate(ClientContext &context, vector types, + vector> aggregates, vector> groups, + vector> group_stats, vector required_bits, + idx_t estimated_cardinality); - //! The filter expression - unique_ptr expression; + //! The groups + vector> groups; + //! The aggregates that have to be computed + vector> aggregates; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; - - unique_ptr GetOperatorState() override; - string ParamsToString() const override; - void FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) override; -}; -} // namespace duckdb - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parallel/thread_context.hpp -// -// -//===----------------------------------------------------------------------===// - + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; + void Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) override; + unique_ptr GetLocalSinkState(ExecutionContext &context) override; + unique_ptr GetGlobalState(ClientContext &context) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; + unique_ptr GetOperatorState() override; + string ParamsToString() const override; -namespace duckdb { -class ClientContext; + //! Create a perfect aggregate hash table for this node + unique_ptr CreateHT(ClientContext &context); -//! The ThreadContext holds thread-local info for parallel usage -class ThreadContext { public: - explicit ThreadContext(ClientContext &context); + //! The group types + vector group_types; + //! The payload types + vector payload_types; + //! The aggregates to be computed + vector aggregate_objects; + //! The minimum value of each of the groups + vector group_minima; + //! The number of bits we need to completely cover each of the groups + vector required_bits; - //! The operator profiler for the individual thread context - OperatorProfiler profiler; + unordered_map filter_indexes; }; } // namespace duckdb -namespace duckdb { - -class PhysicalFilterState : public PhysicalOperatorState { -public: - PhysicalFilterState(PhysicalOperator &op, PhysicalOperator *child, Expression &expr) - : PhysicalOperatorState(op, child), executor(expr) { - } - - ExpressionExecutor executor; -}; - -PhysicalFilter::PhysicalFilter(vector types, vector> select_list, - idx_t estimated_cardinality) - : PhysicalOperator(PhysicalOperatorType::FILTER, move(types), estimated_cardinality) { - D_ASSERT(select_list.size() > 0); - if (select_list.size() > 1) { - // create a big AND out of the expressions - auto conjunction = make_unique(ExpressionType::CONJUNCTION_AND); - for (auto &expr : select_list) { - conjunction->children.push_back(move(expr)); - } - expression = move(conjunction); - } else { - expression = move(select_list[0]); - } -} - -void PhysicalFilter::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { - auto state = reinterpret_cast(state_p); - SelectionVector sel(STANDARD_VECTOR_SIZE); - idx_t initial_count; - idx_t result_count; - do { - // fetch a chunk from the child and run the filter - // we repeat this process until either (1) passing tuples are found, or (2) the child is completely exhausted - children[0]->GetChunk(context, chunk, state->child_state.get()); - if (chunk.size() == 0) { - return; - } - initial_count = chunk.size(); - result_count = state->executor.SelectExpression(chunk, sel); - } while (result_count == 0); - - if (result_count == initial_count) { - // nothing was filtered: skip adding any selection vectors - return; - } - chunk.Slice(sel, result_count); -} - -unique_ptr PhysicalFilter::GetOperatorState() { - return make_unique(*this, children[0].get(), *expression); -} - -string PhysicalFilter::ParamsToString() const { - return expression->GetName(); -} - -void PhysicalFilter::FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) { - auto &state = reinterpret_cast(state_p); - context.thread.profiler.Flush(this, &state.executor); - if (!children.empty() && state.child_state) { - children[0]->FinalizeOperatorState(*state.child_state, context); - } -} -} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/execution/operator/helper/physical_execute.hpp +// duckdb/execution/perfect_aggregate_hashtable.hpp // // //===----------------------------------------------------------------------===// @@ -38904,73 +45997,52 @@ void PhysicalFilter::FinalizeOperatorState(PhysicalOperatorState &state_p, Execu namespace duckdb { -class PhysicalExecute : public PhysicalOperator { +class PerfectAggregateHashTable : public BaseAggregateHashTable { public: - explicit PhysicalExecute(PhysicalOperator *plan) - : PhysicalOperator(PhysicalOperatorType::EXECUTE, plan->types, -1), plan(plan) { - } - - PhysicalOperator *plan; + PerfectAggregateHashTable(BufferManager &buffer_manager, const vector &group_types, + vector payload_types_p, vector aggregate_objects, + vector group_minima, vector required_bits); + ~PerfectAggregateHashTable() override; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; - - unique_ptr GetOperatorState() override; - void FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) override; -}; - -} // namespace duckdb - - -namespace duckdb { - -void PhysicalExecute::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { - D_ASSERT(plan); - plan->GetChunk(context, chunk, state_p); -} - -unique_ptr PhysicalExecute::GetOperatorState() { - return plan->GetOperatorState(); -} - -void PhysicalExecute::FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) { - if (!children.empty() && state_p.child_state) { - plan->FinalizeOperatorState(state_p, context); - } -} - -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/operator/helper/physical_limit.hpp -// -// -//===----------------------------------------------------------------------===// - + //! Add the given data to the HT + void AddChunk(DataChunk &groups, DataChunk &payload); + //! Combines the target perfect aggregate HT into this one + void Combine(PerfectAggregateHashTable &other); + //! Scan the HT starting from the scan_position + void Scan(idx_t &scan_position, DataChunk &result); -namespace duckdb { +protected: + Vector addresses; + //! The required bits per group + vector required_bits; + //! The total required bits for the HT (this determines the max capacity) + idx_t total_required_bits; + //! The total amount of groups + idx_t total_groups; + //! The tuple size + idx_t tuple_size; + //! The number of grouping columns + idx_t grouping_columns; -//! PhyisicalLimit represents the LIMIT operator -class PhysicalLimit : public PhysicalOperator { -public: - PhysicalLimit(vector types, idx_t limit, idx_t offset, unique_ptr limit_expression, - unique_ptr offset_expression, idx_t estimated_cardinality) - : PhysicalOperator(PhysicalOperatorType::LIMIT, move(types), estimated_cardinality), limit(limit), - offset(offset), limit_expression(move(limit_expression)), offset_expression(move(offset_expression)) { - } + // The actual pointer to the data + data_ptr_t data; + //! The owned data of the HT + unique_ptr owned_data; + //! Information on whether or not a specific group has any entries + unique_ptr group_is_set; - idx_t limit; - idx_t offset; - unique_ptr limit_expression; - unique_ptr offset_expression; + //! The minimum values for each of the group columns + vector group_minima; -public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + //! Reused selection vector + SelectionVector sel; - unique_ptr GetOperatorState() override; +private: + //! Destroy the perfect aggregate HT (called automatically by the destructor) + void Destroy(); }; } // namespace duckdb @@ -38982,203 +46054,215 @@ class PhysicalLimit : public PhysicalOperator { namespace duckdb { -class PhysicalLimitOperatorState : public PhysicalOperatorState { -public: - PhysicalLimitOperatorState(PhysicalOperator &op, PhysicalOperator *child, idx_t current_offset = 0) - : PhysicalOperatorState(op, child), current_offset(current_offset) { +PhysicalPerfectHashAggregate::PhysicalPerfectHashAggregate(ClientContext &context, vector types_p, + vector> aggregates_p, + vector> groups_p, + vector> group_stats, + vector required_bits_p, idx_t estimated_cardinality) + : PhysicalSink(PhysicalOperatorType::PERFECT_HASH_GROUP_BY, move(types_p), estimated_cardinality), + groups(move(groups_p)), aggregates(move(aggregates_p)), required_bits(move(required_bits_p)) { + D_ASSERT(groups.size() == group_stats.size()); + group_minima.reserve(group_stats.size()); + for (auto &stats : group_stats) { + D_ASSERT(stats); + auto &nstats = (NumericStatistics &)*stats; + D_ASSERT(!nstats.min.is_null); + group_minima.push_back(move(nstats.min)); } - - idx_t current_offset; -}; - -uint64_t GetDelimiter(DataChunk &input, Expression *expr, uint64_t original_value) { - DataChunk limit_chunk; - vector types {expr->return_type}; - limit_chunk.Initialize(types); - ExpressionExecutor limit_executor(expr); - auto input_size = input.size(); - input.SetCardinality(1); - limit_executor.Execute(input, limit_chunk); - input.SetCardinality(input_size); - auto limit_value = limit_chunk.GetValue(0, 0); - if (limit_value.is_null) { - return original_value; + for (auto &expr : groups) { + group_types.push_back(expr->return_type); } - return limit_value.value_.ubigint; -} - -void PhysicalLimit::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { - auto state = reinterpret_cast(state_p); - idx_t max_element = limit + offset; - if ((limit == 0 || state->current_offset >= max_element) && !(limit_expression || offset_expression)) { - return; - } + vector bindings; + vector payload_types_filters; + for (auto &expr : aggregates) { + D_ASSERT(expr->expression_class == ExpressionClass::BOUND_AGGREGATE); + D_ASSERT(expr->IsAggregate()); + auto &aggr = (BoundAggregateExpression &)*expr; + bindings.push_back(&aggr); - // get the next chunk from the child - do { - children[0]->GetChunk(context, state->child_chunk, state->child_state.get()); - if (limit_expression) { - limit = GetDelimiter(state->child_chunk, limit_expression.get(), limit); - limit_expression.reset(); - } - if (offset_expression) { - offset = GetDelimiter(state->child_chunk, offset_expression.get(), offset); - offset_expression.reset(); - } - max_element = limit + offset; - if (state->child_chunk.size() == 0) { - return; + D_ASSERT(!aggr.distinct); + D_ASSERT(aggr.function.combine); + for (auto &child : aggr.children) { + payload_types.push_back(child->return_type); } - if (limit == 0 || state->current_offset >= max_element) { - return; + if (aggr.filter) { + payload_types_filters.push_back(aggr.filter->return_type); } - if (state->current_offset < offset) { - // we are not yet at the offset point - if (state->current_offset + state->child_chunk.size() > offset) { - // however we will reach it in this chunk - // we have to copy part of the chunk with an offset - idx_t start_position = offset - state->current_offset; - auto chunk_count = MinValue(limit, state->child_chunk.size() - start_position); - SelectionVector sel(STANDARD_VECTOR_SIZE); - for (idx_t i = 0; i < chunk_count; i++) { - sel.set_index(i, start_position + i); - } - // set up a slice of the input chunks - chunk.Slice(state->child_chunk, sel, chunk_count); - } - } else { - // have to copy either the entire chunk or part of it - idx_t chunk_count; - if (state->current_offset + state->child_chunk.size() >= max_element) { - // have to limit the count of the chunk - chunk_count = max_element - state->current_offset; + } + for (const auto &pay_filters : payload_types_filters) { + payload_types.push_back(pay_filters); + } + aggregate_objects = AggregateObject::CreateAggregateObjects(bindings); + + // filter_indexes must be pre-built, not lazily instantiated in parallel... + idx_t aggregate_input_idx = 0; + for (auto &aggregate : aggregates) { + auto &aggr = (BoundAggregateExpression &)*aggregate; + aggregate_input_idx += aggr.children.size(); + } + for (auto &aggregate : aggregates) { + auto &aggr = (BoundAggregateExpression &)*aggregate; + if (aggr.filter) { + auto &bound_ref_expr = (BoundReferenceExpression &)*aggr.filter; + auto it = filter_indexes.find(aggr.filter.get()); + if (it == filter_indexes.end()) { + filter_indexes[aggr.filter.get()] = bound_ref_expr.index; + bound_ref_expr.index = aggregate_input_idx++; } else { - // we copy the entire chunk - chunk_count = state->child_chunk.size(); + ++aggregate_input_idx; } - // instead of copying we just change the pointer in the current chunk - chunk.Reference(state->child_chunk); - chunk.SetCardinality(chunk_count); } - - state->current_offset += state->child_chunk.size(); - } while (chunk.size() == 0); + } } -unique_ptr PhysicalLimit::GetOperatorState() { - return make_unique(*this, children[0].get(), 0); +unique_ptr PhysicalPerfectHashAggregate::CreateHT(ClientContext &context) { + return make_unique(BufferManager::GetBufferManager(context), group_types, payload_types, + aggregate_objects, group_minima, required_bits); } -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/operator/helper/physical_vacuum.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { - -//! PhysicalVacuum represents an etension LOAD operation -class PhysicalLoad : public PhysicalOperator { +//===--------------------------------------------------------------------===// +// Sink +//===--------------------------------------------------------------------===// +class PerfectHashAggregateGlobalState : public GlobalOperatorState { public: - explicit PhysicalLoad(unique_ptr info, idx_t estimated_cardinality) - : PhysicalOperator(PhysicalOperatorType::LOAD, {LogicalType::BOOLEAN}, estimated_cardinality), - info(move(info)) { + PerfectHashAggregateGlobalState(PhysicalPerfectHashAggregate &op, ClientContext &context) + : ht(op.CreateHT(context)) { } - unique_ptr info; - -public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + //! The lock for updating the global aggregate state + mutex lock; + //! The global aggregate hash table + unique_ptr ht; }; -} // namespace duckdb - +class PerfectHashAggregateLocalState : public LocalSinkState { +public: + PerfectHashAggregateLocalState(PhysicalPerfectHashAggregate &op, ClientContext &context) + : ht(op.CreateHT(context)) { + group_chunk.InitializeEmpty(op.group_types); + if (!op.payload_types.empty()) { + aggregate_input_chunk.InitializeEmpty(op.payload_types); + } + } + //! The local aggregate hash table + unique_ptr ht; + DataChunk group_chunk; + DataChunk aggregate_input_chunk; +}; +unique_ptr PhysicalPerfectHashAggregate::GetGlobalState(ClientContext &context) { + return make_unique(*this, context); +} +unique_ptr PhysicalPerfectHashAggregate::GetLocalSinkState(ExecutionContext &context) { + return make_unique(*this, context.client); +} -#ifndef _WIN32 -#include -#else -#include +void PhysicalPerfectHashAggregate::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate_p, + DataChunk &input) const { + auto &lstate = (PerfectHashAggregateLocalState &)lstate_p; + DataChunk &group_chunk = lstate.group_chunk; + DataChunk &aggregate_input_chunk = lstate.aggregate_input_chunk; -#define RTLD_LAZY 0 -#define RTLD_LOCAL 0 + for (idx_t group_idx = 0; group_idx < groups.size(); group_idx++) { + auto &group = groups[group_idx]; + D_ASSERT(group->type == ExpressionType::BOUND_REF); + auto &bound_ref_expr = (BoundReferenceExpression &)*group; + group_chunk.data[group_idx].Reference(input.data[bound_ref_expr.index]); + } + idx_t aggregate_input_idx = 0; + for (auto &aggregate : aggregates) { + auto &aggr = (BoundAggregateExpression &)*aggregate; + for (auto &child_expr : aggr.children) { + D_ASSERT(child_expr->type == ExpressionType::BOUND_REF); + auto &bound_ref_expr = (BoundReferenceExpression &)*child_expr; + aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[bound_ref_expr.index]); + } + } + for (auto &aggregate : aggregates) { + auto &aggr = (BoundAggregateExpression &)*aggregate; + if (aggr.filter) { + auto it = filter_indexes.find(aggr.filter.get()); + D_ASSERT(it != filter_indexes.end()); + aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[it->second]); + } + } -#endif + group_chunk.SetCardinality(input.size()); -namespace duckdb { + aggregate_input_chunk.SetCardinality(input.size()); -#ifdef _WIN32 + group_chunk.Verify(); + aggregate_input_chunk.Verify(); + D_ASSERT(aggregate_input_chunk.ColumnCount() == 0 || group_chunk.size() == aggregate_input_chunk.size()); -void *dlopen(const char *file, int mode) { - D_ASSERT(file); - return (void *)LoadLibrary(file); + lstate.ht->AddChunk(group_chunk, aggregate_input_chunk); } -void *dlsym(void *handle, const char *name) { - D_ASSERT(handle); - return (void *)GetProcAddress((HINSTANCE)handle, name); +//===--------------------------------------------------------------------===// +// Combine +//===--------------------------------------------------------------------===// +void PhysicalPerfectHashAggregate::Combine(ExecutionContext &context, GlobalOperatorState &gstate_p, + LocalSinkState &lstate_p) { + auto &lstate = (PerfectHashAggregateLocalState &)lstate_p; + auto &gstate = (PerfectHashAggregateGlobalState &)gstate_p; + + lock_guard l(gstate.lock); + gstate.ht->Combine(*lstate.ht); } -#endif -void PhysicalLoad::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { - auto &fs = FileSystem::GetFileSystem(context.client); - auto filename = fs.ConvertSeparators(info->filename); - if (!fs.FileExists(filename)) { - throw InvalidInputException("File %s not found", filename); - } - auto lib_hdl = dlopen(filename.c_str(), RTLD_LAZY | RTLD_LOCAL); - if (!lib_hdl) { - throw InvalidInputException("File %s could not be loaded", filename); +//===--------------------------------------------------------------------===// +// GetChunk +//===--------------------------------------------------------------------===// +class PerfectHashAggregateState : public PhysicalOperatorState { +public: + PerfectHashAggregateState(PhysicalOperator &op, PhysicalOperator *child) + : PhysicalOperatorState(op, child), ht_scan_position(0) { } + //! The current position to scan the HT for output tuples + idx_t ht_scan_position; +}; - auto basename = fs.ExtractBaseName(filename); - auto init_fun_name = basename + "_init"; - auto version_fun_name = basename + "_version"; +void PhysicalPerfectHashAggregate::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { + auto &state = (PerfectHashAggregateState &)*state_p; + auto &gstate = (PerfectHashAggregateGlobalState &)*sink_state; - void (*init_fun)(DatabaseInstance &); - const char *(*version_fun)(void); + gstate.ht->Scan(state.ht_scan_position, chunk); +} - *(void **)(&init_fun) = dlsym(lib_hdl, init_fun_name.c_str()); - if (init_fun == nullptr) { - throw InvalidInputException("File %s did not contain initialization function %s", filename, init_fun_name); - } +unique_ptr PhysicalPerfectHashAggregate::GetOperatorState() { + return make_unique(*this, children[0].get()); +} - *(void **)(&version_fun) = dlsym(lib_hdl, version_fun_name.c_str()); - if (init_fun == nullptr) { - throw InvalidInputException("File %s did not contain version function %s", filename, version_fun_name); - } - auto extension_version = std::string((*version_fun)()); - auto engine_version = DuckDB::LibraryVersion(); - if (extension_version != engine_version) { - throw InvalidInputException("Extension %s version (%s) does not match DuckDB version (%s)", filename, - extension_version, engine_version); +string PhysicalPerfectHashAggregate::ParamsToString() const { + string result; + for (idx_t i = 0; i < groups.size(); i++) { + if (i > 0) { + result += "\n"; + } + result += groups[i]->GetName(); } - - try { - (*init_fun)(*context.client.db); - } catch (Exception &e) { - throw InvalidInputException("Initialization function %s from file %s threw an exception: %s", init_fun_name, - filename, e.what()); + for (idx_t i = 0; i < aggregates.size(); i++) { + if (i > 0 || !groups.empty()) { + result += "\n"; + } + result += aggregates[i]->GetName(); + auto &aggregate = (BoundAggregateExpression &)*aggregates[i]; + if (aggregate.filter) { + result += " Filter: " + aggregate.filter->GetName(); + } } - state->finished = true; + return result; } } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/execution/operator/helper/physical_pragma.hpp +// duckdb/execution/operator/aggregate/physical_simple_aggregate.hpp // // //===----------------------------------------------------------------------===// @@ -39186,56 +46270,41 @@ void PhysicalLoad::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - - - namespace duckdb { -//! PhysicalPragma represents the PRAGMA operator -class PhysicalPragma : public PhysicalOperator { +//! PhysicalSimpleAggregate is an aggregate operator that can only perform aggregates (1) without any groups, and (2) +//! without any DISTINCT aggregates +class PhysicalSimpleAggregate : public PhysicalSink { public: - PhysicalPragma(PragmaFunction function_p, PragmaInfo info_p, idx_t estimated_cardinality) - : PhysicalOperator(PhysicalOperatorType::PRAGMA, {LogicalType::BOOLEAN}, estimated_cardinality), - function(move(function_p)), info(move(info_p)) { - } + PhysicalSimpleAggregate(vector types, vector> expressions, bool all_combinable, + idx_t estimated_cardinality); - //! The pragma function to call - PragmaFunction function; - //! The context of the call - PragmaInfo info; + //! The aggregates that have to be computed + vector> aggregates; + //! Whether or not all aggregates are trivially combinable. Aggregates that are trivially combinable can be + //! parallelized. + bool all_combinable; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; -}; - -} // namespace duckdb + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; + void Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) override; + unique_ptr GetLocalSinkState(ExecutionContext &context) override; + unique_ptr GetGlobalState(ClientContext &context) override; -namespace duckdb { + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; + void FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) override; -void PhysicalPragma::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { - auto &client = context.client; - FunctionParameters parameters {info.parameters, info.named_parameters}; - function.function(client, parameters); -} + string ParamsToString() const override; +}; } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/operator/helper/physical_prepare.hpp -// -// -//===----------------------------------------------------------------------===// - - - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/main/prepared_statement_data.hpp +// duckdb/parallel/thread_context.hpp // // //===----------------------------------------------------------------------===// @@ -39244,3663 +46313,2346 @@ void PhysicalPragma::GetChunkInternal(ExecutionContext &context, DataChunk &chun - - - - namespace duckdb { -class CatalogEntry; -class PhysicalOperator; -class SQLStatement; +class ClientContext; -class PreparedStatementData { +//! The ThreadContext holds thread-local info for parallel usage +class ThreadContext { public: - DUCKDB_API explicit PreparedStatementData(StatementType type); - DUCKDB_API ~PreparedStatementData(); - - StatementType statement_type; - //! The unbound SQL statement that was prepared - unique_ptr unbound_statement; - //! The fully prepared physical plan of the prepared statement - unique_ptr plan; - //! The map of parameter index to the actual value entry - unordered_map>> value_map; - - //! The result names of the transaction - vector names; - //! The result types of the transaction - vector types; - - //! Whether or not the statement is a read-only statement, or whether it can result in changes to the database - bool read_only; - //! Whether or not the statement requires a valid transaction. Almost all statements require this, with the - //! exception of - bool requires_valid_transaction; - //! Whether or not the result can be streamed to the client - bool allow_stream_result; - - //! The catalog version of when the prepared statement was bound - //! If this version is lower than the current catalog version, we have to rebind the prepared statement - idx_t catalog_version; + explicit ThreadContext(ClientContext &context); -public: - //! Bind a set of values to the prepared statement data - DUCKDB_API void Bind(vector values); - //! Get the expected SQL Type of the bound parameter - DUCKDB_API LogicalType GetType(idx_t param_index); + //! The operator profiler for the individual thread context + OperatorProfiler profiler; }; } // namespace duckdb -namespace duckdb { - -class PhysicalPrepare : public PhysicalOperator { -public: - PhysicalPrepare(string name, shared_ptr prepared, idx_t estimated_cardinality) - : PhysicalOperator(PhysicalOperatorType::PREPARE, {LogicalType::BOOLEAN}, estimated_cardinality), name(name), - prepared(move(prepared)) { - } - string name; - shared_ptr prepared; -public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; -}; -} // namespace duckdb namespace duckdb { -void PhysicalPrepare::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { - auto &client = context.client; - - // store the prepared statement in the context - client.prepared_statements[name] = prepared; - state->finished = true; +PhysicalSimpleAggregate::PhysicalSimpleAggregate(vector types, vector> expressions, + bool all_combinable, idx_t estimated_cardinality) + : PhysicalSink(PhysicalOperatorType::SIMPLE_AGGREGATE, move(types), estimated_cardinality), + aggregates(move(expressions)), all_combinable(all_combinable) { } -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/operator/helper/physical_reservoir_sample.hpp -// -// -//===----------------------------------------------------------------------===// - - +//===--------------------------------------------------------------------===// +// Sink +//===--------------------------------------------------------------------===// +struct AggregateState { + explicit AggregateState(vector> &aggregate_expressions) { + for (auto &aggregate : aggregate_expressions) { + D_ASSERT(aggregate->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE); + auto &aggr = (BoundAggregateExpression &)*aggregate; + auto state = unique_ptr(new data_t[aggr.function.state_size()]); + aggr.function.initialize(state.get()); + aggregates.push_back(move(state)); + destructors.push_back(aggr.function.destructor); + } + } + ~AggregateState() { + D_ASSERT(destructors.size() == aggregates.size()); + for (idx_t i = 0; i < destructors.size(); i++) { + if (!destructors[i]) { + continue; + } + Vector state_vector(Value::POINTER((uintptr_t)aggregates[i].get())); + state_vector.SetVectorType(VectorType::FLAT_VECTOR); + destructors[i](state_vector, 1); + } + } + void Move(AggregateState &other) { + other.aggregates = move(aggregates); + other.destructors = move(destructors); + } -namespace duckdb { + //! The aggregate values + vector> aggregates; + // The destructors + vector destructors; +}; -//! PhysicalReservoirSample represents a sample taken using reservoir sampling, which is a blocking sampling method -class PhysicalReservoirSample : public PhysicalSink { +class SimpleAggregateGlobalState : public GlobalOperatorState { public: - PhysicalReservoirSample(vector types, unique_ptr options, idx_t estimated_cardinality) - : PhysicalSink(PhysicalOperatorType::RESERVOIR_SAMPLE, move(types), estimated_cardinality), - options(move(options)) { + explicit SimpleAggregateGlobalState(vector> &aggregates) : state(aggregates) { } - unique_ptr options; + //! The lock for updating the global aggregate state + mutex lock; + //! The global aggregate state + AggregateState state; +}; +class SimpleAggregateLocalState : public LocalSinkState { public: - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; - unique_ptr GetGlobalState(ClientContext &context) override; - - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; - unique_ptr GetOperatorState() override; + explicit SimpleAggregateLocalState(vector> &aggregates) : state(aggregates) { + vector payload_types; + for (auto &aggregate : aggregates) { + D_ASSERT(aggregate->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE); + auto &aggr = (BoundAggregateExpression &)*aggregate; + // initialize the payload chunk + if (!aggr.children.empty()) { + for (auto &child : aggr.children) { + payload_types.push_back(child->return_type); + child_executor.AddExpression(*child); + } + } + } + if (!payload_types.empty()) { // for select count(*) from t; there is no payload at all + payload_chunk.Initialize(payload_types); + } + } + void Reset() { + payload_chunk.Reset(); + } - string ParamsToString() const override; + //! The local aggregate state + AggregateState state; + //! The executor + ExpressionExecutor child_executor; + //! The payload chunk + DataChunk payload_chunk; }; -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/reservoir_sample.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 -// See the end of this file for a list - -/* - * PCG Random Number Generation for C++ - * - * Copyright 2014-2019 Melissa O'Neill , - * and the PCG Project contributors. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - * - * Licensed under the Apache License, Version 2.0 (provided in - * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) - * or under the MIT license (provided in LICENSE-MIT.txt and at - * http://opensource.org/licenses/MIT), at your option. This file may not - * be copied, modified, or distributed except according to those terms. - * - * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either - * express or implied. See your chosen license for details. - * - * For additional information about the PCG random number generation scheme, - * visit http://www.pcg-random.org/. - */ - -/* - * This code provides the reference implementation of the PCG family of - * random number generators. The code is complex because it implements - * - * - several members of the PCG family, specifically members corresponding - * to the output functions: - * - XSH RR (good for 64-bit state, 32-bit output) - * - XSH RS (good for 64-bit state, 32-bit output) - * - XSL RR (good for 128-bit state, 64-bit output) - * - RXS M XS (statistically most powerful generator) - * - XSL RR RR (good for 128-bit state, 128-bit output) - * - and RXS, RXS M, XSH, XSL (mostly for testing) - * - at potentially *arbitrary* bit sizes - * - with four different techniques for random streams (MCG, one-stream - * LCG, settable-stream LCG, unique-stream LCG) - * - and the extended generation schemes allowing arbitrary periods - * - with all features of C++11 random number generation (and more), - * some of which are somewhat painful, including - * - initializing with a SeedSequence which writes 32-bit values - * to memory, even though the state of the generator may not - * use 32-bit values (it might use smaller or larger integers) - * - I/O for RNGs and a prescribed format, which needs to handle - * the issue that 8-bit and 128-bit integers don't have working - * I/O routines (e.g., normally 8-bit = char, not integer) - * - equality and inequality for RNGs - * - and a number of convenience typedefs to mask all the complexity - * - * The code employes a fairly heavy level of abstraction, and has to deal - * with various C++ minutia. If you're looking to learn about how the PCG - * scheme works, you're probably best of starting with one of the other - * codebases (see www.pcg-random.org). But if you're curious about the - * constants for the various output functions used in those other, simpler, - * codebases, this code shows how they are calculated. - * - * On the positive side, at least there are convenience typedefs so that you - * can say - * - * pcg32 myRNG; - * - * rather than: - * - * pcg_detail::engine< - * uint32_t, // Output Type - * uint64_t, // State Type - * pcg_detail::xsh_rr_mixin, true, // Output Func - * pcg_detail::specific_stream, // Stream Kind - * pcg_detail::default_multiplier // LCG Mult - * > myRNG; - * - */ - -#ifndef PCG_RAND_HPP_INCLUDED -#define PCG_RAND_HPP_INCLUDED 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef _MSC_VER - #pragma warning(disable:4146) -#endif - -#ifdef _MSC_VER - #define PCG_ALWAYS_INLINE __forceinline -#elif __GNUC__ - #define PCG_ALWAYS_INLINE __attribute__((always_inline)) -#else - #define PCG_ALWAYS_INLINE inline -#endif - -#ifdef min -#undef min -#endif - -#ifdef max -#undef max -#endif - -/* - * The pcg_extras namespace contains some support code that is likley to - * be useful for a variety of RNGs, including: - * - 128-bit int support for platforms where it isn't available natively - * - bit twiddling operations - * - I/O of 128-bit and 8-bit integers - * - Handling the evilness of SeedSeq - * - Support for efficiently producing random numbers less than a given - * bound - */ - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 -// See the end of this file for a list - -/* - * PCG Random Number Generation for C++ - * - * Copyright 2014-2017 Melissa O'Neill , - * and the PCG Project contributors. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - * - * Licensed under the Apache License, Version 2.0 (provided in - * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) - * or under the MIT license (provided in LICENSE-MIT.txt and at - * http://opensource.org/licenses/MIT), at your option. This file may not - * be copied, modified, or distributed except according to those terms. - * - * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either - * express or implied. See your chosen license for details. - * - * For additional information about the PCG random number generation scheme, - * visit http://www.pcg-random.org/. - */ - -/* - * This file provides support code that is useful for random-number generation - * but not specific to the PCG generation scheme, including: - * - 128-bit int support for platforms where it isn't available natively - * - bit twiddling operations - * - I/O of 128-bit and 8-bit integers - * - Handling the evilness of SeedSeq - * - Support for efficiently producing random numbers less than a given - * bound - */ - -#ifndef PCG_EXTRAS_HPP_INCLUDED -#define PCG_EXTRAS_HPP_INCLUDED 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef __GNUC__ - #include -#endif - -/* - * Abstractions for compiler-specific directives - */ - -#ifdef __GNUC__ - #define PCG_NOINLINE __attribute__((noinline)) -#else - #define PCG_NOINLINE -#endif - -/* - * Some members of the PCG library use 128-bit math. When compiling on 64-bit - * platforms, both GCC and Clang provide 128-bit integer types that are ideal - * for the job. - * - * On 32-bit platforms (or with other compilers), we fall back to a C++ - * class that provides 128-bit unsigned integers instead. It may seem - * like we're reinventing the wheel here, because libraries already exist - * that support large integers, but most existing libraries provide a very - * generic multiprecision code, but here we're operating at a fixed size. - * Also, most other libraries are fairly heavyweight. So we use a direct - * implementation. Sadly, it's much slower than hand-coded assembly or - * direct CPU support. - * - */ -#if __SIZEOF_INT128__ - namespace pcg_extras { - typedef __uint128_t pcg128_t; - } - #define PCG_128BIT_CONSTANT(high,low) \ - ((pcg_extras::pcg128_t(high) << 64) + low) -#else - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 -// See the end of this file for a list - -/* - * PCG Random Number Generation for C++ - * - * Copyright 2014-2017 Melissa O'Neill , - * and the PCG Project contributors. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - * - * Licensed under the Apache License, Version 2.0 (provided in - * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) - * or under the MIT license (provided in LICENSE-MIT.txt and at - * http://opensource.org/licenses/MIT), at your option. This file may not - * be copied, modified, or distributed except according to those terms. - * - * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either - * express or implied. See your chosen license for details. - * - * For additional information about the PCG random number generation scheme, - * visit http://www.pcg-random.org/. - */ - -/* - * This code provides a a C++ class that can provide 128-bit (or higher) - * integers. To produce 2K-bit integers, it uses two K-bit integers, - * placed in a union that allowes the code to also see them as four K/2 bit - * integers (and access them either directly name, or by index). - * - * It may seem like we're reinventing the wheel here, because several - * libraries already exist that support large integers, but most existing - * libraries provide a very generic multiprecision code, but here we're - * operating at a fixed size. Also, most other libraries are fairly - * heavyweight. So we use a direct implementation. Sadly, it's much slower - * than hand-coded assembly or direct CPU support. - */ - -#ifndef PCG_UINT128_HPP_INCLUDED -#define PCG_UINT128_HPP_INCLUDED 1 - -#include -#include -#include -#include -#include -#include -#include - -#if defined(_MSC_VER) // Use MSVC++ intrinsics -#include -#endif +unique_ptr PhysicalSimpleAggregate::GetGlobalState(ClientContext &context) { + return make_unique(aggregates); +} -/* - * We want to lay the type out the same way that a native type would be laid - * out, which means we must know the machine's endian, at compile time. - * This ugliness attempts to do so. - */ +unique_ptr PhysicalSimpleAggregate::GetLocalSinkState(ExecutionContext &context) { + return make_unique(aggregates); +} -#ifndef PCG_LITTLE_ENDIAN - #if defined(__BYTE_ORDER__) - #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - #define PCG_LITTLE_ENDIAN 1 - #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - #define PCG_LITTLE_ENDIAN 0 - #else - #error __BYTE_ORDER__ does not match a standard endian, pick a side - #endif - #elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN - #define PCG_LITTLE_ENDIAN 1 - #elif __BIG_ENDIAN__ || _BIG_ENDIAN - #define PCG_LITTLE_ENDIAN 0 - #elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86 - #define PCG_LITTLE_ENDIAN 1 - #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \ - || __m68k__ || __mc68000__ - #define PCG_LITTLE_ENDIAN 0 - #else - #error Unable to determine target endianness - #endif -#endif +void PhysicalSimpleAggregate::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const { + auto &sink = (SimpleAggregateLocalState &)lstate; + // perform the aggregation inside the local state + idx_t payload_idx = 0, payload_expr_idx = 0; + sink.Reset(); -namespace pcg_extras { + DataChunk &payload_chunk = sink.payload_chunk; + sink.child_executor.SetChunk(input); + payload_chunk.SetCardinality(input); + for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) { + DataChunk filtered_input; + auto &aggregate = (BoundAggregateExpression &)*aggregates[aggr_idx]; + idx_t payload_cnt = 0; + // resolve the filter (if any) + if (aggregate.filter) { + ExpressionExecutor filter_execution(aggregate.filter.get()); + SelectionVector true_sel(STANDARD_VECTOR_SIZE); + auto count = filter_execution.SelectExpression(input, true_sel); + auto input_types = input.GetTypes(); + filtered_input.Initialize(input_types); + filtered_input.Slice(input, true_sel, count); + sink.child_executor.SetChunk(filtered_input); + payload_chunk.SetCardinality(count); + } + // resolve the child expressions of the aggregate (if any) + if (!aggregate.children.empty()) { + for (idx_t i = 0; i < aggregate.children.size(); ++i) { + sink.child_executor.ExecuteExpression(payload_expr_idx, payload_chunk.data[payload_idx + payload_cnt]); + payload_expr_idx++; + payload_cnt++; + } + } -// Recent versions of GCC have intrinsics we can use to quickly calculate -// the number of leading and trailing zeros in a number. If possible, we -// use them, otherwise we fall back to old-fashioned bit twiddling to figure -// them out. + aggregate.function.simple_update(payload_cnt == 0 ? nullptr : &payload_chunk.data[payload_idx], + aggregate.bind_info.get(), payload_cnt, sink.state.aggregates[aggr_idx].get(), + payload_chunk.size()); + payload_idx += payload_cnt; + } +} -#ifndef PCG_BITCOUNT_T - typedef uint8_t bitcount_t; -#else - typedef PCG_BITCOUNT_T bitcount_t; -#endif +//===--------------------------------------------------------------------===// +// Finalize +//===--------------------------------------------------------------------===// +void PhysicalSimpleAggregate::Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) { + auto &gstate = (SimpleAggregateGlobalState &)state; + auto &source = (SimpleAggregateLocalState &)lstate; -/* - * Provide some useful helper functions - * * flog2 floor(log2(x)) - * * trailingzeros number of trailing zero bits - */ + // finalize: combine the local state into the global state + if (all_combinable) { + // all aggregates are combinable: we might be doing a parallel aggregate + // use the combine method to combine the partial aggregates + lock_guard glock(gstate.lock); + for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) { + auto &aggregate = (BoundAggregateExpression &)*aggregates[aggr_idx]; + Vector source_state(Value::POINTER((uintptr_t)source.state.aggregates[aggr_idx].get())); + Vector dest_state(Value::POINTER((uintptr_t)gstate.state.aggregates[aggr_idx].get())); -#if defined(__GNUC__) // Any GNU-compatible compiler supporting C++11 has - // some useful intrinsics we can use. + aggregate.function.combine(source_state, dest_state, 1); + } + } else { + // complex aggregates: this is necessarily a non-parallel aggregate + // simply move over the source state into the global state + source.state.Move(gstate.state); + } -inline bitcount_t flog2(uint32_t v) -{ - return 31 - __builtin_clz(v); + context.thread.profiler.Flush(this, &source.child_executor, "child_executor", 0); + context.client.profiler->Flush(context.thread.profiler); } -inline bitcount_t trailingzeros(uint32_t v) -{ - return __builtin_ctz(v); +//===--------------------------------------------------------------------===// +// GetChunkInternal +//===--------------------------------------------------------------------===// +void PhysicalSimpleAggregate::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state) const { + auto &gstate = (SimpleAggregateGlobalState &)*sink_state; + // initialize the result chunk with the aggregate values + chunk.SetCardinality(1); + for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) { + auto &aggregate = (BoundAggregateExpression &)*aggregates[aggr_idx]; + + Vector state_vector(Value::POINTER((uintptr_t)gstate.state.aggregates[aggr_idx].get())); + aggregate.function.finalize(state_vector, aggregate.bind_info.get(), chunk.data[aggr_idx], 1, 0); + } + state->finished = true; } -inline bitcount_t flog2(uint64_t v) -{ -#if UINT64_MAX == ULONG_MAX - return 63 - __builtin_clzl(v); -#elif UINT64_MAX == ULLONG_MAX - return 63 - __builtin_clzll(v); -#else - #error Cannot find a function for uint64_t -#endif +string PhysicalSimpleAggregate::ParamsToString() const { + string result; + for (idx_t i = 0; i < aggregates.size(); i++) { + auto &aggregate = (BoundAggregateExpression &)*aggregates[i]; + if (i > 0) { + result += "\n"; + } + result += aggregates[i]->GetName(); + if (aggregate.filter) { + result += " Filter: " + aggregate.filter->GetName(); + } + } + return result; } - -inline bitcount_t trailingzeros(uint64_t v) -{ -#if UINT64_MAX == ULONG_MAX - return __builtin_ctzl(v); -#elif UINT64_MAX == ULLONG_MAX - return __builtin_ctzll(v); -#else - #error Cannot find a function for uint64_t -#endif +void PhysicalSimpleAggregate::FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) { } -#elif defined(_MSC_VER) // Use MSVC++ intrinsics +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/operator/aggregate/physical_window.hpp +// +// +//===----------------------------------------------------------------------===// -#pragma intrinsic(_BitScanReverse, _BitScanForward) -#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64) -#pragma intrinsic(_BitScanReverse64, _BitScanForward64) -#endif -inline bitcount_t flog2(uint32_t v) -{ - unsigned long i; - _BitScanReverse(&i, v); - return bitcount_t(i); -} -inline bitcount_t trailingzeros(uint32_t v) -{ - unsigned long i; - _BitScanForward(&i, v); - return bitcount_t(i); -} -inline bitcount_t flog2(uint64_t v) -{ -#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64) - unsigned long i; - _BitScanReverse64(&i, v); - return bitcount_t(i); -#else - // 32-bit x86 - uint32_t high = v >> 32; - uint32_t low = uint32_t(v); - return high ? 32+flog2(high) : flog2(low); -#endif -} -inline bitcount_t trailingzeros(uint64_t v) -{ -#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64) - unsigned long i; - _BitScanForward64(&i, v); - return bitcount_t(i); -#else - // 32-bit x86 - uint32_t high = v >> 32; - uint32_t low = uint32_t(v); - return low ? trailingzeros(low) : trailingzeros(high)+32; -#endif -} -#else // Otherwise, we fall back to bit twiddling - // implementations -inline bitcount_t flog2(uint32_t v) -{ - // Based on code by Eric Cole and Mark Dickinson, which appears at - // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn +namespace duckdb { - static const uint8_t multiplyDeBruijnBitPos[32] = { - 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 - }; +//! PhysicalWindow implements window functions +//! It assumes that all functions have a common partitioning and ordering +class PhysicalWindow : public PhysicalSink { +public: + PhysicalWindow(vector types, vector> select_list, idx_t estimated_cardinality, + PhysicalOperatorType type = PhysicalOperatorType::WINDOW); - v |= v >> 1; // first round down to one less than a power of 2 - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; + unique_ptr GetOperatorState() override; - return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27]; -} + // sink stuff + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; + void Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) override; + bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; + bool FinalizeInternal(ClientContext &context, unique_ptr gstate); -inline bitcount_t trailingzeros(uint32_t v) -{ - static const uint8_t multiplyDeBruijnBitPos[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 - }; + unique_ptr GetLocalSinkState(ExecutionContext &context) override; + unique_ptr GetGlobalState(ClientContext &context) override; - return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27]; -} + idx_t MaxThreads(ClientContext &context); + unique_ptr GetParallelState(); -inline bitcount_t flog2(uint64_t v) -{ - uint32_t high = v >> 32; - uint32_t low = uint32_t(v); + string ParamsToString() const override; - return high ? 32+flog2(high) : flog2(low); -} +public: + //! The projection list of the WINDOW statement (may contain aggregates) + vector> select_list; +}; -inline bitcount_t trailingzeros(uint64_t v) -{ - uint32_t high = v >> 32; - uint32_t low = uint32_t(v); +} // namespace duckdb - return low ? trailingzeros(low) : trailingzeros(high)+32; -} -#endif -inline bitcount_t flog2(uint8_t v) -{ - return flog2(uint32_t(v)); -} -inline bitcount_t flog2(uint16_t v) -{ - return flog2(uint32_t(v)); -} -#if __SIZEOF_INT128__ -inline bitcount_t flog2(__uint128_t v) -{ - uint64_t high = uint64_t(v >> 64); - uint64_t low = uint64_t(v); - return high ? 64+flog2(high) : flog2(low); -} -#endif -inline bitcount_t trailingzeros(uint8_t v) -{ - return trailingzeros(uint32_t(v)); -} +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/window_segment_tree.hpp +// +// +//===----------------------------------------------------------------------===// -inline bitcount_t trailingzeros(uint16_t v) -{ - return trailingzeros(uint32_t(v)); -} -#if __SIZEOF_INT128__ -inline bitcount_t trailingzeros(__uint128_t v) -{ - uint64_t high = uint64_t(v >> 64); - uint64_t low = uint64_t(v); - return low ? trailingzeros(low) : trailingzeros(high)+64; -} -#endif -template -inline bitcount_t clog2(UInt v) -{ - return flog2(v) + ((v & (-v)) != v); -} -template -inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout) -{ - UInt half_result = y + carryin; - UInt result = x + half_result; - *carryout = (half_result < y) || (result < x); - return result; -} -template -inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout) -{ - UInt half_result = y + carryin; - UInt result = x - half_result; - *carryout = (half_result < y) || (result > x); - return result; -} -template -class uint_x4 { -// private: - static constexpr unsigned int UINT_BITS = sizeof(UInt) * CHAR_BIT; -public: - union { -#if PCG_LITTLE_ENDIAN - struct { - UInt v0, v1, v2, v3; - } w; - struct { - UIntX2 v01, v23; - } d; -#else - struct { - UInt v3, v2, v1, v0; - } w; - struct { - UIntX2 v23, v01; - } d; -#endif - // For the array access versions, the code that uses the array - // must handle endian itself. Yuck. - UInt wa[4]; - UIntX2 da[2]; - }; +namespace duckdb { +class WindowSegmentTree { public: - uint_x4() = default; + using FrameBounds = std::pair; - constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0) -#if PCG_LITTLE_ENDIAN - : w{v0, v1, v2, v3} -#else - : w{v3, v2, v1, v0} -#endif - { - // Nothing (else) to do - } + WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info, const LogicalType &result_type, + ChunkCollection *input); + ~WindowSegmentTree(); - constexpr uint_x4(UIntX2 v23, UIntX2 v01) -#if PCG_LITTLE_ENDIAN - : d{v01,v23} -#else - : d{v23,v01} -#endif - { - // Nothing (else) to do - } + //! First row contains the result. + void Compute(Vector &result, idx_t rid, idx_t start, idx_t end); - constexpr uint_x4(UIntX2 v01) -#if PCG_LITTLE_ENDIAN - : d{v01, UIntX2(0)} -#else - : d{UIntX2(0),v01} -#endif - { - // Nothing (else) to do - } +private: + void ConstructTree(); + void ExtractFrame(idx_t begin, idx_t end); + void WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end); + void AggregateInit(); + void AggegateFinal(Vector &result, idx_t rid); - template::value - && sizeof(Integral) <= sizeof(UIntX2)) - >::type* = nullptr> - constexpr uint_x4(Integral v01) -#if PCG_LITTLE_ENDIAN - : d{UIntX2(v01), UIntX2(0)} -#else - : d{UIntX2(0), UIntX2(v01)} -#endif - { - // Nothing (else) to do - } + //! The aggregate that the window function is computed over + AggregateFunction aggregate; + //! The bind info of the aggregate + FunctionData *bind_info; + //! The result type of the window function + LogicalType result_type; - explicit constexpr operator UIntX2() const - { - return d.v01; - } + //! Data pointer that contains a single state, used for intermediate window segment aggregation + vector state; + //! Input data chunk, used for intermediate window segment aggregation + DataChunk inputs; + //! A vector of pointers to "state", used for intermediate window segment aggregation + Vector statep; + //! The frame boundaries, used for the window functions + FrameBounds frame; + //! Reused result state container for the window functions + Vector statev; - template::value - && sizeof(Integral) <= sizeof(UIntX2)) - >::type* = nullptr> - explicit constexpr operator Integral() const - { - return Integral(d.v01); - } + //! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes + unique_ptr levels_flat_native; + //! For each level, the starting location in the levels_flat_native array + vector levels_flat_start; - explicit constexpr operator bool() const - { - return d.v01 || d.v23; - } + //! The total number of internal nodes of the tree, stored in levels_flat_native + idx_t internal_nodes; - template - friend uint_x4 operator*(const uint_x4&, const uint_x4&); + //! The (sorted) input chunk collection on which the tree is built + ChunkCollection *input_ref; - template - friend uint_x4 operator*(const uint_x4&, V); + // TREE_FANOUT needs to cleanly divide STANDARD_VECTOR_SIZE + static constexpr idx_t TREE_FANOUT = 64; +}; - template - friend std::pair< uint_x4,uint_x4 > - divmod(const uint_x4&, const uint_x4&); +} // namespace duckdb - template - friend uint_x4 operator+(const uint_x4&, const uint_x4&); +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parallel/task_context.hpp +// +// +//===----------------------------------------------------------------------===// - template - friend uint_x4 operator-(const uint_x4&, const uint_x4&); - template - friend uint_x4 operator<<(const uint_x4&, const bitcount_t shift); - template - friend uint_x4 operator>>(const uint_x4&, const bitcount_t shift); - template - friend uint_x4 operator&(const uint_x4&, const uint_x4&); - template - friend uint_x4 operator|(const uint_x4&, const uint_x4&); - template - friend uint_x4 operator^(const uint_x4&, const uint_x4&); - template - friend bool operator==(const uint_x4&, const uint_x4&); +namespace duckdb { +class PhysicalOperator; - template - friend bool operator!=(const uint_x4&, const uint_x4&); +//! TaskContext holds task specific information relating to the excution +class TaskContext { +public: + TaskContext() { + } - template - friend bool operator<(const uint_x4&, const uint_x4&); + //! Per-operator task info + unordered_map task_info; +}; - template - friend bool operator<=(const uint_x4&, const uint_x4&); +} // namespace duckdb - template - friend bool operator>(const uint_x4&, const uint_x4&); - template - friend bool operator>=(const uint_x4&, const uint_x4&); - template - friend uint_x4 operator~(const uint_x4&); - template - friend uint_x4 operator-(const uint_x4&); - template - friend bitcount_t flog2(const uint_x4&); +#include +#include +#include - template - friend bitcount_t trailingzeros(const uint_x4&); +namespace duckdb { - uint_x4& operator*=(const uint_x4& rhs) - { - uint_x4 result = *this * rhs; - return *this = result; - } +using counts_t = std::vector; - uint_x4& operator*=(UIntX2 rhs) - { - uint_x4 result = *this * rhs; - return *this = result; - } +// Global sink state +class WindowGlobalState : public GlobalOperatorState { +public: + WindowGlobalState(PhysicalWindow &op_p, ClientContext &context) : op(op_p) { + } - uint_x4& operator/=(const uint_x4& rhs) - { - uint_x4 result = *this / rhs; - return *this = result; - } + PhysicalWindow &op; + mutex lock; + ChunkCollection chunks; + ChunkCollection over_collection; + ChunkCollection hash_collection; + counts_t counts; +}; - uint_x4& operator%=(const uint_x4& rhs) - { - uint_x4 result = *this % rhs; - return *this = result; - } +// Per-thread sink state +class WindowLocalState : public LocalSinkState { +public: + explicit WindowLocalState(PhysicalWindow &op_p, const unsigned partition_bits = 10) + : op(op_p), partition_count(size_t(1) << partition_bits) { + } - uint_x4& operator+=(const uint_x4& rhs) - { - uint_x4 result = *this + rhs; - return *this = result; - } + PhysicalWindow &op; + ChunkCollection chunks; + ChunkCollection over_collection; + ChunkCollection hash_collection; + const size_t partition_count; + counts_t counts; +}; - uint_x4& operator-=(const uint_x4& rhs) - { - uint_x4 result = *this - rhs; - return *this = result; - } +// this implements a sorted window functions variant +PhysicalWindow::PhysicalWindow(vector types, vector> select_list, + idx_t estimated_cardinality, PhysicalOperatorType type) + : PhysicalSink(type, move(types), estimated_cardinality), select_list(move(select_list)) { +} - uint_x4& operator&=(const uint_x4& rhs) - { - uint_x4 result = *this & rhs; - return *this = result; - } +template +class BitArray { +public: + using bits_t = std::vector; - uint_x4& operator|=(const uint_x4& rhs) - { - uint_x4 result = *this | rhs; - return *this = result; - } + static const auto BITS_PER_WORD = std::numeric_limits::digits; + static const auto ZEROS = std::numeric_limits::min(); + static const auto ONES = std::numeric_limits::max(); - uint_x4& operator^=(const uint_x4& rhs) - { - uint_x4 result = *this ^ rhs; - return *this = result; - } + class reference { // NOLINT + public: + friend BitArray; - uint_x4& operator>>=(bitcount_t shift) - { - uint_x4 result = *this >> shift; - return *this = result; - } + reference(const reference &r) = default; - uint_x4& operator<<=(bitcount_t shift) - { - uint_x4 result = *this << shift; - return *this = result; - } + reference &operator=(bool x) noexcept { + auto b = parent.Block(pos); + auto s = parent.Shift(pos); + auto w = parent.GetBlock(b); + if (parent.TestBit(w, s) != x) { + parent.SetBlock(b, parent.FlipBit(w, s)); + } + return *this; + } -}; + reference &operator=(const reference &r) noexcept { + return *this = bool(r); + } -template -bitcount_t flog2(const uint_x4& v) -{ -#if PCG_LITTLE_ENDIAN - for (uint8_t i = 4; i !=0; /* dec in loop */) { - --i; -#else - for (uint8_t i = 0; i < 4; ++i) { -#endif - if (v.wa[i] == 0) - continue; - return flog2(v.wa[i]) + uint_x4::UINT_BITS*i; - } - abort(); -} + explicit operator bool() const noexcept { + return parent[pos]; + } -template -bitcount_t trailingzeros(const uint_x4& v) -{ -#if PCG_LITTLE_ENDIAN - for (uint8_t i = 0; i < 4; ++i) { -#else - for (uint8_t i = 4; i !=0; /* dec in loop */) { - --i; -#endif - if (v.wa[i] != 0) - return trailingzeros(v.wa[i]) + uint_x4::UINT_BITS*i; - } - return uint_x4::UINT_BITS*4; -} + bool operator~() const noexcept { + return !parent[pos]; + } -template -std::pair< uint_x4, uint_x4 > - divmod(const uint_x4& orig_dividend, - const uint_x4& divisor) -{ - // If the dividend is less than the divisor, the answer is always zero. - // This takes care of boundary cases like 0/x (which would otherwise be - // problematic because we can't take the log of zero. (The boundary case - // of division by zero is undefined.) - if (orig_dividend < divisor) - return { uint_x4(UIntX2(0)), orig_dividend }; + private: + explicit reference(BitArray &parent_p, size_t pos_p) : parent(parent_p), pos(pos_p) { + } - auto dividend = orig_dividend; + BitArray &parent; + size_t pos; + }; - auto log2_divisor = flog2(divisor); - auto log2_dividend = flog2(dividend); - // assert(log2_dividend >= log2_divisor); - bitcount_t logdiff = log2_dividend - log2_divisor; + static size_t Block(const size_t &pos) { + return pos / BITS_PER_WORD; + } - constexpr uint_x4 ONE(UIntX2(1)); - if (logdiff == 0) - return { ONE, dividend - divisor }; + static unsigned Shift(const size_t &pos) { + return pos % BITS_PER_WORD; + } - // Now we change the log difference to - // floor(log2(divisor)) - ceil(log2(dividend)) - // to ensure that we *underestimate* the result. - logdiff -= 1; + static bool TestBit(W w, unsigned s) { + return (w >> s) & 0x01; + } - uint_x4 quotient(UIntX2(0)); + static W SetBit(W w, unsigned s) { + return w | (W(1) << s); + } - auto qfactor = ONE << logdiff; - auto factor = divisor << logdiff; + static W ClearBit(W w, unsigned s) { + return w & ~(W(1) << s); + } - do { - dividend -= factor; - quotient += qfactor; - while (dividend < factor) { - factor >>= 1; - qfactor >>= 1; - } - } while (dividend >= divisor); + static W FlipBit(W w, unsigned s) { + return w ^ (W(1) << s); + } - return { quotient, dividend }; -} + explicit BitArray(const size_t &count, const W &init = 0) + : bits(count ? Block(count - 1) + 1 : 0, init), count(count) { + } -template -uint_x4 operator/(const uint_x4& dividend, - const uint_x4& divisor) -{ - return divmod(dividend, divisor).first; -} + size_t Count() const { + return count; + } -template -uint_x4 operator%(const uint_x4& dividend, - const uint_x4& divisor) -{ - return divmod(dividend, divisor).second; -} + const W &GetBlock(size_t b) const { + return bits[b]; + } + W &GetBlock(size_t b) { + return bits[b]; + } -template -uint_x4 operator*(const uint_x4& a, - const uint_x4& b) -{ - constexpr auto UINT_BITS = uint_x4::UINT_BITS; - uint_x4 r = {0U, 0U, 0U, 0U}; - bool carryin = false; - bool carryout; - UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0); - r.w.v0 = UInt(a0b0); - r.w.v1 = UInt(a0b0 >> UINT_BITS); - - UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0); - r.w.v2 = UInt(a1b0 >> UINT_BITS); - r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1); - carryin = false; - r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> UINT_BITS), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - carryin = false; - r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1); - carryin = false; - r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> UINT_BITS), carryin, &carryout); - - r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01; + void SetBlock(size_t b, const W &block) { + GetBlock(b) = block; + } - return r; -} + bool operator[](size_t pos) const { + return TestBit(GetBlock(Block(pos)), Shift(pos)); + } + reference operator[](size_t pos) { + return reference(*this, pos); + } -template -uint_x4 operator*(const uint_x4& a, - UIntX2 b01) -{ - constexpr auto UINT_BITS = uint_x4::UINT_BITS; - uint_x4 r = {0U, 0U, 0U, 0U}; - bool carryin = false; - bool carryout; - UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(UInt(b01)); - r.w.v0 = UInt(a0b0); - r.w.v1 = UInt(a0b0 >> UINT_BITS); - - UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(UInt(b01)); - r.w.v2 = UInt(a1b0 >> UINT_BITS); - r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b01 >> UINT_BITS); - carryin = false; - r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> UINT_BITS), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - carryin = false; - r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b01 >> UINT_BITS); - carryin = false; - r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> UINT_BITS), carryin, &carryout); - - r.d.v23 += a.d.v23 * b01; +private: + bits_t bits; + size_t count; +}; - return r; -} +template +struct ChunkIterator { + ChunkIterator(ChunkCollection &collection, const idx_t col_idx) + : collection(collection), col_idx(col_idx), chunk_begin(0), chunk_end(0), ch_idx(0), data(nullptr), + validity(nullptr) { + Update(0); + } -template -uint_x4 operator+(const uint_x4& a, - const uint_x4& b) -{ - uint_x4 r = {0U, 0U, 0U, 0U}; - - bool carryin = false; - bool carryout; - r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout); - carryin = carryout; - r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout); + void Update(idx_t r) { + if (r >= chunk_end) { + ch_idx = collection.LocateChunk(r); + auto &ch = collection.GetChunk(ch_idx); + chunk_begin = ch_idx * STANDARD_VECTOR_SIZE; + chunk_end = chunk_begin + ch.size(); + auto &vector = ch.data[col_idx]; + data = FlatVector::GetData(vector); + validity = &FlatVector::Validity(vector); + } + } - return r; -} + bool IsValid(idx_t r) { + return validity->RowIsValid(r - chunk_begin); + } -template -uint_x4 operator-(const uint_x4& a, - const uint_x4& b) -{ - uint_x4 r = {0U, 0U, 0U, 0U}; - - bool carryin = false; - bool carryout; - r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout); - carryin = carryout; - r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout); - carryin = carryout; - r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout); - carryin = carryout; - r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout); + INPUT_TYPE GetValue(idx_t r) { + return data[r - chunk_begin]; + } - return r; -} +private: + ChunkCollection &collection; + idx_t col_idx; + idx_t chunk_begin; + idx_t chunk_end; + idx_t ch_idx; + const INPUT_TYPE *data; + ValidityMask *validity; +}; +template +static void MaskTypedColumn(MASK_TYPE &mask, ChunkCollection &over_collection, const idx_t c) { + ChunkIterator ci(over_collection, c); -template -uint_x4 operator&(const uint_x4& a, - const uint_x4& b) -{ - return uint_x4(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01); -} + // Record the first value + idx_t r = 0; + auto prev_valid = ci.IsValid(r); + auto prev = ci.GetValue(r); + ++r; -template -uint_x4 operator|(const uint_x4& a, - const uint_x4& b) -{ - return uint_x4(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01); -} + // Process complete blocks + const auto row_count = over_collection.Count(); + const auto complete_block_count = mask.Block(row_count); + for (idx_t b = mask.Block(r); b < complete_block_count; ++b) { + auto block = mask.GetBlock(b); -template -uint_x4 operator^(const uint_x4& a, - const uint_x4& b) -{ - return uint_x4(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01); -} + // Skip the block if it is all boundaries. + if (block == mask.ONES) { + r -= (r % mask.BITS_PER_WORD); + r += mask.BITS_PER_WORD; + continue; + } -template -uint_x4 operator~(const uint_x4& v) -{ - return uint_x4(~v.d.v23, ~v.d.v01); -} + // Scan the rows in the complete block + for (unsigned shift = mask.Shift(r); shift < mask.BITS_PER_WORD; ++shift, ++r) { + // Update the chunk for this row + ci.Update(r); -template -uint_x4 operator-(const uint_x4& v) -{ - return uint_x4(0UL,0UL) - v; -} + auto curr_valid = ci.IsValid(r); + auto curr = ci.GetValue(r); + if (!mask.TestBit(block, shift)) { + if (curr_valid != prev_valid || (curr_valid && !Equals::Operation(curr, prev))) { + block = mask.SetBit(block, shift); + } + } + prev_valid = curr_valid; + prev = curr; + } + mask.SetBlock(b, block); + } -template -bool operator==(const uint_x4& a, const uint_x4& b) -{ - return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23); -} + // Finish last ragged block + if (r < row_count) { + auto block = mask.GetBlock(complete_block_count); + if (block != mask.ONES) { + for (unsigned shift = mask.Shift(r); r < row_count; ++shift, ++r) { + // Update the chunk for this row + ci.Update(r); -template -bool operator!=(const uint_x4& a, const uint_x4& b) -{ - return !operator==(a,b); + auto curr_valid = ci.IsValid(r); + auto curr = ci.GetValue(r); + if (!mask.TestBit(block, shift)) { + if (curr_valid != prev_valid || (curr_valid && !Equals::Operation(curr, prev))) { + block = mask.SetBit(block, shift); + } + } + prev_valid = curr_valid; + prev = curr; + } + mask.SetBlock(complete_block_count, block); + } + } } +template +static void MaskColumn(BitArray &mask, ChunkCollection &over_collection, const idx_t c) { + using MASK_TYPE = BitArray; -template -bool operator<(const uint_x4& a, const uint_x4& b) -{ - return (a.d.v23 < b.d.v23) - || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01)); + auto &vector = over_collection.GetChunk(0).data[c]; + switch (vector.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::INT16: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::INT32: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::INT64: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::UINT8: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::UINT16: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::UINT32: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::UINT64: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::INT128: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::FLOAT: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::DOUBLE: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::VARCHAR: + MaskTypedColumn(mask, over_collection, c); + break; + case PhysicalType::INTERVAL: + MaskTypedColumn(mask, over_collection, c); + break; + default: + throw NotImplementedException("Type for comparison"); + break; + } } -template -bool operator>(const uint_x4& a, const uint_x4& b) -{ - return operator<(b,a); -} +template +static idx_t FindNextStart(const BitArray &mask, idx_t l, const idx_t r) { + while (l < r) { + // If l is aligned with the start of a block, and the block is blank, then skip forward one block. + const auto block = mask.GetBlock(mask.Block(l)); + auto shift = mask.Shift(l); + if (!block && !shift) { + l += mask.BITS_PER_WORD; + continue; + } -template -bool operator<=(const uint_x4& a, const uint_x4& b) -{ - return !(operator<(b,a)); -} + // Loop over the block + for (; shift < mask.BITS_PER_WORD; ++shift, ++l) { + if (mask.TestBit(block, shift)) { + return MinValue(l, r); + } + } + } -template -bool operator>=(const uint_x4& a, const uint_x4& b) -{ - return !(operator<(a,b)); + // Didn't find a start so return the end of the range + return r; } +template +static idx_t FindPrevStart(const BitArray &mask, const idx_t l, idx_t r) { + while (l < r) { + // If r is aligned with the start of a block, and the previous block is blank, + // then skip backwards one block. + const auto block = mask.GetBlock(mask.Block(r - 1)); + auto shift = mask.Shift(r); + if (!block && !shift) { + // r is nonzero (> l) and word aligned, so this will not underflow. + r -= mask.BITS_PER_WORD; + continue; + } + // Loop backwards over the block + // shift is probing r-1 >= l >= 0 + for (shift = mask.Shift(r - 1) + 1; shift-- > 0; --r) { + if (mask.TestBit(block, shift)) { + return MaxValue(l, r - 1); + } + } + } -template -uint_x4 operator<<(const uint_x4& v, - const bitcount_t shift) -{ - uint_x4 r = {0U, 0U, 0U, 0U}; - const bitcount_t bits = uint_x4::UINT_BITS; - const bitcount_t bitmask = bits - 1; - const bitcount_t shiftdiv = shift / bits; - const bitcount_t shiftmod = shift & bitmask; - - if (shiftmod) { - UInt carryover = 0; -#if PCG_LITTLE_ENDIAN - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#else - for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#endif - r.wa[out] = (v.wa[in] << shiftmod) | carryover; - carryover = (v.wa[in] >> (bits - shiftmod)); - } - } else { -#if PCG_LITTLE_ENDIAN - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#else - for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#endif - r.wa[out] = v.wa[in]; - } - } - - return r; -} - -template -uint_x4 operator>>(const uint_x4& v, - const bitcount_t shift) -{ - uint_x4 r = {0U, 0U, 0U, 0U}; - const bitcount_t bits = uint_x4::UINT_BITS; - const bitcount_t bitmask = bits - 1; - const bitcount_t shiftdiv = shift / bits; - const bitcount_t shiftmod = shift & bitmask; - - if (shiftmod) { - UInt carryover = 0; -#if PCG_LITTLE_ENDIAN - for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#else - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#endif - r.wa[out] = (v.wa[in] >> shiftmod) | carryover; - carryover = (v.wa[in] << (bits - shiftmod)); - } - } else { -#if PCG_LITTLE_ENDIAN - for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#else - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#endif - r.wa[out] = v.wa[in]; - } - } - - return r; + // Didn't find a start so return the start of the range + return l; } -} // namespace pcg_extras - -#endif // PCG_UINT128_HPP_INCLUDED - -// LICENSE_CHANGE_END +static void MaterializeExpressions(Expression **exprs, idx_t expr_count, ChunkCollection &input, + ChunkCollection &output, bool scalar = false) { + if (expr_count == 0) { + return; + } - namespace pcg_extras { - typedef pcg_extras::uint_x4 pcg128_t; - } - #define PCG_128BIT_CONSTANT(high,low) \ - pcg_extras::pcg128_t(high,low) - #define PCG_EMULATED_128BIT_MATH 1 -#endif + vector types; + ExpressionExecutor executor; + for (idx_t expr_idx = 0; expr_idx < expr_count; ++expr_idx) { + types.push_back(exprs[expr_idx]->return_type); + executor.AddExpression(*exprs[expr_idx]); + } + for (idx_t i = 0; i < input.ChunkCount(); i++) { + DataChunk chunk; + chunk.Initialize(types); -namespace pcg_extras { + executor.Execute(input.GetChunk(i), chunk); -/* - * We often need to represent a "number of bits". When used normally, these - * numbers are never greater than 128, so an unsigned char is plenty. - * If you're using a nonstandard generator of a larger size, you can set - * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers - * might produce faster code if you set it to an unsigned int.) - */ + chunk.Verify(); + output.Append(chunk); -#ifndef PCG_BITCOUNT_T - typedef uint8_t bitcount_t; -#else - typedef PCG_BITCOUNT_T bitcount_t; -#endif + if (scalar) { + break; + } + } +} -/* - * C++ requires us to be able to serialize RNG state by printing or reading - * it from a stream. Because we use 128-bit ints, we also need to be able - * ot print them, so here is code to do so. - * - * This code provides enough functionality to print 128-bit ints in decimal - * and zero-padded in hex. It's not a full-featured implementation. - */ +static void MaterializeExpression(Expression *expr, ChunkCollection &input, ChunkCollection &output, + bool scalar = false) { + MaterializeExpressions(&expr, 1, input, output, scalar); +} -template -std::basic_ostream& -operator<<(std::basic_ostream& out, pcg128_t value) -{ - auto desired_base = out.flags() & out.basefield; - bool want_hex = desired_base == out.hex; - - if (want_hex) { - uint64_t highpart = uint64_t(value >> 64); - uint64_t lowpart = uint64_t(value); - auto desired_width = out.width(); - if (desired_width > 16) { - out.width(desired_width - 16); - } - if (highpart != 0 || desired_width > 16) - out << highpart; - CharT oldfill = '\0'; - if (highpart != 0) { - out.width(16); - oldfill = out.fill('0'); - } - auto oldflags = out.setf(decltype(desired_base){}, out.showbase); - out << lowpart; - out.setf(oldflags); - if (highpart != 0) { - out.fill(oldfill); - } - return out; - } - constexpr size_t MAX_CHARS_128BIT = 40; +static OrderByNullType NormaliseNullOrder(OrderType type, OrderByNullType null_order) { + if (type != OrderType::DESCENDING) { + return null_order; + } - char buffer[MAX_CHARS_128BIT]; - char* pos = buffer+sizeof(buffer); - *(--pos) = '\0'; - constexpr auto BASE = pcg128_t(10ULL); - do { - auto div = value / BASE; - auto mod = uint32_t(value - (div * BASE)); - *(--pos) = '0' + char(mod); - value = div; - } while(value != pcg128_t(0ULL)); - return out << pos; + switch (null_order) { + case OrderByNullType::NULLS_FIRST: + return OrderByNullType::NULLS_LAST; + case OrderByNullType::NULLS_LAST: + return OrderByNullType::NULLS_FIRST; + default: + throw InternalException("Unknown NULL order type"); + } } -template -std::basic_istream& -operator>>(std::basic_istream& in, pcg128_t& value) -{ - typename std::basic_istream::sentry s(in); - - if (!s) - return in; - - constexpr auto BASE = pcg128_t(10ULL); - pcg128_t current(0ULL); - bool did_nothing = true; - bool overflow = false; - for(;;) { - CharT wide_ch = in.get(); - if (!in.good()) - break; - auto ch = in.narrow(wide_ch, '\0'); - if (ch < '0' || ch > '9') { - in.unget(); - break; - } - did_nothing = false; - pcg128_t digit(uint32_t(ch - '0')); - pcg128_t timesbase = current*BASE; - overflow = overflow || timesbase < current; - current = timesbase + digit; - overflow = overflow || current < digit; - } - - if (did_nothing || overflow) { - in.setstate(std::ios::failbit); - if (overflow) - current = ~pcg128_t(0ULL); - } +static void SortCollectionForPartition(BoundWindowExpression *wexpr, ChunkCollection &input, + ChunkCollection &sort_collection) { + if (input.Count() == 0) { + return; + } + vector orders; + vector null_order_types; - value = current; + // we sort by both 1) partition by expression list and 2) order by expressions + for (idx_t prt_idx = 0; prt_idx < wexpr->partitions.size(); prt_idx++) { + orders.push_back(OrderType::ASCENDING); + null_order_types.push_back(OrderByNullType::NULLS_FIRST); + } - return in; -} + for (const auto &order : wexpr->orders) { + orders.push_back(order.type); + null_order_types.push_back(NormaliseNullOrder(order.type, order.null_order)); + } -/* - * Likewise, if people use tiny rngs, we'll be serializing uint8_t. - * If we just used the provided IO operators, they'd read/write chars, - * not ints, so we need to define our own. We *can* redefine this operator - * here because we're in our own namespace. - */ + auto sorted_vector = unique_ptr(new idx_t[input.Count()]); + sort_collection.Sort(orders, null_order_types, sorted_vector.get()); -template -std::basic_ostream& -operator<<(std::basic_ostream&out, uint8_t value) -{ - return out << uint32_t(value); + input.Reorder(sorted_vector.get()); + sort_collection.Reorder(sorted_vector.get()); } -template -std::basic_istream& -operator>>(std::basic_istream& in, uint8_t& target) -{ - uint32_t value = 0xdecea5edU; - in >> value; - if (!in && value == 0xdecea5edU) - return in; - if (value > uint8_t(~0)) { - in.setstate(std::ios::failbit); - value = ~0U; - } - target = uint8_t(value); - return in; -} +static void HashChunk(counts_t &counts, DataChunk &hash_chunk, DataChunk &sort_chunk, const idx_t partition_cols) { + const vector hash_types(1, LogicalTypeId::HASH); + hash_chunk.Initialize(hash_types); + hash_chunk.SetCardinality(sort_chunk); + auto &hash_vector = hash_chunk.data[0]; -/* Unfortunately, the above functions don't get found in preference to the - * built in ones, so we create some more specific overloads that will. - * Ugh. - */ + const auto count = sort_chunk.size(); + VectorOperations::Hash(sort_chunk.data[0], hash_vector, count); + for (idx_t prt_idx = 1; prt_idx < partition_cols; ++prt_idx) { + VectorOperations::CombineHash(hash_vector, sort_chunk.data[prt_idx], count); + } -inline std::ostream& operator<<(std::ostream& out, uint8_t value) -{ - return pcg_extras::operator<< (out, value); + const auto partition_mask = hash_t(counts.size() - 1); + auto hashes = FlatVector::GetData(hash_vector); + if (hash_vector.GetVectorType() == VectorType::CONSTANT_VECTOR) { + const auto bin = (hashes[0] & partition_mask); + counts[bin] += count; + } else { + for (idx_t i = 0; i < count; ++i) { + const auto bin = (hashes[i] & partition_mask); + ++counts[bin]; + } + } } -inline std::istream& operator>>(std::istream& in, uint8_t& value) -{ - return pcg_extras::operator>> (in, value); -} +static void MaterializeOverForWindow(BoundWindowExpression *wexpr, DataChunk &input_chunk, DataChunk &over_chunk) { + vector over_types; + ExpressionExecutor executor; + // we sort by both 1) partition by expression list and 2) order by expressions + for (idx_t prt_idx = 0; prt_idx < wexpr->partitions.size(); prt_idx++) { + auto &pexpr = wexpr->partitions[prt_idx]; + over_types.push_back(pexpr->return_type); + executor.AddExpression(*pexpr); + } + for (idx_t ord_idx = 0; ord_idx < wexpr->orders.size(); ord_idx++) { + auto &oexpr = wexpr->orders[ord_idx].expression; + over_types.push_back(oexpr->return_type); + executor.AddExpression(*oexpr); + } -/* - * Useful bitwise operations. - */ + D_ASSERT(over_types.size() > 0); -/* - * XorShifts are invertable, but they are someting of a pain to invert. - * This function backs them out. It's used by the whacky "inside out" - * generator defined later. - */ + over_chunk.Initialize(over_types); + executor.Execute(input_chunk, over_chunk); -template -inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift) -{ - if (2*shift >= bits) { - return x ^ (x >> shift); - } - itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1; - itype highmask1 = ~lowmask1; - itype top1 = x; - itype bottom1 = x & lowmask1; - top1 ^= top1 >> shift; - top1 &= highmask1; - x = top1 | bottom1; - itype lowmask2 = (itype(1U) << (bits - shift)) - 1; - itype bottom2 = x & lowmask2; - bottom2 = unxorshift(bottom2, bits - shift, shift); - bottom2 &= lowmask1; - return top1 | bottom2; + over_chunk.Verify(); } -/* - * Rotate left and right. - * - * In ideal world, compilers would spot idiomatic rotate code and convert it - * to a rotate instruction. Of course, opinions vary on what the correct - * idiom is and how to spot it. For clang, sometimes it generates better - * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM. - */ +struct WindowBoundariesState { + idx_t partition_start = 0; + idx_t partition_end = 0; + idx_t peer_start = 0; + idx_t peer_end = 0; + idx_t valid_start = 0; + idx_t valid_end = 0; + int64_t window_start = -1; + int64_t window_end = -1; + bool is_same_partition = false; + bool is_peer = false; +}; -template -inline itype rotl(itype value, bitcount_t rot) -{ - constexpr bitcount_t bits = sizeof(itype) * 8; - constexpr bitcount_t mask = bits - 1; -#if PCG_USE_ZEROCHECK_ROTATE_IDIOM - return rot ? (value << rot) | (value >> (bits - rot)) : value; -#else - return (value << rot) | (value >> ((- rot) & mask)); -#endif +static bool WindowNeedsRank(BoundWindowExpression *wexpr) { + return wexpr->type == ExpressionType::WINDOW_PERCENT_RANK || wexpr->type == ExpressionType::WINDOW_RANK || + wexpr->type == ExpressionType::WINDOW_RANK_DENSE || wexpr->type == ExpressionType::WINDOW_CUME_DIST; } -template -inline itype rotr(itype value, bitcount_t rot) -{ - constexpr bitcount_t bits = sizeof(itype) * 8; - constexpr bitcount_t mask = bits - 1; -#if PCG_USE_ZEROCHECK_ROTATE_IDIOM - return rot ? (value >> rot) | (value << (bits - rot)) : value; -#else - return (value >> rot) | (value << ((- rot) & mask)); -#endif +template +static T GetCell(ChunkCollection &collection, idx_t column, idx_t index) { + D_ASSERT(collection.ColumnCount() > column); + auto &chunk = collection.GetChunkForRow(index); + auto &source = chunk.data[column]; + const auto source_offset = index % STANDARD_VECTOR_SIZE; + const auto data = FlatVector::GetData(source); + return data[source_offset]; } -/* Unfortunately, both Clang and GCC sometimes perform poorly when it comes - * to properly recognizing idiomatic rotate code, so for we also provide - * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. - * (I hope that these compilers get better so that this code can die.) - * - * These overloads will be preferred over the general template code above. - */ -#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) - -inline uint8_t rotr(uint8_t value, bitcount_t rot) -{ - asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; +static bool CellIsNull(ChunkCollection &collection, idx_t column, idx_t index) { + D_ASSERT(collection.ColumnCount() > column); + auto &chunk = collection.GetChunkForRow(index); + auto &source = chunk.data[column]; + const auto source_offset = index % STANDARD_VECTOR_SIZE; + return FlatVector::IsNull(source, source_offset); } -inline uint16_t rotr(uint16_t value, bitcount_t rot) -{ - asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; -} +template +struct ChunkCollectionIterator { + using iterator = ChunkCollectionIterator; + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = T; + using reference = T; + using pointer = idx_t; -inline uint32_t rotr(uint32_t value, bitcount_t rot) -{ - asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; -} + ChunkCollectionIterator(ChunkCollection &coll_p, idx_t col_no_p, pointer pos_p = 0) + : coll(&coll_p), col_no(col_no_p), pos(pos_p) { + } -#if __x86_64__ -inline uint64_t rotr(uint64_t value, bitcount_t rot) -{ - asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; -} -#endif // __x86_64__ + inline reference operator*() const { + return GetCell(*coll, col_no, pos); + } + inline explicit operator pointer() const { + return pos; + } -#elif defined(_MSC_VER) - // Use MSVC++ bit rotation intrinsics + inline iterator &operator++() { + ++pos; + return *this; + } + inline iterator operator++(int) { + auto result = *this; + ++(*this); + return result; + } -#pragma intrinsic(_rotr, _rotr64, _rotr8, _rotr16) + friend inline bool operator==(const iterator &a, const iterator &b) { + return a.pos == b.pos; + } + friend inline bool operator!=(const iterator &a, const iterator &b) { + return a.pos != b.pos; + } -inline uint8_t rotr(uint8_t value, bitcount_t rot) -{ - return _rotr8(value, rot); -} +private: + ChunkCollection *coll; + idx_t col_no; + pointer pos; +}; -inline uint16_t rotr(uint16_t value, bitcount_t rot) -{ - return _rotr16(value, rot); -} +template +struct OperationCompare : public std::binary_function { + inline bool operator()(const T &lhs, const T &val) const { + return OP::template Operation(lhs, val); + } +}; -inline uint32_t rotr(uint32_t value, bitcount_t rot) -{ - return _rotr(value, rot); -} +template +static idx_t FindTypedRangeBound(ChunkCollection &over, const idx_t order_col, const idx_t order_begin, + const idx_t order_end, ChunkCollection &boundary, const idx_t boundary_row) { + D_ASSERT(!CellIsNull(boundary, 0, boundary_row)); + const auto val = GetCell(boundary, 0, boundary_row); -inline uint64_t rotr(uint64_t value, bitcount_t rot) -{ - return _rotr64(value, rot); + OperationCompare comp; + ChunkCollectionIterator begin(over, order_col, order_begin); + ChunkCollectionIterator end(over, order_col, order_end); + if (FROM) { + return idx_t(std::lower_bound(begin, end, val, comp)); + } else { + return idx_t(std::upper_bound(begin, end, val, comp)); + } } -#endif // PCG_USE_INLINE_ASM - - -/* - * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of - * 32-bit integers with seed data, but sometimes we want to produce - * larger or smaller integers. - * - * The following code handles this annoyance. - * - * uneven_copy will copy an array of 32-bit ints to an array of larger or - * smaller ints (actually, the code is general it only needing forward - * iterators). The copy is identical to the one that would be performed if - * we just did memcpy on a standard little-endian machine, but works - * regardless of the endian of the machine (or the weirdness of the ints - * involved). - * - * generate_to initializes an array of integers using a SeedSeq - * object. It is given the size as a static constant at compile time and - * tries to avoid memory allocation. If we're filling in 32-bit constants - * we just do it directly. If we need a separate buffer and it's small, - * we allocate it on the stack. Otherwise, we fall back to heap allocation. - * Ugh. - * - * generate_one produces a single value of some integral type using a - * SeedSeq object. - */ - - /* uneven_copy helper, case where destination ints are less than 32 bit. */ +template +static idx_t FindRangeBound(ChunkCollection &over, const idx_t order_col, const idx_t order_begin, + const idx_t order_end, ChunkCollection &boundary, const idx_t expr_idx) { + const auto &over_types = over.Types(); + D_ASSERT(over_types.size() > order_col); + D_ASSERT(boundary.Types().size() == 1); + D_ASSERT(boundary.Types()[0] == over_types[order_col]); -template -SrcIter uneven_copy_impl( - SrcIter src_first, DestIter dest_first, DestIter dest_last, - std::true_type) -{ - typedef typename std::iterator_traits::value_type src_t; - typedef typename std::iterator_traits::value_type dest_t; + switch (over_types[order_col].InternalType()) { + case PhysicalType::INT8: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::INT16: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::INT32: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::INT64: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::UINT8: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::UINT16: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::UINT32: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::UINT64: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::INT128: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::FLOAT: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::DOUBLE: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case PhysicalType::INTERVAL: + return FindTypedRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + default: + throw InternalException("Unsupported column type for RANGE"); + } +} - constexpr bitcount_t SRC_SIZE = sizeof(src_t); - constexpr bitcount_t DEST_SIZE = sizeof(dest_t); - constexpr bitcount_t DEST_BITS = DEST_SIZE * 8; - constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE; +template +static idx_t FindOrderedRangeBound(ChunkCollection &over, const idx_t order_col, const OrderType range_sense, + const idx_t order_begin, const idx_t order_end, ChunkCollection &boundary, + const idx_t expr_idx) { + switch (range_sense) { + case OrderType::ASCENDING: + return FindRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + case OrderType::DESCENDING: + return FindRangeBound(over, order_col, order_begin, order_end, boundary, expr_idx); + default: + throw InternalException("Unsupported ORDER BY sense for RANGE"); + } +} - size_t count = 0; - src_t value = 0; +static inline bool BoundaryNeedsPeer(const WindowBoundary &boundary) { + switch (boundary) { + case WindowBoundary::CURRENT_ROW_RANGE: + case WindowBoundary::EXPR_PRECEDING_RANGE: + case WindowBoundary::EXPR_FOLLOWING_RANGE: + return true; + default: + return false; + } +} - while (dest_first != dest_last) { - if ((count++ % SCALE) == 0) - value = *src_first++; // Get more bits - else - value >>= DEST_BITS; // Move down bits +static void UpdateWindowBoundaries(BoundWindowExpression *wexpr, const idx_t input_size, const idx_t row_idx, + ChunkCollection &over_collection, ChunkCollection &boundary_start_collection, + ChunkCollection &boundary_end_collection, const BitArray &partition_mask, + const BitArray &order_mask, WindowBoundariesState &bounds) { - *dest_first++ = dest_t(value); // Truncates, ignores high bits. - } - return src_first; -} + // RANGE sorting parameters + const auto order_col = wexpr->partitions.size(); + const auto range_sense = wexpr->orders.empty() ? OrderType::INVALID : wexpr->orders[0].type; - /* uneven_copy helper, case where destination ints are more than 32 bit. */ + if (wexpr->partitions.size() + wexpr->orders.size() > 0) { -template -SrcIter uneven_copy_impl( - SrcIter src_first, DestIter dest_first, DestIter dest_last, - std::false_type) -{ - typedef typename std::iterator_traits::value_type src_t; - typedef typename std::iterator_traits::value_type dest_t; + // determine partition and peer group boundaries to ultimately figure out window size + bounds.is_same_partition = !partition_mask[row_idx]; + bounds.is_peer = !order_mask[row_idx]; - constexpr auto SRC_SIZE = sizeof(src_t); - constexpr auto SRC_BITS = SRC_SIZE * 8; - constexpr auto DEST_SIZE = sizeof(dest_t); - constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE; + // when the partition changes, recompute the boundaries + if (!bounds.is_same_partition) { + bounds.partition_start = row_idx; + bounds.peer_start = row_idx; - while (dest_first != dest_last) { - dest_t value(0UL); - unsigned int shift = 0; + // find end of partition + bounds.partition_end = input_size; + if (!wexpr->partitions.empty()) { + bounds.partition_end = FindNextStart(partition_mask, bounds.partition_start + 1, input_size); + } - for (size_t i = 0; i < SCALE; ++i) { - value |= dest_t(*src_first++) << shift; - shift += SRC_BITS; - } + // Find valid ordering values for the new partition + // so we can exclude NULLs from RANGE expression computations + bounds.valid_start = bounds.partition_start; + bounds.valid_end = bounds.partition_end; - *dest_first++ = value; - } - return src_first; -} + if ((bounds.valid_start < bounds.valid_end) && (wexpr->start == WindowBoundary::EXPR_PRECEDING_RANGE || + wexpr->end == WindowBoundary::EXPR_PRECEDING_RANGE)) { + // Exclude any leading NULLs + if (CellIsNull(over_collection, order_col, bounds.valid_start)) { + bounds.valid_start = FindNextStart(order_mask, bounds.valid_start + 1, bounds.valid_end); + } + } -/* uneven_copy, call the right code for larger vs. smaller */ + if ((bounds.valid_start < bounds.valid_end) && (wexpr->start == WindowBoundary::EXPR_FOLLOWING_RANGE || + wexpr->end == WindowBoundary::EXPR_FOLLOWING_RANGE)) { + // Exclude any trailing NULLs + if (CellIsNull(over_collection, order_col, bounds.valid_end - 1)) { + bounds.valid_end = FindPrevStart(order_mask, bounds.valid_start, bounds.valid_end); + } + } -template -inline SrcIter uneven_copy(SrcIter src_first, - DestIter dest_first, DestIter dest_last) -{ - typedef typename std::iterator_traits::value_type src_t; - typedef typename std::iterator_traits::value_type dest_t; + } else if (!bounds.is_peer) { + bounds.peer_start = row_idx; + } - constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t); + if (BoundaryNeedsPeer(wexpr->end) || wexpr->type == ExpressionType::WINDOW_CUME_DIST) { + bounds.peer_end = bounds.partition_end; + if (!wexpr->orders.empty()) { + bounds.peer_end = FindNextStart(order_mask, bounds.peer_start + 1, bounds.partition_end); + } + } - return uneven_copy_impl(src_first, dest_first, dest_last, - std::integral_constant{}); -} + } else { + bounds.is_same_partition = false; + bounds.is_peer = true; + bounds.partition_end = input_size; + bounds.peer_end = bounds.partition_end; + } -/* generate_to, fill in a fixed-size array of integral type using a SeedSeq - * (actually works for any random-access iterator) - */ + // determine window boundaries depending on the type of expression + bounds.window_start = -1; + bounds.window_end = -1; -template -inline void generate_to_impl(SeedSeq&& generator, DestIter dest, - std::true_type) -{ - generator.generate(dest, dest+size); -} + switch (wexpr->start) { + case WindowBoundary::UNBOUNDED_PRECEDING: + bounds.window_start = bounds.partition_start; + break; + case WindowBoundary::CURRENT_ROW_ROWS: + bounds.window_start = row_idx; + break; + case WindowBoundary::CURRENT_ROW_RANGE: + bounds.window_start = bounds.peer_start; + break; + case WindowBoundary::EXPR_PRECEDING_ROWS: { + bounds.window_start = (int64_t)row_idx - GetCell(boundary_start_collection, 0, + wexpr->start_expr->IsScalar() ? 0 : row_idx); + break; + } + case WindowBoundary::EXPR_FOLLOWING_ROWS: { + bounds.window_start = + row_idx + GetCell(boundary_start_collection, 0, wexpr->start_expr->IsScalar() ? 0 : row_idx); + break; + } + case WindowBoundary::EXPR_PRECEDING_RANGE: { + const auto expr_idx = wexpr->start_expr->IsScalar() ? 0 : row_idx; + if (CellIsNull(boundary_start_collection, 0, expr_idx)) { + bounds.window_start = bounds.peer_start; + } else { + bounds.window_start = + FindOrderedRangeBound(over_collection, order_col, range_sense, bounds.valid_start, row_idx, + boundary_start_collection, expr_idx); + } + break; + } + case WindowBoundary::EXPR_FOLLOWING_RANGE: { + const auto expr_idx = wexpr->start_expr->IsScalar() ? 0 : row_idx; + if (CellIsNull(boundary_start_collection, 0, expr_idx)) { + bounds.window_start = bounds.peer_start; + } else { + bounds.window_start = FindOrderedRangeBound(over_collection, order_col, range_sense, row_idx, + bounds.valid_end, boundary_start_collection, expr_idx); + } + break; + } + default: + throw InternalException("Unsupported window start boundary"); + } -template -void generate_to_impl(SeedSeq&& generator, DestIter dest, - std::false_type) -{ - typedef typename std::iterator_traits::value_type dest_t; - constexpr auto DEST_SIZE = sizeof(dest_t); - constexpr auto GEN_SIZE = sizeof(uint32_t); - - constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; - constexpr size_t FROM_ELEMS = - GEN_IS_SMALLER - ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE) - : (size + (GEN_SIZE / DEST_SIZE) - 1) - / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); - // this odd code ^^^^^^^^^^^^^^^^^ is work-around for - // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 - - if (FROM_ELEMS <= 1024) { - uint32_t buffer[FROM_ELEMS]; - generator.generate(buffer, buffer+FROM_ELEMS); - uneven_copy(buffer, dest, dest+size); - } else { - uint32_t* buffer = static_cast(malloc(GEN_SIZE * FROM_ELEMS)); - generator.generate(buffer, buffer+FROM_ELEMS); - uneven_copy(buffer, dest, dest+size); - free(static_cast(buffer)); - } -} + switch (wexpr->end) { + case WindowBoundary::CURRENT_ROW_ROWS: + bounds.window_end = row_idx + 1; + break; + case WindowBoundary::CURRENT_ROW_RANGE: + bounds.window_end = bounds.peer_end; + break; + case WindowBoundary::UNBOUNDED_FOLLOWING: + bounds.window_end = bounds.partition_end; + break; + case WindowBoundary::EXPR_PRECEDING_ROWS: + bounds.window_end = (int64_t)row_idx - + GetCell(boundary_end_collection, 0, wexpr->end_expr->IsScalar() ? 0 : row_idx) + 1; + break; + case WindowBoundary::EXPR_FOLLOWING_ROWS: + bounds.window_end = + row_idx + GetCell(boundary_end_collection, 0, wexpr->end_expr->IsScalar() ? 0 : row_idx) + 1; + break; + case WindowBoundary::EXPR_PRECEDING_RANGE: { + const auto expr_idx = wexpr->end_expr->IsScalar() ? 0 : row_idx; + if (CellIsNull(boundary_end_collection, 0, expr_idx)) { + bounds.window_end = bounds.peer_end; + } else { + bounds.window_end = + FindOrderedRangeBound(over_collection, order_col, range_sense, bounds.valid_start, row_idx, + boundary_end_collection, expr_idx); + } + break; + } + case WindowBoundary::EXPR_FOLLOWING_RANGE: { + const auto expr_idx = wexpr->end_expr->IsScalar() ? 0 : row_idx; + if (CellIsNull(boundary_end_collection, 0, expr_idx)) { + bounds.window_end = bounds.peer_end; + } else { + bounds.window_end = FindOrderedRangeBound(over_collection, order_col, range_sense, row_idx, + bounds.valid_end, boundary_end_collection, expr_idx); + } + break; + } + default: + throw InternalException("Unsupported window end boundary"); + } -template -inline void generate_to(SeedSeq&& generator, DestIter dest) -{ - typedef typename std::iterator_traits::value_type dest_t; - constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); + // clamp windows to partitions if they should exceed + if (bounds.window_start < (int64_t)bounds.partition_start) { + bounds.window_start = bounds.partition_start; + } + if (bounds.window_start > (int64_t)bounds.partition_end) { + bounds.window_start = bounds.partition_end; + } + if (bounds.window_end < (int64_t)bounds.partition_start) { + bounds.window_end = bounds.partition_start; + } + if (bounds.window_end > (int64_t)bounds.partition_end) { + bounds.window_end = bounds.partition_end; + } - generate_to_impl(std::forward(generator), dest, - std::integral_constant{}); + if (bounds.window_start < 0 || bounds.window_end < 0) { + throw InternalException("Failed to compute window boundaries"); + } } -/* generate_one, produce a value of integral type using a SeedSeq - * (optionally, we can have it produce more than one and pick which one - * we want) - */ +static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollection &input, ChunkCollection &output, + ChunkCollection &over, const BitArray &partition_mask, + const BitArray &order_mask) { -template -inline UInt generate_one(SeedSeq&& generator) -{ - UInt result[N]; - generate_to(std::forward(generator), result); - return result[i]; -} + // TODO we could evaluate those expressions in parallel -template -auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) - -> typename RngType::result_type -{ - typedef typename RngType::result_type rtype; - rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) - % upper_bound; - for (;;) { - rtype r = rng() - RngType::min(); - if (r >= threshold) - return r % upper_bound; - } -} + // evaluate inner expressions of window functions, could be more complex + ChunkCollection payload_collection; + vector exprs; + for (auto &child : wexpr->children) { + exprs.push_back(child.get()); + } + // TODO: child may be a scalar, don't need to materialize the whole collection then + MaterializeExpressions(exprs.data(), exprs.size(), input, payload_collection); -template -void shuffle(Iter from, Iter to, RandType&& rng) -{ - typedef typename std::iterator_traits::difference_type delta_t; - typedef typename std::remove_reference::type::result_type result_t; - auto count = to - from; - while (count > 1) { - delta_t chosen = delta_t(bounded_rand(rng, result_t(count))); - --count; - --to; - using std::swap; - swap(*(from + chosen), *to); - } -} + ChunkCollection leadlag_offset_collection; + ChunkCollection leadlag_default_collection; + if (wexpr->type == ExpressionType::WINDOW_LEAD || wexpr->type == ExpressionType::WINDOW_LAG) { + if (wexpr->offset_expr) { + MaterializeExpression(wexpr->offset_expr.get(), input, leadlag_offset_collection, + wexpr->offset_expr->IsScalar()); + } + if (wexpr->default_expr) { + MaterializeExpression(wexpr->default_expr.get(), input, leadlag_default_collection, + wexpr->default_expr->IsScalar()); + } + } -/* - * Although std::seed_seq is useful, it isn't everything. Often we want to - * initialize a random-number generator some other way, such as from a random - * device. - * - * Technically, it does not meet the requirements of a SeedSequence because - * it lacks some of the rarely-used member functions (some of which would - * be impossible to provide). However the C++ standard is quite specific - * that actual engines only called the generate method, so it ought not to be - * a problem in practice. - */ + // evaluate boundaries if present. Parser has checked boundary types. + ChunkCollection boundary_start_collection; + if (wexpr->start_expr) { + MaterializeExpression(wexpr->start_expr.get(), input, boundary_start_collection, wexpr->start_expr->IsScalar()); + } -template -class seed_seq_from { -private: - RngType rng_; + ChunkCollection boundary_end_collection; + if (wexpr->end_expr) { + MaterializeExpression(wexpr->end_expr.get(), input, boundary_end_collection, wexpr->end_expr->IsScalar()); + } - typedef uint_least32_t result_type; + // build a segment tree for frame-adhering aggregates + // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf + unique_ptr segment_tree = nullptr; -public: - template - seed_seq_from(Args&&... args) : - rng_(std::forward(args)...) - { - // Nothing (else) to do... - } + if (wexpr->aggregate) { + segment_tree = make_unique(*(wexpr->aggregate), wexpr->bind_info.get(), wexpr->return_type, + &payload_collection); + } - template - void generate(Iter start, Iter finish) - { - for (auto i = start; i != finish; ++i) - *i = result_type(rng_()); - } + WindowBoundariesState bounds; + uint64_t dense_rank = 1, rank_equal = 0, rank = 1; - constexpr size_t size() const - { - return (sizeof(typename RngType::result_type) > sizeof(result_type) - && RngType::max() > ~size_t(0UL)) - ? ~size_t(0UL) - : size_t(RngType::max()); - } -}; + // this is the main loop, go through all sorted rows and compute window function result + const vector output_types(1, wexpr->return_type); + DataChunk output_chunk; + output_chunk.Initialize(output_types); + for (idx_t row_idx = 0; row_idx < input.Count(); row_idx++) { + // Grow the chunk if necessary. + const auto output_offset = row_idx % STANDARD_VECTOR_SIZE; + if (output_offset == 0) { + output.Append(output_chunk); + output_chunk.Reset(); + output_chunk.SetCardinality(MinValue(idx_t(STANDARD_VECTOR_SIZE), input.Count() - row_idx)); + } + auto &result = output_chunk.data[0]; -/* - * Sometimes you might want a distinct seed based on when the program - * was compiled. That way, a particular instance of the program will - * behave the same way, but when recompiled it'll produce a different - * value. - */ + // special case, OVER (), aggregate over everything + UpdateWindowBoundaries(wexpr, input.Count(), row_idx, over, boundary_start_collection, boundary_end_collection, + partition_mask, order_mask, bounds); + if (WindowNeedsRank(wexpr)) { + if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init + dense_rank = 1; + rank = 1; + rank_equal = 0; + } else if (!bounds.is_peer) { + dense_rank++; + rank += rank_equal; + rank_equal = 0; + } + rank_equal++; + } -template -struct static_arbitrary_seed { -private: - static constexpr IntType fnv(IntType hash, const char* pos) { - return *pos == '\0' - ? hash - : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1)); - } + // if no values are read for window, result is NULL + if (bounds.window_start >= bounds.window_end) { + FlatVector::SetNull(result, output_offset, true); + continue; + } -public: - static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)), - __DATE__ __TIME__ __FILE__); -}; + switch (wexpr->type) { + case ExpressionType::WINDOW_AGGREGATE: { + segment_tree->Compute(result, output_offset, bounds.window_start, bounds.window_end); + break; + } + case ExpressionType::WINDOW_ROW_NUMBER: { + auto rdata = FlatVector::GetData(result); + rdata[output_offset] = row_idx - bounds.partition_start + 1; + break; + } + case ExpressionType::WINDOW_RANK_DENSE: { + auto rdata = FlatVector::GetData(result); + rdata[output_offset] = dense_rank; + break; + } + case ExpressionType::WINDOW_RANK: { + auto rdata = FlatVector::GetData(result); + rdata[output_offset] = rank; + break; + } + case ExpressionType::WINDOW_PERCENT_RANK: { + int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start - 1; + double percent_rank = denom > 0 ? ((double)rank - 1) / denom : 0; + auto rdata = FlatVector::GetData(result); + rdata[output_offset] = percent_rank; + break; + } + case ExpressionType::WINDOW_CUME_DIST: { + int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start; + double cume_dist = denom > 0 ? ((double)(bounds.peer_end - bounds.partition_start)) / denom : 0; + auto rdata = FlatVector::GetData(result); + rdata[output_offset] = cume_dist; + break; + } + case ExpressionType::WINDOW_NTILE: { + if (payload_collection.ColumnCount() != 1) { + throw BinderException("NTILE needs a parameter"); + } + auto n_param = GetCell(payload_collection, 0, row_idx); + // With thanks from SQLite's ntileValueFunc() + int64_t n_total = bounds.partition_end - bounds.partition_start; + if (n_param > n_total) { + // more groups allowed than we have values + // map every entry to a unique group + n_param = n_total; + } + int64_t n_size = (n_total / n_param); + // find the row idx within the group + D_ASSERT(row_idx >= bounds.partition_start); + int64_t adjusted_row_idx = row_idx - bounds.partition_start; + // now compute the ntile + int64_t n_large = n_total - n_param * n_size; + int64_t i_small = n_large * (n_size + 1); + int64_t result_ntile; -// Sometimes, when debugging or testing, it's handy to be able print the name -// of a (in human-readable form). This code allows the idiom: -// -// cout << printable_typename() -// -// to print out my_foo_type_t (or its concrete type if it is a synonym) + D_ASSERT((n_large * (n_size + 1) + (n_param - n_large) * n_size) == n_total); -#if __cpp_rtti || __GXX_RTTI + if (adjusted_row_idx < i_small) { + result_ntile = 1 + adjusted_row_idx / (n_size + 1); + } else { + result_ntile = 1 + n_large + (adjusted_row_idx - i_small) / n_size; + } + // result has to be between [1, NTILE] + D_ASSERT(result_ntile >= 1 && result_ntile <= n_param); + auto rdata = FlatVector::GetData(result); + rdata[output_offset] = result_ntile; + break; + } + case ExpressionType::WINDOW_LEAD: + case ExpressionType::WINDOW_LAG: { + int64_t offset = 1; + if (wexpr->offset_expr) { + offset = GetCell(leadlag_offset_collection, 0, wexpr->offset_expr->IsScalar() ? 0 : row_idx); + } + int64_t val_idx = (int64_t)row_idx; + if (wexpr->type == ExpressionType::WINDOW_LEAD) { + val_idx += offset; + } else { + val_idx -= offset; + } -template -struct printable_typename {}; + if (val_idx >= int64_t(bounds.partition_start) && val_idx < int64_t(bounds.partition_end)) { + payload_collection.CopyCell(0, val_idx, result, output_offset); + } else if (wexpr->default_expr) { + const auto source_row = wexpr->default_expr->IsScalar() ? 0 : row_idx; + leadlag_default_collection.CopyCell(0, source_row, result, output_offset); + } else { + FlatVector::SetNull(result, output_offset, true); + } + break; + } + case ExpressionType::WINDOW_FIRST_VALUE: + payload_collection.CopyCell(0, bounds.window_start, result, output_offset); + break; + case ExpressionType::WINDOW_LAST_VALUE: + payload_collection.CopyCell(0, bounds.window_end - 1, result, output_offset); + break; + case ExpressionType::WINDOW_NTH_VALUE: { + if (payload_collection.ColumnCount() != 2) { + throw BinderException("NTH_VALUE needs a parameter"); + } + // Returns value evaluated at the row that is the n'th row of the window frame (counting from 1); + // returns NULL if there is no such row. + if (CellIsNull(payload_collection, 1, row_idx)) { + FlatVector::SetNull(result, output_offset, true); + } else { + auto n_param = GetCell(payload_collection, 1, row_idx); + int64_t n_total = bounds.window_end - bounds.window_start; + if (0 < n_param && n_param <= n_total) { + payload_collection.CopyCell(0, bounds.window_start + n_param - 1, result, output_offset); + } else { + FlatVector::SetNull(result, output_offset, true); + } + } + break; + } + default: + throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr->type)); + } + } -template -std::ostream& operator<<(std::ostream& out, printable_typename) { - const char *implementation_typename = typeid(T).name(); -#ifdef __GNUC__ - int status; - char* pretty_name = - abi::__cxa_demangle(implementation_typename, nullptr, nullptr, &status); - if (status == 0) - out << pretty_name; - free(static_cast(pretty_name)); - if (status == 0) - return out; -#endif - out << implementation_typename; - return out; + // Push the last chunk + output.Append(output_chunk); } -#endif // __cpp_rtti || __GXX_RTTI - -} // namespace pcg_extras +using WindowExpressions = vector; -#endif // PCG_EXTRAS_HPP_INCLUDED +static void ComputeWindowExpressions(WindowExpressions &window_exprs, ChunkCollection &input, + ChunkCollection &window_results, ChunkCollection &over) { + // Idempotency + if (input.Count() == 0) { + return; + } + // Pick out a function for the OVER clause + auto over_expr = window_exprs[0]; -// LICENSE_CHANGE_END + // Sort the partition + const auto sort_col_count = over_expr->partitions.size() + over_expr->orders.size(); + if (sort_col_count > 0) { + SortCollectionForPartition(over_expr, input, over); + } + // Set bits for the start of each partition + BitArray partition_bits(input.Count()); + partition_bits[0] = true; -namespace pcg_detail { + for (idx_t c = 0; c < over_expr->partitions.size(); ++c) { + MaskColumn(partition_bits, over, c); + } -using namespace pcg_extras; + // Set bits for the start of each peer group. + // Partitions also break peer groups, so start with the partition bits. + auto order_bits = partition_bits; + for (idx_t c = over_expr->partitions.size(); c < sort_col_count; ++c) { + MaskColumn(order_bits, over, c); + } -/* - * The LCG generators need some constants to function. This code lets you - * look up the constant by *type*. For example - * - * default_multiplier::multiplier() - * - * gives you the default multipler for 32-bit integers. We use the name - * of the constant and not a generic word like value to allow these classes - * to be used as mixins. - */ + // Compute the functions columnwise + for (idx_t expr_idx = 0; expr_idx < window_exprs.size(); ++expr_idx) { + ChunkCollection output; + ComputeWindowExpression(window_exprs[expr_idx], input, output, over, partition_bits, order_bits); + window_results.Fuse(output); + } +} -template -struct default_multiplier { - // Not defined for an arbitrary type -}; +static void AppendCollection(const ChunkCollection &source, ChunkCollection &target, SelectionVector &sel, + const idx_t source_count, const idx_t chunk_idx) { -template -struct default_increment { - // Not defined for an arbitrary type -}; + DataChunk chunk; + chunk.Initialize(source.Types()); + source.GetChunk(chunk_idx).Copy(chunk, sel, source_count); + target.Append(chunk); +} -#define PCG_DEFINE_CONSTANT(type, what, kind, constant) \ - template <> \ - struct what ## _ ## kind { \ - static constexpr type kind() { \ - return constant; \ - } \ - }; +static void ExtractPartition(WindowGlobalState &gstate, ChunkCollection &chunks, ChunkCollection &over_collection, + const hash_t hash_bin, const hash_t hash_mask) { -PCG_DEFINE_CONSTANT(uint8_t, default, multiplier, 141U) -PCG_DEFINE_CONSTANT(uint8_t, default, increment, 77U) + // Copy the partition data so we can work with it on this thread + ChunkCollection &hashes = gstate.hash_collection; + SelectionVector sel; + for (idx_t chunk_idx = 0; chunk_idx < hashes.ChunkCount(); ++chunk_idx) { + // Build a selection vector of matching hashes + auto &hash_chunk = hashes.GetChunk(chunk_idx); + auto hash_size = hash_chunk.size(); + auto hash_data = FlatVector::GetData(hash_chunk.data[0]); + sel.Initialize(hash_size); + idx_t bin_size = 0; + for (idx_t i = 0; i < hash_size; ++i) { + if ((hash_data[i] & hash_mask) == hash_bin) { + sel.set_index(bin_size++, i); + } + } -PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, 12829U) -PCG_DEFINE_CONSTANT(uint16_t, default, increment, 47989U) + // Copy the data for each collection + if (bin_size == 0) { + continue; + } -PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, 747796405U) -PCG_DEFINE_CONSTANT(uint32_t, default, increment, 2891336453U) + AppendCollection(gstate.chunks, chunks, sel, bin_size, chunk_idx); + AppendCollection(gstate.over_collection, over_collection, sel, bin_size, chunk_idx); + } +} -PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, 6364136223846793005ULL) -PCG_DEFINE_CONSTANT(uint64_t, default, increment, 1442695040888963407ULL) +//===--------------------------------------------------------------------===// +// GetChunkInternal +//===--------------------------------------------------------------------===// +idx_t PhysicalWindow::MaxThreads(ClientContext &context) { + // Recursive CTE can cause us to be called befor Finalize, + // so we have to check and fall back to the cardinality estimate + // in that case + if (!this->sink_state.get()) { + return (estimated_cardinality + STANDARD_VECTOR_SIZE - 1) / STANDARD_VECTOR_SIZE + 1; + } + auto &state = (WindowGlobalState &)*this->sink_state; -PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier, - PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL)) -PCG_DEFINE_CONSTANT(pcg128_t, default, increment, - PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL)) + // If there is only one partition, we have to process it on one thread. + if (state.counts.empty()) { + return 1; + } -/* Alternative (cheaper) multipliers for 128-bit */ + idx_t max_threads = 0; + for (const auto count : state.counts) { + max_threads += int(count > 0); + } -template -struct cheap_multiplier : public default_multiplier { - // For most types just use the default. -}; + return max_threads; +} -template <> -struct cheap_multiplier { - static constexpr uint64_t multiplier() { - return 0xda942042e4dd58b5ULL; - } +// Global read state +class WindowParallelState : public ParallelState { +public: + WindowParallelState() : next_part(0) { + } + //! The output read position. + atomic next_part; }; +unique_ptr PhysicalWindow::GetParallelState() { + auto result = make_unique(); + return move(result); +} -/* - * Each PCG generator is available in four variants, based on how it applies - * the additive constant for its underlying LCG; the variations are: - * - * single stream - all instances use the same fixed constant, thus - * the RNG always somewhere in same sequence - * mcg - adds zero, resulting in a single stream and reduced - * period - * specific stream - the constant can be changed at any time, selecting - * a different random sequence - * unique stream - the constant is based on the memory address of the - * object, thus every RNG has its own unique sequence - * - * This variation is provided though mixin classes which define a function - * value called increment() that returns the nesessary additive constant. - */ +// Per-thread read state +class PhysicalWindowOperatorState : public PhysicalOperatorState { +public: + PhysicalWindowOperatorState(PhysicalOperator &op, PhysicalOperator *child) + : PhysicalOperatorState(op, child), parallel_state(nullptr), initialized(false) { + } + ParallelState *parallel_state; + bool initialized; + //! The number of partitions to process (0 if there is no partitioning) + size_t partitions; + //! The output read position. + size_t next_part; + //! The generated input chunks + ChunkCollection chunks; + //! The generated output chunks + ChunkCollection window_results; + //! The read cursor + idx_t position; +}; -/* - * unique stream - */ +unique_ptr PhysicalWindow::GetOperatorState() { + return make_unique(*this, children.empty() ? nullptr : children[0].get()); +} +static void GeneratePartition(PhysicalWindowOperatorState &state, WindowGlobalState &gstate, const idx_t hash_bin) { + auto &op = (PhysicalWindow &)gstate.op; + WindowExpressions window_exprs; + for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) { + D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW); + auto wexpr = reinterpret_cast(op.select_list[expr_idx].get()); + window_exprs.emplace_back(wexpr); + } -template -class unique_stream { -protected: - static constexpr bool is_mcg = false; + // Get rid of any stale data + state.chunks.Reset(); + state.window_results.Reset(); + state.position = 0; - // Is never called, but is provided for symmetry with specific_stream - void set_stream(...) - { - abort(); - } + if (gstate.counts.empty() && hash_bin == 0) { + ChunkCollection &big_data = gstate.chunks; + ChunkCollection output; + ChunkCollection &over_collection = gstate.over_collection; + ComputeWindowExpressions(window_exprs, big_data, output, over_collection); + state.chunks.Merge(big_data); + state.window_results.Merge(output); + } else if (hash_bin < gstate.counts.size() && gstate.counts[hash_bin] > 0) { + ChunkCollection input; + ChunkCollection output; + ChunkCollection over; + const auto hash_mask = hash_t(gstate.counts.size() - 1); + ExtractPartition(gstate, input, over, hash_bin, hash_mask); + ComputeWindowExpressions(window_exprs, input, output, over); + state.chunks.Merge(input); + state.window_results.Merge(output); + } +} -public: - typedef itype state_type; +static void Scan(PhysicalWindowOperatorState &state, DataChunk &chunk) { + ChunkCollection &big_data = state.chunks; + ChunkCollection &window_results = state.window_results; - constexpr itype increment() const { - return itype(reinterpret_cast(this) | 1); - } + if (state.position >= big_data.Count()) { + return; + } - constexpr itype stream() const - { - return increment() >> 1; - } + // just return what was computed before, appending the result cols of the window expressions at the end + auto &proj_ch = big_data.GetChunkForRow(state.position); + auto &wind_ch = window_results.GetChunkForRow(state.position); - static constexpr bool can_specify_stream = false; + idx_t out_idx = 0; + D_ASSERT(proj_ch.size() == wind_ch.size()); + chunk.SetCardinality(proj_ch); + for (idx_t col_idx = 0; col_idx < proj_ch.ColumnCount(); col_idx++) { + chunk.data[out_idx++].Reference(proj_ch.data[col_idx]); + } + for (idx_t col_idx = 0; col_idx < wind_ch.ColumnCount(); col_idx++) { + chunk.data[out_idx++].Reference(wind_ch.data[col_idx]); + } + chunk.Verify(); - static constexpr size_t streams_pow2() - { - return (sizeof(itype) < sizeof(size_t) ? sizeof(itype) - : sizeof(size_t))*8 - 1u; - } + state.position += STANDARD_VECTOR_SIZE; +} -protected: - constexpr unique_stream() = default; -}; +void PhysicalWindow::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { + auto &state = *reinterpret_cast(state_p); + auto &gstate = (WindowGlobalState &)*sink_state; + if (!state.initialized) { + // initialize thread-local operator state + state.partitions = gstate.counts.size(); + state.next_part = 0; + // record parallel state (if any) + state.parallel_state = nullptr; + auto &task = context.task; + // check if there is any parallel state to fetch + state.parallel_state = nullptr; + auto task_info = task.task_info.find(this); + if (task_info != task.task_info.end()) { + // parallel scan init + state.parallel_state = task_info->second; + } + state.initialized = true; + } -/* - * no stream (mcg) - */ + if (!state.parallel_state) { + // sequential scan + if (state.position >= state.chunks.Count()) { + auto hash_bin = state.next_part++; + for (; hash_bin < state.partitions; hash_bin = state.next_part++) { + if (gstate.counts[hash_bin] > 0) { + break; + } + } + GeneratePartition(state, gstate, hash_bin); + } + Scan(state, chunk); + if (chunk.size() != 0) { + return; + } + } else { + // parallel scan + auto ¶llel_state = *reinterpret_cast(state.parallel_state); + do { + if (state.position >= state.chunks.Count()) { + auto hash_bin = parallel_state.next_part++; + for (; hash_bin < state.partitions; hash_bin = parallel_state.next_part++) { + if (gstate.counts[hash_bin] > 0) { + break; + } + } + GeneratePartition(state, gstate, hash_bin); + } + Scan(state, chunk); + if (chunk.size() != 0) { + return; + } else { + break; + } + } while (true); + } + D_ASSERT(chunk.size() == 0); +} -template -class no_stream { -protected: - static constexpr bool is_mcg = true; +void PhysicalWindow::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate_p, + DataChunk &input) const { + auto &lstate = (WindowLocalState &)lstate_p; + lstate.chunks.Append(input); - // Is never called, but is provided for symmetry with specific_stream - void set_stream(...) - { - abort(); - } + // Compute the over columns and the hash values for this block (if any) + const auto over_idx = 0; + auto over_expr = reinterpret_cast(select_list[over_idx].get()); -public: - typedef itype state_type; + const auto sort_col_count = over_expr->partitions.size() + over_expr->orders.size(); + if (sort_col_count > 0) { + DataChunk over_chunk; + MaterializeOverForWindow(over_expr, input, over_chunk); - static constexpr itype increment() { - return 0; - } + if (!over_expr->partitions.empty()) { + if (lstate.counts.empty()) { + lstate.counts.resize(lstate.partition_count, 0); + } - static constexpr bool can_specify_stream = false; + DataChunk hash_chunk; + HashChunk(lstate.counts, hash_chunk, over_chunk, over_expr->partitions.size()); + lstate.hash_collection.Append(hash_chunk); + D_ASSERT(lstate.chunks.Count() == lstate.hash_collection.Count()); + } - static constexpr size_t streams_pow2() - { - return 0u; - } + lstate.over_collection.Append(over_chunk); + D_ASSERT(lstate.chunks.Count() == lstate.over_collection.Count()); + } +} -protected: - constexpr no_stream() = default; -}; +void PhysicalWindow::Combine(ExecutionContext &context, GlobalOperatorState &gstate_p, LocalSinkState &lstate_p) { + auto &lstate = (WindowLocalState &)lstate_p; + if (lstate.chunks.Count() == 0) { + return; + } + auto &gstate = (WindowGlobalState &)gstate_p; + lock_guard glock(gstate.lock); + gstate.chunks.Merge(lstate.chunks); + gstate.over_collection.Merge(lstate.over_collection); + gstate.hash_collection.Merge(lstate.hash_collection); + if (gstate.counts.empty()) { + gstate.counts = lstate.counts; + } else { + D_ASSERT(gstate.counts.size() == lstate.counts.size()); + for (idx_t i = 0; i < gstate.counts.size(); ++i) { + gstate.counts[i] += lstate.counts[i]; + } + } +} +bool PhysicalWindow::FinalizeInternal(ClientContext &context, unique_ptr gstate_p) { + this->sink_state = move(gstate_p); + return true; +} -/* - * single stream/sequence (oneseq) - */ +bool PhysicalWindow::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate_p) { + return FinalizeInternal(context, move(gstate_p)); +} -template -class oneseq_stream : public default_increment { -protected: - static constexpr bool is_mcg = false; +unique_ptr PhysicalWindow::GetLocalSinkState(ExecutionContext &context) { + return make_unique(*this); +} - // Is never called, but is provided for symmetry with specific_stream - void set_stream(...) - { - abort(); - } +unique_ptr PhysicalWindow::GetGlobalState(ClientContext &context) { + return make_unique(*this, context); +} -public: - typedef itype state_type; +string PhysicalWindow::ParamsToString() const { + string result; + for (idx_t i = 0; i < select_list.size(); i++) { + if (i > 0) { + result += "\n"; + } + result += select_list[i]->GetName(); + } + return result; +} - static constexpr itype stream() - { - return default_increment::increment() >> 1; - } +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/operator/filter/physical_filter.hpp +// +// +//===----------------------------------------------------------------------===// - static constexpr bool can_specify_stream = false; - static constexpr size_t streams_pow2() - { - return 0u; - } -protected: - constexpr oneseq_stream() = default; -}; -/* - * specific stream - */ +namespace duckdb { -template -class specific_stream { -protected: - static constexpr bool is_mcg = false; +//! PhysicalFilter represents a filter operator. It removes non-matching tuples +//! from the result. Note that it does not physically change the data, it only +//! adds a selection vector to the chunk. +class PhysicalFilter : public PhysicalOperator { +public: + PhysicalFilter(vector types, vector> select_list, idx_t estimated_cardinality); - itype inc_ = default_increment::increment(); + //! The filter expression + unique_ptr expression; public: - typedef itype state_type; - typedef itype stream_state; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; - constexpr itype increment() const { - return inc_; - } + unique_ptr GetOperatorState() override; + string ParamsToString() const override; + void FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) override; +}; +} // namespace duckdb - itype stream() - { - return inc_ >> 1; - } - void set_stream(itype specific_seq) - { - inc_ = (specific_seq << 1) | 1; - } - static constexpr bool can_specify_stream = true; - static constexpr size_t streams_pow2() - { - return (sizeof(itype)*8) - 1u; - } +namespace duckdb { -protected: - specific_stream() = default; +class PhysicalFilterState : public PhysicalOperatorState { +public: + PhysicalFilterState(PhysicalOperator &op, PhysicalOperator *child, Expression &expr) + : PhysicalOperatorState(op, child), executor(expr) { + } - specific_stream(itype specific_seq) - : inc_(itype(specific_seq << 1) | itype(1U)) - { - // Nothing (else) to do. - } + ExpressionExecutor executor; }; +PhysicalFilter::PhysicalFilter(vector types, vector> select_list, + idx_t estimated_cardinality) + : PhysicalOperator(PhysicalOperatorType::FILTER, move(types), estimated_cardinality) { + D_ASSERT(select_list.size() > 0); + if (select_list.size() > 1) { + // create a big AND out of the expressions + auto conjunction = make_unique(ExpressionType::CONJUNCTION_AND); + for (auto &expr : select_list) { + conjunction->children.push_back(move(expr)); + } + expression = move(conjunction); + } else { + expression = move(select_list[0]); + } +} -/* - * This is where it all comes together. This function joins together three - * mixin classes which define - * - the LCG additive constant (the stream) - * - the LCG multiplier - * - the output function - * in addition, we specify the type of the LCG state, and the result type, - * and whether to use the pre-advance version of the state for the output - * (increasing instruction-level parallelism) or the post-advance version - * (reducing register pressure). - * - * Given the high level of parameterization, the code has to use some - * template-metaprogramming tricks to handle some of the subtle variations - * involved. - */ - -template , - typename multiplier_mixin = default_multiplier > -class engine : protected output_mixin, - public stream_mixin, - protected multiplier_mixin { -protected: - itype state_; +void PhysicalFilter::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { + auto state = reinterpret_cast(state_p); + SelectionVector sel(STANDARD_VECTOR_SIZE); + idx_t initial_count; + idx_t result_count; + do { + // fetch a chunk from the child and run the filter + // we repeat this process until either (1) passing tuples are found, or (2) the child is completely exhausted + children[0]->GetChunk(context, chunk, state->child_state.get()); + if (chunk.size() == 0) { + return; + } + initial_count = chunk.size(); + result_count = state->executor.SelectExpression(chunk, sel); + } while (result_count == 0); - struct can_specify_stream_tag {}; - struct no_specifiable_stream_tag {}; + if (result_count == initial_count) { + // nothing was filtered: skip adding any selection vectors + return; + } + chunk.Slice(sel, result_count); +} - using stream_mixin::increment; - using multiplier_mixin::multiplier; +unique_ptr PhysicalFilter::GetOperatorState() { + return make_unique(*this, children[0].get(), *expression); +} -public: - typedef xtype result_type; - typedef itype state_type; +string PhysicalFilter::ParamsToString() const { + return expression->GetName(); +} - static constexpr size_t period_pow2() - { - return sizeof(state_type)*8 - 2*stream_mixin::is_mcg; - } +void PhysicalFilter::FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) { + auto &state = reinterpret_cast(state_p); + context.thread.profiler.Flush(this, &state.executor, "executor", 0); + if (!children.empty() && state.child_state) { + children[0]->FinalizeOperatorState(*state.child_state, context); + } +} - // It would be nice to use std::numeric_limits for these, but - // we can't be sure that it'd be defined for the 128-bit types. +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/operator/helper/physical_execute.hpp +// +// +//===----------------------------------------------------------------------===// - static constexpr result_type min() - { - return result_type(0UL); - } - static constexpr result_type max() - { - return result_type(~result_type(0UL)); - } -protected: - itype bump(itype state) - { - return state * multiplier() + increment(); - } - itype base_generate() - { - return state_ = bump(state_); - } - itype base_generate0() - { - itype old_state = state_; - state_ = bump(state_); - return old_state; - } +namespace duckdb { +class PhysicalExecute : public PhysicalOperator { public: - result_type operator()() - { - if (output_previous) - return this->output(base_generate0()); - else - return this->output(base_generate()); - } - - result_type operator()(result_type upper_bound) - { - return bounded_rand(*this, upper_bound); - } - -protected: - static itype advance(itype state, itype delta, - itype cur_mult, itype cur_plus); - - static itype distance(itype cur_state, itype newstate, itype cur_mult, - itype cur_plus, itype mask = ~itype(0U)); + explicit PhysicalExecute(PhysicalOperator *plan) + : PhysicalOperator(PhysicalOperatorType::EXECUTE, plan->types, -1), plan(plan) { + } - itype distance(itype newstate, itype mask = itype(~itype(0U))) const - { - return distance(state_, newstate, multiplier(), increment(), mask); - } + PhysicalOperator *plan; public: - void advance(itype delta) - { - state_ = advance(state_, delta, this->multiplier(), this->increment()); - } + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; - void backstep(itype delta) - { - advance(-delta); - } + unique_ptr GetOperatorState() override; + void FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) override; +}; - void discard(itype delta) - { - advance(delta); - } +} // namespace duckdb - bool wrapped() - { - if (stream_mixin::is_mcg) { - // For MCGs, the low order two bits never change. In this - // implementation, we keep them fixed at 3 to make this test - // easier. - return state_ == 3; - } else { - return state_ == 0; - } - } - engine(itype state = itype(0xcafef00dd15ea5e5ULL)) - : state_(this->is_mcg ? state|state_type(3U) - : bump(state + this->increment())) - { - // Nothing else to do. - } - - // This function may or may not exist. It thus has to be a template - // to use SFINAE; users don't have to worry about its template-ness. - - template - engine(itype state, typename sm::stream_state stream_seed) - : stream_mixin(stream_seed), - state_(this->is_mcg ? state|state_type(3U) - : bump(state + this->increment())) - { - // Nothing else to do. - } - - template - engine(SeedSeq&& seedSeq, typename std::enable_if< - !stream_mixin::can_specify_stream - && !std::is_convertible::value - && !std::is_convertible::value, - no_specifiable_stream_tag>::type = {}) - : engine(generate_one(std::forward(seedSeq))) - { - // Nothing else to do. - } - - template - engine(SeedSeq&& seedSeq, typename std::enable_if< - stream_mixin::can_specify_stream - && !std::is_convertible::value - && !std::is_convertible::value, - can_specify_stream_tag>::type = {}) - : engine(generate_one(seedSeq), - generate_one(seedSeq)) - { - // Nothing else to do. - } - - - template - void seed(Args&&... args) - { - new (this) engine(std::forward(args)...); - } - - template - friend bool operator==(const engine&, - const engine&); - - template - friend itype1 operator-(const engine&, - const engine&); - - template - friend std::basic_ostream& - operator<<(std::basic_ostream& out, - const engine&); - - template - friend std::basic_istream& - operator>>(std::basic_istream& in, - engine& rng); -}; - -template -std::basic_ostream& -operator<<(std::basic_ostream& out, - const engine& rng) -{ - using pcg_extras::operator<<; +namespace duckdb { - auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); - auto space = out.widen(' '); - auto orig_fill = out.fill(); +void PhysicalExecute::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { + D_ASSERT(plan); + plan->GetChunk(context, chunk, state_p); +} - out << rng.multiplier() << space - << rng.increment() << space - << rng.state_; +unique_ptr PhysicalExecute::GetOperatorState() { + return plan->GetOperatorState(); +} - out.flags(orig_flags); - out.fill(orig_fill); - return out; +void PhysicalExecute::FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) { + if (!children.empty() && state_p.child_state) { + plan->FinalizeOperatorState(state_p, context); + } } +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/operator/helper/physical_limit.hpp +// +// +//===----------------------------------------------------------------------===// -template -std::basic_istream& -operator>>(std::basic_istream& in, - engine& rng) -{ - using pcg_extras::operator>>; - auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); - itype multiplier, increment, state; - in >> multiplier >> increment >> state; - if (!in.fail()) { - bool good = true; - if (multiplier != rng.multiplier()) { - good = false; - } else if (rng.can_specify_stream) { - rng.set_stream(increment >> 1); - } else if (increment != rng.increment()) { - good = false; - } - if (good) { - rng.state_ = state; - } else { - in.clear(std::ios::failbit); - } - } +namespace duckdb { - in.flags(orig_flags); - return in; -} +//! PhyisicalLimit represents the LIMIT operator +class PhysicalLimit : public PhysicalOperator { +public: + PhysicalLimit(vector types, idx_t limit, idx_t offset, unique_ptr limit_expression, + unique_ptr offset_expression, idx_t estimated_cardinality) + : PhysicalOperator(PhysicalOperatorType::LIMIT, move(types), estimated_cardinality), limit_value(limit), + offset_value(offset), limit_expression(move(limit_expression)), offset_expression(move(offset_expression)) { + } + idx_t limit_value; + idx_t offset_value; + unique_ptr limit_expression; + unique_ptr offset_expression; -template -itype engine::advance( - itype state, itype delta, itype cur_mult, itype cur_plus) -{ - // The method used here is based on Brown, "Random Number Generation - // with Arbitrary Stride,", Transactions of the American Nuclear - // Society (Nov. 1994). The algorithm is very similar to fast - // exponentiation. - // - // Even though delta is an unsigned integer, we can pass a - // signed integer to go backwards, it just goes "the long way round". - - constexpr itype ZERO = 0u; // itype may be a non-trivial types, so - constexpr itype ONE = 1u; // we define some ugly constants. - itype acc_mult = 1; - itype acc_plus = 0; - while (delta > ZERO) { - if (delta & ONE) { - acc_mult *= cur_mult; - acc_plus = acc_plus*cur_mult + cur_plus; - } - cur_plus = (cur_mult+ONE)*cur_plus; - cur_mult *= cur_mult; - delta >>= 1; - } - return acc_mult * state + acc_plus; -} - -template -itype engine::distance( - itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask) -{ - constexpr itype ONE = 1u; // itype could be weird, so use constant - bool is_mcg = cur_plus == itype(0); - itype the_bit = is_mcg ? itype(4u) : itype(1u); - itype distance = 0u; - while ((cur_state & mask) != (newstate & mask)) { - if ((cur_state & the_bit) != (newstate & the_bit)) { - cur_state = cur_state * cur_mult + cur_plus; - distance |= the_bit; - } - assert((cur_state & the_bit) == (newstate & the_bit)); - the_bit <<= 1; - cur_plus = (cur_mult+ONE)*cur_plus; - cur_mult *= cur_mult; - } - return is_mcg ? distance >> 2 : distance; -} - -template -itype operator-(const engine& lhs, - const engine& rhs) -{ - static_assert( - std::is_same::value && - std::is_same::value, - "Incomparable generators"); - if (lhs.increment() == rhs.increment()) { - return rhs.distance(lhs.state_); - } else { - constexpr itype ONE = 1u; - itype lhs_diff = lhs.increment() + (lhs.multiplier()-ONE) * lhs.state_; - itype rhs_diff = rhs.increment() + (rhs.multiplier()-ONE) * rhs.state_; - if ((lhs_diff & itype(3u)) != (rhs_diff & itype(3u))) { - rhs_diff = -rhs_diff; - } - return rhs.distance(rhs_diff, lhs_diff, rhs.multiplier(), itype(0u)); - } -} - - -template -bool operator==(const engine& lhs, - const engine& rhs) -{ - return (lhs.multiplier() == rhs.multiplier()) - && (lhs.increment() == rhs.increment()) - && (lhs.state_ == rhs.state_); -} - -template -inline bool operator!=(const engine& lhs, - const engine& rhs) -{ - return !operator==(lhs,rhs); -} - - -template class output_mixin, - bool output_previous = (sizeof(itype) <= 8), - template class multiplier_mixin = default_multiplier> -using oneseq_base = engine, output_previous, - oneseq_stream, - multiplier_mixin >; - -template class output_mixin, - bool output_previous = (sizeof(itype) <= 8), - template class multiplier_mixin = default_multiplier> -using unique_base = engine, output_previous, - unique_stream, - multiplier_mixin >; - -template class output_mixin, - bool output_previous = (sizeof(itype) <= 8), - template class multiplier_mixin = default_multiplier> -using setseq_base = engine, output_previous, - specific_stream, - multiplier_mixin >; - -template class output_mixin, - bool output_previous = (sizeof(itype) <= 8), - template class multiplier_mixin = default_multiplier> -using mcg_base = engine, output_previous, - no_stream, - multiplier_mixin >; +public: + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; -/* - * OUTPUT FUNCTIONS. - * - * These are the core of the PCG generation scheme. They specify how to - * turn the base LCG's internal state into the output value of the final - * generator. - * - * They're implemented as mixin classes. - * - * All of the classes have code that is written to allow it to be applied - * at *arbitrary* bit sizes, although in practice they'll only be used at - * standard sizes supported by C++. - */ + unique_ptr GetOperatorState() override; +}; -/* - * XSH RS -- high xorshift, followed by a random shift - * - * Fast. A good performer. - */ +} // namespace duckdb -template -struct xsh_rs_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t opbits = - sparebits-5 >= 64 ? 5 - : sparebits-4 >= 32 ? 4 - : sparebits-3 >= 16 ? 3 - : sparebits-2 >= 4 ? 2 - : sparebits-1 >= 1 ? 1 - : 0; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t maxrandshift = mask; - constexpr bitcount_t topspare = opbits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = topspare + (xtypebits+maxrandshift)/2; - bitcount_t rshift = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - internal ^= internal >> xshift; - xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift)); - return result; - } -}; -/* - * XSH RR -- high xorshift, followed by a random rotate - * - * Fast. A good performer. Slightly better statistically than XSH RS. - */ -template -struct xsh_rr_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t wantedopbits = - xtypebits >= 128 ? 7 - : xtypebits >= 64 ? 6 - : xtypebits >= 32 ? 5 - : xtypebits >= 16 ? 4 - : 3; - constexpr bitcount_t opbits = - sparebits >= wantedopbits ? wantedopbits - : sparebits; - constexpr bitcount_t amplifier = wantedopbits - opbits; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t topspare = opbits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits)/2; - bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask - : 0; - bitcount_t amprot = (rot << amplifier) & mask; - internal ^= internal >> xshift; - xtype result = xtype(internal >> bottomspare); - result = rotr(result, amprot); - return result; - } -}; -/* - * RXS -- random xorshift - */ -template -struct rxs_mixin { -static xtype output_rxs(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8); - constexpr bitcount_t shift = bits - xtypebits; - constexpr bitcount_t extrashift = (xtypebits - shift)/2; - bitcount_t rshift = shift > 64+8 ? (internal >> (bits - 6)) & 63 - : shift > 32+4 ? (internal >> (bits - 5)) & 31 - : shift > 16+2 ? (internal >> (bits - 4)) & 15 - : shift > 8+1 ? (internal >> (bits - 3)) & 7 - : shift > 4+1 ? (internal >> (bits - 2)) & 3 - : shift > 2+1 ? (internal >> (bits - 1)) & 1 - : 0; - internal ^= internal >> (shift + extrashift - rshift); - xtype result = internal >> rshift; - return result; - } -}; -/* - * RXS M XS -- random xorshift, mcg multiply, fixed xorshift - * - * The most statistically powerful generator, but all those steps - * make it slower than some of the others. We give it the rottenest jobs. - * - * Because it's usually used in contexts where the state type and the - * result type are the same, it is a permutation and is thus invertable. - * We thus provide a function to invert it. This function is used to - * for the "inside out" generator used by the extended generator. - */ +namespace duckdb { -/* Defined type-based concepts for the multiplication step. They're actually - * all derived by truncating the 128-bit, which was computed to be a good - * "universal" constant. - */ +class PhysicalLimitOperatorState : public PhysicalOperatorState { +public: + PhysicalLimitOperatorState(PhysicalLimit &op, PhysicalOperator *child, idx_t current_offset = 0) + : PhysicalOperatorState(op, child), current_offset(current_offset) { + this->limit = op.limit_expression ? INVALID_INDEX : op.limit_value; + this->offset = op.offset_expression ? INVALID_INDEX : op.offset_value; + } -template -struct mcg_multiplier { - // Not defined for an arbitrary type + idx_t current_offset; + idx_t limit; + idx_t offset; }; -template -struct mcg_unmultiplier { - // Not defined for an arbitrary type -}; +uint64_t GetDelimiter(DataChunk &input, Expression *expr, uint64_t original_value) { + DataChunk limit_chunk; + vector types {expr->return_type}; + limit_chunk.Initialize(types); + ExpressionExecutor limit_executor(expr); + auto input_size = input.size(); + input.SetCardinality(1); + limit_executor.Execute(input, limit_chunk); + input.SetCardinality(input_size); + auto limit_value = limit_chunk.GetValue(0, 0); + if (limit_value.is_null) { + return original_value; + } + return limit_value.value_.ubigint; +} -PCG_DEFINE_CONSTANT(uint8_t, mcg, multiplier, 217U) -PCG_DEFINE_CONSTANT(uint8_t, mcg, unmultiplier, 105U) +void PhysicalLimit::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { + auto state = reinterpret_cast(state_p); + auto &limit = state->limit; + auto &offset = state->offset; -PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier, 62169U) -PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, 28009U) + if (limit != INVALID_INDEX && offset != INVALID_INDEX) { + idx_t max_element = limit + offset; + if ((limit == 0 || state->current_offset >= max_element) && !(limit_expression || offset_expression)) { + return; + } + } -PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier, 277803737U) -PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, 2897767785U) + // get the next chunk from the child + do { + children[0]->GetChunk(context, state->child_chunk, state->child_state.get()); + if (limit == INVALID_INDEX) { + limit = GetDelimiter(state->child_chunk, limit_expression.get(), 1ULL << 62ULL); + } + if (offset == INVALID_INDEX) { + offset = GetDelimiter(state->child_chunk, offset_expression.get(), 0); + } + idx_t max_element = limit + offset; + if (state->child_chunk.size() == 0) { + return; + } + if (limit == 0 || state->current_offset >= max_element) { + return; + } + if (state->current_offset < offset) { + // we are not yet at the offset point + if (state->current_offset + state->child_chunk.size() > offset) { + // however we will reach it in this chunk + // we have to copy part of the chunk with an offset + idx_t start_position = offset - state->current_offset; + auto chunk_count = MinValue(limit, state->child_chunk.size() - start_position); + SelectionVector sel(STANDARD_VECTOR_SIZE); + for (idx_t i = 0; i < chunk_count; i++) { + sel.set_index(i, start_position + i); + } + // set up a slice of the input chunks + chunk.Slice(state->child_chunk, sel, chunk_count); + } + } else { + // have to copy either the entire chunk or part of it + idx_t chunk_count; + if (state->current_offset + state->child_chunk.size() >= max_element) { + // have to limit the count of the chunk + chunk_count = max_element - state->current_offset; + } else { + // we copy the entire chunk + chunk_count = state->child_chunk.size(); + } + // instead of copying we just change the pointer in the current chunk + chunk.Reference(state->child_chunk); + chunk.SetCardinality(chunk_count); + } -PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier, 12605985483714917081ULL) -PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, 15009553638781119849ULL) + state->current_offset += state->child_chunk.size(); + } while (chunk.size() == 0); +} -PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier, - PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL)) -PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier, - PCG_128BIT_CONSTANT(14422606686972528997ULL, 15009553638781119849ULL)) +unique_ptr PhysicalLimit::GetOperatorState() { + return make_unique(*this, children[0].get(), 0); +} +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/operator/helper/physical_vacuum.hpp +// +// +//===----------------------------------------------------------------------===// -template -struct rxs_m_xs_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t opbits = xtypebits >= 128 ? 6 - : xtypebits >= 64 ? 5 - : xtypebits >= 32 ? 4 - : xtypebits >= 16 ? 3 - : 2; - constexpr bitcount_t shift = bits - xtypebits; - constexpr bitcount_t mask = (1 << opbits) - 1; - bitcount_t rshift = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - internal ^= internal >> (opbits + rshift); - internal *= mcg_multiplier::multiplier(); - xtype result = internal >> shift; - result ^= result >> ((2U*xtypebits+2U)/3U); - return result; - } - static itype unoutput(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t opbits = bits >= 128 ? 6 - : bits >= 64 ? 5 - : bits >= 32 ? 4 - : bits >= 16 ? 3 - : 2; - constexpr bitcount_t mask = (1 << opbits) - 1; - internal = unxorshift(internal, bits, (2U*bits+2U)/3U); - internal *= mcg_unmultiplier::unmultiplier(); - bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; - internal = unxorshift(internal, bits, opbits + rshift); - return internal; - } -}; +namespace duckdb { +//! PhysicalVacuum represents an etension LOAD operation +class PhysicalLoad : public PhysicalOperator { +public: + explicit PhysicalLoad(unique_ptr info, idx_t estimated_cardinality) + : PhysicalOperator(PhysicalOperatorType::LOAD, {LogicalType::BOOLEAN}, estimated_cardinality), + info(move(info)) { + } -/* - * RXS M -- random xorshift, mcg multiply - */ + unique_ptr info; -template -struct rxs_m_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t opbits = xtypebits >= 128 ? 6 - : xtypebits >= 64 ? 5 - : xtypebits >= 32 ? 4 - : xtypebits >= 16 ? 3 - : 2; - constexpr bitcount_t shift = bits - xtypebits; - constexpr bitcount_t mask = (1 << opbits) - 1; - bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; - internal ^= internal >> (opbits + rshift); - internal *= mcg_multiplier::multiplier(); - xtype result = internal >> shift; - return result; - } +public: + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; +} // namespace duckdb -/* - * DXSM -- double xorshift multiply - * - * This is a new, more powerful output permutation (added in 2019). It's - * a more comprehensive scrambling than RXS M, but runs faster on 128-bit - * types. Although primarily intended for use at large sizes, also works - * at smaller sizes as well. - * - * This permutation is similar to xorshift multiply hash functions, except - * that one of the multipliers is the LCG multiplier (to avoid needing to - * have a second constant) and the other is based on the low-order bits. - * This latter aspect means that the scrambling applied to the high bits - * depends on the low bits, and makes it (to my eye) impractical to back - * out the permutation without having the low-order bits. - */ - -template -struct dxsm_mixin { - inline xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t itypebits = bitcount_t(sizeof(itype) * 8); - static_assert(xtypebits <= itypebits/2, - "Output type must be half the size of the state type."); - xtype hi = xtype(internal >> (itypebits - xtypebits)); - xtype lo = xtype(internal); - lo |= 1; - hi ^= hi >> (xtypebits/2); - hi *= xtype(cheap_multiplier::multiplier()); - hi ^= hi >> (3*(xtypebits/4)); - hi *= lo; - return hi; - } -}; -/* - * XSL RR -- fixed xorshift (to low bits), random rotate - * - * Useful for 128-bit types that are split across two CPU registers. - */ -template -struct xsl_rr_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7 - : xtypebits >= 64 ? 6 - : xtypebits >= 32 ? 5 - : xtypebits >= 16 ? 4 - : 3; - constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits - : sparebits; - constexpr bitcount_t amplifier = wantedopbits - opbits; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t topspare = sparebits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits) / 2; - - bitcount_t rot = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - bitcount_t amprot = (rot << amplifier) & mask; - internal ^= internal >> xshift; - xtype result = xtype(internal >> bottomspare); - result = rotr(result, amprot); - return result; - } -}; +#ifndef _WIN32 +#include +#else +#define RTLD_LAZY 0 +#define RTLD_LOCAL 0 +#endif +namespace duckdb { -/* - * XSL RR RR -- fixed xorshift (to low bits), random rotate (both parts) - * - * Useful for 128-bit types that are split across two CPU registers. - * If you really want an invertable 128-bit RNG, I guess this is the one. - */ +#ifdef _WIN32 -template struct halfsize_trait {}; -template <> struct halfsize_trait { typedef uint64_t type; }; -template <> struct halfsize_trait { typedef uint32_t type; }; -template <> struct halfsize_trait { typedef uint16_t type; }; -template <> struct halfsize_trait { typedef uint8_t type; }; - -template -struct xsl_rr_rr_mixin { - typedef typename halfsize_trait::type htype; - - static itype output(itype internal) - { - constexpr bitcount_t htypebits = bitcount_t(sizeof(htype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - htypebits; - constexpr bitcount_t wantedopbits = htypebits >= 128 ? 7 - : htypebits >= 64 ? 6 - : htypebits >= 32 ? 5 - : htypebits >= 16 ? 4 - : 3; - constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits - : sparebits; - constexpr bitcount_t amplifier = wantedopbits - opbits; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t topspare = sparebits; - constexpr bitcount_t xshift = (topspare + htypebits) / 2; - - bitcount_t rot = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - bitcount_t amprot = (rot << amplifier) & mask; - internal ^= internal >> xshift; - htype lowbits = htype(internal); - lowbits = rotr(lowbits, amprot); - htype highbits = htype(internal >> topspare); - bitcount_t rot2 = lowbits & mask; - bitcount_t amprot2 = (rot2 << amplifier) & mask; - highbits = rotr(highbits, amprot2); - return (itype(highbits) << topspare) ^ itype(lowbits); - } -}; +void *dlopen(const char *file, int mode) { + D_ASSERT(file); + return (void *)LoadLibrary(file); +} +void *dlsym(void *handle, const char *name) { + D_ASSERT(handle); + return (void *)GetProcAddress((HINSTANCE)handle, name); +} +#endif -/* - * XSH -- fixed xorshift (to high bits) - * - * You shouldn't use this at 64-bits or less. - */ +void PhysicalLoad::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { + auto &fs = FileSystem::GetFileSystem(context.client); + auto filename = fs.ConvertSeparators(info->filename); + if (!fs.FileExists(filename)) { + throw InvalidInputException("File %s not found", filename); + } + auto lib_hdl = dlopen(filename.c_str(), RTLD_LAZY | RTLD_LOCAL); + if (!lib_hdl) { + throw InvalidInputException("File %s could not be loaded", filename); + } -template -struct xsh_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t topspare = 0; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + auto basename = fs.ExtractBaseName(filename); + auto init_fun_name = basename + "_init"; + auto version_fun_name = basename + "_version"; - internal ^= internal >> xshift; - xtype result = internal >> bottomspare; - return result; - } -}; + void (*init_fun)(DatabaseInstance &); + const char *(*version_fun)(void); -/* - * XSL -- fixed xorshift (to low bits) - * - * You shouldn't use this at 64-bits or less. - */ + *(void **)(&init_fun) = dlsym(lib_hdl, init_fun_name.c_str()); + if (init_fun == nullptr) { + throw InvalidInputException("File %s did not contain initialization function %s", filename, init_fun_name); + } -template -struct xsl_mixin { - inline xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t topspare = sparebits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + *(void **)(&version_fun) = dlsym(lib_hdl, version_fun_name.c_str()); + if (init_fun == nullptr) { + throw InvalidInputException("File %s did not contain version function %s", filename, version_fun_name); + } + auto extension_version = std::string((*version_fun)()); + auto engine_version = DuckDB::LibraryVersion(); + if (extension_version != engine_version) { + throw InvalidInputException("Extension %s version (%s) does not match DuckDB version (%s)", filename, + extension_version, engine_version); + } - internal ^= internal >> xshift; - xtype result = internal >> bottomspare; - return result; - } -}; + try { + (*init_fun)(*context.client.db); + } catch (Exception &e) { + throw InvalidInputException("Initialization function %s from file %s threw an exception: %s", init_fun_name, + filename, e.what()); + } + state->finished = true; +} +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/operator/helper/physical_pragma.hpp +// +// +//===----------------------------------------------------------------------===// -/* ---- End of Output Functions ---- */ -template -struct inside_out : private baseclass { - inside_out() = delete; - typedef typename baseclass::result_type result_type; - typedef typename baseclass::state_type state_type; - static_assert(sizeof(result_type) == sizeof(state_type), - "Require a RNG whose output function is a permutation"); - static bool external_step(result_type& randval, size_t i) - { - state_type state = baseclass::unoutput(randval); - state = state * baseclass::multiplier() + baseclass::increment() - + state_type(i*2); - result_type result = baseclass::output(state); - randval = result; - state_type zero = - baseclass::is_mcg ? state & state_type(3U) : state_type(0U); - return result == zero; - } - static bool external_advance(result_type& randval, size_t i, - result_type delta, bool forwards = true) - { - state_type state = baseclass::unoutput(randval); - state_type mult = baseclass::multiplier(); - state_type inc = baseclass::increment() + state_type(i*2); - state_type zero = - baseclass::is_mcg ? state & state_type(3U) : state_type(0U); - state_type dist_to_zero = baseclass::distance(state, zero, mult, inc); - bool crosses_zero = - forwards ? dist_to_zero <= delta - : (-dist_to_zero) <= delta; - if (!forwards) - delta = -delta; - state = baseclass::advance(state, delta, mult, inc); - randval = baseclass::output(state); - return crosses_zero; - } -}; +namespace duckdb { -template -class pcg_extended : public baseclass { +//! PhysicalPragma represents the PRAGMA operator +class PhysicalPragma : public PhysicalOperator { public: - typedef typename baseclass::state_type state_type; - typedef typename baseclass::result_type result_type; - typedef inside_out insideout; - -private: - static constexpr bitcount_t rtypebits = sizeof(result_type)*8; - static constexpr bitcount_t stypebits = sizeof(state_type)*8; - - static constexpr bitcount_t tick_limit_pow2 = 64U; - - static constexpr size_t table_size = 1UL << table_pow2; - static constexpr size_t table_shift = stypebits - table_pow2; - static constexpr state_type table_mask = - (state_type(1U) << table_pow2) - state_type(1U); + PhysicalPragma(PragmaFunction function_p, PragmaInfo info_p, idx_t estimated_cardinality) + : PhysicalOperator(PhysicalOperatorType::PRAGMA, {LogicalType::BOOLEAN}, estimated_cardinality), + function(move(function_p)), info(move(info_p)) { + } - static constexpr bool may_tick = - (advance_pow2 < stypebits) && (advance_pow2 < tick_limit_pow2); - static constexpr size_t tick_shift = stypebits - advance_pow2; - static constexpr state_type tick_mask = - may_tick ? state_type( - (uint64_t(1) << (advance_pow2*may_tick)) - 1) - // ^-- stupidity to appease GCC warnings - : ~state_type(0U); + //! The pragma function to call + PragmaFunction function; + //! The context of the call + PragmaInfo info; - static constexpr bool may_tock = stypebits < tick_limit_pow2; +public: + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; +}; - result_type data_[table_size]; +} // namespace duckdb - PCG_NOINLINE void advance_table(); - PCG_NOINLINE void advance_table(state_type delta, bool isForwards = true); +namespace duckdb { - result_type& get_extended_value() - { - state_type state = this->state_; - if (kdd && baseclass::is_mcg) { - // The low order bits of an MCG are constant, so drop them. - state >>= 2; - } - size_t index = kdd ? state & table_mask - : state >> table_shift; - - if (may_tick) { - bool tick = kdd ? (state & tick_mask) == state_type(0u) - : (state >> tick_shift) == state_type(0u); - if (tick) - advance_table(); - } - if (may_tock) { - bool tock = state == state_type(0u); - if (tock) - advance_table(); - } - return data_[index]; - } +void PhysicalPragma::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { + auto &client = context.client; + FunctionParameters parameters {info.parameters, info.named_parameters}; + function.function(client, parameters); +} -public: - static constexpr size_t period_pow2() - { - return baseclass::period_pow2() + table_size*extvalclass::period_pow2(); - } +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/operator/helper/physical_prepare.hpp +// +// +//===----------------------------------------------------------------------===// - PCG_ALWAYS_INLINE result_type operator()() - { - result_type rhs = get_extended_value(); - result_type lhs = this->baseclass::operator()(); - return lhs ^ rhs; - } - result_type operator()(result_type upper_bound) - { - return bounded_rand(*this, upper_bound); - } - void set(result_type wanted) - { - result_type& rhs = get_extended_value(); - result_type lhs = this->baseclass::operator()(); - rhs = lhs ^ wanted; - } - void advance(state_type distance, bool forwards = true); - void backstep(state_type distance) - { - advance(distance, false); - } +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/main/prepared_statement_data.hpp +// +// +//===----------------------------------------------------------------------===// - pcg_extended(const result_type* data) - : baseclass() - { - datainit(data); - } - pcg_extended(const result_type* data, state_type seed) - : baseclass(seed) - { - datainit(data); - } - // This function may or may not exist. It thus has to be a template - // to use SFINAE; users don't have to worry about its template-ness. - template - pcg_extended(const result_type* data, state_type seed, - typename bc::stream_state stream_seed) - : baseclass(seed, stream_seed) - { - datainit(data); - } - pcg_extended() - : baseclass() - { - selfinit(); - } - pcg_extended(state_type seed) - : baseclass(seed) - { - selfinit(); - } - // This function may or may not exist. It thus has to be a template - // to use SFINAE; users don't have to worry about its template-ness. - template - pcg_extended(state_type seed, typename bc::stream_state stream_seed) - : baseclass(seed, stream_seed) - { - selfinit(); - } -private: - void selfinit(); - void datainit(const result_type* data); +namespace duckdb { +class CatalogEntry; +class PhysicalOperator; +class SQLStatement; +class PreparedStatementData { public: + DUCKDB_API explicit PreparedStatementData(StatementType type); + DUCKDB_API ~PreparedStatementData(); - template::value - && !std::is_convertible::value>::type> - pcg_extended(SeedSeq&& seedSeq) - : baseclass(seedSeq) - { - generate_to(seedSeq, data_); - } - - template - void seed(Args&&... args) - { - new (this) pcg_extended(std::forward(args)...); - } + StatementType statement_type; + //! The unbound SQL statement that was prepared + unique_ptr unbound_statement; + //! The fully prepared physical plan of the prepared statement + unique_ptr plan; + //! The map of parameter index to the actual value entry + unordered_map>> value_map; - template - friend bool operator==(const pcg_extended&, - const pcg_extended&); + //! The result names of the transaction + vector names; + //! The result types of the transaction + vector types; - template - friend std::basic_ostream& - operator<<(std::basic_ostream& out, - const pcg_extended&); + //! Whether or not the statement is a read-only statement, or whether it can result in changes to the database + bool read_only; + //! Whether or not the statement requires a valid transaction. Almost all statements require this, with the + //! exception of + bool requires_valid_transaction; + //! Whether or not the result can be streamed to the client + bool allow_stream_result; - template - friend std::basic_istream& - operator>>(std::basic_istream& in, - pcg_extended&); + //! The catalog version of when the prepared statement was bound + //! If this version is lower than the current catalog version, we have to rebind the prepared statement + idx_t catalog_version; +public: + //! Bind a set of values to the prepared statement data + DUCKDB_API void Bind(vector values); + //! Get the expected SQL Type of the bound parameter + DUCKDB_API LogicalType GetType(idx_t param_index); }; +} // namespace duckdb -template -void pcg_extended::datainit( - const result_type* data) -{ - for (size_t i = 0; i < table_size; ++i) - data_[i] = data[i]; -} - -template -void pcg_extended::selfinit() -{ - // We need to fill the extended table with something, and we have - // very little provided data, so we use the base generator to - // produce values. Although not ideal (use a seed sequence, folks!), - // unexpected correlations are mitigated by - // - using XOR differences rather than the number directly - // - the way the table is accessed, its values *won't* be accessed - // in the same order the were written. - // - any strange correlations would only be apparent if we - // were to backstep the generator so that the base generator - // was generating the same values again - result_type lhs = baseclass::operator()(); - result_type rhs = baseclass::operator()(); - result_type xdiff = lhs - rhs; - for (size_t i = 0; i < table_size; ++i) { - data_[i] = baseclass::operator()() ^ xdiff; - } -} - -template -bool operator==(const pcg_extended& lhs, - const pcg_extended& rhs) -{ - auto& base_lhs = static_cast(lhs); - auto& base_rhs = static_cast(rhs); - return base_lhs == base_rhs - && std::equal( - std::begin(lhs.data_), std::end(lhs.data_), - std::begin(rhs.data_) - ); -} - -template -inline bool operator!=(const pcg_extended& lhs, - const pcg_extended& rhs) -{ - return !operator==(lhs, rhs); -} - -template -std::basic_ostream& -operator<<(std::basic_ostream& out, - const pcg_extended& rng) -{ - auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); - auto space = out.widen(' '); - auto orig_fill = out.fill(); - - out << rng.multiplier() << space - << rng.increment() << space - << rng.state_; - - for (const auto& datum : rng.data_) - out << space << datum; - - out.flags(orig_flags); - out.fill(orig_fill); - return out; -} - -template -std::basic_istream& -operator>>(std::basic_istream& in, - pcg_extended& rng) -{ - pcg_extended new_rng; - auto& base_rng = static_cast(new_rng); - in >> base_rng; +namespace duckdb { - if (in.fail()) - return in; +class PhysicalPrepare : public PhysicalOperator { +public: + PhysicalPrepare(string name, shared_ptr prepared, idx_t estimated_cardinality) + : PhysicalOperator(PhysicalOperatorType::PREPARE, {LogicalType::BOOLEAN}, estimated_cardinality), name(name), + prepared(move(prepared)) { + } - auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); + string name; + shared_ptr prepared; - for (auto& datum : new_rng.data_) { - in >> datum; - if (in.fail()) - goto bail; - } +public: + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; +}; - rng = new_rng; +} // namespace duckdb -bail: - in.flags(orig_flags); - return in; -} +namespace duckdb { -template -void -pcg_extended::advance_table() -{ - bool carry = false; - for (size_t i = 0; i < table_size; ++i) { - if (carry) { - carry = insideout::external_step(data_[i],i+1); - } - bool carry2 = insideout::external_step(data_[i],i+1); - carry = carry || carry2; - } -} +void PhysicalPrepare::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state) const { + auto &client = context.client; -template -void -pcg_extended::advance_table( - state_type delta, bool isForwards) -{ - typedef typename baseclass::state_type base_state_t; - typedef typename extvalclass::state_type ext_state_t; - constexpr bitcount_t basebits = sizeof(base_state_t)*8; - constexpr bitcount_t extbits = sizeof(ext_state_t)*8; - static_assert(basebits <= extbits || advance_pow2 > 0, - "Current implementation might overflow its carry"); - - base_state_t carry = 0; - for (size_t i = 0; i < table_size; ++i) { - base_state_t total_delta = carry + delta; - ext_state_t trunc_delta = ext_state_t(total_delta); - if (basebits > extbits) { - carry = total_delta >> extbits; - } else { - carry = 0; - } - carry += - insideout::external_advance(data_[i],i+1, trunc_delta, isForwards); - } + // store the prepared statement in the context + client.prepared_statements[name] = prepared; + state->finished = true; } -template -void pcg_extended::advance( - state_type distance, bool forwards) -{ - static_assert(kdd, - "Efficient advance is too hard for non-kdd extension. " - "For a weak advance, cast to base class"); - state_type zero = - baseclass::is_mcg ? this->state_ & state_type(3U) : state_type(0U); - if (may_tick) { - state_type ticks = distance >> (advance_pow2*may_tick); - // ^-- stupidity to appease GCC - // warnings - state_type adv_mask = - baseclass::is_mcg ? tick_mask << 2 : tick_mask; - state_type next_advance_distance = this->distance(zero, adv_mask); - if (!forwards) - next_advance_distance = (-next_advance_distance) & tick_mask; - if (next_advance_distance < (distance & tick_mask)) { - ++ticks; - } - if (ticks) - advance_table(ticks, forwards); - } - if (forwards) { - if (may_tock && this->distance(zero) <= distance) - advance_table(); - baseclass::advance(distance); - } else { - if (may_tock && -(this->distance(zero)) <= distance) - advance_table(state_type(1U), false); - baseclass::advance(-distance); - } -} - -} // namespace pcg_detail - -namespace pcg_engines { - -using namespace pcg_detail; - -/* Predefined types for XSH RS */ - -typedef oneseq_base oneseq_xsh_rs_16_8; -typedef oneseq_base oneseq_xsh_rs_32_16; -typedef oneseq_base oneseq_xsh_rs_64_32; -typedef oneseq_base oneseq_xsh_rs_128_64; -typedef oneseq_base - cm_oneseq_xsh_rs_128_64; - -typedef unique_base unique_xsh_rs_16_8; -typedef unique_base unique_xsh_rs_32_16; -typedef unique_base unique_xsh_rs_64_32; -typedef unique_base unique_xsh_rs_128_64; -typedef unique_base - cm_unique_xsh_rs_128_64; - -typedef setseq_base setseq_xsh_rs_16_8; -typedef setseq_base setseq_xsh_rs_32_16; -typedef setseq_base setseq_xsh_rs_64_32; -typedef setseq_base setseq_xsh_rs_128_64; -typedef setseq_base - cm_setseq_xsh_rs_128_64; - -typedef mcg_base mcg_xsh_rs_16_8; -typedef mcg_base mcg_xsh_rs_32_16; -typedef mcg_base mcg_xsh_rs_64_32; -typedef mcg_base mcg_xsh_rs_128_64; -typedef mcg_base - cm_mcg_xsh_rs_128_64; - -/* Predefined types for XSH RR */ - -typedef oneseq_base oneseq_xsh_rr_16_8; -typedef oneseq_base oneseq_xsh_rr_32_16; -typedef oneseq_base oneseq_xsh_rr_64_32; -typedef oneseq_base oneseq_xsh_rr_128_64; -typedef oneseq_base - cm_oneseq_xsh_rr_128_64; - -typedef unique_base unique_xsh_rr_16_8; -typedef unique_base unique_xsh_rr_32_16; -typedef unique_base unique_xsh_rr_64_32; -typedef unique_base unique_xsh_rr_128_64; -typedef unique_base - cm_unique_xsh_rr_128_64; - -typedef setseq_base setseq_xsh_rr_16_8; -typedef setseq_base setseq_xsh_rr_32_16; -typedef setseq_base setseq_xsh_rr_64_32; -typedef setseq_base setseq_xsh_rr_128_64; -typedef setseq_base - cm_setseq_xsh_rr_128_64; - -typedef mcg_base mcg_xsh_rr_16_8; -typedef mcg_base mcg_xsh_rr_32_16; -typedef mcg_base mcg_xsh_rr_64_32; -typedef mcg_base mcg_xsh_rr_128_64; -typedef mcg_base - cm_mcg_xsh_rr_128_64; - - -/* Predefined types for RXS M XS */ - -typedef oneseq_base oneseq_rxs_m_xs_8_8; -typedef oneseq_base oneseq_rxs_m_xs_16_16; -typedef oneseq_base oneseq_rxs_m_xs_32_32; -typedef oneseq_base oneseq_rxs_m_xs_64_64; -typedef oneseq_base - oneseq_rxs_m_xs_128_128; -typedef oneseq_base - cm_oneseq_rxs_m_xs_128_128; - -typedef unique_base unique_rxs_m_xs_8_8; -typedef unique_base unique_rxs_m_xs_16_16; -typedef unique_base unique_rxs_m_xs_32_32; -typedef unique_base unique_rxs_m_xs_64_64; -typedef unique_base unique_rxs_m_xs_128_128; -typedef unique_base - cm_unique_rxs_m_xs_128_128; - -typedef setseq_base setseq_rxs_m_xs_8_8; -typedef setseq_base setseq_rxs_m_xs_16_16; -typedef setseq_base setseq_rxs_m_xs_32_32; -typedef setseq_base setseq_rxs_m_xs_64_64; -typedef setseq_base setseq_rxs_m_xs_128_128; -typedef setseq_base - cm_setseq_rxs_m_xs_128_128; - - // MCG versions don't make sense here, so aren't defined. - -/* Predefined types for RXS M */ - -typedef oneseq_base oneseq_rxs_m_16_8; -typedef oneseq_base oneseq_rxs_m_32_16; -typedef oneseq_base oneseq_rxs_m_64_32; -typedef oneseq_base oneseq_rxs_m_128_64; -typedef oneseq_base - cm_oneseq_rxs_m_128_64; - -typedef unique_base unique_rxs_m_16_8; -typedef unique_base unique_rxs_m_32_16; -typedef unique_base unique_rxs_m_64_32; -typedef unique_base unique_rxs_m_128_64; -typedef unique_base - cm_unique_rxs_m_128_64; - -typedef setseq_base setseq_rxs_m_16_8; -typedef setseq_base setseq_rxs_m_32_16; -typedef setseq_base setseq_rxs_m_64_32; -typedef setseq_base setseq_rxs_m_128_64; -typedef setseq_base - cm_setseq_rxs_m_128_64; - -typedef mcg_base mcg_rxs_m_16_8; -typedef mcg_base mcg_rxs_m_32_16; -typedef mcg_base mcg_rxs_m_64_32; -typedef mcg_base mcg_rxs_m_128_64; -typedef mcg_base - cm_mcg_rxs_m_128_64; - -/* Predefined types for DXSM */ - -typedef oneseq_base oneseq_dxsm_16_8; -typedef oneseq_base oneseq_dxsm_32_16; -typedef oneseq_base oneseq_dxsm_64_32; -typedef oneseq_base oneseq_dxsm_128_64; -typedef oneseq_base - cm_oneseq_dxsm_128_64; - -typedef unique_base unique_dxsm_16_8; -typedef unique_base unique_dxsm_32_16; -typedef unique_base unique_dxsm_64_32; -typedef unique_base unique_dxsm_128_64; -typedef unique_base - cm_unique_dxsm_128_64; - -typedef setseq_base setseq_dxsm_16_8; -typedef setseq_base setseq_dxsm_32_16; -typedef setseq_base setseq_dxsm_64_32; -typedef setseq_base setseq_dxsm_128_64; -typedef setseq_base - cm_setseq_dxsm_128_64; - -typedef mcg_base mcg_dxsm_16_8; -typedef mcg_base mcg_dxsm_32_16; -typedef mcg_base mcg_dxsm_64_32; -typedef mcg_base mcg_dxsm_128_64; -typedef mcg_base - cm_mcg_dxsm_128_64; - -/* Predefined types for XSL RR (only defined for "large" types) */ - -typedef oneseq_base oneseq_xsl_rr_64_32; -typedef oneseq_base oneseq_xsl_rr_128_64; -typedef oneseq_base - cm_oneseq_xsl_rr_128_64; - -typedef unique_base unique_xsl_rr_64_32; -typedef unique_base unique_xsl_rr_128_64; -typedef unique_base - cm_unique_xsl_rr_128_64; - -typedef setseq_base setseq_xsl_rr_64_32; -typedef setseq_base setseq_xsl_rr_128_64; -typedef setseq_base - cm_setseq_xsl_rr_128_64; - -typedef mcg_base mcg_xsl_rr_64_32; -typedef mcg_base mcg_xsl_rr_128_64; -typedef mcg_base - cm_mcg_xsl_rr_128_64; - - -/* Predefined types for XSL RR RR (only defined for "large" types) */ - -typedef oneseq_base - oneseq_xsl_rr_rr_64_64; -typedef oneseq_base - oneseq_xsl_rr_rr_128_128; -typedef oneseq_base - cm_oneseq_xsl_rr_rr_128_128; - -typedef unique_base - unique_xsl_rr_rr_64_64; -typedef unique_base - unique_xsl_rr_rr_128_128; -typedef unique_base - cm_unique_xsl_rr_rr_128_128; - -typedef setseq_base - setseq_xsl_rr_rr_64_64; -typedef setseq_base - setseq_xsl_rr_rr_128_128; -typedef setseq_base - cm_setseq_xsl_rr_rr_128_128; - - // MCG versions don't make sense here, so aren't defined. - -/* Extended generators */ - -template -using ext_std8 = pcg_extended; - -template -using ext_std16 = pcg_extended; - -template -using ext_std32 = pcg_extended; - -template -using ext_std64 = pcg_extended; - - -template -using ext_oneseq_rxs_m_xs_32_32 = - ext_std32; - -template -using ext_mcg_xsh_rs_64_32 = - ext_std32; - -template -using ext_oneseq_xsh_rs_64_32 = - ext_std32; +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/operator/helper/physical_reservoir_sample.hpp +// +// +//===----------------------------------------------------------------------===// -template -using ext_setseq_xsh_rr_64_32 = - ext_std32; -template -using ext_mcg_xsl_rr_128_64 = - ext_std64; -template -using ext_oneseq_xsl_rr_128_64 = - ext_std64; -template -using ext_setseq_xsl_rr_128_64 = - ext_std64; -} // namespace pcg_engines -typedef pcg_engines::setseq_xsh_rr_64_32 pcg32; -typedef pcg_engines::oneseq_xsh_rr_64_32 pcg32_oneseq; -typedef pcg_engines::unique_xsh_rr_64_32 pcg32_unique; -typedef pcg_engines::mcg_xsh_rs_64_32 pcg32_fast; +namespace duckdb { -typedef pcg_engines::setseq_xsl_rr_128_64 pcg64; -typedef pcg_engines::oneseq_xsl_rr_128_64 pcg64_oneseq; -typedef pcg_engines::unique_xsl_rr_128_64 pcg64_unique; -typedef pcg_engines::mcg_xsl_rr_128_64 pcg64_fast; +//! PhysicalReservoirSample represents a sample taken using reservoir sampling, which is a blocking sampling method +class PhysicalReservoirSample : public PhysicalSink { +public: + PhysicalReservoirSample(vector types, unique_ptr options, idx_t estimated_cardinality) + : PhysicalSink(PhysicalOperatorType::RESERVOIR_SAMPLE, move(types), estimated_cardinality), + options(move(options)) { + } -typedef pcg_engines::setseq_rxs_m_xs_8_8 pcg8_once_insecure; -typedef pcg_engines::setseq_rxs_m_xs_16_16 pcg16_once_insecure; -typedef pcg_engines::setseq_rxs_m_xs_32_32 pcg32_once_insecure; -typedef pcg_engines::setseq_rxs_m_xs_64_64 pcg64_once_insecure; -typedef pcg_engines::setseq_xsl_rr_rr_128_128 pcg128_once_insecure; + unique_ptr options; -typedef pcg_engines::oneseq_rxs_m_xs_8_8 pcg8_oneseq_once_insecure; -typedef pcg_engines::oneseq_rxs_m_xs_16_16 pcg16_oneseq_once_insecure; -typedef pcg_engines::oneseq_rxs_m_xs_32_32 pcg32_oneseq_once_insecure; -typedef pcg_engines::oneseq_rxs_m_xs_64_64 pcg64_oneseq_once_insecure; -typedef pcg_engines::oneseq_xsl_rr_rr_128_128 pcg128_oneseq_once_insecure; +public: + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; + unique_ptr GetGlobalState(ClientContext &context) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; + unique_ptr GetOperatorState() override; -// These two extended RNGs provide two-dimensionally equidistributed -// 32-bit generators. pcg32_k2_fast occupies the same space as pcg64, -// and can be called twice to generate 64 bits, but does not required -// 128-bit math; on 32-bit systems, it's faster than pcg64 as well. + string ParamsToString() const override; +}; -typedef pcg_engines::ext_setseq_xsh_rr_64_32<1,16,true> pcg32_k2; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<1,32,true> pcg32_k2_fast; +} // namespace duckdb -// These eight extended RNGs have about as much state as arc4random -// -// - the k variants are k-dimensionally equidistributed -// - the c variants offer better crypographic security +//===----------------------------------------------------------------------===// +// DuckDB // -// (just how good the cryptographic security is is an open question) - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,true> pcg32_k64; -typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,true> pcg32_k64_oneseq; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,true> pcg32_k64_fast; - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,false> pcg32_c64; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,false> pcg32_c64_oneseq; -typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,false> pcg32_c64_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,true> pcg64_k32; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,true> pcg64_k32_oneseq; -typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,true> pcg64_k32_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,false> pcg64_c32; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,false> pcg64_c32_oneseq; -typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,false> pcg64_c32_fast; - -// These eight extended RNGs have more state than the Mersenne twister +// duckdb/execution/reservoir_sample.hpp // -// - the k variants are k-dimensionally equidistributed -// - the c variants offer better crypographic security // -// (just how good the cryptographic security is is an open question) - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,true> pcg32_k1024; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,true> pcg32_k1024_fast; - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,false> pcg32_c1024; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,false> pcg32_c1024_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,true> pcg64_k1024; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,true> pcg64_k1024_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,false> pcg64_c1024; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,false> pcg64_c1024_fast; - -// These generators have an insanely huge period (2^524352), and is suitable -// for silly party tricks, such as dumping out 64 KB ZIP files at an arbitrary -// point in the future. [Actually, over the full period of the generator, it -// will produce every 64 KB ZIP file 2^64 times!] +//===----------------------------------------------------------------------===// -typedef pcg_engines::ext_setseq_xsh_rr_64_32<14,16,true> pcg32_k16384; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true> pcg32_k16384_fast; -#ifdef _MSC_VER - #pragma warning(default:4146) -#endif -#endif // PCG_RAND_HPP_INCLUDED -// LICENSE_CHANGE_END #include @@ -42918,10 +48670,8 @@ class BaseReservoirSampling { void ReplaceElement(); - //! These are only for pcg generator - unique_ptr rng; - unique_ptr> uniform_dist; - + //! The random generator + RandomEngine random; //! Priority queue of [random element, index] for each of the elements in the sample std::priority_queue> reservoir_weights; //! The next element to sample @@ -42936,7 +48686,7 @@ class BaseReservoirSampling { class BlockingSample { public: - explicit BlockingSample(int64_t seed) : random(seed), reservoirSampling(seed) { + explicit BlockingSample(int64_t seed) : base_reservoir_sample(seed), random(base_reservoir_sample.random) { } virtual ~BlockingSample() { } @@ -42949,10 +48699,9 @@ class BlockingSample { virtual unique_ptr GetChunk() = 0; protected: - //! The random generator - RandomEngine random; //! The reservoir sampling - BaseReservoirSampling reservoirSampling; + BaseReservoirSampling base_reservoir_sample; + RandomEngine &random; }; //! The reservoir sample class maintains a streaming sample of fixed size "sample_count" @@ -43051,7 +48800,7 @@ unique_ptr PhysicalReservoirSample::GetGlobalState(ClientCo } void PhysicalReservoirSample::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, - DataChunk &input) { + DataChunk &input) const { auto &gstate = (SampleGlobalOperatorState &)state; if (!gstate.sample) { return; @@ -43067,7 +48816,7 @@ void PhysicalReservoirSample::Sink(ExecutionContext &context, GlobalOperatorStat // GetChunkInternal //===--------------------------------------------------------------------===// void PhysicalReservoirSample::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto &sink = (SampleGlobalOperatorState &)*this->sink_state; if (!sink.sample) { return; @@ -43076,7 +48825,7 @@ void PhysicalReservoirSample::GetChunkInternal(ExecutionContext &context, DataCh if (!sample_chunk) { return; } - chunk.Reference(*sample_chunk); + chunk.Move(*sample_chunk); } unique_ptr PhysicalReservoirSample::GetOperatorState() { @@ -43112,7 +48861,7 @@ class PhysicalSet : public PhysicalOperator { } public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; public: std::string name; @@ -43126,7 +48875,7 @@ class PhysicalSet : public PhysicalOperator { namespace duckdb { -void PhysicalSet::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalSet::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { auto &db = context.client.db; db->config.set_variables[name] = value; // woop state->finished = true; @@ -43159,14 +48908,14 @@ class PhysicalStreamingSample : public PhysicalOperator { int64_t seed; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; string ParamsToString() const override; private: - void SystemSample(DataChunk &input, DataChunk &result, PhysicalOperatorState *state); - void BernoulliSample(DataChunk &input, DataChunk &result, PhysicalOperatorState *state); + void SystemSample(DataChunk &input, DataChunk &result, PhysicalOperatorState *state) const; + void BernoulliSample(DataChunk &input, DataChunk &result, PhysicalOperatorState *state) const; }; } // namespace duckdb @@ -43194,7 +48943,7 @@ class StreamingSampleOperatorState : public PhysicalOperatorState { RandomEngine random; }; -void PhysicalStreamingSample::SystemSample(DataChunk &input, DataChunk &result, PhysicalOperatorState *state_p) { +void PhysicalStreamingSample::SystemSample(DataChunk &input, DataChunk &result, PhysicalOperatorState *state_p) const { // system sampling: we throw one dice per chunk auto &state = (StreamingSampleOperatorState &)*state_p; double rand = state.random.NextRandom(); @@ -43204,7 +48953,8 @@ void PhysicalStreamingSample::SystemSample(DataChunk &input, DataChunk &result, } } -void PhysicalStreamingSample::BernoulliSample(DataChunk &input, DataChunk &result, PhysicalOperatorState *state_p) { +void PhysicalStreamingSample::BernoulliSample(DataChunk &input, DataChunk &result, + PhysicalOperatorState *state_p) const { // bernoulli sampling: we throw one dice per tuple // then slice the result chunk auto &state = (StreamingSampleOperatorState &)*state_p; @@ -43222,7 +48972,7 @@ void PhysicalStreamingSample::BernoulliSample(DataChunk &input, DataChunk &resul } void PhysicalStreamingSample::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state) { + PhysicalOperatorState *state) const { // get the next chunk from the child do { @@ -43279,7 +49029,7 @@ class PhysicalTransaction : public PhysicalOperator { unique_ptr info; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -43288,7 +49038,8 @@ class PhysicalTransaction : public PhysicalOperator { namespace duckdb { -void PhysicalTransaction::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalTransaction::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state) const { auto &client = context.client; switch (info->type) { @@ -43356,7 +49107,7 @@ class PhysicalVacuum : public PhysicalOperator { unique_ptr info; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -43364,7 +49115,7 @@ class PhysicalVacuum : public PhysicalOperator { namespace duckdb { -void PhysicalVacuum::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalVacuum::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { // NOP state->finished = true; } @@ -43430,10 +49181,11 @@ class PhysicalBlockwiseNLJoin : public PhysicalJoin { unique_ptr GetGlobalState(ClientContext &context) override; unique_ptr GetLocalSinkState(ExecutionContext &context) override; - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; - void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; + bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; string ParamsToString() const override; @@ -43525,7 +49277,7 @@ unique_ptr PhysicalBlockwiseNLJoin::GetLocalSinkState(ExecutionC } void PhysicalBlockwiseNLJoin::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, - DataChunk &input) { + DataChunk &input) const { auto &gstate = (BlockwiseNLJoinGlobalState &)state; gstate.right_chunks.Append(input); } @@ -43533,7 +49285,7 @@ void PhysicalBlockwiseNLJoin::Sink(ExecutionContext &context, GlobalOperatorStat //===--------------------------------------------------------------------===// // Finalize //===--------------------------------------------------------------------===// -void PhysicalBlockwiseNLJoin::Finalize(Pipeline &pipeline, ClientContext &context, +bool PhysicalBlockwiseNLJoin::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) { auto &gstate = (BlockwiseNLJoinGlobalState &)*state; if (IsRightOuterJoin(join_type)) { @@ -43541,6 +49293,7 @@ void PhysicalBlockwiseNLJoin::Finalize(Pipeline &pipeline, ClientContext &contex memset(gstate.rhs_found_match.get(), 0, sizeof(bool) * gstate.right_chunks.Count()); } PhysicalSink::Finalize(pipeline, context, move(state)); + return true; } //===--------------------------------------------------------------------===// @@ -43567,7 +49320,7 @@ class PhysicalBlockwiseNLJoinState : public PhysicalOperatorState { }; void PhysicalBlockwiseNLJoin::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); auto &gstate = (BlockwiseNLJoinGlobalState &)*sink_state; @@ -43647,8 +49400,7 @@ void PhysicalBlockwiseNLJoin::GetChunkInternal(ExecutionContext &context, DataCh // fill in the current element of the LHS into the chunk D_ASSERT(chunk.ColumnCount() == lchunk.ColumnCount() + rchunk.ColumnCount()); for (idx_t i = 0; i < lchunk.ColumnCount(); i++) { - auto lvalue = lchunk.GetValue(i, state->left_position); - chunk.data[i].Reference(lvalue); + ConstantVector::Reference(chunk.data[i], lchunk.data[i], state->left_position, lchunk.size()); } // for the RHS we just reference the entire vector for (idx_t i = 0; i < rchunk.ColumnCount(); i++) { @@ -43834,9 +49586,10 @@ class PhysicalCrossProduct : public PhysicalSink { public: unique_ptr GetGlobalState(ClientContext &context) override; - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; }; @@ -43862,7 +49615,7 @@ class CrossProductGlobalState : public GlobalOperatorState { CrossProductGlobalState() { } ChunkCollection rhs_materialized; - std::mutex rhs_lock; + mutex rhs_lock; }; unique_ptr PhysicalCrossProduct::GetGlobalState(ClientContext &context) { @@ -43870,7 +49623,7 @@ unique_ptr PhysicalCrossProduct::GetGlobalState(ClientConte } void PhysicalCrossProduct::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate_p, - DataChunk &input) { + DataChunk &input) const { auto &sink = (CrossProductGlobalState &)state; lock_guard client_guard(sink.rhs_lock); sink.rhs_materialized.Append(input); @@ -43895,7 +49648,7 @@ unique_ptr PhysicalCrossProduct::GetOperatorState() { } void PhysicalCrossProduct::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); auto &sink = (CrossProductGlobalState &)*sink_state; auto &right_collection = sink.rhs_materialized; @@ -43919,14 +49672,16 @@ void PhysicalCrossProduct::GetChunkInternal(ExecutionContext &context, DataChunk // now match the current vector of the left relation with the current row // from the right relation chunk.SetCardinality(left_chunk.size()); + // create a reference to the vectors of the left column for (idx_t i = 0; i < left_chunk.ColumnCount(); i++) { - // first duplicate the values of the left side chunk.data[i].Reference(left_chunk.data[i]); } + // duplicate the values on the right side + auto &right_chunk = right_collection.GetChunkForRow(state->right_position); + auto row_in_chunk = state->right_position % STANDARD_VECTOR_SIZE; for (idx_t i = 0; i < right_collection.ColumnCount(); i++) { - // now create a reference to the vectors of the right chunk - auto rvalue = right_collection.GetValue(i, state->right_position); - chunk.data[left_chunk.ColumnCount() + i].Reference(rvalue); + ConstantVector::Reference(chunk.data[left_chunk.ColumnCount() + i], right_chunk.data[i], row_in_chunk, + right_chunk.size()); } // for the next iteration, move to the next position on the right side @@ -43964,11 +49719,12 @@ class PhysicalDelimJoin : public PhysicalSink { public: unique_ptr GetGlobalState(ClientContext &context) override; unique_ptr GetLocalSinkState(ExecutionContext &context) override; - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; void Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) override; - void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) override; + bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; string ParamsToString() const override; @@ -44000,7 +49756,7 @@ class PhysicalChunkScan : public PhysicalOperator { : PhysicalOperator(op_type, move(types), estimated_cardinality), collection(nullptr) { } - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; public: @@ -44074,15 +49830,16 @@ unique_ptr PhysicalDelimJoin::GetLocalSinkState(ExecutionContext } void PhysicalDelimJoin::Sink(ExecutionContext &context, GlobalOperatorState &state_p, LocalSinkState &lstate, - DataChunk &input) { + DataChunk &input) const { auto &state = (DelimJoinGlobalState &)state_p; state.lhs_data.Append(input); distinct->Sink(context, *state.distinct_state, lstate, input); } -void PhysicalDelimJoin::Finalize(Pipeline &pipeline, ClientContext &client, unique_ptr state) { +bool PhysicalDelimJoin::Finalize(Pipeline &pipeline, ClientContext &client, unique_ptr state) { auto &dstate = (DelimJoinGlobalState &)*state; // finalize the distinct HT + D_ASSERT(distinct); distinct->FinalizeImmediate(client, move(dstate.distinct_state)); // materialize the distinct collection DataChunk delim_chunk; @@ -44099,6 +49856,7 @@ void PhysicalDelimJoin::Finalize(Pipeline &pipeline, ClientContext &client, uniq dstate.delim_data.Append(delim_chunk); } PhysicalSink::Finalize(pipeline, client, move(state)); + return true; } void PhysicalDelimJoin::Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) { @@ -44106,7 +49864,8 @@ void PhysicalDelimJoin::Combine(ExecutionContext &context, GlobalOperatorState & distinct->Combine(context, *dstate.distinct_state, lstate); } -void PhysicalDelimJoin::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { +void PhysicalDelimJoin::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); if (!state->join_state) { // create the state of the underlying join @@ -44152,6 +49911,7 @@ class PhysicalHashJoin : public PhysicalComparisonJoin { idx_t estimated_cardinality); PhysicalHashJoin(LogicalOperator &op, unique_ptr left, unique_ptr right, vector cond, JoinType join_type, idx_t estimated_cardinality); + void Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) override; vector right_projection_map; //! The types of the keys @@ -44160,19 +49920,24 @@ class PhysicalHashJoin : public PhysicalComparisonJoin { vector build_types; //! Duplicate eliminated types; only used for delim_joins (i.e. correlated subqueries) vector delim_types; + //! Whether or not we can cache the chunk + bool can_cache; public: unique_ptr GetGlobalState(ClientContext &context) override; unique_ptr GetLocalSinkState(ExecutionContext &context) override; - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; - void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; + bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; + void FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) override; + private: - void ProbeHashTable(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p); + void ProbeHashTable(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) const; }; } // namespace duckdb @@ -44244,10 +50009,6 @@ struct ApproxCountDistinctFun { static void RegisterFunction(BuiltinFunctions &set); }; -struct ModeFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - struct ArgMinFun { static void RegisterFunction(BuiltinFunctions &set); }; @@ -44262,10 +50023,6 @@ struct FirstFun { static void RegisterFunction(BuiltinFunctions &set); }; -struct ArbitraryFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - struct MaxFun { static void RegisterFunction(BuiltinFunctions &set); }; @@ -44283,6 +50040,7 @@ struct MinByFun { }; struct SumFun { + static AggregateFunction GetSumAggregate(PhysicalType type); static void RegisterFunction(BuiltinFunctions &set); }; @@ -44305,8 +50063,12 @@ struct StringAggFun { } // namespace duckdb + + namespace duckdb { +bool CanCacheType(const LogicalType &type); + PhysicalHashJoin::PhysicalHashJoin(LogicalOperator &op, unique_ptr left, unique_ptr right, vector cond, JoinType join_type, const vector &left_projection_map, @@ -44326,6 +50088,13 @@ PhysicalHashJoin::PhysicalHashJoin(LogicalOperator &op, unique_ptrGetTypes(), right_projection_map); } + // we avoid caching lists, since lists can be very large caching can have very negative effects + can_cache = true; + for (auto &type : types) { + if (!CanCacheType(type)) { + can_cache = false; + } + } } PhysicalHashJoin::PhysicalHashJoin(LogicalOperator &op, unique_ptr left, @@ -44395,9 +50164,7 @@ unique_ptr PhysicalHashJoin::GetGlobalState(ClientContext & info.correlated_counts = make_unique( BufferManager::GetBufferManager(context), delim_types, payload_types, correlated_aggregates); info.correlated_types = delim_types; - // FIXME: these can be initialized "empty" (without allocating empty vectors) info.group_chunk.Initialize(delim_types); - info.payload_chunk.Initialize(payload_types); info.result_chunk.Initialize(payload_types); } } @@ -44417,7 +50184,7 @@ unique_ptr PhysicalHashJoin::GetLocalSinkState(ExecutionContext } void PhysicalHashJoin::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate_p, - DataChunk &input) { + DataChunk &input) const { auto &sink = (HashJoinGlobalState &)state; auto &lstate = (HashJoinLocalState &)lstate_p; // resolve the join keys for the right chunk @@ -44431,20 +50198,25 @@ void PhysicalHashJoin::Sink(ExecutionContext &context, GlobalOperatorState &stat lstate.build_chunk.data[i].Reference(input.data[right_projection_map[i]]); } sink.hash_table->Build(lstate.join_keys, lstate.build_chunk); - } else { + } else if (!build_types.empty()) { // there is not a projected map: place the entire right chunk in the HT sink.hash_table->Build(lstate.join_keys, input); + } else { + // there are only keys: place an empty chunk in the payload + lstate.build_chunk.SetCardinality(input.size()); + sink.hash_table->Build(lstate.join_keys, lstate.build_chunk); } } //===--------------------------------------------------------------------===// // Finalize //===--------------------------------------------------------------------===// -void PhysicalHashJoin::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) { +bool PhysicalHashJoin::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) { auto &sink = (HashJoinGlobalState &)*state; sink.hash_table->Finalize(); PhysicalSink::Finalize(pipeline, context, move(state)); + return true; } //===--------------------------------------------------------------------===// @@ -44463,6 +50235,25 @@ class PhysicalHashJoinState : public PhysicalOperatorState { unique_ptr scan_structure; }; +bool CanCacheType(const LogicalType &type) { + switch (type.id()) { + case LogicalTypeId::LIST: + case LogicalTypeId::MAP: + return false; + case LogicalTypeId::STRUCT: { + auto &entries = StructType::GetChildTypes(type); + for (auto &entry : entries) { + if (!CanCacheType(entry.second)) { + return false; + } + } + return true; + } + default: + return true; + } +} + unique_ptr PhysicalHashJoin::GetOperatorState() { auto state = make_unique(*this, children[0].get(), children[1].get(), conditions); state->cached_chunk.Initialize(types); @@ -44473,12 +50264,16 @@ unique_ptr PhysicalHashJoin::GetOperatorState() { return move(state); } -void PhysicalHashJoin::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { +void PhysicalHashJoin::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); auto &sink = (HashJoinGlobalState &)*sink_state; - if (sink.hash_table->size() == 0 && - (sink.hash_table->join_type == JoinType::INNER || sink.hash_table->join_type == JoinType::SEMI)) { - // empty hash table with INNER or SEMI join means empty result set + bool join_is_inner_right_semi = + (sink.hash_table->join_type == JoinType::INNER || sink.hash_table->join_type == JoinType::RIGHT || + sink.hash_table->join_type == JoinType::SEMI); + + if (sink.hash_table->Count() == 0 && join_is_inner_right_semi) { + // empty hash table with INNER, RIGHT or SEMI join means empty result set return; } do { @@ -44487,8 +50282,8 @@ void PhysicalHashJoin::GetChunkInternal(ExecutionContext &context, DataChunk &ch #if STANDARD_VECTOR_SIZE >= 128 if (state->cached_chunk.size() > 0) { // finished probing but cached data remains, return cached chunk - chunk.Reference(state->cached_chunk); - state->cached_chunk.Reset(); + chunk.Move(state->cached_chunk); + state->cached_chunk.Initialize(types); } else #endif if (IsRightOuterJoin(join_type)) { @@ -44498,13 +50293,13 @@ void PhysicalHashJoin::GetChunkInternal(ExecutionContext &context, DataChunk &ch return; } else { #if STANDARD_VECTOR_SIZE >= 128 - if (chunk.size() < 64) { + if (can_cache && chunk.size() < 64) { // small chunk: add it to chunk cache and continue state->cached_chunk.Append(chunk); if (state->cached_chunk.size() >= (STANDARD_VECTOR_SIZE - 64)) { // chunk cache full: return it - chunk.Reference(state->cached_chunk); - state->cached_chunk.Reset(); + chunk.Move(state->cached_chunk); + state->cached_chunk.Initialize(types); return; } else { // chunk cache not full: probe again @@ -44520,7 +50315,8 @@ void PhysicalHashJoin::GetChunkInternal(ExecutionContext &context, DataChunk &ch } while (true); } -void PhysicalHashJoin::ProbeHashTable(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { +void PhysicalHashJoin::ProbeHashTable(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); auto &sink = (HashJoinGlobalState &)*sink_state; @@ -44541,7 +50337,7 @@ void PhysicalHashJoin::ProbeHashTable(ExecutionContext &context, DataChunk &chun if (state->child_chunk.size() == 0) { return; } - if (sink.hash_table->size() == 0) { + if (sink.hash_table->Count() == 0) { ConstructEmptyJoinResult(sink.hash_table->join_type, sink.hash_table->has_null, state->child_chunk, chunk); return; } @@ -44553,8 +50349,21 @@ void PhysicalHashJoin::ProbeHashTable(ExecutionContext &context, DataChunk &chun state->scan_structure->Next(state->join_keys, state->child_chunk, chunk); } while (chunk.size() == 0); } +void PhysicalHashJoin::FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) { + auto &state_p = reinterpret_cast(state); + context.thread.profiler.Flush(this, &state_p.probe_executor, "probe_executor", 0); + if (!children.empty() && state.child_state) { + children[0]->FinalizeOperatorState(*state.child_state, context); + } +} +void PhysicalHashJoin::Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) { + auto &state = (HashJoinLocalState &)lstate; + context.thread.profiler.Flush(this, &state.build_executor, "build_executor", 1); + context.client.profiler->Flush(context.thread.profiler); +} -} // namespace duckdb//===----------------------------------------------------------------------===// +} // namespace duckdb +//===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/operator/join/physical_index_join.hpp @@ -44603,13 +50412,14 @@ class PhysicalIndexJoin : public PhysicalOperator { JoinType join_type; //! In case we swap rhs with lhs we need to output columns related to rhs first. bool lhs_first = true; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; + void FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) override; private: void GetRHSMatches(ExecutionContext &context, PhysicalOperatorState *state_p) const; //! Fills result chunk - void Output(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p); + void Output(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) const; }; } // namespace duckdb @@ -44657,7 +50467,7 @@ class PhysicalTableScan : public PhysicalOperator { string GetName() const override; string ParamsToString() const override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; }; @@ -44674,7 +50484,7 @@ class PhysicalTableScan : public PhysicalOperator { -#include + namespace duckdb { class TableCatalogEntry; @@ -44692,14 +50502,7 @@ struct TableScanBindData : public FunctionData { vector result_ids; //! How many chunks we already scanned - std::atomic chunk_count; - - unique_ptr Copy() override { - auto result = make_unique(table); - result->is_index_scan = is_index_scan; - result->result_ids = result_ids; - return move(result); - } + atomic chunk_count; }; //! The table scan function represents a sequential scan over one of DuckDB's base tables. @@ -44713,6 +50516,7 @@ struct TableScanFunction { + #include #include @@ -44737,7 +50541,6 @@ class PhysicalIndexJoinOperatorState : public PhysicalOperatorState { //! Vector of rows that mush be fetched for every LHS key vector> rhs_rows; ExpressionExecutor probe_executor; - IndexLock lock; }; PhysicalIndexJoin::PhysicalIndexJoin(LogicalOperator &op, unique_ptr left, @@ -44767,7 +50570,7 @@ PhysicalIndexJoin::PhysicalIndexJoin(LogicalOperator &op, unique_ptrFetch(transaction, rhs_chunk, fetch_ids, row_ids, output_sel_idx, fetch_state); } @@ -44837,9 +50638,12 @@ void PhysicalIndexJoin::GetRHSMatches(ExecutionContext &context, PhysicalOperato state->rhs_rows[i].clear(); if (!equal_value.is_null) { if (fetch_types.empty()) { - //! Nothing to materialize + IndexLock lock; + index->InitializeLock(lock); art.SearchEqualJoinNoFetch(equal_value, state->result_sizes[i]); } else { + IndexLock lock; + index->InitializeLock(lock); art.SearchEqual((ARTIndexScanState *)index_state.get(), (idx_t)-1, state->rhs_rows[i]); state->result_sizes[i] = state->rhs_rows[i].size(); } @@ -44854,11 +50658,9 @@ void PhysicalIndexJoin::GetRHSMatches(ExecutionContext &context, PhysicalOperato } } -void PhysicalIndexJoin::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { +void PhysicalIndexJoin::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); - if (!state->lock.index_lock) { - index->InitializeLock(state->lock); - } state->result_size = 0; while (state->result_size == 0) { //! Check if we need to get a new LHS chunk @@ -44902,6 +50704,13 @@ unique_ptr PhysicalIndexJoin::GetOperatorState() { } return move(state); } +void PhysicalIndexJoin::FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) { + auto &state_p = reinterpret_cast(state); + context.thread.profiler.Flush(this, &state_p.probe_executor, "probe_executor", 0); + if (!children.empty() && state.child_state) { + children[0]->FinalizeOperatorState(*state.child_state, context); + } +} } // namespace duckdb @@ -44942,17 +50751,19 @@ class PhysicalNestedLoopJoin : public PhysicalComparisonJoin { public: unique_ptr GetGlobalState(ClientContext &context) override; unique_ptr GetLocalSinkState(ExecutionContext &context) override; - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; - void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; - - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; + bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; + void FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; + void Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) override; private: // resolve joins that output max N elements (SEMI, ANTI, MARK) - void ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state); + void ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const; // resolve joins that can potentially output N*M elements (INNER, LEFT, FULL) - void ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state); + void ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const; }; } // namespace duckdb @@ -44963,6 +50774,7 @@ class PhysicalNestedLoopJoin : public PhysicalComparisonJoin { + namespace duckdb { PhysicalNestedLoopJoin::PhysicalNestedLoopJoin(LogicalOperator &op, unique_ptr left, @@ -45100,7 +50912,7 @@ class NestedLoopJoinGlobalState : public GlobalOperatorState { }; void PhysicalNestedLoopJoin::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, - DataChunk &input) { + DataChunk &input) const { auto &gstate = (NestedLoopJoinGlobalState &)state; auto &nlj_state = (NestedLoopJoinLocalState &)lstate; @@ -45120,7 +50932,7 @@ void PhysicalNestedLoopJoin::Sink(ExecutionContext &context, GlobalOperatorState gstate.right_chunks.Append(nlj_state.right_condition); } -void PhysicalNestedLoopJoin::Finalize(Pipeline &pipeline, ClientContext &context, +bool PhysicalNestedLoopJoin::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) { auto &gstate = (NestedLoopJoinGlobalState &)*state; if (join_type == JoinType::OUTER || join_type == JoinType::RIGHT) { @@ -45129,6 +50941,7 @@ void PhysicalNestedLoopJoin::Finalize(Pipeline &pipeline, ClientContext &context memset(gstate.right_found_match.get(), 0, sizeof(bool) * gstate.right_data.Count()); } PhysicalSink::Finalize(pipeline, context, move(state)); + return true; } unique_ptr PhysicalNestedLoopJoin::GetGlobalState(ClientContext &context) { @@ -45169,7 +50982,7 @@ class PhysicalNestedLoopJoinState : public PhysicalOperatorState { }; void PhysicalNestedLoopJoin::ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); auto &gstate = (NestedLoopJoinGlobalState &)*sink_state; do { @@ -45221,7 +51034,7 @@ void PhysicalJoin::ConstructLeftJoinResult(DataChunk &left, DataChunk &result, b } void PhysicalNestedLoopJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); auto &gstate = (NestedLoopJoinGlobalState &)*sink_state; @@ -45309,7 +51122,7 @@ void PhysicalNestedLoopJoin::ResolveComplexJoin(ExecutionContext &context, DataC } void PhysicalNestedLoopJoin::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); auto &gstate = (NestedLoopJoinGlobalState &)*sink_state; @@ -45350,6 +51163,19 @@ unique_ptr PhysicalNestedLoopJoin::GetOperatorState() { return make_unique(*this, children[0].get(), conditions); } +void PhysicalNestedLoopJoin::FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) { + auto &state_p = reinterpret_cast(state); + context.thread.profiler.Flush(this, &state_p.lhs_executor, "lhs_executor", 0); + if (!children.empty() && state.child_state) { + children[0]->FinalizeOperatorState(*state.child_state, context); + } +} +void PhysicalNestedLoopJoin::Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) { + auto &state = (NestedLoopJoinLocalState &)lstate; + context.thread.profiler.Flush(this, &state.rhs_executor, "rhs_executor", 1); + context.client.profiler->Flush(context.thread.profiler); +} + } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB @@ -45380,17 +51206,20 @@ class PhysicalPiecewiseMergeJoin : public PhysicalComparisonJoin { unique_ptr GetGlobalState(ClientContext &context) override; unique_ptr GetLocalSinkState(ExecutionContext &context) override; - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; - void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; + bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; + void FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) override; + void Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) override; private: // resolve joins that output max N elements (SEMI, ANTI, MARK) - void ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state); + void ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const; // resolve joins that can potentially output N*M elements (INNER, LEFT, FULL) - void ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state); + void ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const; }; } // namespace duckdb @@ -45401,6 +51230,7 @@ class PhysicalPiecewiseMergeJoin : public PhysicalComparisonJoin { + namespace duckdb { PhysicalPiecewiseMergeJoin::PhysicalPiecewiseMergeJoin(LogicalOperator &op, unique_ptr left, @@ -45468,7 +51298,7 @@ unique_ptr PhysicalPiecewiseMergeJoin::GetLocalSinkState(Executi } void PhysicalPiecewiseMergeJoin::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, - DataChunk &input) { + DataChunk &input) const { auto &gstate = (MergeJoinGlobalState &)state; auto &mj_state = (MergeJoinLocalState &)lstate; @@ -45491,7 +51321,7 @@ void PhysicalPiecewiseMergeJoin::Sink(ExecutionContext &context, GlobalOperatorS //===--------------------------------------------------------------------===// static void OrderVector(Vector &vector, idx_t count, MergeOrder &order); -void PhysicalPiecewiseMergeJoin::Finalize(Pipeline &pipeline, ClientContext &context, +bool PhysicalPiecewiseMergeJoin::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) { auto &gstate = (MergeJoinGlobalState &)*state; if (gstate.right_conditions.ChunkCount() > 0) { @@ -45517,6 +51347,7 @@ void PhysicalPiecewiseMergeJoin::Finalize(Pipeline &pipeline, ClientContext &con memset(gstate.right_found_match.get(), 0, sizeof(bool) * gstate.right_chunks.Count()); } PhysicalSink::Finalize(pipeline, context, move(state)); + return true; } //===--------------------------------------------------------------------===// @@ -45548,7 +51379,7 @@ class PhysicalPiecewiseMergeJoinState : public PhysicalOperatorState { }; void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); auto &gstate = (MergeJoinGlobalState &)*sink_state; do { @@ -45589,7 +51420,7 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da } void PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); auto &gstate = (MergeJoinGlobalState &)*sink_state; do { @@ -45675,7 +51506,7 @@ void PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionContext &context, D } void PhysicalPiecewiseMergeJoin::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); auto &gstate = (MergeJoinGlobalState &)*sink_state; @@ -45714,6 +51545,19 @@ void PhysicalPiecewiseMergeJoin::GetChunkInternal(ExecutionContext &context, Dat unique_ptr PhysicalPiecewiseMergeJoin::GetOperatorState() { return make_unique(*this, children[0].get(), conditions); } +void PhysicalPiecewiseMergeJoin::FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) { + auto &state_p = reinterpret_cast(state); + context.thread.profiler.Flush(this, &state_p.lhs_executor, "lhs_executor", 0); + if (!children.empty() && state.child_state) { + children[0]->FinalizeOperatorState(*state.child_state, context); + } +} +void PhysicalPiecewiseMergeJoin::Combine(ExecutionContext &context, GlobalOperatorState &gstate, + LocalSinkState &lstate) { + auto &state = (MergeJoinLocalState &)lstate; + context.thread.profiler.Flush(this, &state.rhs_executor, "rhs_executor", 1); + context.client.profiler->Flush(context.thread.profiler); +} //===--------------------------------------------------------------------===// // OrderVector @@ -45879,34 +51723,37 @@ void OrderVector(Vector &vector, idx_t count, MergeOrder &order) { - - namespace duckdb { +class OrderGlobalState; + //! Physically re-orders the input data class PhysicalOrder : public PhysicalSink { public: - PhysicalOrder(vector types, vector orders, idx_t estimated_cardinality); + PhysicalOrder(vector types, vector orders, + vector> statistics, idx_t estimated_cardinality); //! Input data vector orders; + //! Statistics of the order expressions + vector> statistics; public: - void Sink(ExecutionContext &context, GlobalOperatorState &gstate_p, LocalSinkState &lstate_p, - DataChunk &input) override; - void Combine(ExecutionContext &context, GlobalOperatorState &gstate_p, LocalSinkState &lstate_p) override; - void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate_p) override; - unique_ptr GetLocalSinkState(ExecutionContext &context) override; unique_ptr GetGlobalState(ClientContext &context) override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; - unique_ptr GetOperatorState() override; + void Sink(ExecutionContext &context, GlobalOperatorState &gstate_p, LocalSinkState &lstate_p, + DataChunk &input) const override; + void Combine(ExecutionContext &context, GlobalOperatorState &gstate_p, LocalSinkState &lstate_p) override; + bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate_p) override; - idx_t MaxThreads(ClientContext &context); - unique_ptr GetParallelState(); + unique_ptr GetOperatorState() override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; string ParamsToString() const override; + + //! Schedules tasks to merge the data during the Finalize phase + static void ScheduleMergeTasks(Pipeline &pipeline, ClientContext &context, OrderGlobalState &state); }; } // namespace duckdb @@ -45918,210 +51765,61 @@ class PhysicalOrder : public PhysicalSink { - namespace duckdb { -PhysicalOrder::PhysicalOrder(vector types, vector orders, idx_t estimated_cardinality) - : PhysicalSink(PhysicalOperatorType::ORDER_BY, move(types), estimated_cardinality), orders(move(orders)) { +PhysicalOrder::PhysicalOrder(vector types, vector orders, + vector> statistics, idx_t estimated_cardinality) + : PhysicalSink(PhysicalOperatorType::ORDER_BY, move(types), estimated_cardinality), orders(move(orders)), + statistics(move(statistics)) { } //===--------------------------------------------------------------------===// // Sink //===--------------------------------------------------------------------===// -struct SortingState { - const idx_t ENTRY_SIZE; - - const vector ORDER_TYPES; - const vector ORDER_BY_NULL_TYPES; - const vector TYPES; - const vector STATS; - - const vector HAS_NULL; - const vector CONSTANT_SIZE; - const vector COL_SIZE; -}; - -struct PayloadState { - const bool HAS_VARIABLE_SIZE; - const idx_t VALIDITYMASK_SIZE; - const idx_t ENTRY_SIZE; -}; - class OrderGlobalState : public GlobalOperatorState { public: - explicit OrderGlobalState(BufferManager &buffer_manager) : buffer_manager(buffer_manager) { + OrderGlobalState(BufferManager &buffer_manager, PhysicalOrder &order, RowLayout payload_layout) + : global_sort_state(buffer_manager, order.orders, order.statistics, payload_layout) { } - //! The lock for updating the order global state - std::mutex lock; - //! The buffer manager - BufferManager &buffer_manager; - //! Sorting columns, and variable size sorting data (if any) - unique_ptr sorting_block; - vector> var_sorting_blocks; - vector> var_sorting_sizes; - - //! Payload data (and payload entry sizes if there is variable size data) - unique_ptr payload_block; - unique_ptr sizes_block; - - //! Constants concerning sorting and/or payload data - unique_ptr sorting_state; - unique_ptr payload_state; + //! Global sort state + GlobalSortState global_sort_state; + //! Memory usage per thread + idx_t memory_per_thread; }; class OrderLocalState : public LocalSinkState { public: - OrderLocalState() : initialized(false) { + OrderLocalState() { } - //! Whether this local state has been initialized - bool initialized; - - //! Local copy of the executor +public: + //! The local sort state + LocalSortState local_sort_state; + //! Local copy of the sorting expression executor ExpressionExecutor executor; - //! Holds a vector of incoming sorting columns DataChunk sort; - - //! Sorting columns, and variable size sorting data (if any) - unique_ptr sorting_block = nullptr; - vector> var_sorting_blocks; - vector> var_sorting_sizes; - - //! Payload data (and payload entry sizes if there is variable size data) - unique_ptr payload_block = nullptr; - unique_ptr sizes_block = nullptr; - - //! Constant buffers allocated for vector serialization - const SelectionVector *sel_ptr = &FlatVector::INCREMENTAL_SELECTION_VECTOR; - data_ptr_t key_locations[STANDARD_VECTOR_SIZE]; - data_ptr_t validitymask_locations[STANDARD_VECTOR_SIZE]; - idx_t entry_sizes[STANDARD_VECTOR_SIZE]; }; -template -static idx_t TemplatedGetSize(Value min, Value max) { - T min_val = min.GetValue(); - T max_val = max.GetValue(); - idx_t size = sizeof(T); - T max_in_size = (1 << ((size - 1) * 8 - 1)) - 1; - while (max_val < max_in_size && min_val > -max_in_size) { - size--; - max_in_size = (1 << ((size - 1) * 8 - 1)) - 1; - } - return size; -} - unique_ptr PhysicalOrder::GetGlobalState(ClientContext &context) { - auto &buffer_manager = BufferManager::GetBufferManager(context); - auto state = make_unique(buffer_manager); - - // init sorting state and sorting block - size_t entry_size = 0; - vector order_types; - vector order_by_null_types; - vector types; - vector stats; - vector has_null; - vector constant_size; - vector col_sizes; - for (auto &order : orders) { - // global state ExpressionExecutor - auto &expr = *order.expression; - - // sorting state - order_types.push_back(order.type); - order_by_null_types.push_back(order.null_order); - types.push_back(expr.return_type); - if (expr.stats) { - stats.push_back(expr.stats.get()); - } else { - stats.push_back(nullptr); - } - - // compute column sizes - auto physical_type = expr.return_type.InternalType(); - constant_size.push_back(TypeIsConstantSize(physical_type)); - idx_t col_size = GetTypeIdSize(expr.return_type.InternalType()); - - // TODO: make use of statistics - if (!TypeIsConstantSize(physical_type)) { - switch (physical_type) { - case PhysicalType::VARCHAR: - col_size = StringStatistics::MAX_STRING_MINMAX_SIZE; - break; - default: - // do nothing - break; - } - } - has_null.push_back(true); - - // increment entry size with the column size - if (has_null.back()) { - col_size++; - } - entry_size += col_size; - col_sizes.push_back(col_size); - - // create RowChunks for variable size sorting columns in order to resolve - if (TypeIsConstantSize(physical_type)) { - state->var_sorting_blocks.push_back(nullptr); - state->var_sorting_sizes.push_back(nullptr); - } else { - // besides the prefix, variable size sorting columns are also fully serialized, along with offsets - // we have to assume a large variable size, otherwise a single large variable entry may not fit in a block - // 1 << 23 = 8MB - state->var_sorting_blocks.push_back(make_unique(buffer_manager, (1 << 23) / 8, 8)); - state->var_sorting_sizes.push_back(make_unique( - buffer_manager, (idx_t)Storage::BLOCK_ALLOC_SIZE / sizeof(idx_t) + 1, sizeof(idx_t))); - } - } - // make room for an 'index' column at the end - entry_size += sizeof(idx_t); - - state->sorting_state = unique_ptr(new SortingState { - entry_size, order_types, order_by_null_types, types, stats, has_null, constant_size, col_sizes}); - idx_t vectors_per_block = (Storage::BLOCK_ALLOC_SIZE / entry_size + STANDARD_VECTOR_SIZE) / STANDARD_VECTOR_SIZE; - state->sorting_block = make_unique(buffer_manager, vectors_per_block * STANDARD_VECTOR_SIZE, entry_size); - - // init payload state - entry_size = 0; - idx_t validitymask_size = (children[0]->types.size() + 7) / 8; - entry_size += validitymask_size; - bool variable_payload_size = false; - idx_t var_columns = 0; - for (auto &type : children[0]->types) { - auto physical_type = type.InternalType(); - if (TypeIsConstantSize(physical_type)) { - entry_size += GetTypeIdSize(physical_type); - } else { - variable_payload_size = true; - var_columns++; - } - } - state->payload_state = - unique_ptr(new PayloadState {variable_payload_size, validitymask_size, entry_size}); - entry_size = entry_size == 0 ? 32 : entry_size; // avoid divide by 0 in case no nulls and all variable columns - - if (variable_payload_size) { - // if payload entry size is not constant, we keep track of entry sizes - state->sizes_block = - make_unique(buffer_manager, (idx_t)Storage::BLOCK_ALLOC_SIZE / sizeof(idx_t) + 1, sizeof(idx_t)); - // again, we have to assume a large variable size - state->payload_block = make_unique(buffer_manager, (entry_size + var_columns * (1 << 23)) / 32, 32); - } else { - vectors_per_block = (Storage::BLOCK_ALLOC_SIZE / entry_size + STANDARD_VECTOR_SIZE) / STANDARD_VECTOR_SIZE; - state->payload_block = - make_unique(buffer_manager, vectors_per_block * STANDARD_VECTOR_SIZE, entry_size); - } - + // Get the payload layout from the return types + RowLayout payload_layout; + payload_layout.Initialize(types, false); + auto state = make_unique(BufferManager::GetBufferManager(context), *this, payload_layout); + // Set external (can be force with the PRAGMA) + state->global_sort_state.external = context.force_external; + // Memory usage per thread should scale with max mem / num threads + // We take 1/6th of this, to be conservative + idx_t max_memory = BufferManager::GetBufferManager(context).GetMaxMemory(); + idx_t num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads(); + state->memory_per_thread = (max_memory / num_threads) / 6; return move(state); } unique_ptr PhysicalOrder::GetLocalSinkState(ExecutionContext &context) { auto result = make_unique(); + // Initialize order clause expression executor and DataChunk vector types; for (auto &order : orders) { types.push_back(order.expression->return_type); @@ -46132,730 +51830,138 @@ unique_ptr PhysicalOrder::GetLocalSinkState(ExecutionContext &co } void PhysicalOrder::Sink(ExecutionContext &context, GlobalOperatorState &gstate_p, LocalSinkState &lstate_p, - DataChunk &input) { + DataChunk &input) const { auto &gstate = (OrderGlobalState &)gstate_p; auto &lstate = (OrderLocalState &)lstate_p; - const auto &sorting_state = *gstate.sorting_state; - const auto &payload_state = *gstate.payload_state; - - if (!lstate.initialized) { - // init using gstate if not initialized yet - lstate.sorting_block = make_unique(*gstate.sorting_block); - lstate.payload_block = make_unique(*gstate.payload_block); - if (payload_state.HAS_VARIABLE_SIZE) { - lstate.sizes_block = make_unique(*gstate.sizes_block); - } - for (idx_t i = 0; i < gstate.var_sorting_blocks.size(); i++) { - if (gstate.var_sorting_blocks[i]) { - lstate.var_sorting_blocks.push_back(make_unique(*gstate.var_sorting_blocks[i])); - lstate.var_sorting_sizes.push_back(make_unique(*gstate.var_sorting_sizes[i])); - } else { - lstate.var_sorting_blocks.push_back(nullptr); - lstate.var_sorting_sizes.push_back(nullptr); - } - } - lstate.initialized = true; - } - // obtain sorting columns - auto &sort = lstate.sort; - lstate.executor.Execute(input, sort); + auto &global_sort_state = gstate.global_sort_state; + auto &local_sort_state = lstate.local_sort_state; - // build and serialize sorting data - lstate.sorting_block->Build(sort.size(), lstate.key_locations, nullptr); - for (idx_t sort_col = 0; sort_col < sort.ColumnCount(); sort_col++) { - bool has_null = sorting_state.HAS_NULL[sort_col]; - bool nulls_first = sorting_state.ORDER_BY_NULL_TYPES[sort_col] == OrderByNullType::NULLS_FIRST; - bool desc = sorting_state.ORDER_TYPES[sort_col] == OrderType::DESCENDING; - idx_t size_in_bytes = StringStatistics::MAX_STRING_MINMAX_SIZE; // TODO: use actual string statistics - lstate.sorting_block->SerializeVectorSortable(sort.data[sort_col], sort.size(), *lstate.sel_ptr, sort.size(), - lstate.key_locations, desc, has_null, nulls_first, size_in_bytes); + // Initialize local state (if necessary) + if (!local_sort_state.initialized) { + local_sort_state.Initialize(global_sort_state, BufferManager::GetBufferManager(context.client)); } - // also fully serialize variable size sorting columns - for (idx_t sort_col = 0; sort_col < sort.ColumnCount(); sort_col++) { - if (TypeIsConstantSize(sort.data[sort_col].GetType().InternalType())) { - continue; - } - auto &var_sizes = *lstate.var_sorting_sizes[sort_col]; - auto &var_block = *lstate.var_sorting_blocks[sort_col]; - // compute entry sizes - std::fill_n(lstate.entry_sizes, input.size(), 0); - RowChunk::ComputeEntrySizes(sort.data[sort_col], lstate.entry_sizes, sort.size()); - // build and serialize entry sizes - var_sizes.Build(sort.size(), lstate.key_locations, nullptr); - for (idx_t i = 0; i < input.size(); i++) { - Store(lstate.entry_sizes[i], lstate.key_locations[i]); - } - // build and serialize variable size entries - var_block.Build(sort.size(), lstate.key_locations, lstate.entry_sizes); - var_block.SerializeVector(sort.data[sort_col], sort.size(), *lstate.sel_ptr, input.size(), 0, - lstate.key_locations, nullptr); - } + // Obtain sorting columns + auto &sort = lstate.sort; + lstate.executor.Execute(input, sort); - // compute entry sizes of payload columns if there are variable size columns - if (payload_state.HAS_VARIABLE_SIZE) { - RowChunk::ComputeEntrySizes(input, lstate.entry_sizes, payload_state.ENTRY_SIZE); - lstate.sizes_block->Build(input.size(), lstate.key_locations, nullptr); - for (idx_t i = 0; i < input.size(); i++) { - Store(lstate.entry_sizes[i], lstate.key_locations[i]); - } - lstate.payload_block->Build(input.size(), lstate.key_locations, lstate.entry_sizes); - } else { - lstate.payload_block->Build(input.size(), lstate.key_locations, nullptr); - } + // Sink the data into the local sort state + local_sort_state.SinkChunk(sort, input); - // serialize payload data - for (idx_t i = 0; i < input.size(); i++) { - memset(lstate.key_locations[i], -1, payload_state.VALIDITYMASK_SIZE); - lstate.validitymask_locations[i] = lstate.key_locations[i]; - lstate.key_locations[i] += payload_state.VALIDITYMASK_SIZE; - } - for (idx_t payl_col = 0; payl_col < input.ColumnCount(); payl_col++) { - lstate.payload_block->SerializeVector(input.data[payl_col], input.size(), *lstate.sel_ptr, input.size(), - payl_col, lstate.key_locations, lstate.validitymask_locations); + // When sorting data reaches a certain size, we sort it + if (local_sort_state.SizeInBytes() >= gstate.memory_per_thread) { + local_sort_state.Sort(global_sort_state); } } void PhysicalOrder::Combine(ExecutionContext &context, GlobalOperatorState &gstate_p, LocalSinkState &lstate_p) { auto &gstate = (OrderGlobalState &)gstate_p; auto &lstate = (OrderLocalState &)lstate_p; - auto &sorting_state = *gstate.sorting_state; - - if (!lstate.sorting_block) { - return; - } - - lock_guard append_lock(gstate.lock); - for (auto &block : lstate.sorting_block->blocks) { - gstate.sorting_block->count += block.count; - gstate.sorting_block->blocks.push_back(move(block)); - } - for (idx_t i = 0; i < lstate.var_sorting_blocks.size(); i++) { - if (sorting_state.CONSTANT_SIZE[i]) { - continue; - } - for (auto &block : lstate.var_sorting_blocks[i]->blocks) { - gstate.var_sorting_blocks[i]->count += block.count; - gstate.var_sorting_blocks[i]->blocks.push_back(move(block)); - } - for (auto &block : lstate.var_sorting_sizes[i]->blocks) { - gstate.var_sorting_sizes[i]->count += block.count; - gstate.var_sorting_sizes[i]->blocks.push_back(move(block)); - } - } - for (auto &block : lstate.payload_block->blocks) { - gstate.payload_block->count += block.count; - gstate.payload_block->blocks.push_back(move(block)); - } - - const auto &payload_state = *gstate.payload_state; - if (payload_state.HAS_VARIABLE_SIZE) { - for (auto &block : lstate.sizes_block->blocks) { - gstate.sizes_block->count += block.count; - gstate.sizes_block->blocks.push_back(move(block)); - } - } + gstate.global_sort_state.AddLocalState(lstate.local_sort_state); } -static void RadixSort(BufferManager &buffer_manager, data_ptr_t dataptr, const idx_t &count, const idx_t &col_offset, - const idx_t &sorting_size, const SortingState &sorting_state) { - auto temp_block = - buffer_manager.Allocate(MaxValue(count * sorting_state.ENTRY_SIZE, (idx_t)Storage::BLOCK_ALLOC_SIZE)); - data_ptr_t temp = temp_block->node->buffer; - bool swap = false; - - idx_t counts[256]; - uint8_t byte; - for (idx_t offset = col_offset + sorting_size - 1; offset + 1 > col_offset; offset--) { - // init to 0 - memset(counts, 0, sizeof(counts)); - // collect counts - for (idx_t i = 0; i < count; i++) { - byte = *(dataptr + i * sorting_state.ENTRY_SIZE + offset); - counts[byte]++; - } - // compute offsets from counts - for (idx_t val = 1; val < 256; val++) { - counts[val] = counts[val] + counts[val - 1]; - } - // re-order the data in temporary array - for (idx_t i = count; i > 0; i--) { - byte = *(dataptr + (i - 1) * sorting_state.ENTRY_SIZE + offset); - memcpy(temp + (counts[byte] - 1) * sorting_state.ENTRY_SIZE, dataptr + (i - 1) * sorting_state.ENTRY_SIZE, - sorting_state.ENTRY_SIZE); - counts[byte]--; - } - std::swap(dataptr, temp); - swap = !swap; - } - // move data back to original buffer (if it was swapped) - if (swap) { - memcpy(temp, dataptr, count * sorting_state.ENTRY_SIZE); - } -} - -static void SubSortTiedTuples(BufferManager &buffer_manager, const data_ptr_t dataptr, const idx_t &count, - const idx_t &col_offset, const idx_t &sorting_size, bool ties[], - const SortingState &sorting_state) { - D_ASSERT(!ties[count - 1]); - for (idx_t i = 0; i < count; i++) { - if (!ties[i]) { - continue; - } - idx_t j; - for (j = i + 1; j < count; j++) { - if (!ties[j]) { - break; - } - } - RadixSort(buffer_manager, dataptr + i * sorting_state.ENTRY_SIZE, j - i + 1, col_offset, sorting_size, - sorting_state); - i = j; - } -} - -static void ComputeTies(data_ptr_t dataptr, const idx_t &count, const idx_t &col_offset, const idx_t &tie_size, - bool ties[], const SortingState &sorting_state) { - D_ASSERT(!ties[count - 1]); - D_ASSERT(col_offset + tie_size <= sorting_state.ENTRY_SIZE - sizeof(idx_t)); - // align dataptr - dataptr += col_offset; - idx_t i = 0; - for (; i + 7 < count - 1; i += 8) { - // fixed size inner loop to allow unrolling - for (idx_t j = 0; j < 8; j++) { - ties[i + j] = ties[i + j] && memcmp(dataptr, dataptr + sorting_state.ENTRY_SIZE, tie_size) == 0; - dataptr += sorting_state.ENTRY_SIZE; - } - } - for (; i < count - 1; i++) { - ties[i] = ties[i] && memcmp(dataptr, dataptr + sorting_state.ENTRY_SIZE, tie_size) == 0; - dataptr += sorting_state.ENTRY_SIZE; - } - ties[count - 1] = false; -} - -static bool CompareStrings(const data_ptr_t &l, const data_ptr_t &r, const data_ptr_t &var_dataptr, const idx_t sizes[], - const int &order, const idx_t &sorting_size) { - // use indices to find strings in blob - idx_t left_idx = Load(l + sorting_size); - idx_t right_idx = Load(r + sorting_size); - data_ptr_t left_ptr = var_dataptr + sizes[left_idx]; - data_ptr_t right_ptr = var_dataptr + sizes[right_idx]; - // read string lengths - uint32_t left_size = Load(left_ptr); - uint32_t right_size = Load(right_ptr); - left_ptr += string_t::PREFIX_LENGTH; - right_ptr += string_t::PREFIX_LENGTH; - // construct strings - string_t left_val((const char *)left_ptr, left_size); - string_t right_val((const char *)right_ptr, right_size); - - int comp_res = 1; - if (Equals::Operation(left_val, right_val)) { - comp_res = 0; - } - if (LessThan::Operation(left_val, right_val)) { - comp_res = -1; - } - return order * comp_res < 0; -} - -static void BreakStringTies(BufferManager &buffer_manager, const data_ptr_t dataptr, const idx_t &start, - const idx_t &end, const idx_t &tie_col, bool ties[], const data_ptr_t var_dataptr, - const data_ptr_t sizes_ptr, const SortingState &sorting_state) { - idx_t tie_col_offset = 0; - for (idx_t i = 0; i < tie_col; i++) { - tie_col_offset += sorting_state.COL_SIZE[i]; - } - if (sorting_state.HAS_NULL[tie_col]) { - char *validity = (char *)dataptr + start * sorting_state.ENTRY_SIZE + tie_col_offset; - if (sorting_state.ORDER_BY_NULL_TYPES[tie_col] == OrderByNullType::NULLS_FIRST && *validity == 0) { - // NULLS_FIRST, therefore null is encoded as 0 - we can't break null ties - return; - } else if (sorting_state.ORDER_BY_NULL_TYPES[tie_col] == OrderByNullType::NULLS_LAST && *validity == 1) { - // NULLS_LAST, therefore null is encoded as 1 - we can't break null ties - return; - } - tie_col_offset++; - } - // if the tied strings are smaller than the prefix size, or are NULL, we don't need to break the ties - char *prefix_chars = (char *)dataptr + start * sorting_state.ENTRY_SIZE + tie_col_offset; - const char null_char = sorting_state.ORDER_TYPES[tie_col] == OrderType::ASCENDING ? 0 : -1; - for (idx_t i = 0; i < StringStatistics::MAX_STRING_MINMAX_SIZE; i++) { - if (prefix_chars[i] == null_char) { - return; - } - } - - // fill pointer array for sorting - auto ptr_block = - buffer_manager.Allocate(MaxValue((end - start) * sizeof(data_ptr_t), (idx_t)Storage::BLOCK_ALLOC_SIZE)); - auto entry_ptrs = (data_ptr_t *)ptr_block->node->buffer; - for (idx_t i = start; i < end; i++) { - entry_ptrs[i - start] = dataptr + i * sorting_state.ENTRY_SIZE; - } - - // slow pointer-based sorting - const int order = sorting_state.ORDER_TYPES[tie_col] == OrderType::DESCENDING ? -1 : 1; - const idx_t sorting_size = sorting_state.ENTRY_SIZE - sizeof(idx_t); - const idx_t *sizes = (idx_t *)sizes_ptr; - std::sort(entry_ptrs, entry_ptrs + end - start, - [&var_dataptr, &sizes, &order, &sorting_size](const data_ptr_t l, const data_ptr_t r) { - return CompareStrings(l, r, var_dataptr, sizes, order, sorting_size); - }); - - // re-order - auto temp_block = - buffer_manager.Allocate(MaxValue((end - start) * sorting_state.ENTRY_SIZE, (idx_t)Storage::BLOCK_ALLOC_SIZE)); - data_ptr_t temp_ptr = temp_block->node->buffer; - for (idx_t i = 0; i < end - start; i++) { - memcpy(temp_ptr, entry_ptrs[i], sorting_state.ENTRY_SIZE); - temp_ptr += sorting_state.ENTRY_SIZE; +class PhysicalOrderMergeTask : public Task { +public: + PhysicalOrderMergeTask(Pipeline &parent, ClientContext &context, OrderGlobalState &state) + : parent(parent), context(context), state(state) { } - memcpy(dataptr + start * sorting_state.ENTRY_SIZE, temp_block->node->buffer, - (end - start) * sorting_state.ENTRY_SIZE); - - // determine if there are still ties (if this is not the last column) - if (tie_col < sorting_state.ORDER_TYPES.size() - 1) { - data_ptr_t idx_ptr = dataptr + start * sorting_state.ENTRY_SIZE + sorting_size; - idx_t current_idx = Load(idx_ptr); - data_ptr_t current_ptr = var_dataptr + sizes[current_idx]; - uint32_t current_size = Load(current_ptr); - current_ptr += string_t::PREFIX_LENGTH; - string_t current_val((const char *)current_ptr, current_size); - for (idx_t i = 0; i < end - start - 1; i++) { - idx_ptr += sorting_state.ENTRY_SIZE; - - // load next entry - idx_t next_idx = Load(idx_ptr); - data_ptr_t next_ptr = var_dataptr + sizes[next_idx]; - uint32_t next_size = Load(next_ptr); - next_ptr += string_t::PREFIX_LENGTH; - string_t next_val((const char *)next_ptr, next_size); - - if (current_size != next_size) { - // quick comparison: different length - ties[start + i] = false; + void Execute() override { + // Initialize merge sorted and iterate until done + auto &global_sort_state = state.global_sort_state; + MergeSorter merge_sorter(global_sort_state, BufferManager::GetBufferManager(context)); + merge_sorter.PerformInMergeRound(); + // Finish task and act if all tasks are finished + lock_guard state_guard(global_sort_state.lock); + parent.finished_tasks++; + if (parent.finished_tasks == parent.total_tasks) { + global_sort_state.CompleteMergeRound(); + if (global_sort_state.sorted_blocks.size() == 1) { + // Only one block left: Done! + parent.Finish(); } else { - // equal length: full comparison - ties[start + i] = Equals::Operation(current_val, next_val); - } - - current_size = next_size; - current_val = next_val; - } - } -} - -static void BreakTies(BufferManager &buffer_manager, OrderGlobalState &global_state, bool ties[], data_ptr_t dataptr, - const idx_t &count, const idx_t &tie_col, const SortingState &sorting_state) { - D_ASSERT(!ties[count - 1]); - auto var_block_handle = buffer_manager.Pin(global_state.var_sorting_blocks[tie_col]->blocks[0].block); - auto var_sizes_handle = buffer_manager.Pin(global_state.var_sorting_sizes[tie_col]->blocks[0].block); - const data_ptr_t var_dataptr = var_block_handle->node->buffer; - const data_ptr_t sizes_ptr = var_sizes_handle->node->buffer; - - for (idx_t i = 0; i < count; i++) { - if (!ties[i]) { - continue; - } - idx_t j; - for (j = i; j < count; j++) { - if (!ties[j]) { - break; + // Schedule the next round + PhysicalOrder::ScheduleMergeTasks(parent, context, state); } } - switch (sorting_state.TYPES[tie_col].InternalType()) { - case PhysicalType::VARCHAR: - BreakStringTies(buffer_manager, dataptr, i, j + 1, tie_col, ties, var_dataptr, sizes_ptr, sorting_state); - break; - default: - throw NotImplementedException("Cannot sort variable size column with type %s", - sorting_state.TYPES[tie_col].ToString()); - } - i = j; - } -} - -static bool AnyTies(bool ties[], const idx_t &count) { - D_ASSERT(!ties[count - 1]); - bool any_ties = false; - for (idx_t i = 0; i < count - 1; i++) { - any_ties = any_ties || ties[i]; - } - return any_ties; -} - -static void SortInMemory(Pipeline &pipeline, ClientContext &context, OrderGlobalState &state) { - const auto &sorting_state = *state.sorting_state; - auto &buffer_manager = BufferManager::GetBufferManager(context); - - auto &block = state.sorting_block->blocks.back(); - const auto &count = block.count; - auto handle = buffer_manager.Pin(block.block); - const auto dataptr = handle->node->buffer; - - // assign an index to each row - idx_t sorting_size = sorting_state.ENTRY_SIZE - sizeof(idx_t); - data_ptr_t idx_dataptr = dataptr + sorting_size; - for (idx_t i = 0; i < count; i++) { - Store(i, idx_dataptr); - idx_dataptr += sorting_state.ENTRY_SIZE; } - bool all_constant = true; - for (idx_t i = 0; i < sorting_state.CONSTANT_SIZE.size(); i++) { - all_constant = all_constant && sorting_state.CONSTANT_SIZE[i]; - } - - if (all_constant) { - RadixSort(buffer_manager, dataptr, count, 0, sorting_size, sorting_state); - return; - } - - sorting_size = 0; - idx_t col_offset = 0; - unique_ptr ties_handle = nullptr; - bool *ties = nullptr; - const idx_t num_cols = sorting_state.CONSTANT_SIZE.size(); - for (idx_t i = 0; i < num_cols; i++) { - sorting_size += sorting_state.COL_SIZE[i]; - if (sorting_state.CONSTANT_SIZE[i] && i < num_cols - 1) { - // add columns to the sort until we reach a variable size column, or the last column - continue; - } - - if (!ties) { - // this is the first sort - RadixSort(buffer_manager, dataptr, count, col_offset, sorting_size, sorting_state); - ties_handle = buffer_manager.Allocate(MaxValue(count, (idx_t)Storage::BLOCK_ALLOC_SIZE)); - ties = (bool *)ties_handle->node->buffer; - std::fill_n(ties, count - 1, true); - ties[count - 1] = false; - } else { - // for subsequent sorts, we subsort the tied tuples - SubSortTiedTuples(buffer_manager, dataptr, count, col_offset, sorting_size, ties, sorting_state); - } - - if (sorting_state.CONSTANT_SIZE[i] && i == num_cols - 1) { - // all columns are sorted, no ties to break because last column is constant size - break; - } - - ComputeTies(dataptr, count, col_offset, sorting_size, ties, sorting_state); - if (!AnyTies(ties, count)) { - // no ties, so we stop sorting - break; - } - - BreakTies(buffer_manager, state, ties, dataptr, count, i, sorting_state); - if (!AnyTies(ties, count)) { - // no more ties after tie-breaking - break; - } - - col_offset += sorting_size; - sorting_size = 0; - } -} +private: + Pipeline &parent; + ClientContext &context; + OrderGlobalState &state; +}; -void ConcatenateBlocks(BufferManager &buffer_manager, RowChunk &row_chunk, idx_t capacity, bool variable_entry_size) { - RowDataBlock new_block(buffer_manager, capacity, row_chunk.entry_size); - new_block.count = row_chunk.count; - auto new_block_handle = buffer_manager.Pin(new_block.block); - data_ptr_t new_block_ptr = new_block_handle->node->buffer; - for (auto &block : row_chunk.blocks) { - auto block_handle = buffer_manager.Pin(block.block); - if (variable_entry_size) { - memcpy(new_block_ptr, block_handle->node->buffer, block.byte_offset); - new_block_ptr += block.byte_offset; - } else { - memcpy(new_block_ptr, block_handle->node->buffer, block.count * row_chunk.entry_size); - new_block_ptr += block.count * row_chunk.entry_size; - } - buffer_manager.UnregisterBlock(block.block->BlockId(), true); - } - row_chunk.blocks.clear(); - row_chunk.block_capacity = capacity; - row_chunk.blocks.push_back(move(new_block)); -} - -void SizesToOffsets(BufferManager &buffer_manager, RowChunk &row_chunk, idx_t capacity) { - RowDataBlock new_block(buffer_manager, capacity, row_chunk.entry_size); - new_block.count = row_chunk.count; - auto new_block_handle = buffer_manager.Pin(new_block.block); - data_ptr_t new_block_ptr = new_block_handle->node->buffer; - for (auto &block : row_chunk.blocks) { - auto block_handle = buffer_manager.Pin(block.block); - memcpy(new_block_ptr, block_handle->node->buffer, block.count * row_chunk.entry_size); - new_block_ptr += block.count * row_chunk.entry_size; - buffer_manager.UnregisterBlock(block.block->BlockId(), true); - } - row_chunk.blocks.clear(); - row_chunk.block_capacity = capacity; - // convert sizes to offsets - idx_t *offsets = (idx_t *)new_block_handle->node->buffer; - idx_t prev = offsets[0]; - offsets[0] = 0; - idx_t curr; - for (idx_t i = 1; i < row_chunk.count; i++) { - curr = offsets[i]; - offsets[i] = offsets[i - 1] + prev; - prev = curr; - } - offsets[row_chunk.count] = offsets[row_chunk.count - 1] + prev; - row_chunk.blocks.push_back(move(new_block)); -} - -void PhysicalOrder::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state_p) { +bool PhysicalOrder::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state_p) { this->sink_state = move(state_p); auto &state = (OrderGlobalState &)*this->sink_state; - const auto &sorting_state = *state.sorting_state; - const auto &payload_state = *state.payload_state; - D_ASSERT(state.sorting_block->count == state.payload_block->count); - - if (state.sorting_block->count == 0) { - return; - } + auto &global_sort_state = state.global_sort_state; - idx_t payload_size = 0; - if (payload_state.HAS_VARIABLE_SIZE) { - for (auto &block : state.payload_block->blocks) { - payload_size += block.byte_offset; - } - } else { - payload_size = state.payload_block->count * payload_state.ENTRY_SIZE; - } - if (payload_size > state.buffer_manager.GetMaxMemory()) { - throw NotImplementedException("External sort"); - } - - if (state.sorting_block->blocks.size() > 1) { - // copy all of the sorting data to one big block - idx_t capacity = MaxValue(Storage::BLOCK_ALLOC_SIZE / sorting_state.ENTRY_SIZE + 1, state.sorting_block->count); - ConcatenateBlocks(state.buffer_manager, *state.sorting_block, capacity, false); - } - - for (idx_t i = 0; i < state.var_sorting_blocks.size(); i++) { - // copy variable size columns to one big block - if (!state.var_sorting_blocks[i]) { - continue; - } - auto &row_chunk = *state.var_sorting_blocks[i]; - idx_t var_block_size = 0; - for (auto &block : row_chunk.blocks) { - var_block_size += block.byte_offset; - } - // variable size data - idx_t capacity = - MaxValue(Storage::BLOCK_ALLOC_SIZE / row_chunk.entry_size + 1, var_block_size / row_chunk.entry_size + 1); - if (row_chunk.blocks.size() > 1) { - ConcatenateBlocks(state.buffer_manager, row_chunk, capacity, true); - } - // offsets - auto &sizes_chunk = *state.var_sorting_sizes[i]; - capacity = MaxValue(Storage::BLOCK_ALLOC_SIZE / sizes_chunk.entry_size + 1, sizes_chunk.count + 1); - SizesToOffsets(state.buffer_manager, sizes_chunk, capacity); + if (global_sort_state.sorted_blocks.empty()) { + // Empty input! + return true; } - if (state.payload_block->blocks.size() > 1) { - // same for the payload data, beware of variable entry size - idx_t capacity = - payload_state.HAS_VARIABLE_SIZE - ? MaxValue(Storage::BLOCK_ALLOC_SIZE / state.payload_block->entry_size + 1, - payload_size / payload_state.ENTRY_SIZE + 1) - : MaxValue(Storage::BLOCK_ALLOC_SIZE / state.payload_block->entry_size + 1, state.payload_block->count); - ConcatenateBlocks(state.buffer_manager, *state.payload_block, capacity, payload_state.HAS_VARIABLE_SIZE); - } + // Prepare for merge sort phase + global_sort_state.PrepareMergePhase(); - if (payload_state.HAS_VARIABLE_SIZE) { - D_ASSERT(state.sizes_block->count == state.sorting_block->count); - idx_t capacity = - MaxValue(Storage::BLOCK_ALLOC_SIZE / state.sizes_block->entry_size + 1, state.sizes_block->count + 1); - SizesToOffsets(state.buffer_manager, *state.sizes_block, capacity); + // Start the merge phase or finish if a merge is not necessary + if (global_sort_state.sorted_blocks.size() > 1) { + PhysicalOrder::ScheduleMergeTasks(pipeline, context, state); + return false; + } else { + return true; } +} - // now perform the actual sort - SortInMemory(pipeline, context, state); - - // cleanup - auto &buffer_manager = BufferManager::GetBufferManager(context); - for (idx_t i = 0; i < state.var_sorting_blocks.size(); i++) { - if (sorting_state.CONSTANT_SIZE[i]) { - continue; - } - for (auto &block : state.var_sorting_blocks[i]->blocks) { - buffer_manager.UnregisterBlock(block.block->BlockId(), true); - } - } - for (idx_t i = 0; i < state.var_sorting_sizes.size(); i++) { - if (sorting_state.CONSTANT_SIZE[i]) { - continue; - } - for (auto &block : state.var_sorting_sizes[i]->blocks) { - buffer_manager.UnregisterBlock(block.block->BlockId(), true); - } +void PhysicalOrder::ScheduleMergeTasks(Pipeline &pipeline, ClientContext &context, OrderGlobalState &state) { + // Initialize global sort state for a round of merging + state.global_sort_state.InitializeMergeRound(); + // Schedule tasks equal to the number of threads, which will each merge multiple partitions + auto &ts = TaskScheduler::GetScheduler(context); + idx_t num_threads = ts.NumberOfThreads(); + pipeline.total_tasks += num_threads; + for (idx_t tnum = 0; tnum < num_threads; tnum++) { + auto new_task = make_unique(pipeline, context, state); + ts.ScheduleTask(pipeline.token, move(new_task)); } } //===--------------------------------------------------------------------===// // GetChunkInternal //===--------------------------------------------------------------------===// -idx_t PhysicalOrder::MaxThreads(ClientContext &context) { - if (this->sink_state) { - auto &state = (OrderGlobalState &)*this->sink_state; - return state.payload_block->count / STANDARD_VECTOR_SIZE + 1; - } else { - return estimated_cardinality / STANDARD_VECTOR_SIZE + 1; - } -} - -class OrderParallelState : public ParallelState { -public: - OrderParallelState() : entry_idx(0) { - } - idx_t entry_idx; - std::mutex lock; -}; - -unique_ptr PhysicalOrder::GetParallelState() { - auto result = make_unique(); - return move(result); -} - class PhysicalOrderOperatorState : public PhysicalOperatorState { public: - PhysicalOrderOperatorState(PhysicalOperator &op, PhysicalOperator *child) - : PhysicalOperatorState(op, child), initialized(false), entry_idx(0), count(-1) { + PhysicalOrderOperatorState(PhysicalOperator &op, PhysicalOperator *child) : PhysicalOperatorState(op, child) { } - ParallelState *parallel_state; - bool initialized; - - unique_ptr sorting_handle = nullptr; - unique_ptr payload_handle; - unique_ptr offsets_handle; - data_ptr_t key_locations[STANDARD_VECTOR_SIZE]; - data_ptr_t validitymask_locations[STANDARD_VECTOR_SIZE]; - - idx_t entry_idx; - idx_t count; +public: + //! Payload scanner + unique_ptr scanner = nullptr; }; unique_ptr PhysicalOrder::GetOperatorState() { return make_unique(*this, children[0].get()); } -static void Scan(ClientContext &context, DataChunk &chunk, PhysicalOrderOperatorState &state, - const SortingState &sorting_state, const PayloadState &payload_state, const idx_t offset, - const idx_t next) { - if (offset >= state.count) { - return; - } - data_ptr_t sort_dataptr = state.sorting_handle->node->buffer + (offset * sorting_state.ENTRY_SIZE) + - sorting_state.ENTRY_SIZE - sizeof(idx_t); - const data_ptr_t payl_dataptr = state.payload_handle->node->buffer; - if (payload_state.HAS_VARIABLE_SIZE) { - const idx_t *offsets = (idx_t *)state.offsets_handle->node->buffer; - for (idx_t i = 0; i < next; i++) { - state.validitymask_locations[i] = payl_dataptr + offsets[Load(sort_dataptr)]; - state.key_locations[i] = state.validitymask_locations[i] + payload_state.VALIDITYMASK_SIZE; - sort_dataptr += sorting_state.ENTRY_SIZE; - } - } else { - for (idx_t i = 0; i < next; i++) { - state.validitymask_locations[i] = payl_dataptr + Load(sort_dataptr) * payload_state.ENTRY_SIZE; - state.key_locations[i] = state.validitymask_locations[i] + payload_state.VALIDITYMASK_SIZE; - sort_dataptr += sorting_state.ENTRY_SIZE; - } - } - - // deserialize the payload data - for (idx_t payl_col = 0; payl_col < chunk.ColumnCount(); payl_col++) { - RowChunk::DeserializeIntoVector(chunk.data[payl_col], next, payl_col, state.key_locations, - state.validitymask_locations); - } - chunk.SetCardinality(next); - chunk.Verify(); -} - -static void CleanUp(ClientContext &context, OrderGlobalState &gstate) { - auto &buffer_manager = BufferManager::GetBufferManager(context); - for (auto &block : gstate.sorting_block->blocks) { - buffer_manager.UnregisterBlock(block.block->BlockId(), true); - } - for (auto &block : gstate.payload_block->blocks) { - buffer_manager.UnregisterBlock(block.block->BlockId(), true); - } - if (gstate.payload_state->HAS_VARIABLE_SIZE) { - for (auto &block : gstate.sizes_block->blocks) { - buffer_manager.UnregisterBlock(block.block->BlockId(), true); - } - } -} - -void PhysicalOrder::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { +void PhysicalOrder::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { auto &state = *reinterpret_cast(state_p); - auto &gstate = (OrderGlobalState &)*this->sink_state; - const auto &sorting_state = *gstate.sorting_state; - const auto &payload_state = *gstate.payload_state; - - if (!state.initialized) { - // initialize operator state - state.count = gstate.payload_block->count; - if (state.count > 0) { - state.sorting_handle = gstate.buffer_manager.Pin(gstate.sorting_block->blocks[0].block); - state.payload_handle = gstate.buffer_manager.Pin(gstate.payload_block->blocks[0].block); - if (payload_state.HAS_VARIABLE_SIZE) { - state.offsets_handle = gstate.buffer_manager.Pin(gstate.sizes_block->blocks[0].block); - } - } - // initialize parallel state (if any) - state.parallel_state = nullptr; - auto &task = context.task; - // check if there is any parallel state to fetch - state.parallel_state = nullptr; - auto task_info = task.task_info.find(this); - if (task_info != task.task_info.end()) { - // parallel scan init - state.parallel_state = task_info->second; - } - state.initialized = true; - } - if (!state.parallel_state) { - // sequential scan - const idx_t next = MinValue((idx_t)STANDARD_VECTOR_SIZE, state.count - state.entry_idx); - Scan(context.client, chunk, state, sorting_state, payload_state, state.entry_idx, next); - state.entry_idx += STANDARD_VECTOR_SIZE; - if (chunk.size() != 0) { + if (!state.scanner) { + // Initialize scanner (if not yet initialized) + auto &gstate = (OrderGlobalState &)*this->sink_state; + auto &global_sort_state = gstate.global_sort_state; + if (global_sort_state.sorted_blocks.empty()) { return; } - } else { - // parallel scan - auto ¶llel_state = *reinterpret_cast(state.parallel_state); - do { - idx_t offset; - idx_t next; - { - lock_guard parallel_lock(parallel_state.lock); - offset = parallel_state.entry_idx; - next = MinValue((idx_t)STANDARD_VECTOR_SIZE, state.count - offset); - parallel_state.entry_idx += next; - } - Scan(context.client, chunk, state, sorting_state, payload_state, offset, next); - if (chunk.size() == 0) { - break; - } else { - return; - } - } while (true); + state.scanner = + make_unique(*global_sort_state.sorted_blocks[0]->payload_data, global_sort_state); } - D_ASSERT(chunk.size() == 0); - CleanUp(context.client, gstate); + + // Scan the next data chunk + state.scanner->Scan(chunk); } string PhysicalOrder::ParamsToString() const { @@ -46902,13 +52008,14 @@ class PhysicalTopN : public PhysicalSink { idx_t offset; public: - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; void Combine(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate) override; - void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; + bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; unique_ptr GetLocalSinkState(ExecutionContext &context) override; unique_ptr GetGlobalState(ClientContext &context) override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; string ParamsToString() const override; @@ -46926,7 +52033,6 @@ class PhysicalTopN : public PhysicalSink { - namespace duckdb { //===--------------------------------------------------------------------===// @@ -47057,7 +52163,7 @@ unique_ptr PhysicalTopN::GetGlobalState(ClientContext &cont // Sink //===--------------------------------------------------------------------===// void PhysicalTopN::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, - DataChunk &input) { + DataChunk &input) const { // append to the local sink state auto &sink = (TopNLocalState &)lstate; sink.heap.Sink(input); @@ -47079,12 +52185,13 @@ void PhysicalTopN::Combine(ExecutionContext &context, GlobalOperatorState &state //===--------------------------------------------------------------------===// // Finalize //===--------------------------------------------------------------------===// -void PhysicalTopN::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) { +bool PhysicalTopN::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr state) { auto &gstate = (TopNGlobalState &)*state; // global finalize: compute the final top N gstate.heap.Reduce(); PhysicalSink::Finalize(pipeline, context, move(state)); + return true; } //===--------------------------------------------------------------------===// @@ -47099,7 +52206,7 @@ class PhysicalTopNOperatorState : public PhysicalOperatorState { idx_t position; }; -void PhysicalTopN::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { +void PhysicalTopN::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) const { auto &state = (PhysicalTopNOperatorState &)*state_p; auto &gstate = (TopNGlobalState &)*sink_state; @@ -47133,355 +52240,9 @@ string PhysicalTopN::ParamsToString() const { } } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/operator/persistent/buffered_csv_reader.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/function/scalar/strftime.hpp -// -// -//===----------------------------------------------------------------------===// - - - -namespace duckdb { - -enum class StrTimeSpecifier : uint8_t { - ABBREVIATED_WEEKDAY_NAME = 0, // %a - Abbreviated weekday name. (Sun, Mon, ...) - FULL_WEEKDAY_NAME = 1, // %A Full weekday name. (Sunday, Monday, ...) - WEEKDAY_DECIMAL = 2, // %w - Weekday as a decimal number. (0, 1, ..., 6) - DAY_OF_MONTH_PADDED = 3, // %d - Day of the month as a zero-padded decimal. (01, 02, ..., 31) - DAY_OF_MONTH = 4, // %-d - Day of the month as a decimal number. (1, 2, ..., 30) - ABBREVIATED_MONTH_NAME = 5, // %b - Abbreviated month name. (Jan, Feb, ..., Dec) - FULL_MONTH_NAME = 6, // %B - Full month name. (January, February, ...) - MONTH_DECIMAL_PADDED = 7, // %m - Month as a zero-padded decimal number. (01, 02, ..., 12) - MONTH_DECIMAL = 8, // %-m - Month as a decimal number. (1, 2, ..., 12) - YEAR_WITHOUT_CENTURY_PADDED = 9, // %y - Year without century as a zero-padded decimal number. (00, 01, ..., 99) - YEAR_WITHOUT_CENTURY = 10, // %-y - Year without century as a decimal number. (0, 1, ..., 99) - YEAR_DECIMAL = 11, // %Y - Year with century as a decimal number. (2013, 2019 etc.) - HOUR_24_PADDED = 12, // %H - Hour (24-hour clock) as a zero-padded decimal number. (00, 01, ..., 23) - HOUR_24_DECIMAL = 13, // %-H - Hour (24-hour clock) as a decimal number. (0, 1, ..., 23) - HOUR_12_PADDED = 14, // %I - Hour (12-hour clock) as a zero-padded decimal number. (01, 02, ..., 12) - HOUR_12_DECIMAL = 15, // %-I - Hour (12-hour clock) as a decimal number. (1, 2, ... 12) - AM_PM = 16, // %p - Locale’s AM or PM. (AM, PM) - MINUTE_PADDED = 17, // %M - Minute as a zero-padded decimal number. (00, 01, ..., 59) - MINUTE_DECIMAL = 18, // %-M - Minute as a decimal number. (0, 1, ..., 59) - SECOND_PADDED = 19, // %S - Second as a zero-padded decimal number. (00, 01, ..., 59) - SECOND_DECIMAL = 20, // %-S - Second as a decimal number. (0, 1, ..., 59) - MICROSECOND_PADDED = 21, // %f - Microsecond as a decimal number, zero-padded on the left. (000000 - 999999) - MILLISECOND_PADDED = 22, // %g - Millisecond as a decimal number, zero-padded on the left. (000 - 999) - UTC_OFFSET = 23, // %z - UTC offset in the form +HHMM or -HHMM. ( ) - TZ_NAME = 24, // %Z - Time zone name. ( ) - DAY_OF_YEAR_PADDED = 25, // %j - Day of the year as a zero-padded decimal number. (001, 002, ..., 366) - DAY_OF_YEAR_DECIMAL = 26, // %-j - Day of the year as a decimal number. (1, 2, ..., 366) - WEEK_NUMBER_PADDED_SUN_FIRST = - 27, // %U - Week number of the year (Sunday as the first day of the week). All days in a new year preceding the - // first Sunday are considered to be in week 0. (00, 01, ..., 53) - WEEK_NUMBER_PADDED_MON_FIRST = - 28, // %W - Week number of the year (Monday as the first day of the week). All days in a new year preceding the - // first Monday are considered to be in week 0. (00, 01, ..., 53) - LOCALE_APPROPRIATE_DATE_AND_TIME = - 29, // %c - Locale’s appropriate date and time representation. (Mon Sep 30 07:06:05 2013) - LOCALE_APPROPRIATE_DATE = 30, // %x - Locale’s appropriate date representation. (09/30/13) - LOCALE_APPROPRIATE_TIME = 31 // %X - Locale’s appropriate time representation. (07:06:05) -}; - -struct StrTimeFormat { -public: - virtual ~StrTimeFormat() { - } - - static string ParseFormatSpecifier(string format_string, StrTimeFormat &format); - -protected: - //! The format specifiers - vector specifiers; - //! The literals that appear in between the format specifiers - //! The following must hold: literals.size() = specifiers.size() + 1 - //! Format is literals[0], specifiers[0], literals[1], ..., specifiers[n - 1], literals[n] - vector literals; - //! The constant size that appears in the format string - idx_t constant_size; - //! The max numeric width of the specifier (if it is parsed as a number), or -1 if it is not a number - vector numeric_width; - void AddLiteral(string literal); - virtual void AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier); -}; - -struct StrfTimeFormat : public StrTimeFormat { - idx_t GetLength(date_t date, dtime_t time); - - void FormatString(date_t date, int32_t data[7], char *target); - void FormatString(date_t date, dtime_t time, char *target); - -protected: - //! The variable-length specifiers. To determine total string size, these need to be checked. - vector var_length_specifiers; - //! Whether or not the current specifier is a special "date" specifier (i.e. one that requires a date_t object to - //! generate) - vector is_date_specifier; - - void AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) override; - static idx_t GetSpecifierLength(StrTimeSpecifier specifier, date_t date, dtime_t time); - char *WriteString(char *target, const string_t &str); - char *Write2(char *target, uint8_t value); - char *WritePadded2(char *target, int32_t value); - char *WritePadded3(char *target, uint32_t value); - char *WritePadded(char *target, int32_t value, int32_t padding); - bool IsDateSpecifier(StrTimeSpecifier specifier); - char *WriteDateSpecifier(StrTimeSpecifier specifier, date_t date, char *target); - char *WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t data[], char *target); -}; - -struct StrpTimeFormat : public StrTimeFormat { -public: - //! Type-safe parsing argument - struct ParseResult { - int32_t data[7]; - string error_message; - idx_t error_position = INVALID_INDEX; - }; - //! The full format specifier, for error messages - string format_specifier; - - bool Parse(string_t str, ParseResult &result); - date_t ParseDate(string_t str); - timestamp_t ParseTimestamp(string_t str); - -protected: - string FormatStrpTimeError(const string &input, idx_t position); - void AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) override; - int NumericSpecifierWidth(StrTimeSpecifier specifier); - int32_t TryParseCollection(const char *data, idx_t &pos, idx_t size, const string_t collection[], - idx_t collection_count); -}; - -} // namespace duckdb - - - -#include -#include -#include - -namespace duckdb { -struct CopyInfo; -struct StrpTimeFormat; - -//! The shifts array allows for linear searching of multi-byte values. For each position, it determines the next -//! position given that we encounter a byte with the given value. -/*! For example, if we have a string "ABAC", the shifts array will have the following values: - * [0] --> ['A'] = 1, all others = 0 - * [1] --> ['B'] = 2, ['A'] = 1, all others = 0 - * [2] --> ['A'] = 3, all others = 0 - * [3] --> ['C'] = 4 (match), 'B' = 2, 'A' = 1, all others = 0 - * Suppose we then search in the following string "ABABAC", our progression will be as follows: - * 'A' -> [1], 'B' -> [2], 'A' -> [3], 'B' -> [2], 'A' -> [3], 'C' -> [4] (match!) - */ -struct TextSearchShiftArray { - TextSearchShiftArray(); - explicit TextSearchShiftArray(string search_term); - - inline bool Match(uint8_t &position, uint8_t byte_value) { - if (position >= length) { - return false; - } - position = shifts[position * 255 + byte_value]; - return position == length; - } - - idx_t length; - unique_ptr shifts; -}; - -struct BufferedCSVReaderOptions { - //! The file path of the CSV file to read - string file_path; - //! Whether file is compressed or not, and if so which compression type - //! ("infer" (default; infer from file extension), "gzip", "none") - string compression = "infer"; - //! Whether or not to automatically detect dialect and datatypes - bool auto_detect = false; - //! Whether or not a delimiter was defined by the user - bool has_delimiter = false; - //! Delimiter to separate columns within each line - string delimiter = ","; - //! Whether or not a quote sign was defined by the user - bool has_quote = false; - //! Quote used for columns that contain reserved characters, e.g., delimiter - string quote = "\""; - //! Whether or not an escape character was defined by the user - bool has_escape = false; - //! Escape character to escape quote character - string escape; - //! Whether or not a header information was given by the user - bool has_header = false; - //! Whether or not the file has a header line - bool header = false; - //! How many leading rows to skip - idx_t skip_rows = 0; - //! Expected number of columns - idx_t num_cols = 0; - //! Specifies the string that represents a null value - string null_str; - //! True, if column with that index must skip null check - vector force_not_null; - //! Size of sample chunk used for dialect and type detection - idx_t sample_chunk_size = STANDARD_VECTOR_SIZE; - //! Number of sample chunks used for type detection - idx_t sample_chunks = 10; - //! Number of samples to buffer - idx_t buffer_size = STANDARD_VECTOR_SIZE * 100; - //! Consider all columns to be of type varchar - bool all_varchar = false; - //! The date format to use (if any is specified) - std::map date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}}; - //! Whether or not a type format is specified - std::map has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}}; - - std::string toString() const { - return "DELIMITER='" + delimiter + (has_delimiter ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) + - ", QUOTE='" + quote + (has_quote ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) + - ", ESCAPE='" + escape + (has_escape ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) + - ", HEADER=" + std::to_string(header) + - (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) + - ", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) + - ", ALL_VARCHAR=" + std::to_string(all_varchar); - } -}; - -enum class QuoteRule : uint8_t { QUOTES_RFC = 0, QUOTES_OTHER = 1, NO_QUOTES = 2 }; - -enum class ParserMode : uint8_t { PARSING = 0, SNIFFING_DIALECT = 1, SNIFFING_DATATYPES = 2, PARSING_HEADER = 3 }; - -static DataChunk DUMMY_CHUNK; - -//! Buffered CSV reader is a class that reads values from a stream and parses them as a CSV file -class BufferedCSVReader { - //! Initial buffer read size; can be extended for long lines - static constexpr idx_t INITIAL_BUFFER_SIZE = 16384; - //! Maximum CSV line size: specified because if we reach this amount, we likely have the wrong delimiters - static constexpr idx_t MAXIMUM_CSV_LINE_SIZE = 1048576; - ParserMode mode; - - //! Candidates for delimiter auto detection - vector delim_candidates = {",", "|", ";", "\t"}; - //! Candidates for quote rule auto detection - vector quoterule_candidates = {QuoteRule::QUOTES_RFC, QuoteRule::QUOTES_OTHER, QuoteRule::NO_QUOTES}; - //! Candidates for quote sign auto detection (per quote rule) - vector> quote_candidates_map = {{"\""}, {"\"", "'"}, {""}}; - //! Candidates for escape character auto detection (per quote rule) - vector> escape_candidates_map = {{""}, {"\\"}, {""}}; - -public: - BufferedCSVReader(ClientContext &context, BufferedCSVReaderOptions options, - const vector &requested_types = vector()); - BufferedCSVReader(BufferedCSVReaderOptions options, const vector &requested_types, - unique_ptr source); - - BufferedCSVReaderOptions options; - vector sql_types; - vector col_names; - unique_ptr source; - bool plain_file_source = false; - bool gzip_compressed = false; - idx_t file_size = 0; - - unique_ptr buffer; - idx_t buffer_size; - idx_t position; - idx_t start = 0; - - idx_t linenr = 0; - bool linenr_estimated = false; - - vector sniffed_column_counts; - bool row_empty = false; - idx_t sample_chunk_idx = 0; - bool jumping_samples = false; - bool end_of_file_reached = false; - - idx_t bytes_in_chunk = 0; - double bytes_per_line_avg = 0; - - vector> cached_buffers; - - TextSearchShiftArray delimiter_search, escape_search, quote_search; - - DataChunk parse_chunk; - - std::queue> cached_chunks; - -public: - //! Extract a single DataChunk from the CSV file and stores it in insert_chunk - void ParseCSV(DataChunk &insert_chunk); - -private: - //! Initialize Parser - void Initialize(const vector &requested_types); - //! Initializes the parse_chunk with varchar columns and aligns info with new number of cols - void InitParseChunk(idx_t num_cols); - //! Initializes the TextSearchShiftArrays for complex parser - void PrepareComplexParser(); - //! Extract a single DataChunk from the CSV file and stores it in insert_chunk - void ParseCSV(ParserMode mode, DataChunk &insert_chunk = DUMMY_CHUNK); - //! Sniffs CSV dialect and determines skip rows, header row, column types and column names - vector SniffCSV(const vector &requested_types); - //! Change the date format for the type to the string - void SetDateFormat(const string &format_specifier, const LogicalTypeId &sql_type); - //! Try to cast a string value to the specified sql type - bool TryCastValue(const Value &value, const LogicalType &sql_type); - //! Try to cast a vector of values to the specified sql type - bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type); - //! Skips skip_rows, reads header row from input stream - void SkipRowsAndReadHeader(idx_t skip_rows, bool skip_header); - //! Skip Byte Order Mark - void SkipBOM(); - //! Jumps back to the beginning of input stream and resets necessary internal states - void JumpToBeginning(idx_t skip_rows, bool skip_header); - //! Jumps back to the beginning of input stream and resets necessary internal states - bool JumpToNextSample(); - //! Resets the buffer - void ResetBuffer(); - //! Resets the steam - void ResetStream(); - //! Prepare candidate sets for auto detection based on user input - void PrepareCandidateSets(); - - //! Parses a CSV file with a one-byte delimiter, escape and quote character - void ParseSimpleCSV(DataChunk &insert_chunk); - //! Parses more complex CSV files with multi-byte delimiters, escapes or quotes - void ParseComplexCSV(DataChunk &insert_chunk); - - //! Adds a value to the current row - void AddValue(char *str_val, idx_t length, idx_t &column, vector &escape_positions); - //! Adds a row to the insert_chunk, returns true if the chunk is filled as a result of this row being added - bool AddRow(DataChunk &insert_chunk, idx_t &column); - //! Finalizes a chunk, parsing all values that have been added so far and adding them to the insert_chunk - void Flush(DataChunk &insert_chunk); - //! Reads a new buffer from the CSV file if the current one has been exhausted - bool ReadBuffer(idx_t &start); - - unique_ptr OpenCSV(ClientContext &context, const BufferedCSVReaderOptions &options); -}; - -} // namespace duckdb - - @@ -47604,17 +52365,16 @@ TextSearchShiftArray::TextSearchShiftArray(string search_term) : length(search_t } } -BufferedCSVReader::BufferedCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p, +BufferedCSVReader::BufferedCSVReader(FileSystem &fs_p, BufferedCSVReaderOptions options_p, const vector &requested_types) - : options(move(options_p)), buffer_size(0), position(0), start(0) { - source = OpenCSV(context, options); + : fs(fs_p), options(move(options_p)), buffer_size(0), position(0), start(0) { + file_handle = OpenCSV(options); Initialize(requested_types); } -BufferedCSVReader::BufferedCSVReader(BufferedCSVReaderOptions options_p, const vector &requested_types, - unique_ptr ssource) - : options(move(options_p)), source(move(ssource)), buffer_size(0), position(0), start(0) { - Initialize(requested_types); +BufferedCSVReader::BufferedCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p, + const vector &requested_types) + : BufferedCSVReader(FileSystem::GetFileSystem(context), move(options_p), requested_types) { } void BufferedCSVReader::Initialize(const vector &requested_types) { @@ -47626,7 +52386,8 @@ void BufferedCSVReader::Initialize(const vector &requested_types) { } } else { sql_types = requested_types; - JumpToBeginning(options.skip_rows, options.header); + ResetBuffer(); + SkipRowsAndReadHeader(options.skip_rows, options.header); } InitParseChunk(sql_types.size()); } @@ -47637,36 +52398,18 @@ void BufferedCSVReader::PrepareComplexParser() { quote_search = TextSearchShiftArray(options.quote); } -unique_ptr BufferedCSVReader::OpenCSV(ClientContext &context, const BufferedCSVReaderOptions &options) { - if (!FileSystem::GetFileSystem(context).FileExists(options.file_path)) { - throw IOException("File \"%s\" not found", options.file_path.c_str()); - } - unique_ptr result; - - gzip_compressed = false; - if (options.compression == "infer") { - if (StringUtil::EndsWith(StringUtil::Lower(options.file_path), ".gz")) { - gzip_compressed = true; - } +unique_ptr BufferedCSVReader::OpenCSV(const BufferedCSVReaderOptions &options) { + this->compression = FileCompressionType::UNCOMPRESSED; + if (options.compression == "infer" || options.compression == "auto") { + this->compression = FileCompressionType::AUTO_DETECT; } else if (options.compression == "gzip") { - gzip_compressed = true; + this->compression = FileCompressionType::GZIP; } - if (gzip_compressed) { - result = make_unique(options.file_path); - plain_file_source = false; - } else { - auto csv_local = make_unique(); - csv_local->open(options.file_path); - result = move(csv_local); - - // determine filesize - plain_file_source = true; - result->seekg(0, result->end); - file_size = (idx_t)result->tellg(); - result->clear(); - result->seekg(0, result->beg); - } + auto result = + fs.OpenFile(options.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, this->compression); + plain_file_source = result->OnDiskFile() && result->CanSeek(); + file_size = result->GetFileSize(); return result; } @@ -47679,6 +52422,86 @@ static string GenerateColumnName(const idx_t total_cols, const idx_t col_number, return string(prefix + leading_zeros + value); } +// Helper function for UTF-8 aware space trimming +static string TrimWhitespace(const string &col_name) { + utf8proc_int32_t codepoint; + auto str = reinterpret_cast(col_name.c_str()); + idx_t size = col_name.size(); + // Find the first character that is not left trimmed + idx_t begin = 0; + while (begin < size) { + auto bytes = utf8proc_iterate(str + begin, size - begin, &codepoint); + D_ASSERT(bytes > 0); + if (utf8proc_category(codepoint) != UTF8PROC_CATEGORY_ZS) { + break; + } + begin += bytes; + } + + // Find the last character that is not right trimmed + idx_t end; + end = begin; + for (auto next = begin; next < col_name.size();) { + auto bytes = utf8proc_iterate(str + next, size - next, &codepoint); + D_ASSERT(bytes > 0); + next += bytes; + if (utf8proc_category(codepoint) != UTF8PROC_CATEGORY_ZS) { + end = next; + } + } + + // return the trimmed string + return col_name.substr(begin, end - begin); +} + +static string NormalizeColumnName(const string &col_name) { + // normalize UTF8 characters to NFKD + auto nfkd = utf8proc_NFKD((const utf8proc_uint8_t *)col_name.c_str(), col_name.size()); + const string col_name_nfkd = string((const char *)nfkd, strlen((const char *)nfkd)); + free(nfkd); + + // only keep ASCII characters 0-9 a-z A-Z and replace spaces with regular whitespace + string col_name_ascii = ""; + for (idx_t i = 0; i < col_name_nfkd.size(); i++) { + if (col_name_nfkd[i] == '_' || (col_name_nfkd[i] >= '0' && col_name_nfkd[i] <= '9') || + (col_name_nfkd[i] >= 'A' && col_name_nfkd[i] <= 'Z') || + (col_name_nfkd[i] >= 'a' && col_name_nfkd[i] <= 'z')) { + col_name_ascii += col_name_nfkd[i]; + } else if (StringUtil::CharacterIsSpace(col_name_nfkd[i])) { + col_name_ascii += " "; + } + } + + // trim whitespace and replace remaining whitespace by _ + string col_name_trimmed = TrimWhitespace(col_name_ascii); + string col_name_cleaned = ""; + bool in_whitespace = false; + for (idx_t i = 0; i < col_name_trimmed.size(); i++) { + if (col_name_trimmed[i] == ' ') { + if (!in_whitespace) { + col_name_cleaned += "_"; + in_whitespace = true; + } + } else { + col_name_cleaned += col_name_trimmed[i]; + in_whitespace = false; + } + } + + // don't leave string empty; if not empty, make lowercase + if (col_name_cleaned.empty()) { + col_name_cleaned = "_"; + } else { + col_name_cleaned = StringUtil::Lower(col_name_cleaned); + } + + // prepend _ if name starts with a digit or is a reserved keyword + if (KeywordHelper::IsKeyword(col_name_cleaned) || (col_name_cleaned[0] >= '0' && col_name_cleaned[0] <= '9')) { + col_name_cleaned = "_" + col_name_cleaned; + } + return col_name_cleaned; +} + void BufferedCSVReader::ResetBuffer() { buffer.reset(); buffer_size = 0; @@ -47688,13 +52511,12 @@ void BufferedCSVReader::ResetBuffer() { } void BufferedCSVReader::ResetStream() { - if (!plain_file_source && gzip_compressed) { + if (!file_handle->CanSeek()) { // seeking to the beginning appears to not be supported in all compiler/os-scenarios, // so we have to create a new stream source here for now - source = make_unique(options.file_path); + file_handle->Reset(); } else { - source->clear(); - source->seekg(0, source->beg); + file_handle->Seek(0); } linenr = 0; linenr_estimated = false; @@ -47704,41 +52526,35 @@ void BufferedCSVReader::ResetStream() { } void BufferedCSVReader::InitParseChunk(idx_t num_cols) { - bytes_in_chunk = 0; - // adapt not null info if (options.force_not_null.size() != num_cols) { options.force_not_null.resize(num_cols, false); } + if (num_cols == parse_chunk.ColumnCount()) { + parse_chunk.Reset(); + } else { + parse_chunk.Destroy(); - parse_chunk.Destroy(); - - // initialize the parse_chunk with a set of VARCHAR types - vector varchar_types(num_cols, LogicalType::VARCHAR); - parse_chunk.Initialize(varchar_types); + // initialize the parse_chunk with a set of VARCHAR types + vector varchar_types(num_cols, LogicalType::VARCHAR); + parse_chunk.Initialize(varchar_types); + } } void BufferedCSVReader::JumpToBeginning(idx_t skip_rows = 0, bool skip_header = false) { ResetBuffer(); ResetStream(); - SkipBOM(); SkipRowsAndReadHeader(skip_rows, skip_header); sample_chunk_idx = 0; -} - -void BufferedCSVReader::SkipBOM() { - char bom_buffer[3]; - source->read(bom_buffer, 3); - if (bom_buffer[0] != '\xEF' || bom_buffer[1] != '\xBB' || bom_buffer[2] != '\xBF') { - ResetStream(); - } + bytes_in_chunk = 0; + end_of_file_reached = false; + bom_checked = false; } void BufferedCSVReader::SkipRowsAndReadHeader(idx_t skip_rows, bool skip_header) { for (idx_t i = 0; i < skip_rows; i++) { // ignore skip rows - string read_line; - getline(*source, read_line); + string read_line = file_handle->ReadLine(); linenr++; } @@ -47789,12 +52605,11 @@ bool BufferedCSVReader::JumpToNextSample() { // calculate offset to end of the current partition int64_t offset = partition_size - bytes_in_chunk - remaining_bytes_in_buffer; - idx_t current_pos = (idx_t)source->tellg(); + auto current_pos = file_handle->SeekPosition(); if (current_pos + offset < file_size) { // set position in stream and clear failure bits - source->clear(); - source->seekg(offset, source->cur); + file_handle->Seek(current_pos + offset); // estimate linenr linenr += (idx_t)round((offset + remaining_bytes_in_buffer) / bytes_per_line_avg); @@ -47803,7 +52618,7 @@ bool BufferedCSVReader::JumpToNextSample() { // seek backwards from the end in last chunk and hope to catch the end of the file // TODO: actually it would be good to make sure that the end of file is being reached, because // messy end-lines are quite common. For this case, however, we first need a skip_end detection anyways. - source->seekg(-std::streamoff(bytes_in_chunk), source->end); + file_handle->Seek(file_size - bytes_in_chunk); // estimate linenr linenr = (idx_t)round((file_size - bytes_in_chunk) / bytes_per_line_avg); @@ -47815,8 +52630,7 @@ bool BufferedCSVReader::JumpToNextSample() { // seek beginning of next line // FIXME: if this jump ends up in a quoted linebreak, we will have a problem - string read_line; - getline(*source, read_line); + string read_line = file_handle->ReadLine(); linenr++; sample_chunk_idx++; @@ -47832,85 +52646,124 @@ void BufferedCSVReader::SetDateFormat(const string &format_specifier, const Logi } bool BufferedCSVReader::TryCastValue(const Value &value, const LogicalType &sql_type) { - try { - if (options.has_format[LogicalTypeId::DATE] && sql_type.id() == LogicalTypeId::DATE) { - options.date_format[LogicalTypeId::DATE].ParseDate(string_t(value.str_value)); - } else if (options.has_format[LogicalTypeId::TIMESTAMP] && sql_type.id() == LogicalTypeId::TIMESTAMP) { - options.date_format[LogicalTypeId::TIMESTAMP].ParseTimestamp(string_t(value.str_value)); - } else { - value.CastAs(sql_type, true); - } - return true; - } catch (...) { - return false; + if (options.has_format[LogicalTypeId::DATE] && sql_type.id() == LogicalTypeId::DATE) { + date_t result; + string error_message; + return options.date_format[LogicalTypeId::DATE].TryParseDate(string_t(value.str_value), result, error_message); + } else if (options.has_format[LogicalTypeId::TIMESTAMP] && sql_type.id() == LogicalTypeId::TIMESTAMP) { + timestamp_t result; + string error_message; + return options.date_format[LogicalTypeId::TIMESTAMP].TryParseTimestamp(string_t(value.str_value), result, + error_message); + } else { + Value new_value; + string error_message; + return value.TryCastAs(sql_type, new_value, &error_message, true); } - return false; } -bool BufferedCSVReader::TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type) { - try { - // try vector-cast from string to sql_type - Vector dummy_result(sql_type); - if (options.has_format[LogicalTypeId::DATE] && sql_type == LogicalTypeId::DATE) { - // use the date format to cast the chunk - UnaryExecutor::Execute(parse_chunk_col, dummy_result, size, [&](string_t input) { - return options.date_format[LogicalTypeId::DATE].ParseDate(input); - }); - } else if (options.has_format[LogicalTypeId::TIMESTAMP] && sql_type == LogicalTypeId::TIMESTAMP) { - // use the date format to cast the chunk - UnaryExecutor::Execute(parse_chunk_col, dummy_result, size, [&](string_t input) { - return options.date_format[LogicalTypeId::TIMESTAMP].ParseTimestamp(input); - }); - } else { - // target type is not varchar: perform a cast - VectorOperations::Cast(parse_chunk_col, dummy_result, size, true); +struct TryCastDateOperator { + static bool Operation(BufferedCSVReaderOptions &options, string_t input, date_t &result, string &error_message) { + return options.date_format[LogicalTypeId::DATE].TryParseDate(input, result, error_message); + } +}; + +struct TryCastTimestampOperator { + static bool Operation(BufferedCSVReaderOptions &options, string_t input, timestamp_t &result, + string &error_message) { + return options.date_format[LogicalTypeId::TIMESTAMP].TryParseTimestamp(input, result, error_message); + } +}; + +template +static bool TemplatedTryCastDateVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector, + idx_t count, string &error_message) { + D_ASSERT(input_vector.GetType().id() == LogicalTypeId::VARCHAR); + bool all_converted = true; + UnaryExecutor::Execute(input_vector, result_vector, count, [&](string_t input) { + T result; + if (!OP::Operation(options, input, result, error_message)) { + all_converted = false; } - } catch (const Exception &e) { - return false; + return result; + }); + return all_converted; +} + +bool TryCastDateVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector, idx_t count, + string &error_message) { + return TemplatedTryCastDateVector(options, input_vector, result_vector, count, + error_message); +} + +bool TryCastTimestampVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector, idx_t count, + string &error_message) { + return TemplatedTryCastDateVector(options, input_vector, result_vector, + count, error_message); +} + +bool BufferedCSVReader::TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type) { + // try vector-cast from string to sql_type + Vector dummy_result(sql_type); + if (options.has_format[LogicalTypeId::DATE] && sql_type == LogicalTypeId::DATE) { + // use the date format to cast the chunk + string error_message; + return TryCastDateVector(options, parse_chunk_col, dummy_result, size, error_message); + } else if (options.has_format[LogicalTypeId::TIMESTAMP] && sql_type == LogicalTypeId::TIMESTAMP) { + // use the timestamp format to cast the chunk + string error_message; + return TryCastTimestampVector(options, parse_chunk_col, dummy_result, size, error_message); + } else { + // target type is not varchar: perform a cast + string error_message; + return VectorOperations::TryCast(parse_chunk_col, dummy_result, size, &error_message, true); } - return true; } -void BufferedCSVReader::PrepareCandidateSets() { +enum class QuoteRule : uint8_t { QUOTES_RFC = 0, QUOTES_OTHER = 1, NO_QUOTES = 2 }; + +void BufferedCSVReader::DetectDialect(const vector &requested_types, + BufferedCSVReaderOptions &original_options, + vector &info_candidates, idx_t &best_num_cols) { + // set up the candidates we consider for delimiter and quote rules based on user input + vector delim_candidates; + vector quoterule_candidates; + vector> quote_candidates_map; + vector> escape_candidates_map = {{""}, {"\\"}, {""}}; + if (options.has_delimiter) { + // user provided a delimiter: use that delimiter delim_candidates = {options.delimiter}; + } else { + // no delimiter provided: try standard/common delimiters + delim_candidates = {",", "|", ";", "\t"}; } if (options.has_quote) { + // user provided quote: use that quote rule quote_candidates_map = {{options.quote}, {options.quote}, {options.quote}}; + } else { + // no quote rule provided: use standard/common quotes + quote_candidates_map = {{"\""}, {"\"", "'"}, {""}}; } if (options.has_escape) { + // user provided escape: use that escape rule if (options.escape.empty()) { quoterule_candidates = {QuoteRule::QUOTES_RFC}; } else { quoterule_candidates = {QuoteRule::QUOTES_OTHER}; } escape_candidates_map[static_cast(quoterule_candidates[0])] = {options.escape}; - } -} - -vector BufferedCSVReader::SniffCSV(const vector &requested_types) { - for (auto &type : requested_types) { - // auto detect for blobs not supported: there may be invalid UTF-8 in the file - if (type.id() == LogicalTypeId::BLOB) { - return requested_types; - } + } else { + // no escape provided: try standard/common escapes + quoterule_candidates = {QuoteRule::QUOTES_RFC, QuoteRule::QUOTES_OTHER, QuoteRule::NO_QUOTES}; } - // ####### - // ### dialect detection - // ####### - - PrepareCandidateSets(); - BufferedCSVReaderOptions original_options = options; - vector info_candidates; idx_t best_consistent_rows = 0; - idx_t best_num_cols = 0; - - for (QuoteRule quoterule : quoterule_candidates) { - vector quote_candidates = quote_candidates_map[static_cast(quoterule)]; + for (auto quoterule : quoterule_candidates) { + const auto "e_candidates = quote_candidates_map[static_cast(quoterule)]; for (const auto "e : quote_candidates) { for (const auto &delim : delim_candidates) { - vector escape_candidates = escape_candidates_map[static_cast(quoterule)]; + const auto &escape_candidates = escape_candidates_map[static_cast(quoterule)]; for (const auto &escape : escape_candidates) { BufferedCSVReaderOptions sniff_info = original_options; sniff_info.delimiter = delim; @@ -47922,9 +52775,7 @@ vector BufferedCSVReader::SniffCSV(const vector &reque JumpToBeginning(original_options.skip_rows); sniffed_column_counts.clear(); - try { - ParseCSV(ParserMode::SNIFFING_DIALECT); - } catch (const InvalidInputException &e) { + if (!TryParseCSV(ParserMode::SNIFFING_DIALECT)) { continue; } @@ -47978,40 +52829,19 @@ vector BufferedCSVReader::SniffCSV(const vector &reque } } } +} - // if not dialect candidate was found, then file was most likely empty and we throw an exception - if (info_candidates.empty()) { - throw InvalidInputException( - "Error in file \"%s\": CSV options could not be auto-detected. Consider setting parser options manually.", - options.file_path); - } - - // ####### - // ### type detection (initial) - // ####### - // type candidates, ordered by descending specificity (~ from high to low) - vector type_candidates = { - LogicalType::VARCHAR, LogicalType::TIMESTAMP, - LogicalType::DATE, LogicalType::TIME, - LogicalType::DOUBLE, /* LogicalType::FLOAT,*/ LogicalType::BIGINT, - LogicalType::INTEGER, /*LogicalType::SMALLINT, LogicalType::TINYINT,*/ LogicalType::BOOLEAN, - LogicalType::SQLNULL}; - - // format template candidates, ordered by descending specificity (~ from high to low) - std::map> format_template_candidates = { - {LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}}, - {LogicalTypeId::TIMESTAMP, - {"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S", - "%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S"}}, - }; - - // check which info candidate leads to minimum amount of non-varchar columns... +void BufferedCSVReader::DetectCandidateTypes(const vector &type_candidates, + const map> &format_template_candidates, + const vector &info_candidates, + BufferedCSVReaderOptions &original_options, idx_t best_num_cols, + vector> &best_sql_types_candidates, + std::map> &best_format_candidates, + DataChunk &best_header_row) { BufferedCSVReaderOptions best_options; idx_t min_varchar_cols = best_num_cols + 1; - vector> best_sql_types_candidates; - std::map> best_format_candidates; - DataChunk best_header_row; + // check which info candidate leads to minimum amount of non-varchar columns... for (const auto &t : format_template_candidates) { best_format_candidates[t.first].clear(); } @@ -48062,14 +52892,17 @@ vector BufferedCSVReader::SniffCSV(const vector &reque if (!has_format_candidates[sql_type.id()]) { has_format_candidates[sql_type.id()] = true; // order by preference - for (const auto &t : format_template_candidates[sql_type.id()]) { - const auto format_string = GenerateDateFormat(separator, t); - // don't parse ISO 8601 - if (format_string.find("%Y-%m-%d") == string::npos) { - type_format_candidates.emplace_back(format_string); + auto entry = format_template_candidates.find(sql_type.id()); + if (entry != format_template_candidates.end()) { + const auto &format_template_list = entry->second; + for (const auto &t : format_template_list) { + const auto format_string = GenerateDateFormat(separator, t); + // don't parse ISO 8601 + if (format_string.find("%Y-%m-%d") == string::npos) { + type_format_candidates.emplace_back(format_string); + } } } - // initialise the first candidate options.has_format[sql_type.id()] = true; // all formats are constructed to be valid @@ -48156,11 +52989,10 @@ vector BufferedCSVReader::SniffCSV(const vector &reque SetDateFormat(best.second.back(), best.first); } } +} - // ####### - // ### header detection - // ####### - +void BufferedCSVReader::DetectHeader(const vector> &best_sql_types_candidates, + const DataChunk &best_header_row) { // information for header detection bool first_row_consistent = true; bool first_row_nulls = false; @@ -48169,12 +53001,10 @@ vector BufferedCSVReader::SniffCSV(const vector &reque first_row_nulls = true; for (idx_t col = 0; col < best_sql_types_candidates.size(); col++) { auto dummy_val = best_header_row.GetValue(col, 0); - // try cast as SQLNULL - try { - dummy_val.CastAs(LogicalType::SQLNULL, true); - } catch (const Exception &e) { + if (!dummy_val.is_null) { first_row_nulls = false; } + // try cast to sql_type of column const auto &sql_type = best_sql_types_candidates[col].back(); if (!TryCastValue(dummy_val, sql_type)) { @@ -48185,26 +53015,35 @@ vector BufferedCSVReader::SniffCSV(const vector &reque // update parser info, and read, generate & set col_names based on previous findings if (((!first_row_consistent || first_row_nulls) && !options.has_header) || (options.has_header && options.header)) { options.header = true; - vector t_col_names; + unordered_map name_collision_count; + // get header names from CSV for (idx_t col = 0; col < options.num_cols; col++) { const auto &val = best_header_row.GetValue(col, 0); string col_name = val.ToString(); + + // generate name if field is empty if (col_name.empty() || val.is_null) { col_name = GenerateColumnName(options.num_cols, col); } - // We'll keep column names as they appear in the file, no canonicalization - // col_name = StringUtil::Lower(col_name); - t_col_names.push_back(col_name); - } - for (idx_t col = 0; col < t_col_names.size(); col++) { - string col_name = t_col_names[col]; - idx_t exists_n_times = std::count(t_col_names.begin(), t_col_names.end(), col_name); - idx_t exists_n_times_before = std::count(t_col_names.begin(), t_col_names.begin() + col, col_name); - if (exists_n_times > 1) { - col_name = GenerateColumnName(exists_n_times, exists_n_times_before, col_name + "_"); + + // normalize names or at least trim whitespace + if (options.normalize_names) { + col_name = NormalizeColumnName(col_name); + } else { + col_name = TrimWhitespace(col_name); } + + // avoid duplicate header names + const string col_name_raw = col_name; + while (name_collision_count.find(col_name) != name_collision_count.end()) { + name_collision_count[col_name] += 1; + col_name = col_name + "_" + to_string(name_collision_count[col_name]); + } + col_names.push_back(col_name); + name_collision_count[col_name] = 0; } + } else { options.header = false; idx_t total_columns = parse_chunk.ColumnCount(); @@ -48213,12 +53052,13 @@ vector BufferedCSVReader::SniffCSV(const vector &reque col_names.push_back(column_name); } } +} - // ####### - // ### type detection (refining) - // ####### - - // sql_types and parse_chunk have to be in line with new info +vector BufferedCSVReader::RefineTypeDetection(const vector &type_candidates, + const vector &requested_types, + vector> &best_sql_types_candidates, + map> &best_format_candidates) { + // for the type refine we set the SQL types to VARCHAR for all columns sql_types.clear(); sql_types.assign(options.num_cols, LogicalType::VARCHAR); @@ -48241,9 +53081,7 @@ vector BufferedCSVReader::SniffCSV(const vector &reque while (JumpToNextSample()) { InitParseChunk(sql_types.size()); // if jump ends up a bad line, we just skip this chunk - try { - ParseCSV(ParserMode::SNIFFING_DATATYPES); - } catch (const InvalidInputException &e) { + if (!TryParseCSV(ParserMode::SNIFFING_DATATYPES)) { continue; } for (idx_t col = 0; col < parse_chunk.ColumnCount(); col++) { @@ -48289,8 +53127,7 @@ vector BufferedCSVReader::SniffCSV(const vector &reque // create a new chunk and fill it with the remainder auto chunk = make_unique(); auto parse_chunk_types = parse_chunk.GetTypes(); - chunk->Initialize(parse_chunk_types); - chunk->Reference(parse_chunk); + chunk->Move(parse_chunk); cached_chunks.push(move(chunk)); } else { while (!cached_chunks.empty()) { @@ -48313,7 +53150,65 @@ vector BufferedCSVReader::SniffCSV(const vector &reque return detected_types; } -void BufferedCSVReader::ParseComplexCSV(DataChunk &insert_chunk) { +vector BufferedCSVReader::SniffCSV(const vector &requested_types) { + for (auto &type : requested_types) { + // auto detect for blobs not supported: there may be invalid UTF-8 in the file + if (type.id() == LogicalTypeId::BLOB) { + return requested_types; + } + } + + // ####### + // ### dialect detection + // ####### + BufferedCSVReaderOptions original_options = options; + vector info_candidates; + idx_t best_num_cols = 0; + + DetectDialect(requested_types, original_options, info_candidates, best_num_cols); + + // if no dialect candidate was found, then file was most likely empty and we throw an exception + if (info_candidates.empty()) { + throw InvalidInputException( + "Error in file \"%s\": CSV options could not be auto-detected. Consider setting parser options manually.", + options.file_path); + } + + // ####### + // ### type detection (initial) + // ####### + // type candidates, ordered by descending specificity (~ from high to low) + vector type_candidates = { + LogicalType::VARCHAR, LogicalType::TIMESTAMP, + LogicalType::DATE, LogicalType::TIME, + LogicalType::DOUBLE, /* LogicalType::FLOAT,*/ LogicalType::BIGINT, + LogicalType::INTEGER, /*LogicalType::SMALLINT, LogicalType::TINYINT,*/ LogicalType::BOOLEAN, + LogicalType::SQLNULL}; + // format template candidates, ordered by descending specificity (~ from high to low) + std::map> format_template_candidates = { + {LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}}, + {LogicalTypeId::TIMESTAMP, + {"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S", + "%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S"}}, + }; + vector> best_sql_types_candidates; + map> best_format_candidates; + DataChunk best_header_row; + DetectCandidateTypes(type_candidates, format_template_candidates, info_candidates, original_options, best_num_cols, + best_sql_types_candidates, best_format_candidates, best_header_row); + + // ####### + // ### header detection + // ####### + DetectHeader(best_sql_types_candidates, best_header_row); + + // ####### + // ### type detection (refining) + // ####### + return RefineTypeDetection(type_candidates, requested_types, best_sql_types_candidates, best_format_candidates); +} + +bool BufferedCSVReader::TryParseComplexCSV(DataChunk &insert_chunk, string &error_message) { // used for parsing algorithm bool finished_chunk = false; idx_t column = 0; @@ -48324,7 +53219,7 @@ void BufferedCSVReader::ParseComplexCSV(DataChunk &insert_chunk) { // read values into the buffer (if any) if (position >= buffer_size) { if (!ReadBuffer(start)) { - return; + return true; } } // start parsing the first value @@ -48407,7 +53302,7 @@ add_row : { } else { // \n newline, move to value start if (finished_chunk) { - return; + return true; } goto value_start; } @@ -48431,8 +53326,9 @@ add_row : { } } while (ReadBuffer(start)); // still in quoted state at the end of the file, error: - throw InvalidInputException("Error in file \"%s\" on line %s: unterminated quotes. (%s)", options.file_path, - GetLineNumberStr(linenr, linenr_estimated).c_str(), options.toString()); + error_message = StringUtil::Format("Error in file \"%s\" on line %s: unterminated quotes. (%s)", options.file_path, + GetLineNumberStr(linenr, linenr_estimated).c_str(), options.toString()); + return false; unquote: /* state: unquote */ // this state handles the state directly after we unquote @@ -48458,10 +53354,11 @@ add_row : { delimiter_search.Match(delimiter_pos, buffer[position]); count++; if (count > delimiter_pos && count > quote_pos) { - throw InvalidInputException( + error_message = StringUtil::Format( "Error in file \"%s\" on line %s: quote should be followed by end of value, end " "of row or another quote. (%s)", options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.toString()); + return false; } if (delimiter_pos == options.delimiter.size()) { // quote followed by delimiter, add value @@ -48475,9 +53372,10 @@ add_row : { } } } while (ReadBuffer(start)); - throw InvalidInputException( + error_message = StringUtil::Format( "Error in file \"%s\" on line %s: quote should be followed by end of value, end of row or another quote. (%s)", options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.toString()); + return false; handle_escape: escape_pos = 0; quote_pos = 0; @@ -48489,9 +53387,10 @@ add_row : { escape_search.Match(escape_pos, buffer[position]); count++; if (count > escape_pos && count > quote_pos) { - throw InvalidInputException( + error_message = StringUtil::Format( "Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.toString()); + return false; } if (quote_pos == options.quote.size() || escape_pos == options.escape.size()) { // found quote or escape: move back to quoted state @@ -48499,9 +53398,10 @@ add_row : { } } } while (ReadBuffer(start)); - throw InvalidInputException( - "Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path, - GetLineNumberStr(linenr, linenr_estimated).c_str(), options.toString()); + error_message = + StringUtil::Format("Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", + options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.toString()); + return false; carriage_return: /* state: carriage_return */ // this stage optionally skips a newline (\n) character, which allows \r\n to be interpreted as a single line @@ -48514,12 +53414,12 @@ add_row : { } } if (finished_chunk) { - return; + return true; } goto value_start; final_state: if (finished_chunk) { - return; + return true; } if (column > 0 || position > start) { // remaining values to be added to the chunk @@ -48533,9 +53433,10 @@ add_row : { } end_of_file_reached = true; + return true; } -void BufferedCSVReader::ParseSimpleCSV(DataChunk &insert_chunk) { +bool BufferedCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message) { // used for parsing algorithm bool finished_chunk = false; idx_t column = 0; @@ -48545,7 +53446,7 @@ void BufferedCSVReader::ParseSimpleCSV(DataChunk &insert_chunk) { // read values into the buffer (if any) if (position >= buffer_size) { if (!ReadBuffer(start)) { - return; + return true; } } // start parsing the first value @@ -48608,7 +53509,7 @@ add_row : { } else { // \n newline, move to value start if (finished_chunk) { - return; + return true; } goto value_start; } @@ -48655,24 +53556,27 @@ add_row : { offset = 1; goto add_row; } else { - throw InvalidInputException("Error in file \"%s\" on line %s: quote should be followed by end of value, end of " - "row or another quote. (%s)", - options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), - options.toString()); + error_message = StringUtil::Format( + "Error in file \"%s\" on line %s: quote should be followed by end of value, end of " + "row or another quote. (%s)", + options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.toString()); + return false; } handle_escape: /* state: handle_escape */ // escape should be followed by a quote or another escape character position++; if (position >= buffer_size && !ReadBuffer(start)) { - throw InvalidInputException( + error_message = StringUtil::Format( "Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.toString()); + return false; } if (buffer[position] != options.quote[0] && buffer[position] != options.escape[0]) { - throw InvalidInputException( + error_message = StringUtil::Format( "Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.toString()); + return false; } // escape was followed by quote or escape, go back to quoted state goto in_quotes; @@ -48689,12 +53593,12 @@ add_row : { } } if (finished_chunk) { - return; + return true; } goto value_start; final_state: if (finished_chunk) { - return; + return true; } if (column > 0 || position > start) { @@ -48710,6 +53614,7 @@ add_row : { } end_of_file_reached = true; + return true; } bool BufferedCSVReader::ReadBuffer(idx_t &start) { @@ -48730,9 +53635,8 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start) { // remaining from last buffer: copy it here memcpy(buffer.get(), old_buffer.get() + start, remaining); } - source->read(buffer.get() + remaining, buffer_read_size); + idx_t read_count = file_handle->Read(buffer.get() + remaining, buffer_read_size); - idx_t read_count = source->eof() ? source->gcount() : buffer_read_size; bytes_in_chunk += read_count; buffer_size = remaining + read_count; buffer[buffer_size] = '\0'; @@ -48741,6 +53645,12 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start) { } start = 0; position = remaining; + if (!bom_checked) { + bom_checked = true; + if (read_count >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') { + position += 3; + } + } return read_count > 0; } @@ -48751,22 +53661,39 @@ void BufferedCSVReader::ParseCSV(DataChunk &insert_chunk) { cached_buffers.clear(); } else { auto &chunk = cached_chunks.front(); - parse_chunk.Reference(*chunk); + parse_chunk.Move(*chunk); cached_chunks.pop(); Flush(insert_chunk); return; } - ParseCSV(ParserMode::PARSING, insert_chunk); + string error_message; + if (!TryParseCSV(ParserMode::PARSING, insert_chunk, error_message)) { + throw InvalidInputException(error_message); + } } -void BufferedCSVReader::ParseCSV(ParserMode parser_mode, DataChunk &insert_chunk) { +bool BufferedCSVReader::TryParseCSV(ParserMode mode) { + DataChunk dummy_chunk; + string error_message; + return TryParseCSV(mode, dummy_chunk, error_message); +} + +void BufferedCSVReader::ParseCSV(ParserMode mode) { + DataChunk dummy_chunk; + string error_message; + if (!TryParseCSV(mode, dummy_chunk, error_message)) { + throw InvalidInputException(error_message); + } +} + +bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_chunk, string &error_message) { mode = parser_mode; if (options.quote.size() <= 1 && options.escape.size() <= 1 && options.delimiter.size() == 1) { - ParseSimpleCSV(insert_chunk); + return TryParseSimpleCSV(insert_chunk, error_message); } else { - ParseComplexCSV(insert_chunk); + return TryParseComplexCSV(insert_chunk, error_message); } } @@ -48902,24 +53829,23 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) { } insert_chunk.data[col_idx].Reference(parse_chunk.data[col_idx]); } else { - try { - if (options.has_format[LogicalTypeId::DATE] && sql_types[col_idx].id() == LogicalTypeId::DATE) { - // use the date format to cast the chunk - UnaryExecutor::Execute( - parse_chunk.data[col_idx], insert_chunk.data[col_idx], parse_chunk.size(), - [&](string_t input) { return options.date_format[LogicalTypeId::DATE].ParseDate(input); }); - } else if (options.has_format[LogicalTypeId::TIMESTAMP] && - sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP) { - // use the date format to cast the chunk - UnaryExecutor::Execute( - parse_chunk.data[col_idx], insert_chunk.data[col_idx], parse_chunk.size(), [&](string_t input) { - return options.date_format[LogicalTypeId::TIMESTAMP].ParseTimestamp(input); - }); - } else { - // target type is not varchar: perform a cast - VectorOperations::Cast(parse_chunk.data[col_idx], insert_chunk.data[col_idx], parse_chunk.size()); - } - } catch (const Exception &e) { + string error_message; + bool success; + if (options.has_format[LogicalTypeId::DATE] && sql_types[col_idx].id() == LogicalTypeId::DATE) { + // use the date format to cast the chunk + success = TryCastDateVector(options, parse_chunk.data[col_idx], insert_chunk.data[col_idx], + parse_chunk.size(), error_message); + } else if (options.has_format[LogicalTypeId::TIMESTAMP] && + sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP) { + // use the date format to cast the chunk + success = TryCastTimestampVector(options, parse_chunk.data[col_idx], insert_chunk.data[col_idx], + parse_chunk.size(), error_message); + } else { + // target type is not varchar: perform a cast + success = VectorOperations::TryCast(parse_chunk.data[col_idx], insert_chunk.data[col_idx], + parse_chunk.size(), &error_message); + } + if (!success) { string col_name = to_string(col_idx); if (col_idx < col_names.size()) { col_name = "\"" + col_names[col_idx] + "\""; @@ -48930,11 +53856,11 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) { "options: %s. Consider either increasing the sample size " "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), " "or skipping column conversion (ALL_VARCHAR=1)", - e.what(), col_name, linenr - parse_chunk.size() + 1, linenr, + error_message, col_name, linenr - parse_chunk.size() + 1, linenr, options.toString()); } else { throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ", - e.what(), linenr - parse_chunk.size(), linenr, col_name, + error_message, linenr - parse_chunk.size(), linenr, col_name, options.toString()); } } @@ -48972,12 +53898,12 @@ class PhysicalCopyToFile : public PhysicalSink { unique_ptr bind_data; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; void Sink(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate, - DataChunk &input) override; + DataChunk &input) const override; void Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) override; - void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; + bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) override; unique_ptr GetLocalSinkState(ExecutionContext &context) override; unique_ptr GetGlobalState(ClientContext &context) override; }; @@ -49006,7 +53932,8 @@ class CopyToFunctionLocalState : public LocalSinkState { unique_ptr local_state; }; -void PhysicalCopyToFile::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalCopyToFile::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state) const { auto &g = (CopyToFunctionGlobalState &)*sink_state; chunk.SetCardinality(1); @@ -49016,7 +53943,7 @@ void PhysicalCopyToFile::GetChunkInternal(ExecutionContext &context, DataChunk & } void PhysicalCopyToFile::Sink(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate, - DataChunk &input) { + DataChunk &input) const { auto &g = (CopyToFunctionGlobalState &)gstate; auto &l = (CopyToFunctionLocalState &)lstate; @@ -49032,12 +53959,13 @@ void PhysicalCopyToFile::Combine(ExecutionContext &context, GlobalOperatorState function.copy_to_combine(context.client, *bind_data, *g.global_state, *l.local_state); } } -void PhysicalCopyToFile::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) { +bool PhysicalCopyToFile::Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) { auto g = (CopyToFunctionGlobalState *)gstate.get(); if (function.copy_to_finalize) { function.copy_to_finalize(context, *bind_data, *g->global_state); } PhysicalSink::Finalize(pipeline, context, move(gstate)); + return true; } unique_ptr PhysicalCopyToFile::GetLocalSinkState(ExecutionContext &context) { @@ -49078,9 +54006,10 @@ class PhysicalDelete : public PhysicalSink { public: unique_ptr GetGlobalState(ClientContext &context) override; - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -49089,7 +54018,7 @@ class PhysicalDelete : public PhysicalSink { -#include + namespace duckdb { @@ -49101,17 +54030,16 @@ class DeleteGlobalState : public GlobalOperatorState { DeleteGlobalState() : deleted_count(0) { } - std::atomic deleted_count; + atomic deleted_count; }; void PhysicalDelete::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, - DataChunk &input) { + DataChunk &input) const { auto &gstate = (DeleteGlobalState &)state; // delete data in the base table // the row ids are given to us as the last column of the child chunk - table.Delete(tableref, context.client, input.data[row_id_index], input.size()); - gstate.deleted_count += input.size(); + gstate.deleted_count += table.Delete(tableref, context.client, input.data[row_id_index], input.size()); } unique_ptr PhysicalDelete::GetGlobalState(ClientContext &context) { @@ -49121,7 +54049,7 @@ unique_ptr PhysicalDelete::GetGlobalState(ClientContext &co //===--------------------------------------------------------------------===// // GetChunkInternal //===--------------------------------------------------------------------===// -void PhysicalDelete::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalDelete::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { auto &gstate = (DeleteGlobalState &)*sink_state; chunk.SetCardinality(1); @@ -49147,23 +54075,26 @@ void PhysicalDelete::GetChunkInternal(ExecutionContext &context, DataChunk &chun + namespace duckdb { //! Parse a file from disk using a specified copy function and return the set of chunks retrieved from the file class PhysicalExport : public PhysicalOperator { public: PhysicalExport(vector types, CopyFunction function, unique_ptr info, - idx_t estimated_cardinality) + idx_t estimated_cardinality, BoundExportData exported_tables) : PhysicalOperator(PhysicalOperatorType::EXPORT, move(types), estimated_cardinality), - function(std::move(function)), info(move(info)) { + function(std::move(function)), info(move(info)), exported_tables(move(exported_tables)) { } //! The copy function to use to read the file CopyFunction function; //! The binding info containing the set of options for reading the file unique_ptr info; + //! The table info for each table that will be exported + BoundExportData exported_tables; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -49174,6 +54105,7 @@ class PhysicalExport : public PhysicalOperator { + #include #include @@ -49188,7 +54120,7 @@ static void WriteCatalogEntries(stringstream &ss, vector &entrie ss << std::endl; } -static void WriteStringStreamToFile(FileSystem &fs, stringstream &ss, string path) { +static void WriteStringStreamToFile(FileSystem &fs, stringstream &ss, const string &path) { auto ss_string = ss.str(); auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW, FileLockType::WRITE_LOCK); @@ -49205,17 +54137,16 @@ static void WriteValueAsSQL(stringstream &ss, Value &val) { } static void WriteCopyStatement(FileSystem &fs, stringstream &ss, TableCatalogEntry *table, CopyInfo &info, - CopyFunction &function) { - string table_file_path; + ExportedTableData &exported_table, CopyFunction const &function) { ss << "COPY "; - if (table->schema->name != DEFAULT_SCHEMA) { - table_file_path = fs.JoinPath( - info.file_path, StringUtil::Format("%s.%s.%s", table->schema->name, table->name, function.extension)); - ss << table->schema->name << "."; - } else { - table_file_path = fs.JoinPath(info.file_path, StringUtil::Format("%s.%s", table->name, function.extension)); + + if (exported_table.schema_name != DEFAULT_SCHEMA) { + ss << KeywordHelper::WriteOptionallyQuoted(exported_table.schema_name) << "."; } - ss << table->name << " FROM '" << table_file_path << "' ("; + + ss << KeywordHelper::WriteOptionallyQuoted(exported_table.table_name) << " FROM '" << exported_table.file_path + << "' ("; + // write the copy options ss << "FORMAT '" << info.format << "'"; if (info.format == "csv") { @@ -49243,7 +54174,7 @@ static void WriteCopyStatement(FileSystem &fs, stringstream &ss, TableCatalogEnt ss << ");" << std::endl; } -void PhysicalExport::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalExport::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { auto &ccontext = context.client; auto &fs = FileSystem::GetFileSystem(ccontext); @@ -49256,11 +54187,14 @@ void PhysicalExport::GetChunkInternal(ExecutionContext &context, DataChunk &chun Catalog::GetCatalog(ccontext).schemas->Scan(context.client, [&](CatalogEntry *entry) { auto schema = (SchemaCatalogEntry *)entry; - if (schema->name != DEFAULT_SCHEMA) { + if (!schema->internal) { // export schema schemas.push_back(schema); } schema->Scan(context.client, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) { + if (entry->internal) { + return; + } if (entry->type == CatalogType::TABLE_ENTRY) { tables.push_back(entry); } else { @@ -49287,8 +54221,10 @@ void PhysicalExport::GetChunkInternal(ExecutionContext &context, DataChunk &chun // write the load.sql file // for every table, we write COPY INTO statement with the specified options stringstream load_ss; - for (auto &table : tables) { - WriteCopyStatement(fs, load_ss, (TableCatalogEntry *)table, *info, function); + for (auto const &kv : exported_tables.data) { + auto table = kv.first; + auto exported_table_info = kv.second; + WriteCopyStatement(fs, load_ss, table, *info, exported_table_info, function); } WriteStringStreamToFile(fs, load_ss, fs.JoinPath(info->file_path, "load.sql")); state->finished = true; @@ -49324,10 +54260,12 @@ class PhysicalInsert : public PhysicalSink { public: unique_ptr GetGlobalState(ClientContext &context) override; + void Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) override; unique_ptr GetLocalSinkState(ExecutionContext &context) override; - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -49339,6 +54277,7 @@ class PhysicalInsert : public PhysicalSink { + namespace duckdb { //===--------------------------------------------------------------------===// @@ -49349,7 +54288,7 @@ class InsertGlobalState : public GlobalOperatorState { InsertGlobalState() : insert_count(0) { } - std::mutex lock; + mutex lock; idx_t insert_count; }; @@ -49365,7 +54304,7 @@ class InsertLocalState : public LocalSinkState { }; void PhysicalInsert::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, - DataChunk &chunk) { + DataChunk &chunk) const { auto &gstate = (InsertGlobalState &)state; auto &istate = (InsertLocalState &)lstate; @@ -49411,7 +54350,7 @@ unique_ptr PhysicalInsert::GetLocalSinkState(ExecutionContext &c //===--------------------------------------------------------------------===// // GetChunkInternal //===--------------------------------------------------------------------===// -void PhysicalInsert::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalInsert::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { auto &gstate = (InsertGlobalState &)*sink_state; chunk.SetCardinality(1); @@ -49419,6 +54358,11 @@ void PhysicalInsert::GetChunkInternal(ExecutionContext &context, DataChunk &chun state->finished = true; } +void PhysicalInsert::Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) { + auto &state = (InsertLocalState &)lstate; + context.thread.profiler.Flush(this, &state.default_executor, "default_executor", 1); + context.client.profiler->Flush(context.thread.profiler); +} } // namespace duckdb //===----------------------------------------------------------------------===// @@ -49452,14 +54396,16 @@ class PhysicalUpdate : public PhysicalSink { vector columns; vector> expressions; vector> bound_defaults; - bool is_index_update; + bool update_is_del_and_insert; public: unique_ptr GetGlobalState(ClientContext &context) override; unique_ptr GetLocalSinkState(ExecutionContext &context) override; - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -49471,6 +54417,7 @@ class PhysicalUpdate : public PhysicalSink { + namespace duckdb { //===--------------------------------------------------------------------===// @@ -49481,8 +54428,9 @@ class UpdateGlobalState : public GlobalOperatorState { UpdateGlobalState() : updated_count(0) { } - std::mutex lock; + mutex lock; idx_t updated_count; + unordered_set updated_columns; }; class UpdateLocalState : public LocalSinkState { @@ -49507,7 +54455,7 @@ class UpdateLocalState : public LocalSinkState { }; void PhysicalUpdate::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, - DataChunk &chunk) { + DataChunk &chunk) const { auto &gstate = (UpdateGlobalState &)state; auto &ustate = (UpdateLocalState &)lstate; @@ -49534,9 +54482,28 @@ void PhysicalUpdate::Sink(ExecutionContext &context, GlobalOperatorState &state, } lock_guard glock(gstate.lock); - if (is_index_update) { - // index update, perform a delete and an append instead + if (update_is_del_and_insert) { + // index update or update on complex type, perform a delete and an append instead + + // figure out which rows have not yet been deleted in this update + // this is required since we might see the same row_id multiple times + // in the case of an UPDATE query that e.g. has joins + auto row_id_data = FlatVector::GetData(row_ids); + SelectionVector sel(STANDARD_VECTOR_SIZE); + idx_t update_count = 0; + for (idx_t i = 0; i < update_chunk.size(); i++) { + auto row_id = row_id_data[i]; + if (gstate.updated_columns.find(row_id) == gstate.updated_columns.end()) { + gstate.updated_columns.insert(row_id); + sel.set_index(update_count++, i); + } + } + if (update_count != update_chunk.size()) { + // we need to slice here + update_chunk.Slice(sel, update_count); + } table.Delete(tableref, context.client, row_ids, update_chunk.size()); + // for the append we need to arrange the columns in a specific manner (namely the "standard table order") mock_chunk.SetCardinality(update_chunk); for (idx_t i = 0; i < columns.size(); i++) { mock_chunk.data[columns[i]].Reference(update_chunk.data[i]); @@ -49559,7 +54526,7 @@ unique_ptr PhysicalUpdate::GetLocalSinkState(ExecutionContext &c //===--------------------------------------------------------------------===// // GetChunkInternal //===--------------------------------------------------------------------===// -void PhysicalUpdate::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalUpdate::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { auto &gstate = (UpdateGlobalState &)*sink_state; chunk.SetCardinality(1); @@ -49567,6 +54534,11 @@ void PhysicalUpdate::GetChunkInternal(ExecutionContext &context, DataChunk &chun state->finished = true; } +void PhysicalUpdate::Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) { + auto &state = (UpdateLocalState &)lstate; + context.thread.profiler.Flush(this, &state.default_executor, "default_executor", 1); + context.client.profiler->Flush(context.thread.profiler); +} } // namespace duckdb //===----------------------------------------------------------------------===// @@ -49594,7 +54566,7 @@ class PhysicalProjection : public PhysicalOperator { vector> select_list; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; void FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) override; @@ -49619,7 +54591,8 @@ class PhysicalProjectionState : public PhysicalOperatorState { ExpressionExecutor executor; }; -void PhysicalProjection::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { +void PhysicalProjection::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); // get the next chunk from the child @@ -49637,7 +54610,7 @@ unique_ptr PhysicalProjection::GetOperatorState() { void PhysicalProjection::FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) { auto &state = reinterpret_cast(state_p); - context.thread.profiler.Flush(this, &state.executor); + context.thread.profiler.Flush(this, &state.executor, "projection", 0); if (!children.empty() && state.child_state) { children[0]->FinalizeOperatorState(*state.child_state, context); } @@ -49675,7 +54648,7 @@ class PhysicalTableInOutFunction : public PhysicalOperator { unique_ptr bind_data_p, vector column_ids_p, idx_t estimated_cardinality); - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; public: unique_ptr GetOperatorState() override; @@ -49716,7 +54689,7 @@ PhysicalTableInOutFunction::PhysicalTableInOutFunction(vector types } void PhysicalTableInOutFunction::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto &state = (PhysicalTableInOutFunctionState &)*state_p; if (!state.initialized) { @@ -49762,7 +54735,7 @@ class PhysicalUnnest : public PhysicalOperator { PhysicalUnnest(vector types, vector> select_list, idx_t estimated_cardinality, PhysicalOperatorType type = PhysicalOperatorType::UNNEST); - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; //! The projection list of the SELECT statement (that contains aggregates) vector> select_list; @@ -49794,7 +54767,8 @@ class PhysicalUnnestOperatorState : public PhysicalOperatorState { int64_t list_length = -1; DataChunk list_data; - VectorData list_vector_data; + vector list_vector_data; + vector list_child_data; }; // this implements a sorted window functions variant @@ -49805,7 +54779,122 @@ PhysicalUnnest::PhysicalUnnest(vector types, vectorselect_list.empty()); } -void PhysicalUnnest::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { +static void UnnestNull(idx_t start, idx_t end, Vector &result) { + if (result.GetType().InternalType() == PhysicalType::STRUCT) { + auto &children = StructVector::GetEntries(result); + for (auto &child : children) { + UnnestNull(start, end, *child); + } + } + auto &validity = FlatVector::Validity(result); + for (idx_t i = start; i < end; i++) { + validity.SetInvalid(i); + } + if (result.GetType().InternalType() == PhysicalType::STRUCT) { + auto &struct_children = StructVector::GetEntries(result); + for (auto &child : struct_children) { + UnnestNull(start, end, *child); + } + } +} + +template +static void TemplatedUnnest(VectorData &vdata, idx_t start, idx_t end, Vector &result) { + auto source_data = (T *)vdata.data; + auto &source_mask = vdata.validity; + auto result_data = FlatVector::GetData(result); + auto &result_mask = FlatVector::Validity(result); + + for (idx_t i = start; i < end; i++) { + auto source_idx = vdata.sel->get_index(i); + auto target_idx = i - start; + if (source_mask.RowIsValid(source_idx)) { + result_data[target_idx] = source_data[source_idx]; + result_mask.SetValid(target_idx); + } else { + result_mask.SetInvalid(target_idx); + } + } +} + +static void UnnestValidity(VectorData &vdata, idx_t start, idx_t end, Vector &result) { + auto &source_mask = vdata.validity; + auto &result_mask = FlatVector::Validity(result); + + for (idx_t i = start; i < end; i++) { + auto source_idx = vdata.sel->get_index(i); + auto target_idx = i - start; + result_mask.Set(target_idx, source_mask.RowIsValid(source_idx)); + } +} + +static void UnnestVector(VectorData &vdata, Vector &source, idx_t list_size, idx_t start, idx_t end, Vector &result) { + switch (result.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::INT16: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::INT32: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::INT64: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::INT128: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::UINT8: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::UINT16: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::UINT32: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::UINT64: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::FLOAT: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::DOUBLE: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::INTERVAL: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::VARCHAR: + TemplatedUnnest(vdata, start, end, result); + break; + case PhysicalType::LIST: { + auto &target = ListVector::GetEntry(result); + target.Reference(ListVector::GetEntry(source)); + ListVector::SetListSize(result, ListVector::GetListSize(source)); + TemplatedUnnest(vdata, start, end, result); + break; + } + case PhysicalType::STRUCT: { + auto &source_entries = StructVector::GetEntries(source); + auto &target_entries = StructVector::GetEntries(result); + UnnestValidity(vdata, start, end, result); + for (idx_t i = 0; i < source_entries.size(); i++) { + VectorData sdata; + source_entries[i]->Orrify(list_size, sdata); + UnnestVector(sdata, *source_entries[i], list_size, start, end, *target_entries[i]); + } + break; + } + default: + throw InternalException("Unimplemented type for UNNEST"); + } +} + +void PhysicalUnnest::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); while (true) { // repeat until we actually have produced some rows if (state->child_chunk.size() == 0 || state->parent_position >= state->child_chunk.size()) { @@ -49838,30 +54927,36 @@ void PhysicalUnnest::GetChunkInternal(ExecutionContext &context, DataChunk &chun D_ASSERT(state->list_data.ColumnCount() == select_list.size()); // initialize VectorData object so the nullmask can accessed - state->list_data.data[0].Orrify(state->list_data.size(), state->list_vector_data); - } + state->list_vector_data.resize(state->list_data.ColumnCount()); + state->list_child_data.resize(state->list_data.ColumnCount()); + for (idx_t col_idx = 0; col_idx < state->list_data.ColumnCount(); col_idx++) { + auto &list_vector = state->list_data.data[col_idx]; + list_vector.Orrify(state->list_data.size(), state->list_vector_data[col_idx]); - // whether we have UNNEST(*expression returning list that evaluated to NULL*) - bool unnest_null = !state->list_vector_data.validity.RowIsValid( - state->list_vector_data.sel->get_index(state->parent_position)); + auto &child_vector = ListVector::GetEntry(list_vector); + auto list_size = ListVector::GetListSize(list_vector); + child_vector.Orrify(list_size, state->list_child_data[col_idx]); + } + } // need to figure out how many times we need to repeat for current row if (state->list_length < 0) { for (idx_t col_idx = 0; col_idx < state->list_data.ColumnCount(); col_idx++) { - auto &v = state->list_data.data[col_idx]; - - D_ASSERT(v.GetType().id() == LogicalTypeId::LIST); + auto &vdata = state->list_vector_data[col_idx]; + auto current_idx = vdata.sel->get_index(state->parent_position); + int64_t list_length; // deal with NULL values - if (unnest_null) { - state->list_length = 1; - continue; + if (!vdata.validity.RowIsValid(current_idx)) { + list_length = 1; + } else { + auto list_data = (list_entry_t *)vdata.data; + auto list_entry = list_data[current_idx]; + list_length = (int64_t)list_entry.length; } - auto list_data = FlatVector::GetData(v); - auto list_entry = list_data[state->parent_position]; - if ((int64_t)list_entry.length > state->list_length) { - state->list_length = list_entry.length; + if (list_length > state->list_length) { + state->list_length = list_length; } } } @@ -49874,33 +54969,41 @@ void PhysicalUnnest::GetChunkInternal(ExecutionContext &context, DataChunk &chun chunk.SetCardinality(this_chunk_len); for (idx_t col_idx = 0; col_idx < state->child_chunk.ColumnCount(); col_idx++) { - auto val = state->child_chunk.data[col_idx].GetValue(state->parent_position); - chunk.data[col_idx].Reference(val); + ConstantVector::Reference(chunk.data[col_idx], state->child_chunk.data[col_idx], state->parent_position, + state->child_chunk.size()); } - // FIXME do not use GetValue/SetValue here for (idx_t col_idx = 0; col_idx < state->list_data.ColumnCount(); col_idx++) { - auto target_col = col_idx + state->child_chunk.ColumnCount(); - auto &v = state->list_data.data[col_idx]; - auto list_data = FlatVector::GetData(v); - auto list_entry = list_data[state->parent_position]; - idx_t i = 0; + auto &result_vector = chunk.data[col_idx + state->child_chunk.ColumnCount()]; + + auto &vdata = state->list_vector_data[col_idx]; + auto &child_data = state->list_child_data[col_idx]; + auto current_idx = vdata.sel->get_index(state->parent_position); + + auto list_data = (list_entry_t *)vdata.data; + auto list_entry = list_data[current_idx]; + + idx_t list_count; + if (state->list_position >= list_entry.length) { + list_count = 0; + } else { + list_count = MinValue(this_chunk_len, list_entry.length - state->list_position); + } + if (list_entry.length > state->list_position) { - if (unnest_null) { - for (i = 0; i < MinValue(this_chunk_len, list_entry.length - state->list_position); i++) { - FlatVector::SetNull(chunk.data[target_col], i, true); - } + if (!vdata.validity.RowIsValid(current_idx)) { + UnnestNull(0, list_count, result_vector); } else { - auto &child_vector = ListVector::GetEntry(v); - for (i = 0; i < MinValue(this_chunk_len, list_entry.length - state->list_position); i++) { - chunk.data[target_col].SetValue( - i, child_vector.GetValue(list_entry.offset + i + state->list_position)); - } + auto &list_vector = state->list_data.data[col_idx]; + auto &child_vector = ListVector::GetEntry(list_vector); + auto list_size = ListVector::GetListSize(list_vector); + + auto base_offset = list_entry.offset + state->list_position; + UnnestVector(child_data, child_vector, list_size, base_offset, base_offset + list_count, + result_vector); } } - for (; i < (idx_t)this_chunk_len; i++) { - chunk.data[target_col].SetValue(i, Value()); - } + UnnestNull(list_count, this_chunk_len, result_vector); } state->list_position += this_chunk_len; @@ -49935,7 +55038,8 @@ class PhysicalChunkScanState : public PhysicalOperatorState { idx_t chunk_index; }; -void PhysicalChunkScan::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { +void PhysicalChunkScan::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { auto state = (PhysicalChunkScanState *)state_p; D_ASSERT(collection); if (collection->Count() == 0) { @@ -49976,14 +55080,15 @@ class PhysicalDummyScan : public PhysicalOperator { } public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb namespace duckdb { -void PhysicalDummyScan::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalDummyScan::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state) const { state->finished = true; // return a single row on the first call to the dummy scan chunk.SetCardinality(1); @@ -50011,14 +55116,15 @@ class PhysicalEmptyResult : public PhysicalOperator { } public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb namespace duckdb { -void PhysicalEmptyResult::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalEmptyResult::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state) const { state->finished = true; } @@ -50051,8 +55157,9 @@ class PhysicalExpressionScan : public PhysicalOperator { vector>> expressions; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; + void FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) override; }; } // namespace duckdb @@ -50075,7 +55182,7 @@ class PhysicalExpressionScanState : public PhysicalOperatorState { }; void PhysicalExpressionScan::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto state = (PhysicalExpressionScanState *)state_p; if (state->expression_index >= expressions.size()) { // finished executing all expression lists @@ -50084,10 +55191,12 @@ void PhysicalExpressionScan::GetChunkInternal(ExecutionContext &context, DataChu if (state->expression_index == 0) { // first run, fetch the chunk from the child + // the child chunk is either (1) a dummy scan, or (2) (uncorrelated) scalar subquery results + // as a result, the child operator should ALWAYS return exactly one row D_ASSERT(children.size() == 1); children[0]->GetChunk(context, state->child_chunk, state->child_state.get()); - if (state->child_chunk.size() == 0) { - return; + if (state->child_chunk.size() != 1) { + throw InternalException("Expected expression scan child to have exactly one element"); } } // now execute the expressions of the nth expression list for the child chunk list @@ -50100,6 +55209,13 @@ void PhysicalExpressionScan::GetChunkInternal(ExecutionContext &context, DataChu unique_ptr PhysicalExpressionScan::GetOperatorState() { return make_unique(*this, children[0].get()); } +void PhysicalExpressionScan::FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) { + auto &state_p = reinterpret_cast(state); + context.thread.profiler.Flush(this, state_p.executor.get(), "executor", 0); + if (!children.empty() && state.child_state) { + children[0]->FinalizeOperatorState(*state.child_state, context); + } +} } // namespace duckdb @@ -50135,7 +55251,8 @@ PhysicalTableScan::PhysicalTableScan(vector types, TableFunction fu table_filters(move(table_filters_p)) { } -void PhysicalTableScan::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state_p) { +void PhysicalTableScan::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state_p) const { auto &state = (PhysicalTableScanOperatorState &)*state_p; if (column_ids.empty()) { return; @@ -50173,7 +55290,13 @@ void PhysicalTableScan::GetChunkInternal(ExecutionContext &context, DataChunk &c } else { // parallel scan do { - function.function(context.client, bind_data.get(), state.operator_data.get(), nullptr, chunk); + if (function.parallel_function) { + function.parallel_function(context.client, bind_data.get(), state.operator_data.get(), nullptr, chunk, + state.parallel_state); + } else { + function.function(context.client, bind_data.get(), state.operator_data.get(), nullptr, chunk); + } + if (chunk.size() == 0) { D_ASSERT(function.parallel_state_next); if (function.parallel_state_next(context.client, bind_data.get(), state.operator_data.get(), @@ -50217,12 +55340,11 @@ string PhysicalTableScan::ParamsToString() const { result += "\n[INFOSEPARATOR]\n"; result += "Filters: "; for (auto &f : table_filters->filters) { - for (auto &filter : f.second) { - if (filter.column_index < names.size()) { - result += "\n"; - result += names[column_ids[filter.column_index]] + - ExpressionTypeToOperator(filter.comparison_type) + filter.constant.ToString(); - } + auto &column_index = f.first; + auto &filter = f.second; + if (column_index < names.size()) { + result += filter->ToString(names[column_ids[column_index]]); + result += "\n"; } } } @@ -50260,7 +55382,7 @@ class PhysicalAlter : public PhysicalOperator { unique_ptr info; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -50270,7 +55392,7 @@ class PhysicalAlter : public PhysicalOperator { namespace duckdb { -void PhysicalAlter::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalAlter::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { auto &catalog = Catalog::GetCatalog(context.client); catalog.Alter(context.client, info.get()); state->finished = true; @@ -50303,7 +55425,7 @@ class PhysicalCreateFunction : public PhysicalOperator { unique_ptr info; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -50315,7 +55437,7 @@ class PhysicalCreateFunction : public PhysicalOperator { namespace duckdb { void PhysicalCreateFunction::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state) { + PhysicalOperatorState *state) const { Catalog::GetCatalog(context.client).CreateFunction(context.client, info.get()); state->finished = true; } @@ -50364,7 +55486,7 @@ class PhysicalCreateIndex : public PhysicalOperator { vector> unbound_expressions; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -50376,9 +55498,10 @@ class PhysicalCreateIndex : public PhysicalOperator { namespace duckdb { -void PhysicalCreateIndex::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalCreateIndex::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state) const { if (column_ids.empty()) { - throw NotImplementedException("CREATE INDEX does not refer to any columns in the base table!"); + throw BinderException("CREATE INDEX does not refer to any columns in the base table!"); } auto &schema = *table.schema; @@ -50391,12 +55514,11 @@ void PhysicalCreateIndex::GetChunkInternal(ExecutionContext &context, DataChunk unique_ptr index; switch (info->index_type) { case IndexType::ART: { - index = make_unique(column_ids, move(unbound_expressions), info->unique); + index = make_unique(column_ids, unbound_expressions, info->unique); break; } default: - D_ASSERT(0); - throw NotImplementedException("Unimplemented index type"); + throw InternalException("Unimplemented index type"); } index_entry->index = index.get(); index_entry->info = table.storage->info; @@ -50433,7 +55555,7 @@ class PhysicalCreateSchema : public PhysicalOperator { unique_ptr info; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -50442,7 +55564,8 @@ class PhysicalCreateSchema : public PhysicalOperator { namespace duckdb { -void PhysicalCreateSchema::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalCreateSchema::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state) const { Catalog::GetCatalog(context.client).CreateSchema(context.client, info.get()); state->finished = true; } @@ -50474,7 +55597,7 @@ class PhysicalCreateSequence : public PhysicalOperator { unique_ptr info; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -50484,7 +55607,7 @@ class PhysicalCreateSequence : public PhysicalOperator { namespace duckdb { void PhysicalCreateSequence::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state) { + PhysicalOperatorState *state) const { Catalog::GetCatalog(context.client).CreateSequence(context.client, info.get()); state->finished = true; } @@ -50517,7 +55640,7 @@ class PhysicalCreateTable : public PhysicalOperator { unique_ptr info; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -50535,7 +55658,8 @@ PhysicalCreateTable::PhysicalCreateTable(LogicalOperator &op, SchemaCatalogEntry info(move(info)) { } -void PhysicalCreateTable::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalCreateTable::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state) const { auto &catalog = Catalog::GetCatalog(context.client); catalog.CreateTable(context.client, schema, info.get()); state->finished = true; @@ -50571,9 +55695,10 @@ class PhysicalCreateTableAs : public PhysicalSink { public: unique_ptr GetGlobalState(ClientContext &context) override; - void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, DataChunk &input) override; + void Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate, + DataChunk &input) const override; - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -50598,7 +55723,7 @@ class CreateTableAsGlobalState : public GlobalOperatorState { CreateTableAsGlobalState() { inserted_count = 0; } - std::mutex append_lock; + mutex append_lock; TableCatalogEntry *table; int64_t inserted_count; }; @@ -50611,7 +55736,7 @@ unique_ptr PhysicalCreateTableAs::GetGlobalState(ClientCont } void PhysicalCreateTableAs::Sink(ExecutionContext &context, GlobalOperatorState &state, LocalSinkState &lstate_p, - DataChunk &input) { + DataChunk &input) const { auto &sink = (CreateTableAsGlobalState &)state; if (sink.table) { lock_guard client_guard(sink.append_lock); @@ -50624,7 +55749,7 @@ void PhysicalCreateTableAs::Sink(ExecutionContext &context, GlobalOperatorState // GetChunkInternal //===--------------------------------------------------------------------===// void PhysicalCreateTableAs::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state) { + PhysicalOperatorState *state) const { auto &sink = (CreateTableAsGlobalState &)*sink_state; if (sink.table) { chunk.SetCardinality(1); @@ -50660,7 +55785,7 @@ class PhysicalCreateView : public PhysicalOperator { unique_ptr info; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -50669,7 +55794,8 @@ class PhysicalCreateView : public PhysicalOperator { namespace duckdb { -void PhysicalCreateView::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalCreateView::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, + PhysicalOperatorState *state) const { Catalog::GetCatalog(context.client).CreateView(context.client, info.get()); state->finished = true; } @@ -50701,7 +55827,7 @@ class PhysicalDrop : public PhysicalOperator { unique_ptr info; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; }; } // namespace duckdb @@ -50710,7 +55836,7 @@ class PhysicalDrop : public PhysicalOperator { namespace duckdb { -void PhysicalDrop::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalDrop::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { switch (info->type) { case CatalogType::PREPARED_STATEMENT: { // DEALLOCATE silently ignores errors @@ -50740,7 +55866,6 @@ void PhysicalDrop::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - namespace duckdb { class Pipeline; @@ -50752,19 +55877,18 @@ class PhysicalRecursiveCTE : public PhysicalOperator { bool union_all; std::shared_ptr working_table; - ChunkCollection intermediate_table; - vector> pipelines; + vector> pipelines; public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; void FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) override; private: //! Probe Hash Table and eliminate duplicate rows - idx_t ProbeHT(DataChunk &chunk, PhysicalOperatorState *state); + idx_t ProbeHT(DataChunk &chunk, PhysicalOperatorState *state) const; - void ExecuteRecursivePipelines(ExecutionContext &context); + void ExecuteRecursivePipelines(ExecutionContext &context) const; }; } // namespace duckdb @@ -50792,6 +55916,8 @@ class PhysicalRecursiveCTEState : public PhysicalOperatorState { bool recursing = false; bool intermediate_empty = true; + std::shared_ptr working_table; + ChunkCollection intermediate_table; }; PhysicalRecursiveCTE::PhysicalRecursiveCTE(vector types, bool union_all, unique_ptr top, @@ -50806,7 +55932,7 @@ PhysicalRecursiveCTE::~PhysicalRecursiveCTE() { // first exhaust non recursive term, then exhaust recursive term iteratively until no (new) rows are generated. void PhysicalRecursiveCTE::GetChunkInternal(ExecutionContext &context, DataChunk &chunk, - PhysicalOperatorState *state_p) { + PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); if (!state->ht) { @@ -50820,10 +55946,10 @@ void PhysicalRecursiveCTE::GetChunkInternal(ExecutionContext &context, DataChunk if (!union_all) { idx_t match_count = ProbeHT(chunk, state); if (match_count > 0) { - working_table->Append(chunk); + state->working_table->Append(chunk); } } else { - working_table->Append(chunk); + state->working_table->Append(chunk); } if (chunk.size() != 0) { @@ -50844,9 +55970,9 @@ void PhysicalRecursiveCTE::GetChunkInternal(ExecutionContext &context, DataChunk break; } - working_table->Reset(); - working_table->Merge(intermediate_table); - intermediate_table.Reset(); + state->working_table->Reset(); + state->working_table->Merge(state->intermediate_table); + state->intermediate_table.Reset(); ExecuteRecursivePipelines(context); state->bottom_state = children[1]->GetOperatorState(); @@ -50860,13 +55986,13 @@ void PhysicalRecursiveCTE::GetChunkInternal(ExecutionContext &context, DataChunk // intermediate tables. idx_t match_count = ProbeHT(chunk, state); if (match_count > 0) { - intermediate_table.Append(chunk); + state->intermediate_table.Append(chunk); state->intermediate_empty = false; } else { continue; } } else { - intermediate_table.Append(chunk); + state->intermediate_table.Append(chunk); state->intermediate_empty = false; } @@ -50874,7 +56000,7 @@ void PhysicalRecursiveCTE::GetChunkInternal(ExecutionContext &context, DataChunk } } -void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context) { +void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context) const { if (pipelines.empty()) { return; } @@ -50907,7 +56033,7 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context) } } -idx_t PhysicalRecursiveCTE::ProbeHT(DataChunk &chunk, PhysicalOperatorState *state_p) { +idx_t PhysicalRecursiveCTE::ProbeHT(DataChunk &chunk, PhysicalOperatorState *state_p) const { auto state = reinterpret_cast(state_p); Vector dummy_addresses(LogicalType::POINTER); @@ -50926,6 +56052,7 @@ unique_ptr PhysicalRecursiveCTE::GetOperatorState() { auto state = make_unique(*this); state->top_state = children[0]->GetOperatorState(); state->bottom_state = children[1]->GetOperatorState(); + state->working_table = working_table; return (move(state)); } @@ -50959,7 +56086,7 @@ class PhysicalUnion : public PhysicalOperator { idx_t estimated_cardinality); public: - void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) override; + void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const override; unique_ptr GetOperatorState() override; void FinalizeOperatorState(PhysicalOperatorState &state_p, ExecutionContext &context) override; }; @@ -50987,7 +56114,8 @@ PhysicalUnion::PhysicalUnion(vector types, unique_ptr(state_p); if (!state->top_done) { children[0]->GetChunk(context, chunk, state->top_state.get()); @@ -51025,38 +56153,54 @@ void PhysicalUnion::FinalizeOperatorState(PhysicalOperatorState &state_p, Execut namespace duckdb { -RadixPartitionInfo::RadixPartitionInfo(idx_t n_partitions_upper_bound) : n_partitions(1), radix_bits(0), radix_mask(0) { +static idx_t PartitionInfoNPartitions(const idx_t n_partitions_upper_bound) { + idx_t n_partitions = 1; while (n_partitions <= n_partitions_upper_bound / 2) { n_partitions *= 2; if (n_partitions >= 256) { break; } } - // finalize_threads needs to be a power of 2 - D_ASSERT(n_partitions > 0); - D_ASSERT(n_partitions <= 256); - D_ASSERT((n_partitions & (n_partitions - 1)) == 0); + return n_partitions; +} +static idx_t PartitionInfoRadixBits(const idx_t n_partitions) { + idx_t radix_bits = 0; auto radix_partitions_copy = n_partitions; while (radix_partitions_copy - 1) { radix_bits++; radix_partitions_copy >>= 1; } + return radix_bits; +} - D_ASSERT(radix_bits <= 8); - +static hash_t PartitionInfoRadixMask(const idx_t radix_bits, const idx_t radix_shift) { + hash_t radix_mask = 0; // we use the fifth byte of the 64 bit hash as radix source for (idx_t i = 0; i < radix_bits; i++) { radix_mask = (radix_mask << 1) | 1; } - radix_mask <<= RADIX_SHIFT; + radix_mask <<= radix_shift; + return radix_mask; +} + +RadixPartitionInfo::RadixPartitionInfo(const idx_t n_partitions_upper_bound) + : n_partitions(PartitionInfoNPartitions(n_partitions_upper_bound)), + radix_bits(PartitionInfoRadixBits(n_partitions)), radix_mask(PartitionInfoRadixMask(radix_bits, RADIX_SHIFT)) { + + // finalize_threads needs to be a power of 2 + D_ASSERT(n_partitions > 0); + D_ASSERT(n_partitions <= 256); + D_ASSERT((n_partitions & (n_partitions - 1)) == 0); + D_ASSERT(radix_bits <= 8); } PartitionableHashTable::PartitionableHashTable(BufferManager &buffer_manager_p, RadixPartitionInfo &partition_info_p, vector group_types_p, vector payload_types_p, vector bindings_p) : buffer_manager(buffer_manager_p), group_types(move(group_types_p)), payload_types(move(payload_types_p)), - bindings(move(bindings_p)), is_partitioned(false), partition_info(partition_info_p) { + bindings(move(bindings_p)), is_partitioned(false), partition_info(partition_info_p), hashes(LogicalType::HASH), + hashes_subset(LogicalType::HASH) { sel_vectors.resize(partition_info.n_partitions); sel_vector_sizes.resize(partition_info.n_partitions); @@ -51064,8 +56208,6 @@ PartitionableHashTable::PartitionableHashTable(BufferManager &buffer_manager_p, if (!payload_types.empty()) { payload_subset.Initialize(payload_types); } - hashes.Initialize(LogicalType::HASH); - hashes_subset.Initialize(LogicalType::HASH); for (hash_t r = 0; r < partition_info.n_partitions; r++) { sel_vectors[r].Initialize(); @@ -51187,37 +56329,34 @@ void PartitionableHashTable::Finalize() { + namespace duckdb { -PerfectAggregateHashTable::PerfectAggregateHashTable(BufferManager &buffer_manager, vector group_types_p, +PerfectAggregateHashTable::PerfectAggregateHashTable(BufferManager &buffer_manager, + const vector &group_types_p, vector payload_types_p, vector aggregate_objects_p, vector group_minima_p, vector required_bits_p) - : BaseAggregateHashTable(buffer_manager, move(group_types_p), move(payload_types_p), move(aggregate_objects_p)), - required_bits(move(required_bits_p)), total_required_bits(0), group_minima(move(group_minima_p)) { - addresses.Initialize(LogicalType::POINTER); - + : BaseAggregateHashTable(buffer_manager, move(payload_types_p)), addresses(LogicalType::POINTER), + required_bits(move(required_bits_p)), total_required_bits(0), group_minima(move(group_minima_p)), + sel(STANDARD_VECTOR_SIZE) { for (auto &group_bits : required_bits) { total_required_bits += group_bits; } // the total amount of groups we allocate space for is 2^required_bits total_groups = 1 << total_required_bits; // we don't need to store the groups in a perfect hash table, since the group keys can be deduced by their location - tuple_size = payload_width; + grouping_columns = group_types_p.size(); + layout.Initialize(move(aggregate_objects_p)); + tuple_size = layout.GetRowWidth(); // allocate and null initialize the data owned_data = unique_ptr(new data_t[tuple_size * total_groups]); data = owned_data.get(); + // set up the empty payloads for every tuple, and initialize the "occupied" flag to false group_is_set = unique_ptr(new bool[total_groups]); memset(group_is_set.get(), 0, total_groups * sizeof(bool)); - - // set up the empty payloads for every tuple, and initialize the "occupied" flag to false - data_ptr_t payload_ptr = data; - for (idx_t i = 0; i < total_groups; i++) { - memcpy(payload_ptr, empty_payload_data.get(), payload_width); - payload_ptr += tuple_size; - } } PerfectAggregateHashTable::~PerfectAggregateHashTable() { @@ -51289,21 +56428,31 @@ void PerfectAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload) } // now we have the HT entry number for every tuple // compute the actual pointer to the data by adding it to the base HT pointer and multiplying by the tuple size + idx_t needs_init = 0; for (idx_t i = 0; i < groups.size(); i++) { D_ASSERT(address_data[i] < total_groups); - group_is_set[address_data[i]] = true; + const auto group = address_data[i]; address_data[i] = uintptr_t(data) + address_data[i] * tuple_size; + if (!group_is_set[group]) { + group_is_set[group] = true; + sel.set_index(needs_init++, i); + if (needs_init == STANDARD_VECTOR_SIZE) { + RowOperations::InitializeStates(layout, addresses, sel, needs_init); + needs_init = 0; + } + } } + RowOperations::InitializeStates(layout, addresses, sel, needs_init); // after finding the group location we update the aggregates idx_t payload_idx = 0; + auto &aggregates = layout.GetAggregates(); for (auto &aggregate : aggregates) { auto input_count = (idx_t)aggregate.child_count; if (aggregate.filter) { - GroupedAggregateHashTable::UpdateAggregate(aggregate, payload, addresses, input_count, payload_idx); + RowOperations::UpdateFilteredStates(aggregate, addresses, payload, payload_idx); } else { - aggregate.function.update(input_count == 0 ? nullptr : &payload.data[payload_idx], nullptr, input_count, - addresses, payload.size()); + RowOperations::UpdateStates(aggregate, addresses, payload, payload_idx, payload.size()); } // move to the next aggregate payload_idx += input_count; @@ -51311,19 +56460,6 @@ void PerfectAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload) } } -void PerfectAggregateHashTable::Combine(Vector &source_addresses, Vector &target_addresses, idx_t combine_count) { - if (combine_count == 0) { - return; - } - - for (auto &aggr : aggregates) { - D_ASSERT(aggr.function.combine); - aggr.function.combine(source_addresses, target_addresses, combine_count); - VectorOperations::AddInPlace(source_addresses, aggr.payload_size, combine_count); - VectorOperations::AddInPlace(target_addresses, aggr.payload_size, combine_count); - } -} - void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) { D_ASSERT(total_groups == other.total_groups); D_ASSERT(tuple_size == other.tuple_size); @@ -51337,6 +56473,8 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) { data_ptr_t source_ptr = other.data; data_ptr_t target_ptr = data; idx_t combine_count = 0; + idx_t reinit_count = 0; + const auto &reinit_sel = FlatVector::INCREMENTAL_SELECTION_VECTOR; for (idx_t i = 0; i < total_groups; i++) { auto has_entry_source = other.group_is_set[i]; // we only have any work to do if the source has an entry for this group @@ -51348,7 +56486,7 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) { target_addresses_ptr[combine_count] = target_ptr; combine_count++; if (combine_count == STANDARD_VECTOR_SIZE) { - Combine(source_addresses, target_addresses, combine_count); + RowOperations::CombineStates(layout, source_addresses, target_addresses, combine_count); combine_count = 0; } } else { @@ -51362,7 +56500,8 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) { source_ptr += tuple_size; target_ptr += tuple_size; } - Combine(source_addresses, target_addresses, combine_count); + RowOperations::CombineStates(layout, source_addresses, target_addresses, combine_count); + RowOperations::InitializeStates(layout, addresses, reinit_sel, reinit_count); } template @@ -51430,25 +56569,20 @@ void PerfectAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) { } // first reconstruct the groups from the group index idx_t shift = total_required_bits; - for (idx_t i = 0; i < group_types.size(); i++) { + for (idx_t i = 0; i < grouping_columns; i++) { shift -= required_bits[i]; ReconstructGroupVector(group_values, group_minima[i], required_bits[i], shift, entry_count, result.data[i]); } // then construct the payloads - for (idx_t i = 0; i < aggregates.size(); i++) { - auto &target = result.data[group_types.size() + i]; - auto &aggr = aggregates[i]; - aggr.function.finalize(addresses, aggr.bind_data, target, entry_count); - VectorOperations::AddInPlace(addresses, aggr.payload_size, entry_count); - } result.SetCardinality(entry_count); + RowOperations::FinalizeStates(layout, addresses, result, grouping_columns); } void PerfectAggregateHashTable::Destroy() { // check if there is any destructor to call bool has_destructor = false; - for (idx_t i = 0; i < aggregates.size(); i++) { - if (aggregates[i].function.destructor) { + for (auto &aggr : layout.GetAggregates()) { + if (aggr.function.destructor) { has_destructor = true; } } @@ -51457,23 +56591,22 @@ void PerfectAggregateHashTable::Destroy() { } // there are aggregates with destructors: loop over the hash table // and call the destructor method for each of the aggregates - data_ptr_t data_pointers[STANDARD_VECTOR_SIZE]; - Vector state_vector(LogicalType::POINTER, (data_ptr_t)data_pointers); + auto data_pointers = FlatVector::GetData(addresses); idx_t count = 0; - // iterate over all occupied slots of the hash table + // iterate over all initialised slots of the hash table data_ptr_t payload_ptr = data; for (idx_t i = 0; i < total_groups; i++) { if (group_is_set[i]) { data_pointers[count++] = payload_ptr; if (count == STANDARD_VECTOR_SIZE) { - CallDestructors(state_vector, count); + RowOperations::DestroyStates(layout, addresses, count); count = 0; } } payload_ptr += tuple_size; } - CallDestructors(state_vector, count); + RowOperations::DestroyStates(layout, addresses, count); } } // namespace duckdb @@ -51498,19 +56631,18 @@ string PhysicalOperator::ToString() const { } PhysicalOperatorState::PhysicalOperatorState(PhysicalOperator &op, PhysicalOperator *child) : finished(false) { - op.InitializeChunk(initial_chunk); if (child) { - child->InitializeChunkEmpty(child_chunk); + child->InitializeChunk(child_chunk); child_state = child->GetOperatorState(); } } -void PhysicalOperator::GetChunk(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) { +void PhysicalOperator::GetChunk(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const { if (context.client.interrupted) { throw InterruptException(); } // reset the chunk back to its initial state - chunk.Reference(state->initial_chunk); + chunk.Reset(); if (state->finished) { return; @@ -51524,9 +56656,11 @@ void PhysicalOperator::GetChunk(ExecutionContext &context, DataChunk &chunk, Phy chunk.Verify(); } +// LCOV_EXCL_START void PhysicalOperator::Print() { Printer::Print(ToString()); } +// LCOV_EXCL_STOP } // namespace duckdb @@ -52021,24 +57155,26 @@ void TransformIndexJoin(ClientContext &context, LogicalComparisonJoin &op, Index auto &tbl_scan = (PhysicalTableScan &)*left; auto tbl = dynamic_cast(tbl_scan.bind_data.get()); if (CanPlanIndexJoin(transaction, tbl, tbl_scan)) { - for (auto &index : tbl->table->storage->info->indexes) { - if (index->unbound_expressions[0]->alias == op.conditions[0].left->alias) { - *left_index = index.get(); - break; + tbl->table->storage->info->indexes.Scan([&](Index &index) { + if (index.unbound_expressions[0]->alias == op.conditions[0].left->alias) { + *left_index = &index; + return true; } - } + return false; + }); } } if (right->type == PhysicalOperatorType::TABLE_SCAN) { auto &tbl_scan = (PhysicalTableScan &)*right; auto tbl = dynamic_cast(tbl_scan.bind_data.get()); if (CanPlanIndexJoin(transaction, tbl, tbl_scan)) { - for (auto &index : tbl->table->storage->info->indexes) { - if (index->unbound_expressions[0]->alias == op.conditions[0].right->alias) { - *right_index = index.get(); - break; + tbl->table->storage->info->indexes.Scan([&](Index &index) { + if (index.unbound_expressions[0]->alias == op.conditions[0].right->alias) { + *right_index = &index; + return true; } - } + return false; + }); } } } @@ -52311,6 +57447,7 @@ class LogicalCreateTable : public LogicalOperator { } // namespace duckdb + namespace duckdb { static void ExtractDependencies(Expression &expr, unordered_set &dependencies) { @@ -52330,7 +57467,10 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalCreateTabl ExtractDependencies(*default_value, op.info->dependencies); } } - if (!op.children.empty()) { + auto &create_info = (CreateTableInfo &)*op.info->base; + auto &catalog = Catalog::GetCatalog(context); + auto existing_entry = catalog.GetEntry(context, create_info.schema, create_info.table, true); + if (!existing_entry && !op.children.empty()) { D_ASSERT(op.children.size() == 1); auto create = make_unique(op, op.schema, move(op.info), op.estimated_cardinality); auto plan = CreatePlan(*op.children[0]); @@ -52928,15 +58068,18 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalExplain &o + namespace duckdb { class LogicalExport : public LogicalOperator { public: - LogicalExport(CopyFunction function, unique_ptr copy_info) - : LogicalOperator(LogicalOperatorType::LOGICAL_EXPORT), function(function), copy_info(move(copy_info)) { + LogicalExport(CopyFunction function, unique_ptr copy_info, BoundExportData exported_tables) + : LogicalOperator(LogicalOperatorType::LOGICAL_EXPORT), function(function), copy_info(move(copy_info)), + exported_tables(move(exported_tables)) { } CopyFunction function; unique_ptr copy_info; + BoundExportData exported_tables; protected: void ResolveTypes() override { @@ -52950,7 +58093,8 @@ class LogicalExport : public LogicalOperator { namespace duckdb { unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalExport &op) { - auto export_node = make_unique(op.types, op.function, move(op.copy_info), op.estimated_cardinality); + auto export_node = make_unique(op.types, op.function, move(op.copy_info), op.estimated_cardinality, + op.exported_tables); // plan the underlying copy statements, if any if (!op.children.empty()) { auto plan = CreatePlan(*op.children[0]); @@ -53386,16 +58530,6 @@ class CaseExpressionMatcher : public ExpressionMatcher { bool Match(Expression *expr_, vector &bindings) override; }; -class CastExpressionMatcher : public ExpressionMatcher { -public: - CastExpressionMatcher() : ExpressionMatcher(ExpressionClass::BOUND_CAST) { - } - //! The child expression to match (if any) - unique_ptr child; - - bool Match(Expression *expr_, vector &bindings) override; -}; - class ComparisonExpressionMatcher : public ExpressionMatcher { public: ComparisonExpressionMatcher() : ExpressionMatcher(ExpressionClass::BOUND_COMPARISON) { @@ -53432,18 +58566,6 @@ class ConjunctionExpressionMatcher : public ExpressionMatcher { bool Match(Expression *expr_, vector &bindings) override; }; -class OperatorExpressionMatcher : public ExpressionMatcher { -public: - OperatorExpressionMatcher() : ExpressionMatcher(ExpressionClass::BOUND_OPERATOR) { - } - //! The matchers for the child expressions - vector> matchers; - //! The set matcher matching policy to use - SetMatcher::Policy policy; - - bool Match(Expression *expr_, vector &bindings) override; -}; - class FunctionExpressionMatcher : public ExpressionMatcher { public: FunctionExpressionMatcher() : ExpressionMatcher(ExpressionClass::BOUND_FUNCTION) { @@ -53546,7 +58668,7 @@ class LogicalGet : public LogicalOperator { //! Bound column IDs vector column_ids; //! Filters pushed down for table scan - vector table_filters; + TableFilterSet table_filters; string GetName() const override; string ParamsToString() const override; @@ -53600,14 +58722,14 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalFilter &op namespace duckdb { -unique_ptr FindColumnIndex(vector &table_filters, vector &column_ids) { +unique_ptr CreateTableFilterSet(TableFilterSet &table_filters, vector &column_ids) { // create the table filter map auto table_filter_set = make_unique(); - for (auto &table_filter : table_filters) { + for (auto &table_filter : table_filters.filters) { // find the relative column index from the absolute column index into the table idx_t column_index = INVALID_INDEX; for (idx_t i = 0; i < column_ids.size(); i++) { - if (table_filter.column_index == column_ids[i]) { + if (table_filter.first == column_ids[i]) { column_index = i; break; } @@ -53615,16 +58737,11 @@ unique_ptr FindColumnIndex(vector &table_filters, v if (column_index == INVALID_INDEX) { throw InternalException("Could not find column index for table filter"); } - table_filter.column_index = column_index; - auto filter = table_filter_set->filters.find(column_index); - if (filter != table_filter_set->filters.end()) { - filter->second.push_back(table_filter); - } else { - table_filter_set->filters.insert(make_pair(column_index, vector {table_filter})); - } + table_filter_set->filters[column_index] = move(table_filter.second); } return table_filter_set; } + unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalGet &op) { if (!op.children.empty()) { // this is for table producing functions that consume subquery results @@ -53636,8 +58753,8 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalGet &op) { } unique_ptr table_filters; - if (!op.table_filters.empty()) { - table_filters = FindColumnIndex(op.table_filters, op.column_ids); + if (!op.table_filters.filters.empty()) { + table_filters = CreateTableFilterSet(op.table_filters, op.column_ids); } if (op.function.dependency) { @@ -53823,6 +58940,7 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalLimit &op) + namespace duckdb { //! LogicalOrder represents an ORDER BY clause, sorting the data @@ -53833,6 +58951,7 @@ class LogicalOrder : public LogicalOperator { } vector orders; + vector> statistics; string ParamsToString() const override { string result; @@ -53865,7 +58984,8 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalOrder &op) auto plan = CreatePlan(*op.children[0]); if (!op.orders.empty()) { - auto order = make_unique(op.types, move(op.orders), op.estimated_cardinality); + auto order = + make_unique(op.types, move(op.orders), move(op.statistics), op.estimated_cardinality); order->children.push_back(move(plan)); plan = move(order); } @@ -54013,7 +59133,8 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalProjection #ifdef DEBUG for (auto &expr : op.expressions) { - D_ASSERT(!expr->IsWindow() && !expr->IsAggregate()); + D_ASSERT(!expr->IsWindow()); + D_ASSERT(!expr->IsAggregate()); } #endif if (plan->types.size() == op.types.size()) { @@ -54647,7 +59768,7 @@ class LogicalUpdate : public LogicalOperator { TableCatalogEntry *table; vector columns; vector> bound_defaults; - bool is_index_update; + bool update_is_del_and_insert; protected: void ResolveTypes() override { @@ -54667,7 +59788,7 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalUpdate &op dependencies.insert(op.table); auto update = make_unique(op.types, *op.table, *op.table->storage, op.columns, move(op.expressions), move(op.bound_defaults), op.estimated_cardinality); - update->is_index_update = op.is_index_update; + update->update_is_del_and_insert = op.update_is_del_and_insert; update->children.push_back(move(plan)); return move(update); } @@ -54820,24 +59941,24 @@ class DependencyExtractor : public LogicalOperatorVisitor { unique_ptr PhysicalPlanGenerator::CreatePlan(unique_ptr op) { // first resolve column references - context.profiler.StartPhase("column_binding"); + context.profiler->StartPhase("column_binding"); ColumnBindingResolver resolver; resolver.VisitOperator(*op); - context.profiler.EndPhase(); + context.profiler->EndPhase(); // now resolve types of all the operators - context.profiler.StartPhase("resolve_types"); + context.profiler->StartPhase("resolve_types"); op->ResolveOperatorTypes(); - context.profiler.EndPhase(); + context.profiler->EndPhase(); // extract dependencies from the logical plan DependencyExtractor extractor(dependencies); extractor.VisitOperator(*op); // then create the main physical plan - context.profiler.StartPhase("create_plan"); + context.profiler->StartPhase("create_plan"); auto plan = CreatePlan(*op); - context.profiler.EndPhase(); + context.profiler->EndPhase(); return plan; } @@ -54958,10 +60079,10 @@ void ReservoirSample::AddToReservoir(DataChunk &input) { idx_t remaining = input.size(); idx_t base_offset = 0; while (true) { - idx_t offset = reservoirSampling.next_index - reservoirSampling.current_count; + idx_t offset = base_reservoir_sample.next_index - base_reservoir_sample.current_count; if (offset >= remaining) { // not in this chunk! increment current count and go to the next chunk - reservoirSampling.current_count += remaining; + base_reservoir_sample.current_count += remaining; return; } // in this chunk! replace the element @@ -54980,9 +60101,9 @@ void ReservoirSample::ReplaceElement(DataChunk &input, idx_t index_in_chunk) { // replace the entry in the reservoir // 8. The item in R with the minimum key is replaced by item vi for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) { - reservoir.SetValue(col_idx, reservoirSampling.min_entry, input.GetValue(col_idx, index_in_chunk)); + reservoir.SetValue(col_idx, base_reservoir_sample.min_entry, input.GetValue(col_idx, index_in_chunk)); } - reservoirSampling.ReplaceElement(); + base_reservoir_sample.ReplaceElement(); } idx_t ReservoirSample::FillReservoir(DataChunk &input) { @@ -55002,7 +60123,7 @@ idx_t ReservoirSample::FillReservoir(DataChunk &input) { input.SetCardinality(required_count); reservoir.Append(input); - reservoirSampling.InitializeReservoir(reservoir.Count(), sample_count); + base_reservoir_sample.InitializeReservoir(reservoir.Count(), sample_count); // check if there are still elements remaining // this happens if we are on a boundary @@ -55040,15 +60161,14 @@ void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) { input.SetCardinality(append_to_current_sample_count); current_sample->AddToReservoir(input); - - if (append_to_next_sample > 0) { - // slice the input for the remainder - SelectionVector sel(STANDARD_VECTOR_SIZE); - for (idx_t i = 0; i < append_to_next_sample; i++) { - sel.set_index(i, append_to_current_sample_count + i); - } - input.Slice(sel, append_to_next_sample); + } + if (append_to_next_sample > 0) { + // slice the input for the remainder + SelectionVector sel(STANDARD_VECTOR_SIZE); + for (idx_t i = 0; i < append_to_next_sample; i++) { + sel.set_index(i, append_to_current_sample_count + i); } + input.Slice(sel, append_to_next_sample); } // now our first sample is filled: append it to the set of finished samples finished_samples.push_back(move(current_sample)); @@ -55061,8 +60181,8 @@ void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) { current_count = append_to_next_sample; } else { // we can just append to the current sample - current_sample->AddToReservoir(input); current_count += input.size(); + current_sample->AddToReservoir(input); } } @@ -55100,26 +60220,14 @@ void ReservoirSamplePercentage::Finalize() { is_finalized = true; } -BaseReservoirSampling::BaseReservoirSampling(int64_t seed) { +BaseReservoirSampling::BaseReservoirSampling(int64_t seed) : random(seed) { next_index = 0; min_threshold = 0; min_entry = 0; current_count = 0; - //! Make a random number engine - rng = make_unique(seed); - uniform_dist = make_unique>(0, 1); } -BaseReservoirSampling::BaseReservoirSampling() { - next_index = 0; - min_threshold = 0; - min_entry = 0; - current_count = 0; - //! Seed with a real random value, if available - pcg_extras::seed_seq_from seed_source; - //! Make a random number engine - rng = make_unique(seed_source); - uniform_dist = make_unique>(0, 1); +BaseReservoirSampling::BaseReservoirSampling() : BaseReservoirSampling(-1) { } void BaseReservoirSampling::InitializeReservoir(idx_t cur_size, idx_t sample_size) { @@ -55130,7 +60238,7 @@ void BaseReservoirSampling::InitializeReservoir(idx_t cur_size, idx_t sample_siz //! we then define the threshold to enter the reservoir T_w as the minimum key of R //! we use a priority queue to extract the minimum key in O(1) time for (idx_t i = 0; i < sample_size; i++) { - double k_i = (*uniform_dist)(*rng); + double k_i = random.NextRandom(); reservoir_weights.push(std::make_pair(-k_i, i)); } SetNextEntry(); @@ -55141,7 +60249,7 @@ void BaseReservoirSampling::SetNextEntry() { //! 4. Let r = random(0, 1) and Xw = log(r) / log(T_w) auto &min_key = reservoir_weights.top(); double t_w = -min_key.first; - double r = (*uniform_dist)(*rng); + double r = random.NextRandom(); double x_w = log(r) / log(t_w); //! 5. From the current item vc skip items until item vi , such that: //! 6. wc +wc+1 +···+wi−1 < Xw <= wc +wc+1 +···+wi−1 +wi @@ -55160,8 +60268,7 @@ void BaseReservoirSampling::ReplaceElement() { //! 8. Let tw = Tw i , r2 = random(tw,1) and vi’s key: ki = (r2)1/wi //! 9. The new threshold Tw is the new minimum key of R //! we generate a random number between (min_threshold, 1) - std::uniform_real_distribution dist(min_threshold, 1); - double r2 = dist(*rng); + double r2 = random.NextRandom(min_threshold, 1); //! now we insert the new weight into the reservoir reservoir_weights.push(std::make_pair(-r2, min_entry)); //! we update the min entry with the new min entry in the reservoir @@ -55174,24 +60281,28 @@ void BaseReservoirSampling::ReplaceElement() { + #include namespace duckdb { -WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info, LogicalType result_type, - ChunkCollection *input) - : aggregate(aggregate), bind_info(bind_info), result_type(move(result_type)), state(aggregate.state_size()), +WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info, + const LogicalType &result_type_p, ChunkCollection *input) + : aggregate(aggregate), bind_info(bind_info), result_type(result_type_p), state(aggregate.state_size()), + statep(Value::POINTER((idx_t)state.data())), frame(0, 0), statev(Value::POINTER((idx_t)state.data())), internal_nodes(0), input_ref(input) { #if STANDARD_VECTOR_SIZE < 512 throw NotImplementedException("Window functions are not supported for vector sizes < 512"); #endif - Value ptr_val = Value::POINTER((idx_t)state.data()); - statep.Reference(ptr_val); statep.Normalify(STANDARD_VECTOR_SIZE); + statev.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants if (input_ref && input_ref->ColumnCount() > 0) { inputs.Initialize(input_ref->Types()); - if (aggregate.combine) { + // if we have a frame-by-frame method, share the single state + if (aggregate.window) { + AggregateInit(); + } else if (aggregate.combine) { ConstructTree(); } } @@ -55216,23 +60327,56 @@ WindowSegmentTree::~WindowSegmentTree() { if (count > 0) { aggregate.destructor(addresses, count); } + + if (aggregate.window) { + aggregate.destructor(statev, 1); + } } void WindowSegmentTree::AggregateInit() { aggregate.initialize(state.data()); } -Value WindowSegmentTree::AggegateFinal() { - Vector statev(Value::POINTER((idx_t)state.data())); - Vector result(result_type); - result.SetVectorType(VectorType::CONSTANT_VECTOR); - ConstantVector::SetNull(result, false); - aggregate.finalize(statev, bind_info, result, 1); +void WindowSegmentTree::AggegateFinal(Vector &result, idx_t rid) { + aggregate.finalize(statev, bind_info, result, 1, rid); if (aggregate.destructor) { aggregate.destructor(statev, 1); } - return result.GetValue(0); +} + +void WindowSegmentTree::ExtractFrame(idx_t begin, idx_t end) { + const auto size = end - begin; + if (size >= STANDARD_VECTOR_SIZE) { + throw InternalException("Cannot compute window aggregation: bounds are too large"); + } + + const idx_t start_in_vector = begin % STANDARD_VECTOR_SIZE; + const auto input_count = input_ref->ColumnCount(); + if (start_in_vector + size <= STANDARD_VECTOR_SIZE) { + inputs.SetCardinality(size); + auto &chunk = input_ref->GetChunkForRow(begin); + for (idx_t i = 0; i < input_count; ++i) { + auto &v = inputs.data[i]; + auto &vec = chunk.data[i]; + v.Slice(vec, start_in_vector); + v.Verify(size); + } + } else { + inputs.Reset(); + inputs.SetCardinality(size); + + // we cannot just slice the individual vector! + auto &chunk_a = input_ref->GetChunkForRow(begin); + auto &chunk_b = input_ref->GetChunkForRow(end); + idx_t chunk_a_count = chunk_a.size() - start_in_vector; + idx_t chunk_b_count = inputs.size() - chunk_a_count; + for (idx_t i = 0; i < input_count; ++i) { + auto &v = inputs.data[i]; + VectorOperations::Copy(chunk_a.data[i], v, chunk_a.size(), start_in_vector, 0); + VectorOperations::Copy(chunk_b.data[i], v, chunk_b_count, 0, chunk_a_count); + } + } } void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end) { @@ -55240,37 +60384,18 @@ void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end) if (begin == end) { return; } - inputs.Reset(); - inputs.SetCardinality(end - begin); - Vector s; - s.Slice(statep, 0); - idx_t start_in_vector = begin % STANDARD_VECTOR_SIZE; + if (end - begin >= STANDARD_VECTOR_SIZE) { + throw InternalException("Cannot compute window aggregation: bounds are too large"); + } + + Vector s(statep, 0); if (l_idx == 0) { - const auto input_count = input_ref->ColumnCount(); - if (start_in_vector + inputs.size() <= STANDARD_VECTOR_SIZE) { - auto &chunk = input_ref->GetChunkForRow(begin); - for (idx_t i = 0; i < input_count; ++i) { - auto &v = inputs.data[i]; - auto &vec = chunk.data[i]; - v.Slice(vec, start_in_vector); - v.Verify(inputs.size()); - } - } else { - // we cannot just slice the individual vector! - auto &chunk_a = input_ref->GetChunkForRow(begin); - auto &chunk_b = input_ref->GetChunkForRow(end); - idx_t chunk_a_count = chunk_a.size() - start_in_vector; - idx_t chunk_b_count = inputs.size() - chunk_a_count; - for (idx_t i = 0; i < input_count; ++i) { - auto &v = inputs.data[i]; - VectorOperations::Copy(chunk_a.data[i], v, chunk_a.size(), start_in_vector, 0); - VectorOperations::Copy(chunk_b.data[i], v, chunk_b_count, 0, chunk_a_count); - } - } - aggregate.update(&inputs.data[0], bind_info, input_count, s, inputs.size()); + ExtractFrame(begin, end); + aggregate.update(&inputs.data[0], bind_info, input_ref->ColumnCount(), s, inputs.size()); } else { - D_ASSERT(end - begin <= STANDARD_VECTOR_SIZE); + inputs.Reset(); + inputs.SetCardinality(end - begin); // find out where the states begin data_ptr_t begin_ptr = levels_flat_native.get() + state.size() * (begin + levels_flat_start[l_idx - 1]); // set up a vector of pointers that point towards the set of states @@ -55308,7 +60433,7 @@ void WindowSegmentTree::ConstructTree() { for (idx_t pos = 0; pos < level_size; pos += TREE_FANOUT) { // compute the aggregate for this entry in the segment tree AggregateInit(); - WindowSegmentValue(level_current, pos, MinValue(level_size, pos + TREE_FANOUT)); + WindowSegmentValue(level_current, pos, MinValue(level_size, pos + TREE_FANOUT)); memcpy(levels_flat_native.get() + (levels_flat_offset * state.size()), state.data(), state.size()); @@ -55318,14 +60443,35 @@ void WindowSegmentTree::ConstructTree() { levels_flat_start.push_back(levels_flat_offset); level_current++; } + + // Corner case: single element in the window + if (levels_flat_offset == 0) { + aggregate.initialize(levels_flat_native.get()); + } } -Value WindowSegmentTree::Compute(idx_t begin, idx_t end) { +void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t end) { D_ASSERT(input_ref); // No arguments, so just count if (inputs.ColumnCount() == 0) { - return Value::Numeric(result_type, end - begin); + D_ASSERT(GetTypeIdSize(result_type.InternalType()) == sizeof(idx_t)); + auto data = FlatVector::GetData(result); + data[rid] = end - begin; + return; + } + + // If we have a window function, use that + if (aggregate.window) { + // Frame boundaries + auto prev = frame; + frame = FrameBounds(begin, end); + + // Extract the range + ExtractFrame(MinValue(frame.first, prev.first), MaxValue(frame.second, prev.second)); + + aggregate.window(inputs.data.data(), bind_info, inputs.ColumnCount(), state.data(), frame, prev, result, rid); + return; } AggregateInit(); @@ -55333,11 +60479,8 @@ Value WindowSegmentTree::Compute(idx_t begin, idx_t end) { // Aggregate everything at once if we can't combine states if (!aggregate.combine) { WindowSegmentValue(0, begin, end); - if (end - begin >= STANDARD_VECTOR_SIZE) { - throw InternalException( - "Cannot compute window aggregation: bounds are too large for non-combinable aggregate"); - } - return AggegateFinal(); + AggegateFinal(result, rid); + return; } for (idx_t l_idx = 0; l_idx < levels_flat_start.size() + 1; l_idx++) { @@ -55345,7 +60488,7 @@ Value WindowSegmentTree::Compute(idx_t begin, idx_t end) { idx_t parent_end = end / TREE_FANOUT; if (parent_begin == parent_end) { WindowSegmentValue(l_idx, begin, end); - return AggegateFinal(); + break; } idx_t group_begin = parent_begin * TREE_FANOUT; if (begin != group_begin) { @@ -55360,7 +60503,7 @@ Value WindowSegmentTree::Compute(idx_t begin, idx_t end) { end = parent_end; } - return AggegateFinal(); + AggegateFinal(result, rid); } } // namespace duckdb @@ -55506,7 +60649,7 @@ struct BaseSumOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { STATEOP::template Combine(source, target); } @@ -55561,7 +60704,7 @@ struct AverageSetOperation { state->count = 0; } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { target->count += source.count; target->value += source.value; } @@ -55654,7 +60797,7 @@ AggregateFunction GetAverageAggregate(PhysicalType type) { return AggregateFunction::UnaryAggregate, hugeint_t, double, HugeintAverageOperation>( LogicalType::HUGEINT, LogicalType::DOUBLE); default: - throw NotImplementedException("Unimplemented average aggregate"); + throw InternalException("Unimplemented average aggregate"); } } @@ -55665,13 +60808,14 @@ unique_ptr BindDecimalAvg(ClientContext &context, AggregateFunctio function.name = "avg"; function.arguments[0] = decimal_type; function.return_type = LogicalType::DOUBLE; - return make_unique(Hugeint::Cast(Hugeint::POWERS_OF_TEN[decimal_type.scale()])); + return make_unique( + Hugeint::Cast(Hugeint::POWERS_OF_TEN[DecimalType::GetScale(decimal_type)])); } void AvgFun::RegisterFunction(BuiltinFunctions &set) { AggregateFunctionSet avg("avg"); - avg.AddFunction(AggregateFunction({LogicalType::DECIMAL}, LogicalType::DECIMAL, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, BindDecimalAvg)); + avg.AddFunction(AggregateFunction({LogicalTypeId::DECIMAL}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, BindDecimalAvg)); avg.AddFunction(GetAverageAggregate(PhysicalType::INT16)); avg.AddFunction(GetAverageAggregate(PhysicalType::INT32)); avg.AddFunction(GetAverageAggregate(PhysicalType::INT64)); @@ -55735,7 +60879,7 @@ struct CovarOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (target->count == 0) { *target = source; } else if (source.count > 0) { @@ -55837,7 +60981,7 @@ struct STDDevBaseOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (target->count == 0) { *target = source; } else if (source.count > 0) { @@ -55956,7 +61100,7 @@ struct CorrOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { CovarOperation::Combine(source.cov_pop, &target->cov_pop); STDDevBaseOperation::Combine(source.dev_pop_x, &target->dev_pop_x); STDDevBaseOperation::Combine(source.dev_pop_y, &target->dev_pop_y); @@ -56115,22 +61259,18 @@ struct ApproxCountDistinctFunctionBase { } template - static void Combine(STATE &source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!source.log) { return; } if (!target->log) { - target->log = source.log; - source.log = nullptr; - return; + target->log = new HyperLogLog(); } - auto new_log = target->log->MergePointer(*source.log); D_ASSERT(target->log); D_ASSERT(source.log); + auto new_log = target->log->MergePointer(*source.log); delete target->log; - delete source.log; target->log = new_log; - source.log = nullptr; } template @@ -56231,7 +61371,7 @@ AggregateFunction GetApproxCountDistinctFunction(PhysicalType type) { LogicalType::BIGINT); default: - throw NotImplementedException("Unimplemented approximate_count aggregate"); + throw InternalException("Unimplemented approximate_count aggregate"); } } @@ -56307,7 +61447,7 @@ struct ArgMinOperation : ArgMinMaxOperation { } template - static void Combine(STATE &source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!source.is_initialized) { return; } @@ -56341,7 +61481,7 @@ struct ArgMaxOperation : ArgMinMaxOperation { } template - static void Combine(STATE &source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!source.is_initialized) { return; } @@ -56383,7 +61523,7 @@ AggregateFunction GetArgMinMaxFunctionArg2(LogicalTypeId arg_2, const LogicalTyp return AggregateFunction::BinaryAggregate, T, string_t, T, OP>( arg, LogicalType::BLOB, arg); default: - throw NotImplementedException("Unimplemented arg_min/arg_max aggregate"); + throw InternalException("Unimplemented arg_min/arg_max aggregate"); } } @@ -56454,11 +61594,11 @@ void GetArgMinMaxFunction(LogicalTypeId arg_1, AggregateFunctionSet &fun) { fun.AddFunction(GetArgMinMaxFunctionArg2(LogicalTypeId::BLOB, LogicalType::BLOB)); break; default: - throw NotImplementedException("Unimplemented arg_min/arg_max aggregate"); + throw InternalException("Unimplemented arg_min/arg_max aggregate"); } } void ArgMinFun::RegisterFunction(BuiltinFunctions &set) { - AggregateFunctionSet fun("arg_min"); + AggregateFunctionSet fun("argmin"); GetArgMinMaxFunction(LogicalTypeId::INTEGER, fun); GetArgMinMaxFunction(LogicalTypeId::BIGINT, fun); GetArgMinMaxFunction(LogicalTypeId::DOUBLE, fun); @@ -56467,10 +61607,18 @@ void ArgMinFun::RegisterFunction(BuiltinFunctions &set) { GetArgMinMaxFunction(LogicalTypeId::TIMESTAMP, fun); GetArgMinMaxFunction(LogicalTypeId::BLOB, fun); set.AddFunction(fun); + + //! Add min_by alias + fun.name = "min_by"; + set.AddFunction(fun); + + //! Add arg_min alias + fun.name = "arg_min"; + set.AddFunction(fun); } void ArgMaxFun::RegisterFunction(BuiltinFunctions &set) { - AggregateFunctionSet fun("arg_max"); + AggregateFunctionSet fun("argmax"); GetArgMinMaxFunction(LogicalTypeId::INTEGER, fun); GetArgMinMaxFunction(LogicalTypeId::BIGINT, fun); GetArgMinMaxFunction(LogicalTypeId::DOUBLE, fun); @@ -56479,28 +61627,13 @@ void ArgMaxFun::RegisterFunction(BuiltinFunctions &set) { GetArgMinMaxFunction(LogicalTypeId::TIMESTAMP, fun); GetArgMinMaxFunction(LogicalTypeId::BLOB, fun); set.AddFunction(fun); -} - -void MinByFun::RegisterFunction(BuiltinFunctions &set) { - AggregateFunctionSet fun("min_by"); - fun.AddFunction( - AggregateFunction::BinaryAggregate, double, double, double, ArgMinOperation>( - LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE)); - fun.AddFunction(AggregateFunction::BinaryAggregate, string_t, double, string_t, - ArgMinOperation>(LogicalType::VARCHAR, LogicalType::DOUBLE, - LogicalType::VARCHAR)); + //! Add max_by alias + fun.name = "max_by"; set.AddFunction(fun); -} -void MaxByFun::RegisterFunction(BuiltinFunctions &set) { - AggregateFunctionSet fun("max_by"); - fun.AddFunction( - AggregateFunction::BinaryAggregate, double, double, double, ArgMaxOperation>( - LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE)); - fun.AddFunction(AggregateFunction::BinaryAggregate, string_t, double, string_t, - ArgMaxOperation>(LogicalType::VARCHAR, LogicalType::DOUBLE, - LogicalType::VARCHAR)); + //! Add arg_max alias + fun.name = "arg_max"; set.AddFunction(fun); } @@ -56510,6 +61643,7 @@ void MaxByFun::RegisterFunction(BuiltinFunctions &set) { + namespace duckdb { template @@ -56540,7 +61674,7 @@ static AggregateFunction GetBitfieldUnaryAggregate(LogicalType type) { case LogicalTypeId::UBIGINT: return AggregateFunction::UnaryAggregate, uint64_t, uint64_t, OP>(type, type); default: - throw NotImplementedException("Unimplemented bitfield type for unary aggregate"); + throw InternalException("Unimplemented bitfield type for unary aggregate"); } } @@ -56578,7 +61712,7 @@ struct BitAndOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!source.is_set) { // source is NULL, nothing to do. return; @@ -56638,7 +61772,7 @@ struct BitOrOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!source.is_set) { // source is NULL, nothing to do. return; @@ -56698,7 +61832,7 @@ struct BitXorOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!source.is_set) { // source is NULL, nothing to do. return; @@ -56746,7 +61880,7 @@ struct BoolAndFunFunction { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { target->val = target->val && source.val; target->empty = target->empty && source.empty; } @@ -56786,7 +61920,7 @@ struct BoolOrFunFunction { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { target->val = target->val || source.val; target->empty = target->empty && source.empty; } @@ -56858,7 +61992,7 @@ struct BaseCountFunction { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { *target += source; } @@ -56948,8 +62082,19 @@ namespace duckdb { template struct EntropyState { + using DistinctMap = unordered_map; + idx_t count; - unordered_map *distinct; + DistinctMap *distinct; + + EntropyState &operator=(const EntropyState &other) = delete; + + EntropyState &Assign(const EntropyState &other) { + D_ASSERT(!distinct); + distinct = new DistinctMap(*other.distinct); + count = other.count; + return *this; + } }; struct EntropyFunctionBase { @@ -56960,14 +62105,12 @@ struct EntropyFunctionBase { } template - static void Combine(STATE &source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!source.distinct) { return; } if (!target->distinct) { - target->distinct = source.distinct; - target->count = source.count; - source.distinct = nullptr; + target->Assign(source); return; } for (auto &val : *source.distinct) { @@ -57072,7 +62215,7 @@ AggregateFunction GetEntropyFunction(PhysicalType type) { LogicalType::DOUBLE); default: - throw NotImplementedException("Unimplemented approximate_count aggregate"); + throw InternalException("Unimplemented approximate_count aggregate"); } } @@ -57141,7 +62284,7 @@ struct FirstFunction : public FirstFunctionBase { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!target->is_set) { *target = source; } @@ -57191,7 +62334,7 @@ struct FirstFunctionString : public FirstFunctionBase { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (source.is_set && !target->is_set) { SetValue(target, source.value, source.is_null); } @@ -57214,6 +62357,64 @@ struct FirstFunctionString : public FirstFunctionBase { } }; +struct FirstStateValue { + Value *value; +}; + +struct FirstValueFunction { + template + static void Initialize(STATE *state) { + state->value = nullptr; + } + + template + static void Destroy(STATE *state) { + if (state->value) { + delete state->value; + } + } + static bool IgnoreNull() { + return false; + } + + static void Update(Vector inputs[], FunctionData *, idx_t input_count, Vector &state_vector, idx_t count) { + auto &input = inputs[0]; + VectorData sdata; + state_vector.Orrify(count, sdata); + + auto states = (FirstStateValue **)sdata.data; + for (idx_t i = 0; i < count; i++) { + auto state = states[sdata.sel->get_index(i)]; + if (!state->value) { + state->value = new Value(input.GetValue(i)); + } + } + } + + template + static void Combine(const STATE &source, STATE *target) { + if (source.value && !target->value) { + target->value = new Value(*source.value); + } + } + + template + static void Finalize(Vector &result, FunctionData *, STATE *state, T *target, ValidityMask &mask, idx_t idx) { + if (!state->value) { + mask.SetInvalid(idx); + } else { + result.SetValue(idx, *state->value); + } + } + + static unique_ptr Bind(ClientContext &context, AggregateFunction &function, + vector> &arguments) { + function.arguments[0] = arguments[0]->return_type; + function.return_type = arguments[0]->return_type; + return nullptr; + } +}; + template static AggregateFunction GetFirstAggregateTemplated(LogicalType type) { return AggregateFunction::UnaryAggregate, T, T, FirstFunction>(type, type); @@ -57276,7 +62477,12 @@ AggregateFunction FirstFun::GetFunction(const LogicalType &type) { return function; } default: - throw NotImplementedException("Unimplemented type for FIRST aggregate"); + return AggregateFunction( + {type}, type, AggregateFunction::StateSize, + AggregateFunction::StateInitialize, FirstValueFunction::Update, + AggregateFunction::StateCombine, + AggregateFunction::StateFinalize, nullptr, + FirstValueFunction::Bind, AggregateFunction::StateDestroy); } } @@ -57298,20 +62504,10 @@ void FirstFun::RegisterFunction(BuiltinFunctions &set) { } } set.AddFunction(first); -} - -void ArbitraryFun::RegisterFunction(BuiltinFunctions &set) { - AggregateFunctionSet first("arbitrary"); - for (const auto &type : LogicalType::ALL_TYPES) { - if (type.id() == LogicalTypeId::DECIMAL) { - first.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - BindDecimalFirst)); - } else { - first.AddFunction(FirstFun::GetFunction(type)); - } - } + first.name = "arbitrary"; set.AddFunction(first); } + } // namespace duckdb @@ -57353,7 +62549,7 @@ struct KurtosisOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (source.n == 0) { return; } @@ -57385,9 +62581,9 @@ struct KurtosisOperation { 6 * state->sum_sqr * state->sum * state->sum * temp * temp - 3 * pow(state->sum, 4) * pow(temp, 3)); double m2 = temp * (state->sum_sqr - state->sum * state->sum * temp); - if (((m2 * m2) - 3 * (n - 1)) == 0 || ((n - 2) * (n - 3)) == 0) { + if (((m2 * m2) - 3 * (n - 1)) == 0 || ((n - 2) * (n - 3)) == 0) { // LCOV_EXCL_START mask.SetInvalid(idx); - } + } // LCOV_EXCL_STOP target[idx] = (n - 1) * ((n + 1) * m4 / (m2 * m2) - 3 * (n - 1)) / ((n - 2) * (n - 3)); if (!Value::DoubleIsValid(target[idx])) { mask.SetInvalid(idx); @@ -57415,6 +62611,9 @@ void KurtosisFun::RegisterFunction(BuiltinFunctions &set) { + + + namespace duckdb { template @@ -57436,6 +62635,7 @@ static AggregateFunction GetUnaryAggregate(LogicalType type) { case LogicalTypeId::INTEGER: return AggregateFunction::UnaryAggregate, int32_t, int32_t, OP>(type, type); case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIME: case LogicalTypeId::BIGINT: return AggregateFunction::UnaryAggregate, int64_t, int64_t, OP>(type, type); case LogicalTypeId::UTINYINT: @@ -57455,7 +62655,7 @@ static AggregateFunction GetUnaryAggregate(LogicalType type) { case LogicalTypeId::INTERVAL: return AggregateFunction::UnaryAggregate, interval_t, interval_t, OP>(type, type); default: - throw NotImplementedException("Unimplemented type for min/max aggregate"); + throw InternalException("Unimplemented type for min/max aggregate"); } } @@ -57514,7 +62714,7 @@ struct MinOperation : public NumericMinMaxBase { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!source.isset) { // source is NULL, nothing to do return; @@ -57537,7 +62737,7 @@ struct MaxOperation : public NumericMinMaxBase { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!source.isset) { // source is NULL, nothing to do return; @@ -57584,7 +62784,7 @@ struct StringMinMaxBase : public MinMaxBase { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (!source.isset) { // source is NULL, nothing to do return; @@ -57617,265 +62817,352 @@ struct MaxOperationString : public StringMinMaxBase { } }; +template +static bool TemplatedOptimumType(Vector &left, idx_t lidx, idx_t lcount, Vector &right, idx_t ridx, idx_t rcount) { + VectorData lvdata, rvdata; + left.Orrify(lcount, lvdata); + right.Orrify(rcount, rvdata); + + lidx = lvdata.sel->get_index(lidx); + ridx = rvdata.sel->get_index(ridx); + + auto ldata = (const T *)lvdata.data; + auto rdata = (const T *)rvdata.data; + + auto &lval = ldata[lidx]; + auto &rval = rdata[ridx]; + + auto lnull = !lvdata.validity.RowIsValid(lidx); + auto rnull = !rvdata.validity.RowIsValid(ridx); + + return OP::Operation(lval, rval, lnull, rnull); +} + template -unique_ptr BindDecimalMinMax(ClientContext &context, AggregateFunction &function, - vector> &arguments) { - auto decimal_type = arguments[0]->return_type; - switch (decimal_type.InternalType()) { +static bool TemplatedOptimumList(Vector &left, idx_t lidx, idx_t lcount, Vector &right, idx_t ridx, idx_t rcount); + +template +static bool TemplatedOptimumStruct(Vector &left, idx_t lidx, idx_t lcount, Vector &right, idx_t ridx, idx_t rcount); + +template +static bool TemplatedOptimumValue(Vector &left, idx_t lidx, idx_t lcount, Vector &right, idx_t ridx, idx_t rcount) { + D_ASSERT(left.GetType() == right.GetType()); + switch (left.GetType().InternalType()) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); case PhysicalType::INT16: - function = GetUnaryAggregate(LogicalType::SMALLINT); - break; + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); case PhysicalType::INT32: - function = GetUnaryAggregate(LogicalType::INTEGER); - break; + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); case PhysicalType::INT64: - function = GetUnaryAggregate(LogicalType::BIGINT); - break; + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::UINT8: + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::UINT16: + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::UINT32: + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::UINT64: + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::INT128: + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::FLOAT: + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::DOUBLE: + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::INTERVAL: + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::VARCHAR: + return TemplatedOptimumType(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::LIST: + return TemplatedOptimumList(left, lidx, lcount, right, ridx, rcount); + case PhysicalType::MAP: + case PhysicalType::STRUCT: + return TemplatedOptimumStruct(left, lidx, lcount, right, ridx, rcount); default: - function = GetUnaryAggregate(LogicalType::HUGEINT); - break; + throw InternalException("Invalid type for distinct comparison"); } - function.arguments[0] = decimal_type; - function.return_type = decimal_type; - return nullptr; } -template -static void AddMinMaxOperator(AggregateFunctionSet &set) { - for (auto &type : LogicalType::ALL_TYPES) { - if (type.id() == LogicalTypeId::VARCHAR || type.id() == LogicalTypeId::BLOB) { - set.AddFunction( - AggregateFunction::UnaryAggregateDestructor, string_t, string_t, OP_STRING>( - type.id(), type.id())); - } else if (type.id() == LogicalTypeId::DECIMAL) { - set.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - BindDecimalMinMax)); - } else { - set.AddFunction(GetUnaryAggregate(type)); - } +template +static bool TemplatedOptimumStruct(Vector &left, idx_t lidx, idx_t lcount, Vector &right, idx_t ridx, idx_t rcount) { + // STRUCT dictionaries apply to all the children + // so map the indexes first + VectorData lvdata, rvdata; + left.Orrify(lcount, lvdata); + right.Orrify(rcount, rvdata); + + lidx = lvdata.sel->get_index(lidx); + ridx = rvdata.sel->get_index(ridx); + + // DISTINCT semantics are in effect for nested types + auto lnull = !lvdata.validity.RowIsValid(lidx); + auto rnull = !rvdata.validity.RowIsValid(ridx); + if (lnull || rnull) { + return OP::Operation(0, 0, lnull, rnull); } -} -void MinFun::RegisterFunction(BuiltinFunctions &set) { - AggregateFunctionSet min("min"); - AddMinMaxOperator(min); - set.AddFunction(min); -} + auto &lchildren = StructVector::GetEntries(left); + auto &rchildren = StructVector::GetEntries(right); -void MaxFun::RegisterFunction(BuiltinFunctions &set) { - AggregateFunctionSet max("max"); - AddMinMaxOperator(max); - set.AddFunction(max); -} + D_ASSERT(lchildren.size() == rchildren.size()); + for (idx_t col_no = 0; col_no < lchildren.size(); ++col_no) { + auto &lchild = *lchildren[col_no]; + auto &rchild = *rchildren[col_no]; -} // namespace duckdb -// MODE( ) -// Returns the most frequent value for the values within expr1. -// NULL values are ignored. If all the values are NULL, or there are 0 rows, then the function returns NULL. + // Strict comparisons use the OP for definite + if (TemplatedOptimumValue(lchild, lidx, lcount, rchild, ridx, rcount)) { + return true; + } + if (col_no == lchildren.size() - 1) { + break; + } + // Strict comparisons use IS NOT DISTINCT for possible + if (!TemplatedOptimumValue(lchild, lidx, lcount, rchild, ridx, rcount)) { + return false; + } + } + return false; +} +template +static bool TemplatedOptimumList(Vector &left, idx_t lidx, idx_t lcount, Vector &right, idx_t ridx, idx_t rcount) { + VectorData lvdata, rvdata; + left.Orrify(lcount, lvdata); + right.Orrify(rcount, rvdata); + // Update the indexes and vector sizes for recursion. + lidx = lvdata.sel->get_index(lidx); + ridx = rvdata.sel->get_index(ridx); + lcount = ListVector::GetListSize(left); + rcount = ListVector::GetListSize(left); + // DISTINCT semantics are in effect for nested types + auto lnull = !lvdata.validity.RowIsValid(lidx); + auto rnull = !rvdata.validity.RowIsValid(ridx); + if (lnull || rnull) { + return OP::Operation(0, 0, lnull, rnull); + } -namespace duckdb { + auto &lchild = ListVector::GetEntry(left); + auto &rchild = ListVector::GetEntry(right); -template -struct ModeState { - unordered_map *frequency_map; -}; + auto ldata = (const list_entry_t *)lvdata.data; + auto rdata = (const list_entry_t *)rvdata.data; -struct ModeFunction { - template - static void Initialize(STATE *state) { - state->frequency_map = nullptr; - } + auto &lval = ldata[lidx]; + auto &rval = rdata[ridx]; - template - static void Operation(STATE *state, FunctionData *bind_data, INPUT_TYPE *input, ValidityMask &mask, idx_t idx) { - if (!state->frequency_map) { - state->frequency_map = new unordered_map(); + for (idx_t pos = 0;; ++pos) { + // Tie-breaking uses the OP + if (pos == lval.length || pos == rval.length) { + return OP::Operation(lval.length, rval.length, false, false); } - (*state->frequency_map)[input[idx]]++; - } - template - static void Combine(STATE &source, STATE *target) { - if (!source.frequency_map) { - return; - } - if (!target->frequency_map) { - target->frequency_map = source.frequency_map; - source.frequency_map = nullptr; - return; - } - for (auto &val : *source.frequency_map) { - (*target->frequency_map)[val.first] += val.second; + // Strict comparisons use the OP for definite + lidx = lval.offset + pos; + ridx = rval.offset + pos; + if (TemplatedOptimumValue(lchild, lidx, lcount, rchild, ridx, rcount)) { + return true; } - } - template - static void Finalize(Vector &result, FunctionData *, STATE *state, T *target, ValidityMask &mask, idx_t idx) { - if (!state->frequency_map) { - mask.SetInvalid(idx); - return; - } - //! Initialize control variables to first variable of the frequency map - auto highest_frequency = state->frequency_map->begin(); - auto iterator = state->frequency_map->begin(); - iterator++; - while (iterator != state->frequency_map->end()) { - if (iterator->second > highest_frequency->second) { - highest_frequency = iterator; - } - iterator++; - } - target[idx] = highest_frequency->first; - } - template - static void ConstantOperation(STATE *state, FunctionData *bind_data, INPUT_TYPE *input, ValidityMask &mask, - idx_t count) { - for (idx_t i = 0; i < count; i++) { - Operation(state, bind_data, input, mask, 0); + // Strict comparisons use IS NOT DISTINCT for possible + if (!TemplatedOptimumValue(lchild, lidx, lcount, rchild, ridx, rcount)) { + return false; } } + return false; +} + +struct VectorMinMaxState { + Vector *value; +}; + +struct VectorMinMaxBase { static bool IgnoreNull() { return true; } + template + static void Initialize(STATE *state) { + state->value = nullptr; + } + template static void Destroy(STATE *state) { - if (state->frequency_map) { - delete state->frequency_map; + if (state->value) { + delete state->value; } + state->value = nullptr; } -}; -struct ModeFunctionString { template - static void Initialize(STATE *state) { - state->frequency_map = nullptr; + static void Assign(STATE *state, Vector &input, const idx_t idx) { + if (!state->value) { + state->value = new Vector(input.GetType()); + state->value->SetVectorType(VectorType::CONSTANT_VECTOR); + } + sel_t selv = idx; + SelectionVector sel(&selv); + VectorOperations::Copy(input, *state->value, sel, 1, 0, 0); } - template - static void Operation(STATE *state, FunctionData *bind_data, INPUT_TYPE *input, ValidityMask &mask, idx_t idx) { - if (!state->frequency_map) { - state->frequency_map = new unordered_map(); - } - auto value = input[idx].GetString(); - (*state->frequency_map)[value]++; + template + static void Execute(STATE *state, Vector &input, const idx_t idx, const idx_t count) { + Assign(state, input, idx); } template - static void Combine(STATE &source, STATE *target) { - if (!source.frequency_map) { - return; + static void Update(Vector inputs[], FunctionData *, idx_t input_count, Vector &state_vector, idx_t count) { + auto &input = inputs[0]; + VectorData idata; + input.Orrify(count, idata); + + VectorData sdata; + state_vector.Orrify(count, sdata); + + auto states = (STATE **)sdata.data; + for (idx_t i = 0; i < count; i++) { + const auto idx = idata.sel->get_index(i); + if (!idata.validity.RowIsValid(idx)) { + continue; + } + const auto sidx = sdata.sel->get_index(i); + auto state = states[sidx]; + if (!state->value) { + Assign(state, input, idx); + } else { + OP::template Execute(state, input, idx, count); + } } - if (!target->frequency_map) { - target->frequency_map = source.frequency_map; - source.frequency_map = nullptr; + } + + template + static void Combine(const STATE &source, STATE *target) { + if (!source.value) { return; - } - for (auto &val : *source.frequency_map) { - auto value = val.first; - (*target->frequency_map)[value] += val.second; + } else if (!target->value) { + Assign(target, *source.value, 0); + } else { + OP::template Execute(target, *source.value, 0, 1); } } template static void Finalize(Vector &result, FunctionData *, STATE *state, T *target, ValidityMask &mask, idx_t idx) { - if (!state->frequency_map) { - mask.SetInvalid(idx); - return; - } - T h_freq; - size_t freq = 0; - - for (auto &val : *state->frequency_map) { - if (val.second > freq) { - h_freq = val.first; - freq = val.second; - } + if (!state->value) { + // we need to use FlatVector::SetNull here + // since for STRUCT columns only setting the validity mask of the struct is incorrect + // as for a struct column, we need to also set ALL child columns to NULL + FlatVector::SetNull(result, idx, true); + } else { + VectorOperations::Copy(*state->value, result, 1, 0, idx); } - target[idx] = string_t(h_freq); } - template - static void ConstantOperation(STATE *state, FunctionData *bind_data, INPUT_TYPE *input, ValidityMask &mask, - idx_t count) { - for (idx_t i = 0; i < count; i++) { - Operation(state, bind_data, input, mask, 0); - } + + static unique_ptr Bind(ClientContext &context, AggregateFunction &function, + vector> &arguments) { + function.arguments[0] = arguments[0]->return_type; + function.return_type = arguments[0]->return_type; + return nullptr; } +}; - static bool IgnoreNull() { - return true; +struct MinOperationVector : public VectorMinMaxBase { + template + static void Execute(STATE *state, Vector &input, const idx_t idx, const idx_t count) { + if (TemplatedOptimumValue(input, idx, count, *state->value, 0, 1)) { + Assign(state, input, idx); + } } +}; +struct MaxOperationVector : public VectorMinMaxBase { template - static void Destroy(STATE *state) { - if (state->frequency_map) { - delete state->frequency_map; + static void Execute(STATE *state, Vector &input, const idx_t idx, const idx_t count) { + if (TemplatedOptimumValue(input, idx, count, *state->value, 0, 1)) { + Assign(state, input, idx); } } }; -AggregateFunction GetModeFunction(PhysicalType type) { - switch (type) { - case PhysicalType::UINT16: - return AggregateFunction::UnaryAggregateDestructor, uint16_t, uint16_t, ModeFunction>( - LogicalType::UTINYINT, LogicalType::UTINYINT); - case PhysicalType::UINT32: - return AggregateFunction::UnaryAggregateDestructor, uint32_t, uint32_t, ModeFunction>( - LogicalType::UINTEGER, LogicalType::UINTEGER); - case PhysicalType::UINT64: - return AggregateFunction::UnaryAggregateDestructor, uint64_t, uint64_t, ModeFunction>( - LogicalType::UBIGINT, LogicalType::UBIGINT); +template +unique_ptr BindDecimalMinMax(ClientContext &context, AggregateFunction &function, + vector> &arguments) { + auto decimal_type = arguments[0]->return_type; + switch (decimal_type.InternalType()) { case PhysicalType::INT16: - return AggregateFunction::UnaryAggregateDestructor, int16_t, int16_t, ModeFunction>( - LogicalType::TINYINT, LogicalType::TINYINT); + function = GetUnaryAggregate(LogicalType::SMALLINT); + break; case PhysicalType::INT32: - return AggregateFunction::UnaryAggregateDestructor, int32_t, int32_t, ModeFunction>( - LogicalType::INTEGER, LogicalType::INTEGER); + function = GetUnaryAggregate(LogicalType::INTEGER); + break; case PhysicalType::INT64: - return AggregateFunction::UnaryAggregateDestructor, int64_t, int64_t, ModeFunction>( - LogicalType::BIGINT, LogicalType::BIGINT); - case PhysicalType::FLOAT: - return AggregateFunction::UnaryAggregateDestructor, float, float, ModeFunction>( - LogicalType::FLOAT, LogicalType::FLOAT); - case PhysicalType::DOUBLE: - return AggregateFunction::UnaryAggregateDestructor, double, double, ModeFunction>( - LogicalType::DOUBLE, LogicalType::DOUBLE); - case PhysicalType::VARCHAR: - return AggregateFunction::UnaryAggregateDestructor, string_t, string_t, ModeFunctionString>( - LogicalType::VARCHAR, LogicalType::VARCHAR); - + function = GetUnaryAggregate(LogicalType::BIGINT); + break; default: - throw NotImplementedException("Unimplemented mode aggregate"); + function = GetUnaryAggregate(LogicalType::HUGEINT); + break; } + function.arguments[0] = decimal_type; + function.return_type = decimal_type; + return nullptr; } -void ModeFun::RegisterFunction(BuiltinFunctions &set) { - AggregateFunctionSet fun("mode"); - fun.AddFunction(GetModeFunction(PhysicalType::UINT16)); - fun.AddFunction(GetModeFunction(PhysicalType::UINT32)); - fun.AddFunction(GetModeFunction(PhysicalType::UINT64)); - fun.AddFunction(GetModeFunction(PhysicalType::FLOAT)); - fun.AddFunction(GetModeFunction(PhysicalType::INT16)); - fun.AddFunction(GetModeFunction(PhysicalType::INT32)); - fun.AddFunction(GetModeFunction(PhysicalType::INT64)); - fun.AddFunction(GetModeFunction(PhysicalType::DOUBLE)); - fun.AddFunction(GetModeFunction(PhysicalType::VARCHAR)); - set.AddFunction(fun); +template +static AggregateFunction GetMinMaxFunction(const LogicalType &type) { + return AggregateFunction({type}, type, AggregateFunction::StateSize, + AggregateFunction::StateInitialize, OP::template Update, + AggregateFunction::StateCombine, + AggregateFunction::StateFinalize, nullptr, OP::Bind, + AggregateFunction::StateDestroy); } + +template +static void AddMinMaxOperator(AggregateFunctionSet &set) { + for (auto &type : LogicalType::ALL_TYPES) { + if (type.id() == LogicalTypeId::VARCHAR || type.id() == LogicalTypeId::BLOB) { + set.AddFunction( + AggregateFunction::UnaryAggregateDestructor, string_t, string_t, OP_STRING>( + type.id(), type.id())); + } else if (type.id() == LogicalTypeId::DECIMAL) { + set.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + BindDecimalMinMax)); + } else if (type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::MAP || + type.id() == LogicalTypeId::STRUCT) { + set.AddFunction(GetMinMaxFunction(type)); + + } else { + set.AddFunction(GetUnaryAggregate(type)); + } + } +} + +void MinFun::RegisterFunction(BuiltinFunctions &set) { + AggregateFunctionSet min("min"); + AddMinMaxOperator(min); + set.AddFunction(min); +} + +void MaxFun::RegisterFunction(BuiltinFunctions &set) { + AggregateFunctionSet max("max"); + AddMinMaxOperator(max); + set.AddFunction(max); +} + } // namespace duckdb + namespace duckdb { struct ProductState { @@ -57891,7 +63178,7 @@ struct ProductFunction { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { target->val *= source.val; target->empty = target->empty && source.empty; } @@ -57976,7 +63263,7 @@ struct SkewnessOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (source.n == 0) { return; } @@ -58028,6 +63315,7 @@ void SkewFun::RegisterFunction(BuiltinFunctions &set) { + namespace duckdb { struct StringAggState { @@ -58066,7 +63354,7 @@ struct StringAggBaseFunction { static inline void PerformOperation(StringAggState *state, const char *str, const char *sep, idx_t str_size, idx_t sep_size) { - if (state->dataptr == nullptr) { + if (!state->dataptr) { // first iteration: allocate space for the string and copy it into the state state->alloc_size = MaxValue(8, NextPowerOfTwo(str_size)); state->dataptr = new char[state->alloc_size]; @@ -58127,8 +63415,8 @@ struct StringAggSingleFunction : public StringAggBaseFunction { } template - static void Combine(STATE source, STATE *target) { - if (source.dataptr == nullptr) { + static void Combine(const STATE &source, STATE *target) { + if (!source.dataptr) { // source is not set: skip combining return; } @@ -58182,7 +63470,7 @@ struct SumSetOperation { state->isset = false; } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { target->isset = source.isset || target->isset; target->value += source.value; } @@ -58260,7 +63548,6 @@ unique_ptr SumPropagateStats(ClientContext &context, BoundAggreg max_positive = numeric_stats.max.GetValueUnsafe(); break; default: - // unhandled type here throw InternalException("Unsupported type for propagate sum stats"); } auto max_sum_negative = max_negative * hugeint_t(node_stats->max_cardinality); @@ -58291,7 +63578,7 @@ unique_ptr SumPropagateStats(ClientContext &context, BoundAggreg return nullptr; } -AggregateFunction GetSumAggregate(PhysicalType type) { +AggregateFunction SumFun::GetSumAggregate(PhysicalType type) { switch (type) { case PhysicalType::INT16: return AggregateFunction::UnaryAggregate, int16_t, hugeint_t, IntegerSumOperation>( @@ -58314,25 +63601,25 @@ AggregateFunction GetSumAggregate(PhysicalType type) { return AggregateFunction::UnaryAggregate, hugeint_t, hugeint_t, HugeintSumOperation>( LogicalType::HUGEINT, LogicalType::HUGEINT); default: - throw NotImplementedException("Unimplemented sum aggregate"); + throw InternalException("Unimplemented sum aggregate"); } } unique_ptr BindDecimalSum(ClientContext &context, AggregateFunction &function, vector> &arguments) { auto decimal_type = arguments[0]->return_type; - function = GetSumAggregate(decimal_type.InternalType()); + function = SumFun::GetSumAggregate(decimal_type.InternalType()); function.name = "sum"; function.arguments[0] = decimal_type; - function.return_type = LogicalType(LogicalTypeId::DECIMAL, Decimal::MAX_WIDTH_DECIMAL, decimal_type.scale()); + function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type)); return nullptr; } void SumFun::RegisterFunction(BuiltinFunctions &set) { AggregateFunctionSet sum("sum"); // decimal - sum.AddFunction(AggregateFunction({LogicalType::DECIMAL}, LogicalType::DECIMAL, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, BindDecimalSum)); + sum.AddFunction(AggregateFunction({LogicalTypeId::DECIMAL}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, BindDecimalSum)); sum.AddFunction(GetSumAggregate(PhysicalType::INT16)); sum.AddFunction(GetSumAggregate(PhysicalType::INT32)); sum.AddFunction(GetSumAggregate(PhysicalType::INT64)); @@ -58363,18 +63650,14 @@ void BuiltinFunctions::RegisterDistributiveAggregates() { Register(); Register(); Register(); - Register(); - Register(); Register(); Register(); Register(); Register(); Register(); Register(); - Register(); Register(); Register(); - Register(); Register(); Register(); Register(); @@ -58401,6 +63684,10 @@ struct QuantileFun { static void RegisterFunction(BuiltinFunctions &set); }; +struct ModeFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + struct ApproximateQuantileFun { static void RegisterFunction(BuiltinFunctions &set); }; @@ -58414,7 +63701,7 @@ struct ReservoirQuantileFun { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 // See the end of this file for a list /* @@ -59143,7 +64430,7 @@ struct ApproxQuantileOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (source.pos == 0) { return; } @@ -59198,18 +64485,13 @@ AggregateFunction GetApproximateQuantileAggregateFunction(PhysicalType type) { return AggregateFunction::UnaryAggregateDestructor>(LogicalType::BIGINT, LogicalType::BIGINT); - case PhysicalType::FLOAT: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::FLOAT, - LogicalType::FLOAT); - case PhysicalType::DOUBLE: return AggregateFunction::UnaryAggregateDestructor>(LogicalType::DOUBLE, LogicalType::DOUBLE); default: - throw NotImplementedException("Unimplemented quantile aggregate"); + throw InternalException("Unimplemented quantile aggregate"); } } @@ -59247,7 +64529,7 @@ AggregateFunction GetApproximateQuantileAggregate(PhysicalType type) { void ApproximateQuantileFun::RegisterFunction(BuiltinFunctions &set) { AggregateFunctionSet approx_quantile("approx_quantile"); - approx_quantile.AddFunction(AggregateFunction({LogicalType::DECIMAL, LogicalType::FLOAT}, LogicalType::DECIMAL, + approx_quantile.AddFunction(AggregateFunction({LogicalTypeId::DECIMAL, LogicalType::FLOAT}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, BindApproxQuantileDecimal)); @@ -59260,6 +64542,322 @@ void ApproximateQuantileFun::RegisterFunction(BuiltinFunctions &set) { } } // namespace duckdb +// MODE( ) +// Returns the most frequent value for the values within expr1. +// NULL values are ignored. If all the values are NULL, or there are 0 rows, then the function returns NULL. + + + + + + + + +#include + +namespace std { + +template <> +struct hash { + inline size_t operator()(const duckdb::interval_t &val) const { + return hash {}(val.days) ^ hash {}(val.months) ^ hash {}(val.micros); + } +}; + +template <> +struct hash { + inline size_t operator()(const duckdb::hugeint_t &val) const { + return hash {}(val.upper) ^ hash {}(val.lower); + } +}; + +} // namespace std + +namespace duckdb { + +using FrameBounds = std::pair; + +template +struct ModeState { + using Counts = unordered_map; + + Counts *frequency_map; + KEY_TYPE *mode; + size_t nonzero; + bool valid; + size_t count; + + void Initialize() { + frequency_map = nullptr; + mode = nullptr; + nonzero = 0; + valid = false; + count = 0; + } + + void Destroy() { + if (frequency_map) { + delete frequency_map; + } + if (mode) { + delete mode; + } + } + + void Reset() { + Counts empty; + frequency_map->swap(empty); + nonzero = 0; + count = 0; + valid = false; + } + + void ModeAdd(const KEY_TYPE &key) { + auto new_count = ((*frequency_map)[key] += 1); + if (new_count == 1) { + ++nonzero; + } + if (new_count > count) { + valid = true; + count = new_count; + if (mode) { + *mode = key; + } else { + mode = new KEY_TYPE(key); + } + } + } + + void ModeRm(const KEY_TYPE &key) { + auto i = frequency_map->find(key); + auto old_count = i->second; + nonzero -= int(old_count == 1); + + i->second -= 1; + if (count == old_count && key == *mode) { + valid = false; + } + } + + typename Counts::const_iterator Scan() const { + //! Initialize control variables to first variable of the frequency map + auto highest_frequency = frequency_map->begin(); + for (auto i = highest_frequency; i != frequency_map->end(); ++i) { + // Tie break with the lowest + if (i->second > highest_frequency->second || + (i->second == highest_frequency->second && i->first < highest_frequency->first)) { + highest_frequency = i; + } + } + return highest_frequency; + } +}; + +template +struct ModeFunction { + template + static void Initialize(STATE *state) { + state->Initialize(); + } + + template + static void Operation(STATE *state, FunctionData *bind_data, INPUT_TYPE *input, ValidityMask &mask, idx_t idx) { + if (!state->frequency_map) { + state->frequency_map = new unordered_map(); + } + auto key = KEY_TYPE(input[idx]); + (*state->frequency_map)[key]++; + } + + template + static void Combine(const STATE &source, STATE *target) { + if (!source.frequency_map) { + return; + } + if (!target->frequency_map) { + // Copy - don't destroy! Otherwise windowing will break. + target->frequency_map = new unordered_map(*source.frequency_map); + return; + } + for (auto &val : *source.frequency_map) { + (*target->frequency_map)[val.first] += val.second; + } + } + + template + static void Finalize(Vector &result, FunctionData *, STATE *state, INPUT_TYPE *target, ValidityMask &mask, + idx_t idx) { + if (!state->frequency_map) { + mask.SetInvalid(idx); + return; + } + auto highest_frequency = state->Scan(); + if (highest_frequency != state->frequency_map->end()) { + target[idx] = INPUT_TYPE(highest_frequency->first); + } else { + mask.SetInvalid(idx); + } + } + template + static void ConstantOperation(STATE *state, FunctionData *bind_data, INPUT_TYPE *input, ValidityMask &mask, + idx_t count) { + if (!state->frequency_map) { + state->frequency_map = new unordered_map(); + } + auto key = KEY_TYPE(input[0]); + (*state->frequency_map)[key] += count; + } + + template + static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state, + const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t rid) { + auto rdata = FlatVector::GetData(result); + auto &rmask = FlatVector::Validity(result); + + const auto bias = MinValue(frame.first, prev.first); + if (!state->frequency_map) { + state->frequency_map = new unordered_map(); + } + const double tau = .25; + if (state->nonzero <= tau * state->frequency_map->size()) { + state->Reset(); + // for f ∈ F do + for (auto f = frame.first; f < frame.second; ++f) { + if (dmask.RowIsValid(f - bias)) { + state->ModeAdd(KEY_TYPE(data[f])); + } + } + } else { + // for f ∈ P \ F do + for (auto p = prev.first; p < frame.first; ++p) { + if (dmask.RowIsValid(p - bias)) { + state->ModeRm(KEY_TYPE(data[p])); + } + } + for (auto p = frame.second; p < prev.second; ++p) { + if (dmask.RowIsValid(p - bias)) { + state->ModeRm(KEY_TYPE(data[p])); + } + } + + // for f ∈ F \ P do + for (auto f = frame.first; f < prev.first; ++f) { + if (dmask.RowIsValid(f - bias)) { + state->ModeAdd(KEY_TYPE(data[f])); + } + } + for (auto f = prev.second; f < frame.second; ++f) { + if (dmask.RowIsValid(f - bias)) { + state->ModeAdd(KEY_TYPE(data[f])); + } + } + } + + if (!state->valid) { + // Rescan + auto highest_frequency = state->Scan(); + if (highest_frequency != state->frequency_map->end()) { + *(state->mode) = highest_frequency->first; + state->count = highest_frequency->second; + state->valid = (state->count > 0); + } + } + + if (state->valid) { + rdata[rid] = RESULT_TYPE(*state->mode); + } else { + rmask.Set(rid, false); + } + } + + static bool IgnoreNull() { + return true; + } + + template + static void Destroy(STATE *state) { + state->Destroy(); + } +}; + +template +AggregateFunction GetTypedModeFunction(const LogicalType &type) { + using STATE = ModeState; + using OP = ModeFunction; + auto func = AggregateFunction::UnaryAggregateDestructor(type, type); + func.window = AggregateFunction::UnaryWindow; + return func; +} + +AggregateFunction GetModeAggregate(const LogicalType &type) { + switch (type.InternalType()) { + case PhysicalType::INT8: + return GetTypedModeFunction(type); + case PhysicalType::UINT8: + return GetTypedModeFunction(type); + case PhysicalType::INT16: + return GetTypedModeFunction(type); + case PhysicalType::UINT16: + return GetTypedModeFunction(type); + case PhysicalType::INT32: + return GetTypedModeFunction(type); + case PhysicalType::UINT32: + return GetTypedModeFunction(type); + case PhysicalType::INT64: + return GetTypedModeFunction(type); + case PhysicalType::UINT64: + return GetTypedModeFunction(type); + case PhysicalType::INT128: + return GetTypedModeFunction(type); + + case PhysicalType::FLOAT: + return GetTypedModeFunction(type); + case PhysicalType::DOUBLE: + return GetTypedModeFunction(type); + + case PhysicalType::INTERVAL: + return GetTypedModeFunction(type); + + case PhysicalType::VARCHAR: + return GetTypedModeFunction(type); + + default: + throw NotImplementedException("Unimplemented mode aggregate"); + } +} + +unique_ptr BindModeDecimal(ClientContext &context, AggregateFunction &function, + vector> &arguments) { + function = GetModeAggregate(arguments[0]->return_type); + function.name = "mode"; + return nullptr; +} + +void ModeFun::RegisterFunction(BuiltinFunctions &set) { + const vector TEMPORAL = {LogicalType::DATE, LogicalType::TIMESTAMP, LogicalType::TIME, + LogicalType::INTERVAL}; + + AggregateFunctionSet mode("mode"); + mode.AddFunction(AggregateFunction({LogicalTypeId::DECIMAL}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, BindModeDecimal)); + + for (const auto &type : LogicalType::NUMERIC) { + if (type.id() != LogicalTypeId::DECIMAL) { + mode.AddFunction(GetModeAggregate(type)); + } + } + + for (const auto &type : TEMPORAL) { + mode.AddFunction(GetModeAggregate(type)); + } + + mode.AddFunction(GetModeAggregate(LogicalType::VARCHAR)); + + set.AddFunction(mode); +} +} // namespace duckdb + + @@ -59272,17 +64870,206 @@ void ApproximateQuantileFun::RegisterFunction(BuiltinFunctions &set) { namespace duckdb { +using FrameBounds = std::pair; + struct QuantileState { data_ptr_t v; idx_t len; idx_t pos; + + // List temporaries + std::vector disturbed; + std::vector lower; + std::vector upper; + + QuantileState() : v(nullptr), len(0), pos(0) { + } + + ~QuantileState() { + if (v) { + free(v); + v = nullptr; + } + } + + template + void Resize(idx_t new_len) { + if (new_len <= len) { + return; + } + v = (data_ptr_t)realloc(v, new_len * sizeof(T)); + if (!v) { + throw InternalException("Memory allocation failure"); + } + len = new_len; + } +}; + +void ReuseIndexes(idx_t *index, const FrameBounds &frame, const FrameBounds &prev) { + idx_t j = 0; + + // Copy overlapping indices + for (idx_t p = 0; p < (prev.second - prev.first); ++p) { + auto idx = index[p]; + + // Shift down into any hole + if (j != p) { + index[j] = idx; + } + + // Skip overlapping values + if (frame.first <= idx && idx < frame.second) { + ++j; + } + } + + // Insert new indices + if (j > 0) { + // Overlap: append the new ends + for (auto f = frame.first; f < prev.first; ++f, ++j) { + index[j] = f; + } + for (auto f = prev.second; f < frame.second; ++f, ++j) { + index[j] = f; + } + } else { + // No overlap: overwrite with new values + for (auto f = frame.first; f < frame.second; ++f, ++j) { + index[j] = f; + } + } +} + +template +static idx_t ReplaceIndex(STATE *state, const FrameBounds &frame, const FrameBounds &prev) { + D_ASSERT(state->v); + auto index = (idx_t *)state->v; + + idx_t j = 0; + for (idx_t p = 0; p < (prev.second - prev.first); ++p) { + auto idx = index[p]; + if (j != p) { + break; + } + + if (frame.first <= idx && idx < frame.second) { + ++j; + } + } + index[j] = frame.second - 1; + + return j; +} + +template +static inline bool CanReplace(STATE *state, const INPUT_TYPE *fdata, const idx_t j, const idx_t k0, const idx_t k1) { + auto same = false; + + D_ASSERT(state->v); + auto index = (idx_t *)state->v; + + auto curr = fdata[index[j]]; + if (k1 < j) { + auto hi = fdata[index[k1]]; + same = hi < curr; + } else if (j < k0) { + auto lo = fdata[index[k0]]; + same = curr < lo; + } + + return same; +} + +struct IndirectNotNull { + inline explicit IndirectNotNull(const ValidityMask &mask_p, idx_t bias_p) : mask(mask_p), bias(bias_p) { + } + + inline bool operator()(const idx_t &idx) const { + return mask.RowIsValid(idx - bias); + } + const ValidityMask &mask; + const idx_t bias; +}; + +template +struct IndirectLess { + inline explicit IndirectLess(const INPUT_TYPE *inputs_p) : inputs(inputs_p) { + } + + inline bool operator()(const idx_t &lhi, const idx_t &rhi) const { + return inputs[lhi] < inputs[rhi]; + } + + const INPUT_TYPE *inputs; +}; + +template +struct Interpolator { + Interpolator(const float q, const idx_t n_p) : n(n_p), RN((double)(n_p - 1) * q), FRN(floor(RN)), CRN(ceil(RN)) { + } + + TARGET_TYPE operator()(INPUT_TYPE *v_t) const { + if (CRN == FRN) { + std::nth_element(v_t, v_t + FRN, v_t + n); + return Cast::Operation(v_t[FRN]); + } else { + std::nth_element(v_t, v_t + FRN, v_t + n); + std::nth_element(v_t + FRN, v_t + CRN, v_t + n); + auto lo = Cast::Operation(v_t[FRN]); + auto hi = Cast::Operation(v_t[CRN]); + auto delta = hi - lo; + return lo + delta * (RN - FRN); + } + } + + TARGET_TYPE operator()(const INPUT_TYPE *v_t, const idx_t *index) const { + if (CRN == FRN) { + return Cast::Operation(v_t[index[FRN]]); + } else { + auto lo = Cast::Operation(v_t[index[FRN]]); + auto hi = Cast::Operation(v_t[index[CRN]]); + auto delta = hi - lo; + return lo + delta * (RN - FRN); + } + } + + const idx_t n; + const double RN; + const idx_t FRN; + const idx_t CRN; +}; + +template +struct Interpolator { + Interpolator(const float q, const idx_t n_p) : n(n_p), RN((double)(n_p - 1) * q), FRN(floor(RN)), CRN(FRN) { + } + + TARGET_TYPE operator()(INPUT_TYPE *v_t) const { + std::nth_element(v_t, v_t + FRN, v_t + n); + return Cast::Operation(v_t[FRN]); + } + + TARGET_TYPE operator()(const INPUT_TYPE *v_t, const idx_t *index) { + return Cast::Operation(v_t[index[FRN]]); + } + + const idx_t n; + const double RN; + const idx_t FRN; + const idx_t CRN; }; struct QuantileBindData : public FunctionData { - explicit QuantileBindData(float quantile_p) : quantiles(1, quantile_p) { + explicit QuantileBindData(float quantile_p) : quantiles(1, quantile_p), order(1, 0) { } explicit QuantileBindData(const vector &quantiles_p) : quantiles(quantiles_p) { + for (idx_t i = 0; i < quantiles.size(); ++i) { + order.push_back(i); + } + + IndirectLess lt(quantiles.data()); + std::sort(order.begin(), order.end(), lt); } unique_ptr Copy() override { @@ -59295,26 +65082,14 @@ struct QuantileBindData : public FunctionData { } vector quantiles; + vector order; }; -template +template struct QuantileOperation { template static void Initialize(STATE *state) { - state->v = nullptr; - state->len = 0; - state->pos = 0; - } - - static void ResizeState(QuantileState *state, idx_t new_len) { - if (new_len <= state->len) { - return; - } - state->v = (data_ptr_t)realloc(state->v, new_len * sizeof(T)); - if (!state->v) { - throw InternalException("Memory allocation failure"); - } - state->len = new_len; + new (state) STATE; } template @@ -59329,28 +65104,25 @@ struct QuantileOperation { static void Operation(STATE *state, FunctionData *bind_data_p, INPUT_TYPE *data, ValidityMask &mask, idx_t idx) { if (state->pos == state->len) { // growing conservatively here since we could be running this on many small groups - ResizeState(state, state->len == 0 ? 1 : state->len * 2); + state->template Resize(state->len == 0 ? 1 : state->len * 2); } D_ASSERT(state->v); - ((T *)state->v)[state->pos++] = data[idx]; + ((SAVE_TYPE *)state->v)[state->pos++] = data[idx]; } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (source.pos == 0) { return; } - ResizeState(target, target->pos + source.pos); - memcpy(target->v + target->pos * sizeof(T), source.v, source.pos * sizeof(T)); + target->template Resize(target->pos + source.pos); + memcpy(target->v + target->pos * sizeof(SAVE_TYPE), source.v, source.pos * sizeof(SAVE_TYPE)); target->pos += source.pos; } template static void Destroy(STATE *state) { - if (state->v) { - free(state->v); - state->v = nullptr; - } + state->~STATE(); } static bool IgnoreNull() { @@ -59359,15 +65131,16 @@ struct QuantileOperation { }; template -static void ExecuteListFinalize(Vector &states, FunctionData *bind_data, Vector &result, idx_t count) { +static void ExecuteListFinalize(Vector &states, FunctionData *bind_data_p, Vector &result, idx_t count, idx_t offset) { D_ASSERT(result.GetType().id() == LogicalTypeId::LIST); - D_ASSERT(result.GetType().child_types().size() == 1); - auto list_child = make_unique(result.GetType().child_types()[0].second); - ListVector::SetEntry(result, move(list_child)); + D_ASSERT(bind_data_p); + auto bind_data = (QuantileBindData *)bind_data_p; + ListVector::SetListSize(result, 0); if (states.GetVectorType() == VectorType::CONSTANT_VECTOR) { result.SetVectorType(VectorType::CONSTANT_VECTOR); + ListVector::Reserve(result, bind_data->quantiles.size()); auto sdata = ConstantVector::GetData(states); auto rdata = ConstantVector::GetData(result); @@ -59376,12 +65149,13 @@ static void ExecuteListFinalize(Vector &states, FunctionData *bind_data, Vector } else { D_ASSERT(states.GetVectorType() == VectorType::FLAT_VECTOR); result.SetVectorType(VectorType::FLAT_VECTOR); + ListVector::Reserve(result, count * bind_data->quantiles.size()); auto sdata = FlatVector::GetData(states); auto rdata = FlatVector::GetData(result); auto &mask = FlatVector::Validity(result); for (idx_t i = 0; i < count; i++) { - OP::template Finalize(result, bind_data, sdata[i], rdata, mask, i); + OP::template Finalize(result, bind_data, sdata[i], rdata, mask, i + offset); } } @@ -59390,7 +65164,7 @@ static void ExecuteListFinalize(Vector &states, FunctionData *bind_data, Vector template static AggregateFunction QuantileListAggregate(const LogicalType &input_type, const LogicalType &child_type) { - LogicalType result_type(LogicalTypeId::LIST, {{"", child_type}}); + LogicalType result_type = LogicalType::LIST(child_type); return AggregateFunction( {input_type}, result_type, AggregateFunction::StateSize, AggregateFunction::StateInitialize, AggregateFunction::UnaryScatterUpdate, AggregateFunction::StateCombine, @@ -59398,11 +65172,11 @@ static AggregateFunction QuantileListAggregate(const LogicalType &input_type, co AggregateFunction::StateDestroy); } -template -struct DiscreteQuantileOperation : public QuantileOperation { +template +struct QuantileScalarOperation : public QuantileOperation { - template - static void Finalize(Vector &result, FunctionData *bind_data_p, STATE *state, TARGET_TYPE *target, + template + static void Finalize(Vector &result, FunctionData *bind_data_p, STATE *state, RESULT_TYPE *target, ValidityMask &mask, idx_t idx) { if (state->pos == 0) { mask.SetInvalid(idx); @@ -59412,242 +65186,391 @@ struct DiscreteQuantileOperation : public QuantileOperation { D_ASSERT(bind_data_p); auto bind_data = (QuantileBindData *)bind_data_p; D_ASSERT(bind_data->quantiles.size() == 1); - auto v_t = (INPUT_TYPE *)state->v; - auto offset = (idx_t)((double)(state->pos - 1) * bind_data->quantiles[0]); - std::nth_element(v_t, v_t + offset, v_t + state->pos); - target[idx] = v_t[offset]; + Interpolator interp(bind_data->quantiles[0], state->pos); + auto v_t = (SAVE_TYPE *)state->v; + target[idx] = interp(v_t); } -}; -AggregateFunction GetDiscreteQuantileAggregateFunction(PhysicalType type) { - switch (type) { - case PhysicalType::INT16: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::SMALLINT, - LogicalType::SMALLINT); + template + static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state, + const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t ridx) { + auto rdata = FlatVector::GetData(result); + auto &rmask = FlatVector::Validity(result); - case PhysicalType::INT32: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::INTEGER, - LogicalType::INTEGER); + // Lazily initialise frame state + const auto prev_valid = state->pos == (prev.second - prev.first); + state->pos = frame.second - frame.first; + state->template Resize(state->pos); - case PhysicalType::INT64: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::BIGINT, - LogicalType::BIGINT); + D_ASSERT(state->v); + auto index = (idx_t *)state->v; - case PhysicalType::INT128: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::HUGEINT, - LogicalType::HUGEINT); - case PhysicalType::FLOAT: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::FLOAT, - LogicalType::FLOAT); + D_ASSERT(bind_data_p); + auto bind_data = (QuantileBindData *)bind_data_p; - case PhysicalType::DOUBLE: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::DOUBLE, - LogicalType::DOUBLE); + // Find the two positions needed + const auto q = bind_data->quantiles[0]; + + bool same = false; + if (prev_valid && dmask.AllValid() && frame.first == prev.first + 1 && frame.second == prev.second + 1) { + // Fixed frame size + const auto j = ReplaceIndex(state, frame, prev); + Interpolator interp(q, state->pos); + same = CanReplace(state, data, j, interp.FRN, interp.CRN); + } else { + ReuseIndexes(index, frame, prev); + } + + if (!same) { + if (!dmask.AllValid()) { + IndirectNotNull not_null(dmask, MinValue(frame.first, prev.first)); + state->pos = std::partition(index, index + state->pos, not_null) - index; + } + if (state->pos) { + Interpolator interp(q, state->pos); + IndirectLess lt(data); + std::nth_element(index, index + interp.FRN, index + state->pos, lt); + if (interp.CRN != interp.FRN) { + std::nth_element(index + interp.CRN, index + interp.CRN, index + interp.n, lt); + } + rdata[ridx] = interp(data, index); + } else { + rmask.Set(ridx, false); + } + } else { + Interpolator interp(q, state->pos); + rdata[ridx] = interp(data, index); + } + } +}; + +template +AggregateFunction GetTypedDiscreteQuantileAggregateFunction(const LogicalType &type) { + using STATE = QuantileState; + using OP = QuantileScalarOperation; + auto fun = AggregateFunction::UnaryAggregateDestructor(type, type); + fun.window = AggregateFunction::UnaryWindow; + return fun; +} + +AggregateFunction GetDiscreteQuantileAggregateFunction(const LogicalType &type) { + switch (type.id()) { + case LogicalTypeId::TINYINT: + return GetTypedDiscreteQuantileAggregateFunction(type); + case LogicalTypeId::SMALLINT: + return GetTypedDiscreteQuantileAggregateFunction(type); + case LogicalTypeId::INTEGER: + return GetTypedDiscreteQuantileAggregateFunction(type); + case LogicalTypeId::BIGINT: + return GetTypedDiscreteQuantileAggregateFunction(type); + case LogicalTypeId::HUGEINT: + return GetTypedDiscreteQuantileAggregateFunction(type); + + case LogicalTypeId::FLOAT: + return GetTypedDiscreteQuantileAggregateFunction(type); + case LogicalTypeId::DOUBLE: + return GetTypedDiscreteQuantileAggregateFunction(type); + case LogicalTypeId::DECIMAL: + switch (type.InternalType()) { + case PhysicalType::INT16: + return GetTypedDiscreteQuantileAggregateFunction(type); + case PhysicalType::INT32: + return GetTypedDiscreteQuantileAggregateFunction(type); + case PhysicalType::INT64: + return GetTypedDiscreteQuantileAggregateFunction(type); + case PhysicalType::INT128: + return GetTypedDiscreteQuantileAggregateFunction(type); + default: + throw NotImplementedException("Unimplemented discrete quantile aggregate"); + } + break; + + case LogicalTypeId::DATE: + return GetTypedDiscreteQuantileAggregateFunction(type); + case LogicalTypeId::TIMESTAMP: + return GetTypedDiscreteQuantileAggregateFunction(type); + case LogicalTypeId::TIME: + return GetTypedDiscreteQuantileAggregateFunction(type); + case LogicalTypeId::INTERVAL: + return GetTypedDiscreteQuantileAggregateFunction(type); default: throw NotImplementedException("Unimplemented discrete quantile aggregate"); } } -template -struct DiscreteQuantileListOperation : public QuantileOperation { +template +struct QuantileListOperation : public QuantileOperation { - template - static void Finalize(Vector &result_list, FunctionData *bind_data_p, STATE *state, TARGET_TYPE *target, + template + static void Finalize(Vector &result_list, FunctionData *bind_data_p, STATE *state, RESULT_TYPE *target, ValidityMask &mask, idx_t idx) { if (state->pos == 0) { mask.SetInvalid(idx); return; } - D_ASSERT(state->v); + D_ASSERT(bind_data_p); auto bind_data = (QuantileBindData *)bind_data_p; - target[idx].offset = ListVector::GetListSize(result_list); - auto v_t = (INPUT_TYPE *)state->v; + + auto &result = ListVector::GetEntry(result_list); + auto ridx = ListVector::GetListSize(result_list); + ListVector::Reserve(result_list, ridx + bind_data->quantiles.size()); + auto rdata = FlatVector::GetData(result); + + D_ASSERT(state->v); + auto v_t = (SAVE_TYPE *)state->v; + + target[idx].offset = ridx; for (const auto &quantile : bind_data->quantiles) { - auto offset = (idx_t)((double)(state->pos - 1) * quantile); - std::nth_element(v_t, v_t + offset, v_t + state->pos); - auto val = Value::CreateValue(v_t[offset]); - ListVector::PushBack(result_list, val); + Interpolator interp(quantile, state->pos); + rdata[ridx] = interp(v_t); + ++ridx; } target[idx].length = bind_data->quantiles.size(); + + ListVector::SetListSize(result_list, ridx); } -}; -AggregateFunction GetDiscreteQuantileListAggregateFunction(PhysicalType type) { - switch (type) { - case PhysicalType::INT16: - return QuantileListAggregate>( - LogicalType::SMALLINT, LogicalType::SMALLINT); + template + static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state, + const FrameBounds &frame, const FrameBounds &prev, Vector &list, idx_t lidx) { + D_ASSERT(bind_data_p); + auto bind_data = (QuantileBindData *)bind_data_p; - case PhysicalType::INT32: - return QuantileListAggregate>( - LogicalType::INTEGER, LogicalType::INTEGER); + // Result is a constant LIST with a fixed length + auto ldata = FlatVector::GetData(list); + auto &lmask = FlatVector::Validity(list); + auto &lentry = ldata[lidx]; + lentry.offset = ListVector::GetListSize(list); + lentry.length = bind_data->quantiles.size(); - case PhysicalType::INT64: - return QuantileListAggregate>( - LogicalType::BIGINT, LogicalType::BIGINT); + ListVector::Reserve(list, lentry.offset + lentry.length); + ListVector::SetListSize(list, lentry.offset + lentry.length); + auto &result = ListVector::GetEntry(list); + auto rdata = FlatVector::GetData(result); - case PhysicalType::INT128: - return QuantileListAggregate>( - LogicalType::HUGEINT, LogicalType::HUGEINT); - case PhysicalType::FLOAT: - return QuantileListAggregate>( - LogicalType::FLOAT, LogicalType::FLOAT); + // Lazily initialise frame state + const auto prev_valid = state->pos == (prev.second - prev.first); + state->pos = frame.second - frame.first; + state->template Resize(state->pos); - case PhysicalType::DOUBLE: - return QuantileListAggregate>( - LogicalType::DOUBLE, LogicalType::DOUBLE); - - default: - throw NotImplementedException("Unimplemented discrete quantile list aggregate"); - } -} - -template -static TARGET_TYPE InterpolateCast(const INPUT_TYPE &v) { - return TARGET_TYPE(v); -} + D_ASSERT(state->v); + auto index = (idx_t *)state->v; + + bool fixed = false; + auto j = state->pos; + if (prev_valid && dmask.AllValid() && frame.first == prev.first + 1 && frame.second == prev.second + 1) { + // Fixed frame size + j = ReplaceIndex(state, frame, prev); + fixed = true; + } else { + ReuseIndexes(index, frame, prev); + if (!dmask.AllValid()) { + IndirectNotNull not_null(dmask, MinValue(frame.first, prev.first)); + state->pos = std::partition(index, index + state->pos, not_null) - index; + } + } -template <> -double InterpolateCast(const hugeint_t &v) { - return Hugeint::Cast(v); -} + if (!state->pos) { + lmask.Set(lidx, false); + return; + } -template -static TARGET_TYPE Interpolate(INPUT_TYPE *v_t, const float q, const idx_t n) { - const auto RN = ((double)(n - 1) * q); - const auto FRN = idx_t(floor(RN)); - const auto CRN = idx_t(ceil(RN)); + // First pass: Fill in the undisturbed values and find the islands of stability. + state->disturbed.clear(); + state->lower.clear(); + state->upper.clear(); + idx_t lb = 0; + for (idx_t i = 0; i < bind_data->order.size(); ++i) { + const auto q = bind_data->order[i]; + const auto &quantile = bind_data->quantiles[q]; + Interpolator interp(quantile, state->pos); - if (CRN == FRN) { - std::nth_element(v_t, v_t + FRN, v_t + n); - return InterpolateCast(v_t[FRN]); - } else { - std::nth_element(v_t, v_t + FRN, v_t + n); - std::nth_element(v_t + FRN, v_t + CRN, v_t + n); - auto lo = InterpolateCast(v_t[FRN]); - auto hi = InterpolateCast(v_t[CRN]); - auto delta = hi - lo; - return lo + delta * (RN - FRN); - } -} + if (fixed && CanReplace(state, data, j, interp.FRN, interp.CRN)) { + rdata[lentry.offset + q] = interp(data, index); + state->upper.resize(state->lower.size(), interp.FRN); + } else { + state->disturbed.push_back(q); + state->lower.push_back(MinValue(lb, interp.FRN)); + } + lb = interp.CRN + 1; + } + state->upper.resize(state->lower.size(), state->pos); -template -struct ContinuousQuantileOperation : public QuantileOperation { + // Second pass: select the disturbed values + for (idx_t i = 0; i < state->disturbed.size(); ++i) { + const auto &q = state->disturbed[i]; + const auto &quantile = bind_data->quantiles[q]; + Interpolator interp(quantile, state->pos); - template - static void Finalize(Vector &result, FunctionData *bind_data_p, STATE *state, TARGET_TYPE *target, - ValidityMask &mask, idx_t idx) { - if (state->pos == 0) { - mask.SetInvalid(idx); - return; + IndirectLess lt(data); + std::nth_element(index + state->lower[i], index + interp.FRN, index + state->upper[i], lt); + if (interp.CRN != interp.FRN) { + std::nth_element(index + interp.CRN, index + interp.CRN, index + state->upper[i], lt); + } + rdata[lentry.offset + q] = interp(data, index); } - D_ASSERT(state->v); - D_ASSERT(bind_data_p); - auto bind_data = (QuantileBindData *)bind_data_p; - D_ASSERT(bind_data->quantiles.size() == 1); - auto v_t = (INPUT_TYPE *)state->v; - target[idx] = Interpolate(v_t, bind_data->quantiles[0], state->pos); } }; -AggregateFunction GetContinuousQuantileAggregateFunction(PhysicalType type) { - switch (type) { - case PhysicalType::INT16: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::SMALLINT, - LogicalType::DOUBLE); - - case PhysicalType::INT32: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::INTEGER, - LogicalType::DOUBLE); +template +AggregateFunction GetTypedDiscreteQuantileListAggregateFunction(const LogicalType &type) { + using STATE = QuantileState; + using OP = QuantileListOperation; + auto fun = QuantileListAggregate(type, type); + fun.window = AggregateFunction::UnaryWindow; + return fun; +} - case PhysicalType::INT64: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::BIGINT, - LogicalType::DOUBLE); +AggregateFunction GetDiscreteQuantileListAggregateFunction(const LogicalType &type) { + switch (type.id()) { + case LogicalTypeId::TINYINT: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case LogicalTypeId::SMALLINT: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case LogicalTypeId::INTEGER: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case LogicalTypeId::BIGINT: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case LogicalTypeId::HUGEINT: + return GetTypedDiscreteQuantileListAggregateFunction(type); - case PhysicalType::INT128: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::HUGEINT, - LogicalType::DOUBLE); - case PhysicalType::FLOAT: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::FLOAT, - LogicalType::FLOAT); + case LogicalTypeId::FLOAT: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case LogicalTypeId::DOUBLE: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case LogicalTypeId::DECIMAL: + switch (type.InternalType()) { + case PhysicalType::INT16: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case PhysicalType::INT32: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case PhysicalType::INT64: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case PhysicalType::INT128: + return GetTypedDiscreteQuantileListAggregateFunction(type); + default: + throw NotImplementedException("Unimplemented discrete quantile list aggregate"); + } + break; - case PhysicalType::DOUBLE: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::DOUBLE, - LogicalType::DOUBLE); + case LogicalTypeId::DATE: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case LogicalTypeId::TIMESTAMP: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case LogicalTypeId::TIME: + return GetTypedDiscreteQuantileListAggregateFunction(type); + case LogicalTypeId::INTERVAL: + return GetTypedDiscreteQuantileListAggregateFunction(type); default: - throw NotImplementedException("Unimplemented continuous quantile aggregate"); + throw NotImplementedException("Unimplemented discrete quantile list aggregate"); } } -template -struct ContinuousQuantileListOperation : public QuantileOperation { +template +AggregateFunction GetTypedContinuousQuantileAggregateFunction(const LogicalType &input_type, + const LogicalType &target_type) { + using STATE = QuantileState; + using OP = QuantileScalarOperation; + auto fun = AggregateFunction::UnaryAggregateDestructor(input_type, target_type); + fun.window = AggregateFunction::UnaryWindow; + return fun; +} - template - static void Finalize(Vector &result_list, FunctionData *bind_data_p, STATE *state, TARGET_TYPE *target, - ValidityMask &mask, idx_t idx) { - if (state->pos == 0) { - mask.SetInvalid(idx); - return; - } - D_ASSERT(state->v); - D_ASSERT(bind_data_p); - auto bind_data = (QuantileBindData *)bind_data_p; - target[idx].offset = ListVector::GetListSize(result_list); - auto v_t = (INPUT_TYPE *)state->v; - for (const auto &quantile : bind_data->quantiles) { - auto child = Interpolate(v_t, quantile, state->pos); - auto val = Value::CreateValue(child); - ListVector::PushBack(result_list, val); +AggregateFunction GetContinuousQuantileAggregateFunction(const LogicalType &type) { + switch (type.id()) { + case LogicalTypeId::TINYINT: + return GetTypedContinuousQuantileAggregateFunction(type, LogicalType::DOUBLE); + case LogicalTypeId::SMALLINT: + return GetTypedContinuousQuantileAggregateFunction(type, LogicalType::DOUBLE); + case LogicalTypeId::INTEGER: + return GetTypedContinuousQuantileAggregateFunction(type, LogicalType::DOUBLE); + case LogicalTypeId::BIGINT: + return GetTypedContinuousQuantileAggregateFunction(type, LogicalType::DOUBLE); + case LogicalTypeId::HUGEINT: + return GetTypedContinuousQuantileAggregateFunction(type, LogicalType::DOUBLE); + + case LogicalTypeId::FLOAT: + return GetTypedContinuousQuantileAggregateFunction(type, type); + case LogicalTypeId::DOUBLE: + return GetTypedContinuousQuantileAggregateFunction(type, type); + case LogicalTypeId::DECIMAL: + switch (type.InternalType()) { + case PhysicalType::INT16: + return GetTypedContinuousQuantileAggregateFunction(type, type); + case PhysicalType::INT32: + return GetTypedContinuousQuantileAggregateFunction(type, type); + case PhysicalType::INT64: + return GetTypedContinuousQuantileAggregateFunction(type, type); + case PhysicalType::INT128: + return GetTypedContinuousQuantileAggregateFunction(type, type); + default: + throw NotImplementedException("Unimplemented discrete quantile list aggregate"); } - target[idx].length = bind_data->quantiles.size(); - } -}; + break; -AggregateFunction GetContinuousQuantileListAggregateFunction(PhysicalType type) { - switch (type) { - case PhysicalType::INT16: - return QuantileListAggregate>(LogicalType::SMALLINT, - LogicalType::DOUBLE); + case LogicalTypeId::DATE: + return GetTypedContinuousQuantileAggregateFunction(type, LogicalType::TIMESTAMP); + case LogicalTypeId::TIMESTAMP: + return GetTypedContinuousQuantileAggregateFunction(type, type); + case LogicalTypeId::TIME: + return GetTypedContinuousQuantileAggregateFunction(type, type); - case PhysicalType::INT32: - return QuantileListAggregate>(LogicalType::INTEGER, - LogicalType::DOUBLE); + default: + throw NotImplementedException("Unimplemented discrete quantile list aggregate"); + } +} - case PhysicalType::INT64: - return QuantileListAggregate>(LogicalType::BIGINT, - LogicalType::DOUBLE); +template +AggregateFunction GetTypedContinuousQuantileListAggregateFunction(const LogicalType &input_type, + const LogicalType &result_type) { + using STATE = QuantileState; + using OP = QuantileListOperation; + auto fun = QuantileListAggregate(input_type, result_type); + fun.window = AggregateFunction::UnaryWindow; + return fun; +} - case PhysicalType::INT128: - return QuantileListAggregate>(LogicalType::HUGEINT, - LogicalType::DOUBLE); - case PhysicalType::FLOAT: - return QuantileListAggregate>(LogicalType::FLOAT, - LogicalType::DOUBLE); +AggregateFunction GetContinuousQuantileListAggregateFunction(const LogicalType &type) { + switch (type.id()) { + case LogicalTypeId::TINYINT: + return GetTypedContinuousQuantileListAggregateFunction(type, LogicalType::DOUBLE); + case LogicalTypeId::SMALLINT: + return GetTypedContinuousQuantileListAggregateFunction(type, LogicalType::DOUBLE); + case LogicalTypeId::INTEGER: + return GetTypedContinuousQuantileListAggregateFunction(type, LogicalType::DOUBLE); + case LogicalTypeId::BIGINT: + return GetTypedContinuousQuantileListAggregateFunction(type, LogicalType::DOUBLE); + case LogicalTypeId::HUGEINT: + return GetTypedContinuousQuantileListAggregateFunction(type, LogicalType::DOUBLE); - case PhysicalType::DOUBLE: - return QuantileListAggregate>(LogicalType::DOUBLE, - LogicalType::DOUBLE); + case LogicalTypeId::FLOAT: + return GetTypedContinuousQuantileListAggregateFunction(type, type); + case LogicalTypeId::DOUBLE: + return GetTypedContinuousQuantileListAggregateFunction(type, type); + case LogicalTypeId::DECIMAL: + switch (type.InternalType()) { + case PhysicalType::INT16: + return GetTypedContinuousQuantileListAggregateFunction(type, LogicalType::DOUBLE); + case PhysicalType::INT32: + return GetTypedContinuousQuantileListAggregateFunction(type, LogicalType::DOUBLE); + case PhysicalType::INT64: + return GetTypedContinuousQuantileListAggregateFunction(type, LogicalType::DOUBLE); + case PhysicalType::INT128: + return GetTypedContinuousQuantileListAggregateFunction(type, LogicalType::DOUBLE); + default: + throw NotImplementedException("Unimplemented discrete quantile list aggregate"); + } + break; + + case LogicalTypeId::DATE: + return GetTypedContinuousQuantileListAggregateFunction(type, LogicalType::TIMESTAMP); + case LogicalTypeId::TIMESTAMP: + return GetTypedContinuousQuantileListAggregateFunction(type, type); + case LogicalTypeId::TIME: + return GetTypedContinuousQuantileListAggregateFunction(type, type); default: throw NotImplementedException("Unimplemented discrete quantile list aggregate"); @@ -59663,7 +65586,7 @@ unique_ptr BindMedianDecimal(ClientContext &context, AggregateFunc vector> &arguments) { auto bind_data = BindMedian(context, function, arguments); - function = GetDiscreteQuantileAggregateFunction(arguments[0]->return_type.InternalType()); + function = GetDiscreteQuantileAggregateFunction(arguments[0]->return_type); function.name = "median"; return bind_data; } @@ -59680,12 +65603,12 @@ static float CheckQuantile(const Value &quantile_val) { unique_ptr BindQuantile(ClientContext &context, AggregateFunction &function, vector> &arguments) { - if (!arguments[1]->IsScalar()) { + if (!arguments[1]->IsFoldable()) { throw BinderException("QUANTILE can only take constant parameters"); } Value quantile_val = ExpressionExecutor::EvaluateScalar(*arguments[1]); vector quantiles; - if (quantile_val.type().child_types().empty()) { + if (quantile_val.type().id() != LogicalTypeId::LIST) { quantiles.push_back(CheckQuantile(quantile_val)); } else { for (const auto &element_val : quantile_val.list_value) { @@ -59700,7 +65623,7 @@ unique_ptr BindQuantile(ClientContext &context, AggregateFunction unique_ptr BindDiscreteQuantileDecimal(ClientContext &context, AggregateFunction &function, vector> &arguments) { auto bind_data = BindQuantile(context, function, arguments); - function = GetDiscreteQuantileAggregateFunction(arguments[0]->return_type.InternalType()); + function = GetDiscreteQuantileAggregateFunction(arguments[0]->return_type); function.name = "quantile_disc"; return bind_data; } @@ -59708,18 +65631,19 @@ unique_ptr BindDiscreteQuantileDecimal(ClientContext &context, Agg unique_ptr BindContinuousQuantileDecimal(ClientContext &context, AggregateFunction &function, vector> &arguments) { auto bind_data = BindQuantile(context, function, arguments); - function = GetContinuousQuantileAggregateFunction(arguments[0]->return_type.InternalType()); + function = GetContinuousQuantileAggregateFunction(arguments[0]->return_type); function.name = "quantile_cont"; return bind_data; } -AggregateFunction GetMedianAggregate(PhysicalType type) { - auto fun = GetDiscreteQuantileAggregateFunction(type); +AggregateFunction GetMedianAggregate(const LogicalType &type) { + auto fun = (type.id() != LogicalTypeId::INTERVAL) ? GetContinuousQuantileAggregateFunction(type) + : GetDiscreteQuantileAggregateFunction(type); fun.bind = BindMedian; return fun; } -AggregateFunction GetDiscreteQuantileAggregate(PhysicalType type) { +AggregateFunction GetDiscreteQuantileAggregate(const LogicalType &type) { auto fun = GetDiscreteQuantileAggregateFunction(type); fun.bind = BindQuantile; // temporarily push an argument so we can bind the actual quantile @@ -59727,16 +65651,16 @@ AggregateFunction GetDiscreteQuantileAggregate(PhysicalType type) { return fun; } -AggregateFunction GetDiscreteQuantileListAggregate(PhysicalType type) { +AggregateFunction GetDiscreteQuantileListAggregate(const LogicalType &type) { auto fun = GetDiscreteQuantileListAggregateFunction(type); fun.bind = BindQuantile; // temporarily push an argument so we can bind the actual quantile - LogicalType list_of_float(LogicalTypeId::LIST, {std::make_pair("", LogicalType::FLOAT)}); + auto list_of_float = LogicalType::LIST(LogicalType::FLOAT); fun.arguments.push_back(list_of_float); return fun; } -AggregateFunction GetContinuousQuantileAggregate(PhysicalType type) { +AggregateFunction GetContinuousQuantileAggregate(const LogicalType &type) { auto fun = GetContinuousQuantileAggregateFunction(type); fun.bind = BindQuantile; // temporarily push an argument so we can bind the actual quantile @@ -59744,69 +65668,51 @@ AggregateFunction GetContinuousQuantileAggregate(PhysicalType type) { return fun; } -AggregateFunction GetContinuousQuantileListAggregate(PhysicalType type) { +AggregateFunction GetContinuousQuantileListAggregate(const LogicalType &type) { auto fun = GetContinuousQuantileListAggregateFunction(type); fun.bind = BindQuantile; // temporarily push an argument so we can bind the actual quantile - LogicalType list_of_double(LogicalTypeId::LIST, {std::make_pair("", LogicalType::DOUBLE)}); + auto list_of_double = LogicalType::LIST(LogicalType::DOUBLE); fun.arguments.push_back(list_of_double); return fun; } void QuantileFun::RegisterFunction(BuiltinFunctions &set) { + const vector QUANTILES = {LogicalType::TINYINT, LogicalType::SMALLINT, LogicalType::INTEGER, + LogicalType::BIGINT, LogicalType::HUGEINT, LogicalType::FLOAT, + LogicalType::DOUBLE, LogicalType::DATE, LogicalType::TIMESTAMP, + LogicalType::TIME, LogicalType::INTERVAL}; + AggregateFunctionSet median("median"); - median.AddFunction(AggregateFunction({LogicalType::DECIMAL}, LogicalType::DECIMAL, nullptr, nullptr, nullptr, + median.AddFunction(AggregateFunction({LogicalTypeId::DECIMAL}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, BindMedianDecimal)); - median.AddFunction(GetMedianAggregate(PhysicalType::INT16)); - median.AddFunction(GetMedianAggregate(PhysicalType::INT32)); - median.AddFunction(GetMedianAggregate(PhysicalType::INT64)); - median.AddFunction(GetMedianAggregate(PhysicalType::INT128)); - median.AddFunction(GetMedianAggregate(PhysicalType::DOUBLE)); - - set.AddFunction(median); AggregateFunctionSet quantile_disc("quantile_disc"); - quantile_disc.AddFunction(AggregateFunction({LogicalType::DECIMAL, LogicalType::FLOAT}, LogicalType::DECIMAL, + quantile_disc.AddFunction(AggregateFunction({LogicalTypeId::DECIMAL, LogicalType::FLOAT}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, BindDiscreteQuantileDecimal)); - quantile_disc.AddFunction(GetDiscreteQuantileAggregate(PhysicalType::INT16)); - quantile_disc.AddFunction(GetDiscreteQuantileAggregate(PhysicalType::INT32)); - quantile_disc.AddFunction(GetDiscreteQuantileAggregate(PhysicalType::INT64)); - quantile_disc.AddFunction(GetDiscreteQuantileAggregate(PhysicalType::INT128)); - quantile_disc.AddFunction(GetDiscreteQuantileAggregate(PhysicalType::DOUBLE)); - - // LIST variants - quantile_disc.AddFunction(GetDiscreteQuantileListAggregate(PhysicalType::INT16)); - quantile_disc.AddFunction(GetDiscreteQuantileListAggregate(PhysicalType::INT32)); - quantile_disc.AddFunction(GetDiscreteQuantileListAggregate(PhysicalType::INT64)); - quantile_disc.AddFunction(GetDiscreteQuantileListAggregate(PhysicalType::INT128)); - quantile_disc.AddFunction(GetDiscreteQuantileListAggregate(PhysicalType::DOUBLE)); - - set.AddFunction(quantile_disc); - - quantile_disc.name = "quantile"; - set.AddFunction(quantile_disc); - AggregateFunctionSet quantile_cont("quantile_cont"); - quantile_cont.AddFunction(AggregateFunction({LogicalType::DECIMAL, LogicalType::FLOAT}, LogicalType::DECIMAL, + quantile_cont.AddFunction(AggregateFunction({LogicalTypeId::DECIMAL, LogicalType::FLOAT}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, BindContinuousQuantileDecimal)); - quantile_cont.AddFunction(GetContinuousQuantileAggregate(PhysicalType::INT16)); - quantile_cont.AddFunction(GetContinuousQuantileAggregate(PhysicalType::INT32)); - quantile_cont.AddFunction(GetContinuousQuantileAggregate(PhysicalType::INT64)); - quantile_cont.AddFunction(GetContinuousQuantileAggregate(PhysicalType::INT128)); - quantile_cont.AddFunction(GetContinuousQuantileAggregate(PhysicalType::DOUBLE)); - - // LIST variants - quantile_cont.AddFunction(GetContinuousQuantileListAggregate(PhysicalType::INT16)); - quantile_cont.AddFunction(GetContinuousQuantileListAggregate(PhysicalType::INT32)); - quantile_cont.AddFunction(GetContinuousQuantileListAggregate(PhysicalType::INT64)); - quantile_cont.AddFunction(GetContinuousQuantileListAggregate(PhysicalType::INT128)); - quantile_cont.AddFunction(GetContinuousQuantileListAggregate(PhysicalType::DOUBLE)); + for (const auto &type : QUANTILES) { + median.AddFunction(GetMedianAggregate(type)); + quantile_disc.AddFunction(GetDiscreteQuantileAggregate(type)); + quantile_disc.AddFunction(GetDiscreteQuantileListAggregate(type)); + if (type.id() != LogicalTypeId::INTERVAL) { + quantile_cont.AddFunction(GetContinuousQuantileAggregate(type)); + quantile_cont.AddFunction(GetContinuousQuantileListAggregate(type)); + } + } + set.AddFunction(median); + set.AddFunction(quantile_disc); set.AddFunction(quantile_cont); + + quantile_disc.name = "quantile"; + set.AddFunction(quantile_disc); } } // namespace duckdb @@ -59848,7 +65754,7 @@ void FillReservoir(STATE *state, idx_t sample_size, T element) { } struct ReservoirQuantileBindData : public FunctionData { - ReservoirQuantileBindData(float quantile_p, int32_t sample_size_p) + ReservoirQuantileBindData(double quantile_p, int32_t sample_size_p) : quantile(quantile_p), sample_size(sample_size_p) { } @@ -59861,7 +65767,7 @@ struct ReservoirQuantileBindData : public FunctionData { return quantile == other.quantile; } - float quantile; + double quantile; int32_t sample_size; }; @@ -59909,7 +65815,7 @@ struct ReservoirQuantileOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { if (source.pos == 0) { return; } @@ -59978,28 +65884,22 @@ AggregateFunction GetReservoirQuantileAggregateFunction(PhysicalType type) { return AggregateFunction::UnaryAggregateDestructor>(LogicalType::HUGEINT, LogicalType::HUGEINT); - case PhysicalType::FLOAT: - return AggregateFunction::UnaryAggregateDestructor>(LogicalType::FLOAT, - LogicalType::FLOAT); - case PhysicalType::DOUBLE: return AggregateFunction::UnaryAggregateDestructor>(LogicalType::DOUBLE, LogicalType::DOUBLE); - default: - throw NotImplementedException("Unimplemented quantile aggregate"); + throw InternalException("Unimplemented quantile aggregate"); } } unique_ptr BindReservoirQuantile(ClientContext &context, AggregateFunction &function, vector> &arguments) { - if (!arguments[1]->IsScalar()) { + if (!arguments[1]->IsFoldable()) { throw BinderException("QUANTILE can only take constant quantile parameters"); } Value quantile_val = ExpressionExecutor::EvaluateScalar(*arguments[1]); - auto quantile = quantile_val.GetValue(); + auto quantile = quantile_val.GetValue(); if (quantile_val.is_null || quantile < 0 || quantile > 1) { throw BinderException("QUANTILE can only take parameters in range [0, 1]"); @@ -60008,7 +65908,7 @@ unique_ptr BindReservoirQuantile(ClientContext &context, Aggregate arguments.pop_back(); return make_unique(quantile, 8192); } - if (!arguments[2]->IsScalar()) { + if (!arguments[2]->IsFoldable()) { throw BinderException("QUANTILE can only take constant quantile parameters"); } Value sample_size_val = ExpressionExecutor::EvaluateScalar(*arguments[2]); @@ -60036,18 +65936,18 @@ AggregateFunction GetReservoirQuantileAggregate(PhysicalType type) { auto fun = GetReservoirQuantileAggregateFunction(type); fun.bind = BindReservoirQuantile; // temporarily push an argument so we can bind the actual quantile - fun.arguments.push_back(LogicalType::FLOAT); + fun.arguments.push_back(LogicalType::DOUBLE); return fun; } void ReservoirQuantileFun::RegisterFunction(BuiltinFunctions &set) { AggregateFunctionSet reservoir_quantile("reservoir_quantile"); - reservoir_quantile.AddFunction(AggregateFunction({LogicalType::DECIMAL, LogicalType::FLOAT, LogicalType::INTEGER}, - LogicalType::DECIMAL, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, BindReservoirQuantileDecimal)); - reservoir_quantile.AddFunction(AggregateFunction({LogicalType::DECIMAL, LogicalType::FLOAT}, LogicalType::DECIMAL, - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - BindReservoirQuantileDecimal)); + reservoir_quantile.AddFunction( + AggregateFunction({LogicalTypeId::DECIMAL, LogicalType::DOUBLE, LogicalType::INTEGER}, LogicalTypeId::DECIMAL, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, BindReservoirQuantileDecimal)); + reservoir_quantile.AddFunction(AggregateFunction({LogicalTypeId::DECIMAL, LogicalType::DOUBLE}, + LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, BindReservoirQuantileDecimal)); reservoir_quantile.AddFunction(GetReservoirQuantileAggregate(PhysicalType::INT16)); reservoir_quantile.AddFunction(GetReservoirQuantileAggregate(PhysicalType::INT32)); reservoir_quantile.AddFunction(GetReservoirQuantileAggregate(PhysicalType::INT64)); @@ -60064,6 +65964,7 @@ namespace duckdb { void BuiltinFunctions::RegisterHolisticAggregates() { Register(); + Register(); Register(); Register(); } @@ -60072,7 +65973,7 @@ void BuiltinFunctions::RegisterHolisticAggregates() { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/function/aggregate/nested_functions.hpp +// duckdb/function/scalar/nested_functions.hpp // // //===----------------------------------------------------------------------===// @@ -60084,41 +65985,84 @@ void BuiltinFunctions::RegisterHolisticAggregates() { namespace duckdb { -struct ListBindData : public FunctionData { - ListBindData() { +struct VariableReturnBindData : public FunctionData { + LogicalType stype; + + explicit VariableReturnBindData(LogicalType stype) : stype(stype) { } unique_ptr Copy() override { - return make_unique(); + return make_unique(stype); } }; -struct ListFun { +struct ArraySliceFun { static void RegisterFunction(BuiltinFunctions &set); }; -struct HistogramFun { + +struct StructPackFun { static void RegisterFunction(BuiltinFunctions &set); }; -} // namespace duckdb +struct ListValueFun { + static void RegisterFunction(BuiltinFunctions &set); +}; +struct MapFun { + static void RegisterFunction(BuiltinFunctions &set); +}; +struct MapExtractFun { + static void RegisterFunction(BuiltinFunctions &set); +}; +struct ListExtractFun { + static void RegisterFunction(BuiltinFunctions &set); +}; +struct CardinalityFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct StructExtractFun { + static ScalarFunction GetFunction(); + static void RegisterFunction(BuiltinFunctions &set); +}; + +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/map.hpp +// duckdb/function/aggregate/nested_functions.hpp // // //===----------------------------------------------------------------------===// -#include + + namespace duckdb { -using std::map; -} + +struct ListBindData : public FunctionData { + ListBindData() { + } + + unique_ptr Copy() override { + return make_unique(); + } +}; + +struct ListFun { + static void RegisterFunction(BuiltinFunctions &set); +}; +struct HistogramFun { + static void RegisterFunction(BuiltinFunctions &set); +}; +} // namespace duckdb + + + @@ -60126,7 +66070,7 @@ using std::map; namespace duckdb { template struct HistogramAggState { - map *hist; + map *hist; }; struct HistogramFunction { @@ -60163,7 +66107,7 @@ static void HistogramUpdateFunction(Vector inputs[], FunctionData *, idx_t input if (input_data.validity.RowIsValid(input_data.sel->get_index(i))) { auto state = states[sdata.sel->get_index(i)]; if (!state->hist) { - state->hist = new map(); + state->hist = new map(); } auto value = (T *)input_data.data; (*state->hist)[value[input_data.sel->get_index(i)]]++; @@ -60186,7 +66130,7 @@ static void HistogramUpdateFunctionString(Vector inputs[], FunctionData *, idx_t if (input_data.validity.RowIsValid(input_data.sel->get_index(i))) { auto state = states[sdata.sel->get_index(i)]; if (!state->hist) { - state->hist = new map(); + state->hist = new map(); } auto value = (string_t *)input_data.data; (*state->hist)[value[input_data.sel->get_index(i)].GetString()]++; @@ -60204,9 +66148,14 @@ static void HistogramCombineFunction(Vector &state, Vector &combined, idx_t coun for (idx_t i = 0; i < count; i++) { auto state = states_ptr[sdata.sel->get_index(i)]; + if (!state->hist) { + continue; + } if (!combined_ptr[i]->hist) { - combined_ptr[i]->hist = new map(); + combined_ptr[i]->hist = new map(); } + D_ASSERT(combined_ptr[i]->hist); + D_ASSERT(state->hist); for (auto &entry : *state->hist) { (*combined_ptr[i]->hist)[entry.first] += entry.second; } @@ -60214,113 +66163,119 @@ static void HistogramCombineFunction(Vector &state, Vector &combined, idx_t coun } template -static void HistogramFinalize(Vector &state_vector, FunctionData *, Vector &result, idx_t count) { +static void HistogramFinalize(Vector &state_vector, FunctionData *, Vector &result, idx_t count, idx_t offset) { VectorData sdata; state_vector.Orrify(count, sdata); auto states = (HistogramAggState **)sdata.data; - result.Initialize(result.GetType()); - auto list_struct_data = FlatVector::GetData(result); - auto list_child = make_unique(result.GetType().child_types()[0].second); - size_t old_len = 0; - ListVector::SetEntry(result, move(list_child)); + auto &mask = FlatVector::Validity(result); + + auto &child_entries = StructVector::GetEntries(result); + auto &bucket_list = child_entries[0]; + auto &count_list = child_entries[1]; + + auto old_len = ListVector::GetListSize(*bucket_list); + + auto &bucket_validity = FlatVector::Validity(*bucket_list); + auto &count_validity = FlatVector::Validity(*count_list); for (idx_t i = 0; i < count; i++) { + const auto rid = i + offset; auto state = states[sdata.sel->get_index(i)]; if (!state->hist) { - mask.SetInvalid(i); + mask.SetInvalid(rid); + bucket_validity.SetInvalid(rid); + count_validity.SetInvalid(rid); continue; } for (auto &entry : *state->hist) { - child_list_t struct_values; - struct_values.push_back({"bucket", Value::CreateValue(entry.first)}); - struct_values.push_back({"count", Value::UBIGINT(entry.second)}); - auto val = Value::STRUCT(struct_values); - ListVector::PushBack(result, val); + auto bucket_value = Value::CreateValue(entry.first); + ListVector::PushBack(*bucket_list, bucket_value); + auto count_value = Value::CreateValue(entry.second); + ListVector::PushBack(*count_list, count_value); } - list_struct_data[i].length = ListVector::GetListSize(result) - old_len; - list_struct_data[i].offset = old_len; - old_len = list_struct_data[i].length; + auto list_struct_data = FlatVector::GetData(*bucket_list); + list_struct_data[rid].length = ListVector::GetListSize(*bucket_list) - old_len; + list_struct_data[rid].offset = old_len; + + list_struct_data = FlatVector::GetData(*count_list); + list_struct_data[rid].length = ListVector::GetListSize(*count_list) - old_len; + list_struct_data[rid].offset = old_len; + old_len = list_struct_data[rid].length; } } unique_ptr HistogramBindFunction(ClientContext &context, AggregateFunction &function, vector> &arguments) { - if (arguments.size() != 1) { - throw Exception("We need exactly one argument for the histogram"); - } D_ASSERT(arguments.size() == 1); child_list_t struct_children; - struct_children.push_back({"bucket", arguments[0]->return_type}); - struct_children.push_back({"count", LogicalType::UBIGINT}); - auto struct_type = LogicalType(LogicalTypeId::STRUCT, move(struct_children)); - child_list_t children; - children.push_back(make_pair("", struct_type)); + struct_children.push_back({"bucket", LogicalType::LIST(arguments[0]->return_type)}); + struct_children.push_back({"count", LogicalType::LIST(LogicalType::UBIGINT)}); + auto struct_type = LogicalType::MAP(move(struct_children)); - function.return_type = LogicalType(LogicalTypeId::LIST, move(children)); - return make_unique(); // TODO atm this is not used anywhere but it might not be required after all - // except for sanity checking + function.return_type = struct_type; + return make_unique(function.return_type); } AggregateFunction GetHistogramFunction(PhysicalType type) { switch (type) { case PhysicalType::UINT16: - return AggregateFunction("histogram", {LogicalType::USMALLINT}, LogicalType::STRUCT, + return AggregateFunction("histogram", {LogicalType::USMALLINT}, LogicalTypeId::MAP, AggregateFunction::StateSize>, AggregateFunction::StateInitialize, HistogramFunction>, HistogramUpdateFunction, HistogramCombineFunction, HistogramFinalize, nullptr, HistogramBindFunction, AggregateFunction::StateDestroy, HistogramFunction>); case PhysicalType::UINT32: - return AggregateFunction("histogram", {LogicalType::UINTEGER}, LogicalType::STRUCT, + return AggregateFunction("histogram", {LogicalType::UINTEGER}, LogicalTypeId::MAP, AggregateFunction::StateSize>, AggregateFunction::StateInitialize, HistogramFunction>, HistogramUpdateFunction, HistogramCombineFunction, HistogramFinalize, nullptr, HistogramBindFunction, AggregateFunction::StateDestroy, HistogramFunction>); case PhysicalType::UINT64: - return AggregateFunction("histogram", {LogicalType::UBIGINT}, LogicalType::STRUCT, + return AggregateFunction("histogram", {LogicalType::UBIGINT}, LogicalTypeId::MAP, AggregateFunction::StateSize>, AggregateFunction::StateInitialize, HistogramFunction>, HistogramUpdateFunction, HistogramCombineFunction, HistogramFinalize, nullptr, HistogramBindFunction, AggregateFunction::StateDestroy, HistogramFunction>); case PhysicalType::INT16: - return AggregateFunction("histogram", {LogicalType::SMALLINT}, LogicalType::STRUCT, + return AggregateFunction("histogram", {LogicalType::SMALLINT}, LogicalTypeId::MAP, AggregateFunction::StateSize>, AggregateFunction::StateInitialize, HistogramFunction>, HistogramUpdateFunction, HistogramCombineFunction, HistogramFinalize, nullptr, HistogramBindFunction, AggregateFunction::StateDestroy, HistogramFunction>); case PhysicalType::INT32: - return AggregateFunction("histogram", {LogicalType::INTEGER}, LogicalType::STRUCT, + return AggregateFunction("histogram", {LogicalType::INTEGER}, LogicalTypeId::MAP, AggregateFunction::StateSize>, AggregateFunction::StateInitialize, HistogramFunction>, HistogramUpdateFunction, HistogramCombineFunction, HistogramFinalize, nullptr, HistogramBindFunction, AggregateFunction::StateDestroy, HistogramFunction>); case PhysicalType::INT64: - return AggregateFunction("histogram", {LogicalType::BIGINT}, LogicalType::STRUCT, + return AggregateFunction("histogram", {LogicalType::BIGINT}, LogicalTypeId::MAP, AggregateFunction::StateSize>, AggregateFunction::StateInitialize, HistogramFunction>, HistogramUpdateFunction, HistogramCombineFunction, HistogramFinalize, nullptr, HistogramBindFunction, AggregateFunction::StateDestroy, HistogramFunction>); case PhysicalType::FLOAT: - return AggregateFunction("histogram", {LogicalType::FLOAT}, LogicalType::STRUCT, + return AggregateFunction("histogram", {LogicalType::FLOAT}, LogicalTypeId::MAP, AggregateFunction::StateSize>, AggregateFunction::StateInitialize, HistogramFunction>, HistogramUpdateFunction, HistogramCombineFunction, HistogramFinalize, nullptr, HistogramBindFunction, AggregateFunction::StateDestroy, HistogramFunction>); case PhysicalType::DOUBLE: - return AggregateFunction("histogram", {LogicalType::DOUBLE}, LogicalType::STRUCT, + return AggregateFunction("histogram", {LogicalType::DOUBLE}, LogicalTypeId::MAP, AggregateFunction::StateSize>, AggregateFunction::StateInitialize, HistogramFunction>, HistogramUpdateFunction, HistogramCombineFunction, HistogramFinalize, nullptr, HistogramBindFunction, AggregateFunction::StateDestroy, HistogramFunction>); case PhysicalType::VARCHAR: - return AggregateFunction("histogram", {LogicalType::VARCHAR}, LogicalType::STRUCT, + return AggregateFunction("histogram", {LogicalType::VARCHAR}, LogicalTypeId::MAP, AggregateFunction::StateSize>, AggregateFunction::StateInitialize, HistogramFunction>, HistogramUpdateFunctionString, HistogramCombineFunction, @@ -60328,7 +66283,7 @@ AggregateFunction GetHistogramFunction(PhysicalType type) { AggregateFunction::StateDestroy, HistogramFunction>); default: - throw NotImplementedException("Unimplemented histogram aggregate"); + throw InternalException("Unimplemented histogram aggregate"); } } @@ -60343,7 +66298,7 @@ void HistogramFun::RegisterFunction(BuiltinFunctions &set) { fun.AddFunction(GetHistogramFunction(PhysicalType::FLOAT)); fun.AddFunction(GetHistogramFunction(PhysicalType::DOUBLE)); fun.AddFunction(GetHistogramFunction(PhysicalType::VARCHAR)); - fun.AddFunction(AggregateFunction("histogram", {LogicalType::TIMESTAMP}, LogicalType::STRUCT, + fun.AddFunction(AggregateFunction("histogram", {LogicalType::TIMESTAMP}, LogicalTypeId::MAP, AggregateFunction::StateSize>, AggregateFunction::StateInitialize, HistogramFunction>, HistogramUpdateFunction, HistogramCombineFunction, @@ -60387,12 +66342,10 @@ static void ListUpdateFunction(Vector inputs[], FunctionData *, idx_t input_coun auto &input = inputs[0]; VectorData sdata; state_vector.Orrify(count, sdata); - child_list_t child_types; - child_types.push_back({"", input.GetType()}); - LogicalType list_vector_type(LogicalType::LIST.id(), child_types); + + auto list_vector_type = LogicalType::LIST(input.GetType()); auto states = (ListAggState **)sdata.data; - SelectionVector sel(STANDARD_VECTOR_SIZE); if (input.GetVectorType() == VectorType::SEQUENCE_VECTOR) { input.Normalify(count); } @@ -60400,8 +66353,6 @@ static void ListUpdateFunction(Vector inputs[], FunctionData *, idx_t input_coun auto state = states[sdata.sel->get_index(i)]; if (!state->list_vector) { state->list_vector = new Vector(list_vector_type); - auto list_child = make_unique(input.GetType()); - ListVector::SetEntry(*state->list_vector, move(list_child)); } ListVector::Append(*state->list_vector, input, i + 1, i); } @@ -60425,15 +66376,15 @@ static void ListCombineFunction(Vector &state, Vector &combined, idx_t count) { } } -static void ListFinalize(Vector &state_vector, FunctionData *, Vector &result, idx_t count) { +static void ListFinalize(Vector &state_vector, FunctionData *, Vector &result, idx_t count, idx_t offset) { VectorData sdata; state_vector.Orrify(count, sdata); auto states = (ListAggState **)sdata.data; D_ASSERT(result.GetType().id() == LogicalTypeId::LIST); - result.Initialize(result.GetType()); // deals with constants + auto &mask = FlatVector::Validity(result); - size_t total_len = 0; + size_t total_len = ListVector::GetListSize(result); for (idx_t i = 0; i < count; i++) { auto state = states[sdata.sel->get_index(i)]; if (!state->list_vector) { @@ -60444,13 +66395,12 @@ static void ListFinalize(Vector &state_vector, FunctionData *, Vector &result, i auto list_struct_data = FlatVector::GetData(result); auto &state_lv = *state->list_vector; auto state_lv_count = ListVector::GetListSize(state_lv); - list_struct_data[i].length = state_lv_count; - list_struct_data[i].offset = total_len; + const auto rid = i + offset; + list_struct_data[rid].length = state_lv_count; + list_struct_data[rid].offset = total_len; total_len += state_lv_count; } - auto list_buffer = make_unique(result.GetType().child_types()[0].second); - ListVector::SetEntry(result, move(list_buffer)); for (idx_t i = 0; i < count; i++) { auto state = states[sdata.sel->get_index(i)]; if (!state->list_vector) { @@ -60465,17 +66415,14 @@ static void ListFinalize(Vector &state_vector, FunctionData *, Vector &result, i unique_ptr ListBindFunction(ClientContext &context, AggregateFunction &function, vector> &arguments) { D_ASSERT(arguments.size() == 1); - child_list_t children; - children.push_back(make_pair("", arguments[0]->return_type)); - - function.return_type = LogicalType(LogicalTypeId::LIST, move(children)); + function.return_type = LogicalType::LIST(arguments[0]->return_type); return make_unique(); // TODO atm this is not used anywhere but it might not be required after all // except for sanity checking } void ListFun::RegisterFunction(BuiltinFunctions &set) { auto agg = AggregateFunction( - "list", {LogicalType::ANY}, LogicalType::LIST, AggregateFunction::StateSize, + "list", {LogicalType::ANY}, LogicalTypeId::LIST, AggregateFunction::StateSize, AggregateFunction::StateInitialize, ListUpdateFunction, ListCombineFunction, ListFinalize, nullptr, ListBindFunction, AggregateFunction::StateDestroy); set.AddFunction(agg); @@ -60566,7 +66513,7 @@ struct RegrAvgFunction { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { target->sum += source.sum; target->count += source.count; } @@ -60644,7 +66591,7 @@ struct RegrCountFunction { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { *target += source; } @@ -60714,7 +66661,7 @@ struct RegrSlopeOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { CovarOperation::Combine(source.cov_pop, &target->cov_pop); STDDevBaseOperation::Combine(source.var_pop, &target->var_pop); } @@ -60774,7 +66721,7 @@ struct RegrInterceptOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { target->count += source.count; target->sum_x += source.sum_x; target->sum_y += source.sum_y; @@ -60842,7 +66789,7 @@ struct RegrR2Operation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { CorrOperation::Combine(source.corr, &target->corr); STDDevBaseOperation::Combine(source.var_pop_x, &target->var_pop_x); STDDevBaseOperation::Combine(source.var_pop_y, &target->var_pop_y); @@ -60928,7 +66875,7 @@ struct RegrBaseOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { RegrCountFunction::Combine(source.count, &target->count); STDDevBaseOperation::Combine(source.var_pop, &target->var_pop); } @@ -61018,7 +66965,7 @@ struct RegrSXYOperation { } template - static void Combine(STATE source, STATE *target) { + static void Combine(const STATE &source, STATE *target) { CovarOperation::Combine(source.cov_pop, &target->cov_pop); RegrCountFunction::Combine(source.count, &target->count); } @@ -61292,9 +67239,19 @@ int64_t CastRules::ImplicitCast(const LogicalType &from, const LogicalType &to) } if (from.id() == LogicalTypeId::LIST && to.id() == LogicalTypeId::LIST) { // Lists can be cast if their child types can be cast - D_ASSERT(!from.child_types().empty()); - D_ASSERT(!to.child_types().empty()); - return ImplicitCast(from.child_types()[0].second, to.child_types()[0].second); + return ImplicitCast(ListType::GetChildType(from), ListType::GetChildType(to)); + } + if ((from.id() == LogicalTypeId::TIMESTAMP_SEC || from.id() == LogicalTypeId::TIMESTAMP_MS || + from.id() == LogicalTypeId::TIMESTAMP_NS) && + to.id() == LogicalTypeId::TIMESTAMP) { + //! Any timestamp type can be converted to the default (us) type at low cost + return 101; + } + if ((to.id() == LogicalTypeId::TIMESTAMP_SEC || to.id() == LogicalTypeId::TIMESTAMP_MS || + to.id() == LogicalTypeId::TIMESTAMP_NS) && + from.id() == LogicalTypeId::TIMESTAMP) { + //! Any timestamp type can be converted to the default (us) type at low cost + return 100; } switch (from.id()) { case LogicalTypeId::TINYINT: @@ -61331,17 +67288,10 @@ int64_t CastRules::ImplicitCast(const LogicalType &from, const LogicalType &to) } // namespace duckdb - - - - - - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/function/scalar/string_functions.hpp +// duckdb/function/compression/compression.hpp // // //===----------------------------------------------------------------------===// @@ -61351,947 +67301,113 @@ int64_t CastRules::ImplicitCast(const LogicalType &from, const LogicalType &to) -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list - -/* - * Copyright (c) 2014-2019 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors. - * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -/** - * @mainpage - * - * utf8proc is a free/open-source (MIT/expat licensed) C library - * providing Unicode normalization, case-folding, and other operations - * for strings in the UTF-8 encoding, supporting up-to-date Unicode versions. - * See the utf8proc home page (http://julialang.org/utf8proc/) - * for downloads and other information, or the source code on github - * (https://github.com/JuliaLang/utf8proc). - * - * For the utf8proc API documentation, see: @ref utf8proc.h - * - * The features of utf8proc include: - * - * - Transformation of strings (@ref utf8proc_map) to: - * - decompose (@ref UTF8PROC_DECOMPOSE) or compose (@ref UTF8PROC_COMPOSE) Unicode combining characters (http://en.wikipedia.org/wiki/Combining_character) - * - canonicalize Unicode compatibility characters (@ref UTF8PROC_COMPAT) - * - strip "ignorable" (@ref UTF8PROC_IGNORE) characters, control characters (@ref UTF8PROC_STRIPCC), or combining characters such as accents (@ref UTF8PROC_STRIPMARK) - * - case-folding (@ref UTF8PROC_CASEFOLD) - * - Unicode normalization: @ref utf8proc_NFD, @ref utf8proc_NFC, @ref utf8proc_NFKD, @ref utf8proc_NFKC - * - Detecting grapheme boundaries (@ref utf8proc_grapheme_break and @ref UTF8PROC_CHARBOUND) - * - Character-width computation: @ref utf8proc_charwidth - * - Classification of characters by Unicode category: @ref utf8proc_category and @ref utf8proc_category_string - * - Encode (@ref utf8proc_encode_char) and decode (@ref utf8proc_iterate) Unicode codepoints to/from UTF-8. - */ - -/** @file */ - -#ifndef UTF8PROC_H -#define UTF8PROC_H - -// DuckDB change: -#define UTF8PROC_STATIC - -/** @name API version - * - * The utf8proc API version MAJOR.MINOR.PATCH, following - * semantic-versioning rules (http://semver.org) based on API - * compatibility. - * - * This is also returned at runtime by @ref utf8proc_version; however, the - * runtime version may append a string like "-dev" to the version number - * for prerelease versions. - * - * @note The shared-library version number in the Makefile - * (and CMakeLists.txt, and MANIFEST) may be different, - * being based on ABI compatibility rather than API compatibility. - */ -/** @{ */ -/** The MAJOR version number (increased when backwards API compatibility is broken). */ -#define UTF8PROC_VERSION_MAJOR 2 -/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */ -#define UTF8PROC_VERSION_MINOR 4 -/** The PATCH version (increased for fixes that do not change the API). */ -#define UTF8PROC_VERSION_PATCH 0 -/** @} */ - -#include - -#if defined(_MSC_VER) && _MSC_VER < 1800 -// MSVC prior to 2013 lacked stdbool.h and inttypes.h -typedef signed char utf8proc_int8_t; -typedef unsigned char utf8proc_uint8_t; -typedef short utf8proc_int16_t; -typedef unsigned short utf8proc_uint16_t; -typedef int utf8proc_int32_t; -typedef unsigned int utf8proc_uint32_t; -# ifdef _WIN64 -typedef __int64 utf8proc_ssize_t; -typedef unsigned __int64 utf8proc_size_t; -# else -typedef int utf8proc_ssize_t; -typedef unsigned int utf8proc_size_t; -# endif -# ifndef __cplusplus -// emulate C99 bool -typedef unsigned char utf8proc_bool; -# ifndef __bool_true_false_are_defined -# define false 0 -# define true 1 -# define __bool_true_false_are_defined 1 -# endif -# else -typedef bool utf8proc_bool; -# endif -#else -# include -# include -# include -#endif -#include - -#define UTF8PROC_DLLEXPORT -// #ifdef UTF8PROC_STATIC -// # define UTF8PROC_DLLEXPORT -// #else -// # ifdef _WIN32 -// # ifdef UTF8PROC_EXPORTS -// # define UTF8PROC_DLLEXPORT __declspec(dllexport) -// # else -// # define UTF8PROC_DLLEXPORT __declspec(dllimport) -// # endif -// # elif __GNUC__ >= 4 -// # define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default"))) -// # else -// # define UTF8PROC_DLLEXPORT -// # endif -// #endif - namespace duckdb { -typedef int8_t utf8proc_int8_t; -typedef uint8_t utf8proc_uint8_t; -typedef int16_t utf8proc_int16_t; -typedef uint16_t utf8proc_uint16_t; -typedef int32_t utf8proc_int32_t; -typedef uint32_t utf8proc_uint32_t; -typedef size_t utf8proc_size_t; -typedef ptrdiff_t utf8proc_ssize_t; -typedef bool utf8proc_bool; - -//#ifdef __cplusplus -//extern "C" { -//#endif - -/** - * Option flags used by several functions in the library. - */ -typedef enum { - /** The given UTF-8 input is NULL terminated. */ - UTF8PROC_NULLTERM = (1<<0), - /** Unicode Versioning Stability has to be respected. */ - UTF8PROC_STABLE = (1<<1), - /** Compatibility decomposition (i.e. formatting information is lost). */ - UTF8PROC_COMPAT = (1<<2), - /** Return a result with decomposed characters. */ - UTF8PROC_COMPOSE = (1<<3), - /** Return a result with decomposed characters. */ - UTF8PROC_DECOMPOSE = (1<<4), - /** Strip "default ignorable characters" such as SOFT-HYPHEN or ZERO-WIDTH-SPACE. */ - UTF8PROC_IGNORE = (1<<5), - /** Return an error, if the input contains unassigned codepoints. */ - UTF8PROC_REJECTNA = (1<<6), - /** - * Indicating that NLF-sequences (LF, CRLF, CR, NEL) are representing a - * line break, and should be converted to the codepoint for line - * separation (LS). - */ - UTF8PROC_NLF2LS = (1<<7), - /** - * Indicating that NLF-sequences are representing a paragraph break, and - * should be converted to the codepoint for paragraph separation - * (PS). - */ - UTF8PROC_NLF2PS = (1<<8), - /** Indicating that the meaning of NLF-sequences is unknown. */ - UTF8PROC_NLF2LF = (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS), - /** Strips and/or convers control characters. - * - * NLF-sequences are transformed into space, except if one of the - * NLF2LS/PS/LF options is given. HorizontalTab (HT) and FormFeed (FF) - * are treated as a NLF-sequence in this case. All other control - * characters are simply removed. - */ - UTF8PROC_STRIPCC = (1<<9), - /** - * Performs unicode case folding, to be able to do a case-insensitive - * string comparison. - */ - UTF8PROC_CASEFOLD = (1<<10), - /** - * Inserts 0xFF bytes at the beginning of each sequence which is - * representing a single grapheme cluster (see UAX#29). - */ - UTF8PROC_CHARBOUND = (1<<11), - /** Lumps certain characters together. - * - * E.g. HYPHEN U+2010 and MINUS U+2212 to ASCII "-". See lump.md for details. - * - * If NLF2LF is set, this includes a transformation of paragraph and - * line separators to ASCII line-feed (LF). - */ - UTF8PROC_LUMP = (1<<12), - /** Strips all character markings. - * - * This includes non-spacing, spacing and enclosing (i.e. accents). - * @note This option works only with @ref UTF8PROC_COMPOSE or - * @ref UTF8PROC_DECOMPOSE - */ - UTF8PROC_STRIPMARK = (1<<13), - /** - * Strip unassigned codepoints. - */ - UTF8PROC_STRIPNA = (1<<14), -} utf8proc_option_t; - -/** @name Error codes - * Error codes being returned by almost all functions. - */ -/** @{ */ -/** Memory could not be allocated. */ -#define UTF8PROC_ERROR_NOMEM -1 -/** The given string is too long to be processed. */ -#define UTF8PROC_ERROR_OVERFLOW -2 -/** The given string is not a legal UTF-8 string. */ -#define UTF8PROC_ERROR_INVALIDUTF8 -3 -/** The @ref UTF8PROC_REJECTNA flag was set and an unassigned codepoint was found. */ -#define UTF8PROC_ERROR_NOTASSIGNED -4 -/** Invalid options have been used. */ -#define UTF8PROC_ERROR_INVALIDOPTS -5 -/** @} */ - -/* @name Types */ - -/** Holds the value of a property. */ -typedef utf8proc_int16_t utf8proc_propval_t; - -/** Struct containing information about a codepoint. */ -typedef struct utf8proc_property_struct { - /** - * Unicode category. - * @see utf8proc_category_t. - */ - utf8proc_propval_t category; - utf8proc_propval_t combining_class; - /** - * Bidirectional class. - * @see utf8proc_bidi_class_t. - */ - utf8proc_propval_t bidi_class; - /** - * @anchor Decomposition type. - * @see utf8proc_decomp_type_t. - */ - utf8proc_propval_t decomp_type; - utf8proc_uint16_t decomp_seqindex; - utf8proc_uint16_t casefold_seqindex; - utf8proc_uint16_t uppercase_seqindex; - utf8proc_uint16_t lowercase_seqindex; - utf8proc_uint16_t titlecase_seqindex; - utf8proc_uint16_t comb_index; - unsigned bidi_mirrored:1; - unsigned comp_exclusion:1; - /** - * Can this codepoint be ignored? - * - * Used by @ref utf8proc_decompose_char when @ref UTF8PROC_IGNORE is - * passed as an option. - */ - unsigned ignorable:1; - unsigned control_boundary:1; - /** The width of the codepoint. */ - unsigned charwidth:2; - unsigned pad:2; - /** - * Boundclass. - * @see utf8proc_boundclass_t. - */ - unsigned boundclass:8; -} utf8proc_property_t; - -/** Unicode categories. */ -typedef enum { - UTF8PROC_CATEGORY_CN = 0, /**< Other, not assigned */ - UTF8PROC_CATEGORY_LU = 1, /**< Letter, uppercase */ - UTF8PROC_CATEGORY_LL = 2, /**< Letter, lowercase */ - UTF8PROC_CATEGORY_LT = 3, /**< Letter, titlecase */ - UTF8PROC_CATEGORY_LM = 4, /**< Letter, modifier */ - UTF8PROC_CATEGORY_LO = 5, /**< Letter, other */ - UTF8PROC_CATEGORY_MN = 6, /**< Mark, nonspacing */ - UTF8PROC_CATEGORY_MC = 7, /**< Mark, spacing combining */ - UTF8PROC_CATEGORY_ME = 8, /**< Mark, enclosing */ - UTF8PROC_CATEGORY_ND = 9, /**< Number, decimal digit */ - UTF8PROC_CATEGORY_NL = 10, /**< Number, letter */ - UTF8PROC_CATEGORY_NO = 11, /**< Number, other */ - UTF8PROC_CATEGORY_PC = 12, /**< Punctuation, connector */ - UTF8PROC_CATEGORY_PD = 13, /**< Punctuation, dash */ - UTF8PROC_CATEGORY_PS = 14, /**< Punctuation, open */ - UTF8PROC_CATEGORY_PE = 15, /**< Punctuation, close */ - UTF8PROC_CATEGORY_PI = 16, /**< Punctuation, initial quote */ - UTF8PROC_CATEGORY_PF = 17, /**< Punctuation, final quote */ - UTF8PROC_CATEGORY_PO = 18, /**< Punctuation, other */ - UTF8PROC_CATEGORY_SM = 19, /**< Symbol, math */ - UTF8PROC_CATEGORY_SC = 20, /**< Symbol, currency */ - UTF8PROC_CATEGORY_SK = 21, /**< Symbol, modifier */ - UTF8PROC_CATEGORY_SO = 22, /**< Symbol, other */ - UTF8PROC_CATEGORY_ZS = 23, /**< Separator, space */ - UTF8PROC_CATEGORY_ZL = 24, /**< Separator, line */ - UTF8PROC_CATEGORY_ZP = 25, /**< Separator, paragraph */ - UTF8PROC_CATEGORY_CC = 26, /**< Other, control */ - UTF8PROC_CATEGORY_CF = 27, /**< Other, format */ - UTF8PROC_CATEGORY_CS = 28, /**< Other, surrogate */ - UTF8PROC_CATEGORY_CO = 29, /**< Other, private use */ -} utf8proc_category_t; - -/** Bidirectional character classes. */ -typedef enum { - UTF8PROC_BIDI_CLASS_L = 1, /**< Left-to-Right */ - UTF8PROC_BIDI_CLASS_LRE = 2, /**< Left-to-Right Embedding */ - UTF8PROC_BIDI_CLASS_LRO = 3, /**< Left-to-Right Override */ - UTF8PROC_BIDI_CLASS_R = 4, /**< Right-to-Left */ - UTF8PROC_BIDI_CLASS_AL = 5, /**< Right-to-Left Arabic */ - UTF8PROC_BIDI_CLASS_RLE = 6, /**< Right-to-Left Embedding */ - UTF8PROC_BIDI_CLASS_RLO = 7, /**< Right-to-Left Override */ - UTF8PROC_BIDI_CLASS_PDF = 8, /**< Pop Directional Format */ - UTF8PROC_BIDI_CLASS_EN = 9, /**< European Number */ - UTF8PROC_BIDI_CLASS_ES = 10, /**< European Separator */ - UTF8PROC_BIDI_CLASS_ET = 11, /**< European Number Terminator */ - UTF8PROC_BIDI_CLASS_AN = 12, /**< Arabic Number */ - UTF8PROC_BIDI_CLASS_CS = 13, /**< Common Number Separator */ - UTF8PROC_BIDI_CLASS_NSM = 14, /**< Nonspacing Mark */ - UTF8PROC_BIDI_CLASS_BN = 15, /**< Boundary Neutral */ - UTF8PROC_BIDI_CLASS_B = 16, /**< Paragraph Separator */ - UTF8PROC_BIDI_CLASS_S = 17, /**< Segment Separator */ - UTF8PROC_BIDI_CLASS_WS = 18, /**< Whitespace */ - UTF8PROC_BIDI_CLASS_ON = 19, /**< Other Neutrals */ - UTF8PROC_BIDI_CLASS_LRI = 20, /**< Left-to-Right Isolate */ - UTF8PROC_BIDI_CLASS_RLI = 21, /**< Right-to-Left Isolate */ - UTF8PROC_BIDI_CLASS_FSI = 22, /**< First Strong Isolate */ - UTF8PROC_BIDI_CLASS_PDI = 23, /**< Pop Directional Isolate */ -} utf8proc_bidi_class_t; - -/** Decomposition type. */ -typedef enum { - UTF8PROC_DECOMP_TYPE_FONT = 1, /**< Font */ - UTF8PROC_DECOMP_TYPE_NOBREAK = 2, /**< Nobreak */ - UTF8PROC_DECOMP_TYPE_INITIAL = 3, /**< Initial */ - UTF8PROC_DECOMP_TYPE_MEDIAL = 4, /**< Medial */ - UTF8PROC_DECOMP_TYPE_FINAL = 5, /**< Final */ - UTF8PROC_DECOMP_TYPE_ISOLATED = 6, /**< Isolated */ - UTF8PROC_DECOMP_TYPE_CIRCLE = 7, /**< Circle */ - UTF8PROC_DECOMP_TYPE_SUPER = 8, /**< Super */ - UTF8PROC_DECOMP_TYPE_SUB = 9, /**< Sub */ - UTF8PROC_DECOMP_TYPE_VERTICAL = 10, /**< Vertical */ - UTF8PROC_DECOMP_TYPE_WIDE = 11, /**< Wide */ - UTF8PROC_DECOMP_TYPE_NARROW = 12, /**< Narrow */ - UTF8PROC_DECOMP_TYPE_SMALL = 13, /**< Small */ - UTF8PROC_DECOMP_TYPE_SQUARE = 14, /**< Square */ - UTF8PROC_DECOMP_TYPE_FRACTION = 15, /**< Fraction */ - UTF8PROC_DECOMP_TYPE_COMPAT = 16, /**< Compat */ -} utf8proc_decomp_type_t; - -/** Boundclass property. (TR29) */ -typedef enum { - UTF8PROC_BOUNDCLASS_START = 0, /**< Start */ - UTF8PROC_BOUNDCLASS_OTHER = 1, /**< Other */ - UTF8PROC_BOUNDCLASS_CR = 2, /**< Cr */ - UTF8PROC_BOUNDCLASS_LF = 3, /**< Lf */ - UTF8PROC_BOUNDCLASS_CONTROL = 4, /**< Control */ - UTF8PROC_BOUNDCLASS_EXTEND = 5, /**< Extend */ - UTF8PROC_BOUNDCLASS_L = 6, /**< L */ - UTF8PROC_BOUNDCLASS_V = 7, /**< V */ - UTF8PROC_BOUNDCLASS_T = 8, /**< T */ - UTF8PROC_BOUNDCLASS_LV = 9, /**< Lv */ - UTF8PROC_BOUNDCLASS_LVT = 10, /**< Lvt */ - UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR = 11, /**< Regional indicator */ - UTF8PROC_BOUNDCLASS_SPACINGMARK = 12, /**< Spacingmark */ - UTF8PROC_BOUNDCLASS_PREPEND = 13, /**< Prepend */ - UTF8PROC_BOUNDCLASS_ZWJ = 14, /**< Zero Width Joiner */ - - /* the following are no longer used in Unicode 11, but we keep - the constants here for backward compatibility */ - UTF8PROC_BOUNDCLASS_E_BASE = 15, /**< Emoji Base */ - UTF8PROC_BOUNDCLASS_E_MODIFIER = 16, /**< Emoji Modifier */ - UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ = 17, /**< Glue_After_ZWJ */ - UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */ - - /* the Extended_Pictographic property is used in the Unicode 11 - grapheme-boundary rules, so we store it in the boundclass field */ - UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC = 19, - UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */ -} utf8proc_boundclass_t; - -/** - * Function pointer type passed to @ref utf8proc_map_custom and - * @ref utf8proc_decompose_custom, which is used to specify a user-defined - * mapping of codepoints to be applied in conjunction with other mappings. - */ -typedef utf8proc_int32_t (*utf8proc_custom_func)(utf8proc_int32_t codepoint, void *data); - -/** - * Array containing the byte lengths of a UTF-8 encoded codepoint based - * on the first byte. - */ -// UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256]; - -/** - * Returns the utf8proc API version as a string MAJOR.MINOR.PATCH - * (http://semver.org format), possibly with a "-dev" suffix for - * development versions. - */ -UTF8PROC_DLLEXPORT const char *utf8proc_version(void); - -/** - * Returns the utf8proc supported Unicode version as a string MAJOR.MINOR.PATCH. - */ -UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void); - -/** - * Returns an informative error string for the given utf8proc error code - * (e.g. the error codes returned by @ref utf8proc_map). - */ -UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode); - -/** - * Reads a single codepoint from the UTF-8 sequence being pointed to by `str`. - * The maximum number of bytes read is `strlen`, unless `strlen` is - * negative (in which case up to 4 bytes are read). - * - * If a valid codepoint could be read, it is stored in the variable - * pointed to by `codepoint_ref`, otherwise that variable will be set to -1. - * In case of success, the number of bytes read is returned; otherwise, a - * negative error code is returned. - */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref); - -/** - * Check if a codepoint is valid (regardless of whether it has been - * assigned a value by the current Unicode standard). - * - * @return 1 if the given `codepoint` is valid and otherwise return 0. - */ -UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint); +struct ConstantFun { + static CompressionFunction GetFunction(PhysicalType type); + static bool TypeIsSupported(PhysicalType type); +}; -/** - * Encodes the codepoint as an UTF-8 string in the byte array pointed - * to by `dst`. This array must be at least 4 bytes long. - * - * In case of success the number of bytes written is returned, and - * otherwise 0 is returned. - * - * This function does not check whether `codepoint` is valid Unicode. - */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst); +struct UncompressedFun { + static CompressionFunction GetFunction(PhysicalType type); + static bool TypeIsSupported(PhysicalType type); +}; -/** - * Look up the properties for a given codepoint. - * - * @param codepoint The Unicode codepoint. - * - * @returns - * A pointer to a (constant) struct containing information about - * the codepoint. - * @par - * If the codepoint is unassigned or invalid, a pointer to a special struct is - * returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN). - */ -UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint); +struct RLEFun { + static CompressionFunction GetFunction(PhysicalType type); + static bool TypeIsSupported(PhysicalType type); +}; -/** Decompose a codepoint into an array of codepoints. - * - * @param codepoint the codepoint. - * @param dst the destination buffer. - * @param bufsize the size of the destination buffer. - * @param options one or more of the following flags: - * - @ref UTF8PROC_REJECTNA - return an error `codepoint` is unassigned - * - @ref UTF8PROC_IGNORE - strip "default ignorable" codepoints - * - @ref UTF8PROC_CASEFOLD - apply Unicode casefolding - * - @ref UTF8PROC_COMPAT - replace certain codepoints with their - * compatibility decomposition - * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster - * - @ref UTF8PROC_LUMP - lump certain different codepoints together - * - @ref UTF8PROC_STRIPMARK - remove all character marks - * - @ref UTF8PROC_STRIPNA - remove unassigned codepoints - * @param last_boundclass - * Pointer to an integer variable containing - * the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND - * option is used. Otherwise, this parameter is ignored. - * - * @return - * In case of success, the number of codepoints written is returned; in case - * of an error, a negative error code is returned (@ref utf8proc_errmsg). - * @par - * If the number of written codepoints would be bigger than `bufsize`, the - * required buffer size is returned, while the buffer will be overwritten with - * undefined data. - */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char( - utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, - utf8proc_option_t options, int *last_boundclass -); +} // namespace duckdb -/** - * The same as @ref utf8proc_decompose_char, but acts on a whole UTF-8 - * string and orders the decomposed sequences correctly. - * - * If the @ref UTF8PROC_NULLTERM flag in `options` is set, processing - * will be stopped, when a NULL byte is encounted, otherwise `strlen` - * bytes are processed. The result (in the form of 32-bit unicode - * codepoints) is written into the buffer being pointed to by - * `buffer` (which must contain at least `bufsize` entries). In case of - * success, the number of codepoints written is returned; in case of an - * error, a negative error code is returned (@ref utf8proc_errmsg). - * See @ref utf8proc_decompose_custom to supply additional transformations. - * - * If the number of written codepoints would be bigger than `bufsize`, the - * required buffer size is returned, while the buffer will be overwritten with - * undefined data. - */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( - const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, - utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options -); -/** - * The same as @ref utf8proc_decompose, but also takes a `custom_func` mapping function - * that is called on each codepoint in `str` before any other transformations - * (along with a `custom_data` pointer that is passed through to `custom_func`). - * The `custom_func` argument is ignored if it is `NULL`. See also @ref utf8proc_map_custom. - */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom( - const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, - utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options, - utf8proc_custom_func custom_func, void *custom_data -); -/** - * Normalizes the sequence of `length` codepoints pointed to by `buffer` - * in-place (i.e., the result is also stored in `buffer`). - * - * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode. - * @param length the length (in codepoints) of the buffer. - * @param options a bitwise or (`|`) of one or more of the following flags: - * - @ref UTF8PROC_NLF2LS - convert LF, CRLF, CR and NEL into LS - * - @ref UTF8PROC_NLF2PS - convert LF, CRLF, CR and NEL into PS - * - @ref UTF8PROC_NLF2LF - convert LF, CRLF, CR and NEL into LF - * - @ref UTF8PROC_STRIPCC - strip or convert all non-affected control characters - * - @ref UTF8PROC_COMPOSE - try to combine decomposed codepoints into composite - * codepoints - * - @ref UTF8PROC_STABLE - prohibit combining characters that would violate - * the unicode versioning stability - * - * @return - * In case of success, the length (in codepoints) of the normalized UTF-32 string is - * returned; otherwise, a negative error code is returned (@ref utf8proc_errmsg). - * - * @warning The entries of the array pointed to by `str` have to be in the - * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash! - */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); +namespace duckdb { -/** - * Reencodes the sequence of `length` codepoints pointed to by `buffer` - * UTF-8 data in-place (i.e., the result is also stored in `buffer`). - * Can optionally normalize the UTF-32 sequence prior to UTF-8 conversion. - * - * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode. - * @param length the length (in codepoints) of the buffer. - * @param options a bitwise or (`|`) of one or more of the following flags: - * - @ref UTF8PROC_NLF2LS - convert LF, CRLF, CR and NEL into LS - * - @ref UTF8PROC_NLF2PS - convert LF, CRLF, CR and NEL into PS - * - @ref UTF8PROC_NLF2LF - convert LF, CRLF, CR and NEL into LF - * - @ref UTF8PROC_STRIPCC - strip or convert all non-affected control characters - * - @ref UTF8PROC_COMPOSE - try to combine decomposed codepoints into composite - * codepoints - * - @ref UTF8PROC_STABLE - prohibit combining characters that would violate - * the unicode versioning stability - * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster - * - * @return - * In case of success, the length (in bytes) of the resulting nul-terminated - * UTF-8 string is returned; otherwise, a negative error code is returned - * (@ref utf8proc_errmsg). - * - * @warning The amount of free space pointed to by `buffer` must - * exceed the amount of the input data by one byte, and the - * entries of the array pointed to by `str` have to be in the - * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash! - */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); +typedef CompressionFunction (*get_compression_function_t)(PhysicalType type); +typedef bool (*compression_supports_type_t)(PhysicalType type); -/** - * Given a pair of consecutive codepoints, return whether a grapheme break is - * permitted between them (as defined by the extended grapheme clusters in UAX#29). - * - * @param codepoint1 The first codepoint. - * @param codepoint2 The second codepoint, occurring consecutively after `codepoint1`. - * @param state Beginning with Version 29 (Unicode 9.0.0), this algorithm requires - * state to break graphemes. This state can be passed in as a pointer - * in the `state` argument and should initially be set to 0. If the - * state is not passed in (i.e. a null pointer is passed), UAX#29 rules - * GB10/12/13 which require this state will not be applied, essentially - * matching the rules in Unicode 8.0.0. - * - * @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must - * be called IN ORDER on ALL potential breaks in a string. However, it - * is safe to reset the state to zero after a grapheme break. - */ -UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful( - utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state); +struct DefaultCompressionMethod { + CompressionType type; + get_compression_function_t get_function; + compression_supports_type_t supports_type; +}; -/** - * Same as @ref utf8proc_grapheme_break_stateful, except without support for the - * Unicode 9 additions to the algorithm. Supported for legacy reasons. - */ -UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break( - utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2); +static DefaultCompressionMethod internal_compression_methods[] = { + {CompressionType::COMPRESSION_CONSTANT, ConstantFun::GetFunction, ConstantFun::TypeIsSupported}, + {CompressionType::COMPRESSION_UNCOMPRESSED, UncompressedFun::GetFunction, UncompressedFun::TypeIsSupported}, + {CompressionType::COMPRESSION_RLE, RLEFun::GetFunction, RLEFun::TypeIsSupported}, + {CompressionType::COMPRESSION_INVALID, nullptr, nullptr}}; -//! Returns the current UTF8 codepoint in a UTF8 string. Assumes the string is valid UTF8. -UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_codepoint(const char *u_input, int &sz); -UTF8PROC_DLLEXPORT utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state); -UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_codepoint(const char *u_input, int &sz); -UTF8PROC_DLLEXPORT bool utf8proc_codepoint_to_utf8(int cp, int &sz, char *c); -UTF8PROC_DLLEXPORT int utf8proc_codepoint_length(int cp); -UTF8PROC_DLLEXPORT size_t utf8proc_next_grapheme(const char *s, size_t len, size_t cpos); -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_remove_accents(const utf8proc_uint8_t *str, utf8proc_ssize_t len); -template -void utf8proc_grapheme_callback(const char *s, size_t len, T &&fun) { - int sz; - int boundclass = UTF8PROC_BOUNDCLASS_START; - int initial = utf8proc_get_property(utf8proc_codepoint(s, sz))->boundclass; - grapheme_break_extended(boundclass, initial, &boundclass); - size_t start = 0; - size_t cpos = 0; - while(true) { - cpos += sz; - if (cpos >= len) { - fun(start, cpos); - return; +static CompressionFunction *FindCompressionFunction(CompressionFunctionSet &set, CompressionType type, + PhysicalType data_type) { + auto &functions = set.functions; + auto comp_entry = functions.find(type); + if (comp_entry != functions.end()) { + auto &type_functions = comp_entry->second; + auto type_entry = type_functions.find(data_type); + if (type_entry != type_functions.end()) { + return &type_entry->second; } - int next = utf8proc_get_property(utf8proc_codepoint(s + cpos, sz))->boundclass; - if (grapheme_break_extended(boundclass, next, &boundclass)) { - if (!fun(start, cpos)) { - return; + } + return nullptr; +} + +static CompressionFunction *LoadCompressionFunction(CompressionFunctionSet &set, CompressionType type, + PhysicalType data_type) { + for (idx_t index = 0; internal_compression_methods[index].get_function; index++) { + const auto &method = internal_compression_methods[index]; + if (method.type == type) { + // found the correct compression type + if (!method.supports_type(data_type)) { + // but it does not support this data type: bail out + return nullptr; } - start = cpos; + // the type is supported: create the function and insert it into the set + auto function = method.get_function(data_type); + set.functions[type].insert(make_pair(data_type, function)); + return FindCompressionFunction(set, type, data_type); } } + throw InternalException("Unsupported compression function type"); } -/** - * Given a codepoint `c`, return the codepoint of the corresponding - * lower-case character, if any; otherwise (if there is no lower-case - * variant, or if `c` is not a valid codepoint) return `c`. - */ -UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c); - -/** - * Given a codepoint `c`, return the codepoint of the corresponding - * upper-case character, if any; otherwise (if there is no upper-case - * variant, or if `c` is not a valid codepoint) return `c`. - */ -UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c); - -/** - * Given a codepoint `c`, return the codepoint of the corresponding - * title-case character, if any; otherwise (if there is no title-case - * variant, or if `c` is not a valid codepoint) return `c`. - */ -UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c); - -/** - * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`, - * except that a width of 0 is returned for non-printable codepoints - * instead of -1 as in `wcwidth`. - * - * @note - * If you want to check for particular types of non-printable characters, - * (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */ - UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t codepoint); - -/** - * Return the Unicode category for the codepoint (one of the - * @ref utf8proc_category_t constants.) - */ -UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint); - -/** - * Return the two-letter (nul-terminated) Unicode category string for - * the codepoint (e.g. `"Lu"` or `"Co"`). - */ -UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoint); - -/** - * Maps the given UTF-8 string pointed to by `str` to a new UTF-8 - * string, allocated dynamically by `malloc` and returned via `dstptr`. - * - * If the @ref UTF8PROC_NULLTERM flag in the `options` field is set, - * the length is determined by a NULL terminator, otherwise the - * parameter `strlen` is evaluated to determine the string length, but - * in any case the result will be NULL terminated (though it might - * contain NULL characters with the string if `str` contained NULL - * characters). Other flags in the `options` field are passed to the - * functions defined above, and regarded as described. See also - * @ref utf8proc_map_custom to supply a custom codepoint transformation. - * - * In case of success the length of the new string is returned, - * otherwise a negative error code is returned. - * - * @note The memory of the new UTF-8 string will have been allocated - * with `malloc`, and should therefore be deallocated with `free`. - */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( - const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options -); - -/** - * Like @ref utf8proc_map, but also takes a `custom_func` mapping function - * that is called on each codepoint in `str` before any other transformations - * (along with a `custom_data` pointer that is passed through to `custom_func`). - * The `custom_func` argument is ignored if it is `NULL`. - */ -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom( - const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options, - utf8proc_custom_func custom_func, void *custom_data -); - -/** @name Unicode normalization - * - * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD, NFKC or - * NFKC_Casefold normalized version of the null-terminated string `str`. These - * are shortcuts to calling @ref utf8proc_map with @ref UTF8PROC_NULLTERM - * combined with @ref UTF8PROC_STABLE and flags indicating the normalization. - */ -/** @{ */ -/** NFD normalization (@ref UTF8PROC_DECOMPOSE). */ -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str, utf8proc_ssize_t len); -/** NFC normalization (@ref UTF8PROC_COMPOSE). */ -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str, utf8proc_ssize_t len); -/** NFKD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */ -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str, utf8proc_ssize_t len); -/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */ -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str, utf8proc_ssize_t len); -/** - * NFKC_Casefold normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT - * and @ref UTF8PROC_CASEFOLD and @ref UTF8PROC_IGNORE). - **/ -UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str, utf8proc_ssize_t len); -/** @} */ - -//#ifdef __cplusplus -//} -//#endif +static void TryLoadCompression(DBConfig &config, vector &result, CompressionType type, + PhysicalType data_type) { + auto function = config.GetCompressionFunction(type, data_type); + if (!function) { + return; + } + result.push_back(function); } -#endif - - -// LICENSE_CHANGE_END - -namespace re2 { -class RE2; +vector DBConfig::GetCompressionFunctions(PhysicalType data_type) { + vector result; + TryLoadCompression(*this, result, CompressionType::COMPRESSION_UNCOMPRESSED, data_type); + TryLoadCompression(*this, result, CompressionType::COMPRESSION_RLE, data_type); + return result; } -namespace duckdb { - -struct ReverseFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct LowerFun { - static uint8_t ascii_to_lower_map[]; - - //! Returns the length of the result string obtained from lowercasing the given input (in bytes) - static idx_t LowerLength(const char *input_data, idx_t input_length); - //! Lowercases the string to the target output location, result_data must have space for at least LowerLength bytes - static void LowerCase(const char *input_data, idx_t input_length, char *result_data); - - static ScalarFunction GetFunction(); - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct UpperFun { - static uint8_t ascii_to_upper_map[]; - - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct StripAccentsFun { - static bool IsAscii(const char *input, idx_t n); - static ScalarFunction GetFunction(); - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct ConcatFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct ConcatWSFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct LengthFun { - static void RegisterFunction(BuiltinFunctions &set); - template - static inline TR Length(TA input) { - auto input_data = input.GetDataUnsafe(); - auto input_length = input.GetSize(); - for (idx_t i = 0; i < input_length; i++) { - if (input_data[i] & 0x80) { - int64_t length = 0; - // non-ascii character: use grapheme iterator on remainder of string - utf8proc_grapheme_callback(input_data, input_length, [&](size_t start, size_t end) { - length++; - return true; - }); - return length; - } - } - return input_length; +CompressionFunction *DBConfig::GetCompressionFunction(CompressionType type, PhysicalType data_type) { + // check if the function is already loaded + auto function = FindCompressionFunction(*compression_functions, type, data_type); + if (function) { + return function; } -}; - -struct LikeFun { - static void RegisterFunction(BuiltinFunctions &set); - static bool Glob(const char *s, idx_t slen, const char *pattern, idx_t plen); -}; - -struct LikeEscapeFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct LpadFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct LeftFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct MD5Fun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct NFCNormalizeFun { - static ScalarFunction GetFunction(); - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct RightFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct RegexpFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct SubstringFun { - static void RegisterFunction(BuiltinFunctions &set); - static string_t SubstringScalarFunction(Vector &result, string_t input, int32_t offset, int32_t length); -}; - -struct PrintfFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct InstrFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct PrefixFun { - static ScalarFunction GetFunction(); - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct RepeatFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct ReplaceFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct RpadFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct SuffixFun { - static ScalarFunction GetFunction(); - static void RegisterFunction(BuiltinFunctions &set); -}; + // else load the function + return LoadCompressionFunction(*compression_functions, type, data_type); +} -struct TrimFun { - static void RegisterFunction(BuiltinFunctions &set); -}; +} // namespace duckdb -struct ContainsFun { - static ScalarFunction GetFunction(); - static void RegisterFunction(BuiltinFunctions &set); - static idx_t Find(const string_t &haystack, const string_t &needle); - static idx_t Find(const unsigned char *haystack, idx_t haystack_size, const unsigned char *needle, - idx_t needle_size); -}; -struct UnicodeFun { - static void RegisterFunction(BuiltinFunctions &set); -}; -struct StringSplitFun { - static void RegisterFunction(BuiltinFunctions &set); -}; -struct ASCII { - static void RegisterFunction(BuiltinFunctions &set); -}; -struct CHR { - static void RegisterFunction(BuiltinFunctions &set); -}; -struct MismatchesFun { - static void RegisterFunction(BuiltinFunctions &set); -}; -struct LevenshteinFun { - static void RegisterFunction(BuiltinFunctions &set); -}; -struct JaccardFun { - static void RegisterFunction(BuiltinFunctions &set); -}; -} // namespace duckdb @@ -62313,7 +67429,6 @@ void BuiltinFunctions::Initialize() { RegisterReadFunctions(); RegisterTableFunctions(); RegisterArrowFunctions(); - RegisterInformationSchemaFunctions(); RegisterAlgebraicAggregates(); RegisterDistributiveAggregates(); @@ -62783,22 +67898,24 @@ struct PragmaFunctions { + + #include namespace duckdb { static void PragmaEnableProfilingStatement(ClientContext &context, const FunctionParameters ¶meters) { - context.profiler.automatic_print_format = ProfilerPrintFormat::QUERY_TREE; - context.profiler.Enable(); + context.profiler->automatic_print_format = ProfilerPrintFormat::QUERY_TREE; + context.profiler->Enable(); } static void PragmaSetProfilingModeStatement(ClientContext &context, const FunctionParameters ¶meters) { // this is either profiling_mode = standard, or profiling_mode = detailed string mode = StringUtil::Lower(parameters.values[0].ToString()); if (mode == "standard") { - context.profiler.Enable(); + context.profiler->Enable(); } else if (mode == "detailed") { - context.profiler.DetailedEnable(); + context.profiler->DetailedEnable(); } else { throw ParserException("Unrecognized print format %s, supported formats: [standard, detailed]", mode); } @@ -62809,23 +67926,23 @@ static void PragmaSetProfilerHistorySize(ClientContext &context, const FunctionP if (size <= 0) { throw ParserException("Size should be larger than 0"); } - context.query_profiler_history.SetProfilerHistorySize(size); + context.query_profiler_history->SetProfilerHistorySize(size); } static void PragmaEnableProfilingAssignment(ClientContext &context, const FunctionParameters ¶meters) { // this is either enable_profiling = json, or enable_profiling = query_tree string assignment = parameters.values[0].ToString(); if (assignment == "json") { - context.profiler.automatic_print_format = ProfilerPrintFormat::JSON; + context.profiler->automatic_print_format = ProfilerPrintFormat::JSON; } else if (assignment == "query_tree") { - context.profiler.automatic_print_format = ProfilerPrintFormat::QUERY_TREE; + context.profiler->automatic_print_format = ProfilerPrintFormat::QUERY_TREE; } else if (assignment == "query_tree_optimizer") { - context.profiler.automatic_print_format = ProfilerPrintFormat::QUERY_TREE_OPTIMIZER; + context.profiler->automatic_print_format = ProfilerPrintFormat::QUERY_TREE_OPTIMIZER; } else { throw ParserException( "Unrecognized print format %s, supported formats: [json, query_tree, query_tree_optimizer]", assignment); } - context.profiler.Enable(); + context.profiler->Enable(); } void RegisterEnableProfiling(BuiltinFunctions &set) { @@ -62839,18 +67956,16 @@ void RegisterEnableProfiling(BuiltinFunctions &set) { } static void PragmaDisableProfiling(ClientContext &context, const FunctionParameters ¶meters) { - context.profiler.Disable(); - context.profiler.automatic_print_format = ProfilerPrintFormat::NONE; + context.profiler->Disable(); + context.profiler->automatic_print_format = ProfilerPrintFormat::NONE; } static void PragmaProfileOutput(ClientContext &context, const FunctionParameters ¶meters) { - context.profiler.save_location = parameters.values[0].ToString(); + context.profiler->save_location = parameters.values[0].ToString(); } -static idx_t ParseMemoryLimit(string arg); - static void PragmaMemoryLimit(ClientContext &context, const FunctionParameters ¶meters) { - idx_t new_limit = ParseMemoryLimit(parameters.values[0].ToString()); + idx_t new_limit = DBConfig::ParseMemoryLimit(parameters.values[0].ToString()); // set the new limit in the buffer manager BufferManager::GetBufferManager(context).SetLimit(new_limit); } @@ -62896,6 +68011,7 @@ static void PragmaSetThreads(ClientContext &context, const FunctionParameters &p static void PragmaEnableProgressBar(ClientContext &context, const FunctionParameters ¶meters) { context.enable_progress_bar = true; } + static void PragmaSetProgressBarWaitTime(ClientContext &context, const FunctionParameters ¶meters) { context.wait_time = parameters.values[0].GetValue(); context.enable_progress_bar = true; @@ -62937,6 +68053,14 @@ static void PragmaDisableForceParallelism(ClientContext &context, const Function context.force_parallelism = false; } +static void PragmaEnableForceExternal(ClientContext &context, const FunctionParameters ¶meters) { + context.force_external = true; +} + +static void PragmaDisableForceExternal(ClientContext &context, const FunctionParameters ¶meters) { + context.force_external = false; +} + static void PragmaEnableObjectCache(ClientContext &context, const FunctionParameters ¶meters) { DBConfig::GetConfig(context).object_cache_enable = true; } @@ -62995,7 +68119,7 @@ static void PragmaPerfectHashThreshold(ClientContext &context, const FunctionPar } static void PragmaAutoCheckpointThreshold(ClientContext &context, const FunctionParameters ¶meters) { - idx_t new_limit = ParseMemoryLimit(parameters.values[0].ToString()); + idx_t new_limit = DBConfig::ParseMemoryLimit(parameters.values[0].ToString()); DBConfig::GetConfig(context).checkpoint_wal_size = new_limit; } @@ -63014,6 +68138,26 @@ static void PragmaDebugCheckpointAbort(ClientContext &context, const FunctionPar } } +static void PragmaSetTempDirectory(ClientContext &context, const FunctionParameters ¶meters) { + auto &buffer_manager = BufferManager::GetBufferManager(context); + buffer_manager.SetTemporaryDirectory(parameters.values[0].ToString()); +} + +static void PragmaForceCompression(ClientContext &context, const FunctionParameters ¶meters) { + auto compression = StringUtil::Lower(parameters.values[0].ToString()); + auto &config = DBConfig::GetConfig(context); + if (compression == "none") { + config.force_compression = CompressionType::COMPRESSION_INVALID; + } else { + auto compression_type = CompressionTypeFromString(compression); + if (compression_type == CompressionType::COMPRESSION_INVALID) { + throw ParserException("Unrecognized option for PRAGMA force_compression, expected none, uncompressed, rle, " + "dictionary, pfor, bitpacking or fsst"); + } + config.force_compression = compression_type; + } +} + void PragmaFunctions::RegisterFunction(BuiltinFunctions &set) { RegisterEnableProfiling(set); @@ -63048,6 +68192,9 @@ void PragmaFunctions::RegisterFunction(BuiltinFunctions &set) { set.AddFunction(PragmaFunction::PragmaStatement("force_parallelism", PragmaEnableForceParallelism)); set.AddFunction(PragmaFunction::PragmaStatement("disable_force_parallelism", PragmaDisableForceParallelism)); + set.AddFunction(PragmaFunction::PragmaStatement("force_external", PragmaEnableForceExternal)); + set.AddFunction(PragmaFunction::PragmaStatement("disable_force_external", PragmaDisableForceExternal)); + set.AddFunction(PragmaFunction::PragmaStatement("enable_object_cache", PragmaEnableObjectCache)); set.AddFunction(PragmaFunction::PragmaStatement("disable_object_cache", PragmaDisableObjectCache)); @@ -63083,58 +68230,11 @@ void PragmaFunctions::RegisterFunction(BuiltinFunctions &set) { set.AddFunction( PragmaFunction::PragmaAssignment("debug_checkpoint_abort", PragmaDebugCheckpointAbort, LogicalType::VARCHAR)); -} - -idx_t ParseMemoryLimit(string arg) { - if (arg[0] == '-' || arg == "null" || arg == "none") { - return INVALID_INDEX; - } - // split based on the number/non-number - idx_t idx = 0; - while (StringUtil::CharacterIsSpace(arg[idx])) { - idx++; - } - idx_t num_start = idx; - while ((arg[idx] >= '0' && arg[idx] <= '9') || arg[idx] == '.' || arg[idx] == 'e' || arg[idx] == 'E' || - arg[idx] == '-') { - idx++; - } - if (idx == num_start) { - throw ParserException("Memory limit must have a number (e.g. PRAGMA memory_limit=1GB"); - } - string number = arg.substr(num_start, idx - num_start); - // try to parse the number - double limit = Cast::Operation(string_t(number)); + set.AddFunction(PragmaFunction::PragmaAssignment("temp_directory", PragmaSetTempDirectory, LogicalType::VARCHAR)); - // now parse the memory limit unit (e.g. bytes, gb, etc) - while (StringUtil::CharacterIsSpace(arg[idx])) { - idx++; - } - idx_t start = idx; - while (idx < arg.size() && !StringUtil::CharacterIsSpace(arg[idx])) { - idx++; - } - if (limit < 0) { - // limit < 0, set limit to infinite - return (idx_t)-1; - } - string unit = StringUtil::Lower(arg.substr(start, idx - start)); - idx_t multiplier; - if (unit == "byte" || unit == "bytes" || unit == "b") { - multiplier = 1; - } else if (unit == "kilobyte" || unit == "kilobytes" || unit == "kb" || unit == "k") { - multiplier = 1000LL; - } else if (unit == "megabyte" || unit == "megabytes" || unit == "mb" || unit == "m") { - multiplier = 1000LL * 1000LL; - } else if (unit == "gigabyte" || unit == "gigabytes" || unit == "gb" || unit == "g") { - multiplier = 1000LL * 1000LL * 1000LL; - } else if (unit == "terabyte" || unit == "terabytes" || unit == "tb" || unit == "t") { - multiplier = 1000LL * 1000LL * 1000LL * 1000LL; - } else { - throw ParserException("Unknown unit for memory_limit: %s (expected: b, mb, gb or tb)", unit); - } - return (idx_t)multiplier * limit; + set.AddFunction( + PragmaFunction::PragmaAssignment("force_compression", PragmaForceCompression, LogicalType::VARCHAR)); } } // namespace duckdb @@ -63148,7 +68248,7 @@ string PragmaTableInfo(ClientContext &context, const FunctionParameters ¶met } string PragmaShowTables(ClientContext &context, const FunctionParameters ¶meters) { - return "SELECT name FROM sqlite_master() ORDER BY name"; + return "SELECT name FROM sqlite_master ORDER BY name"; } string PragmaAllProfiling(ClientContext &context, const FunctionParameters ¶meters) { @@ -63201,8 +68301,13 @@ string PragmaDatabaseSize(ClientContext &context, const FunctionParameters ¶ return "SELECT * FROM pragma_database_size()"; } +string PragmaStorageInfo(ClientContext &context, const FunctionParameters ¶meters) { + return StringUtil::Format("SELECT * FROM pragma_storage_info('%s')", parameters.values[0].ToString()); +} + void PragmaQueries::RegisterFunction(BuiltinFunctions &set) { set.AddFunction(PragmaFunction::PragmaCall("table_info", PragmaTableInfo, {LogicalType::VARCHAR})); + set.AddFunction(PragmaFunction::PragmaCall("storage_info", PragmaStorageInfo, {LogicalType::VARCHAR})); set.AddFunction(PragmaFunction::PragmaStatement("show_tables", PragmaShowTables)); set.AddFunction(PragmaFunction::PragmaStatement("database_list", PragmaDatabaseList)); set.AddFunction(PragmaFunction::PragmaStatement("collations", PragmaCollations)); @@ -63238,22 +68343,22 @@ PragmaFunction PragmaFunction::PragmaCall(const string &name, pragma_function_t PragmaFunction PragmaFunction::PragmaStatement(const string &name, pragma_query_t query) { vector types; - return PragmaFunction(name, PragmaType::PRAGMA_STATEMENT, query, nullptr, types, LogicalType::INVALID); + return PragmaFunction(name, PragmaType::PRAGMA_STATEMENT, query, nullptr, move(types), LogicalType::INVALID); } PragmaFunction PragmaFunction::PragmaStatement(const string &name, pragma_function_t function) { vector types; - return PragmaFunction(name, PragmaType::PRAGMA_STATEMENT, nullptr, function, types, LogicalType::INVALID); + return PragmaFunction(name, PragmaType::PRAGMA_STATEMENT, nullptr, function, move(types), LogicalType::INVALID); } PragmaFunction PragmaFunction::PragmaAssignment(const string &name, pragma_query_t query, LogicalType type) { vector types {move(type)}; - return PragmaFunction(name, PragmaType::PRAGMA_ASSIGNMENT, query, nullptr, types, LogicalType::INVALID); + return PragmaFunction(name, PragmaType::PRAGMA_ASSIGNMENT, query, nullptr, move(types), LogicalType::INVALID); } PragmaFunction PragmaFunction::PragmaAssignment(const string &name, pragma_function_t function, LogicalType type) { vector types {move(type)}; - return PragmaFunction(name, PragmaType::PRAGMA_ASSIGNMENT, nullptr, function, types, LogicalType::INVALID); + return PragmaFunction(name, PragmaType::PRAGMA_ASSIGNMENT, nullptr, function, move(types), LogicalType::INVALID); } string PragmaFunction::ToString() { @@ -63299,25 +68404,35 @@ struct EncodeFun { namespace duckdb { -static void Base64EncodeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - // decode is also a nop cast, but requires verification if the provided string is actually - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t input) { +struct Base64EncodeOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { auto result_str = StringVector::EmptyString(result, Blob::ToBase64Size(input)); Blob::ToBase64(input, result_str.GetDataWriteable()); result_str.Finalize(); return result_str; - }); -} + } +}; -static void Base64DecodeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - // decode is also a nop cast, but requires verification if the provided string is actually - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t input) { +struct Base64DecodeOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { auto result_size = Blob::FromBase64Size(input); auto result_blob = StringVector::EmptyString(result, result_size); Blob::FromBase64(input, (data_ptr_t)result_blob.GetDataWriteable(), result_size); result_blob.Finalize(); return result_blob; - }); + } +}; + +static void Base64EncodeFunction(DataChunk &args, ExpressionState &state, Vector &result) { + // decode is also a nop cast, but requires verification if the provided string is actually + UnaryExecutor::ExecuteString(args.data[0], result, args.size()); +} + +static void Base64DecodeFunction(DataChunk &args, ExpressionState &state, Vector &result) { + // decode is also a nop cast, but requires verification if the provided string is actually + UnaryExecutor::ExecuteString(args.data[0], result, args.size()); } void Base64Fun::RegisterFunction(BuiltinFunctions &set) { @@ -63338,17 +68453,13 @@ namespace duckdb { static void EncodeFunction(DataChunk &args, ExpressionState &state, Vector &result) { // encode is essentially a nop cast from varchar to blob - // we just reference the input vector and set the type to blob - result.SetType(args.data[0].GetType()); - result.SetVectorType(VectorType::FLAT_VECTOR); - VectorOperations::Copy(args.data[0], result, args.size(), 0, 0); - result.SetVectorType(args.data[0].GetVectorType()); - result.SetType(LogicalType::BLOB); + // we only need to reinterpret the data using the blob type + result.Reinterpret(args.data[0]); } -static void DecodeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - // decode is also a nop cast, but requires verification if the provided string is actually - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t input) { +struct BlobDecodeOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input) { auto input_data = input.GetDataUnsafe(); auto input_length = input.GetSize(); if (Utf8Proc::Analyze(input_data, input_length) == UnicodeType::INVALID) { @@ -63356,7 +68467,12 @@ static void DecodeFunction(DataChunk &args, ExpressionState &state, Vector &resu "Failure in decode: could not convert blob to UTF8 string, the blob contained invalid UTF8 characters"); } return input; - }); + } +}; + +static void DecodeFunction(DataChunk &args, ExpressionState &state, Vector &result) { + // decode is also a nop cast, but requires verification if the provided string is actually + UnaryExecutor::Execute(args.data[0], result, args.size()); StringVector::AddHeapReference(result, args.data[0]); } @@ -63387,6 +68503,10 @@ struct AgeFun { static void RegisterFunction(BuiltinFunctions &set); }; +struct DateDiffFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + struct DatePartFun { static void RegisterFunction(BuiltinFunctions &set); }; @@ -63537,58 +68657,467 @@ void CurrentTimestampFun::RegisterFunction(BuiltinFunctions &set) { + + + +namespace duckdb { + +// This function is an implementation of the "period-crossing" date difference function from T-SQL +// https://docs.microsoft.com/en-us/sql/t-sql/functions/datediff-transact-sql?view=sql-server-ver15 +struct DateDiff { + struct YearOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::ExtractYear(enddate) - Date::ExtractYear(startdate); + } + }; + + struct MonthOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + int32_t start_year, start_month, start_day; + Date::Convert(startdate, start_year, start_month, start_day); + int32_t end_year, end_month, end_day; + Date::Convert(enddate, end_year, end_month, end_day); + + return (end_year * 12 + end_month - 1) - (start_year * 12 + start_month - 1); + } + }; + + struct DayOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::EpochDays(enddate) - Date::EpochDays(startdate); + } + }; + + struct DecadeOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::ExtractYear(enddate) / 10 - Date::ExtractYear(startdate) / 10; + } + }; + + struct CenturyOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::ExtractYear(enddate) / 100 - Date::ExtractYear(startdate) / 100; + } + }; + + struct MilleniumOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::ExtractYear(enddate) / 1000 - Date::ExtractYear(startdate) / 1000; + } + }; + + struct QuarterOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + int32_t start_year, start_month, start_day; + Date::Convert(startdate, start_year, start_month, start_day); + int32_t end_year, end_month, end_day; + Date::Convert(enddate, end_year, end_month, end_day); + + return (end_year * 12 + end_month - 1) / Interval::MONTHS_PER_QUARTER - + (start_year * 12 + start_month - 1) / Interval::MONTHS_PER_QUARTER; + } + }; + + struct WeekOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::Epoch(enddate) / Interval::SECS_PER_WEEK - Date::Epoch(startdate) / Interval::SECS_PER_WEEK; + } + }; + + struct MicrosecondsOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::EpochNanoseconds(enddate) / Interval::NANOS_PER_MICRO - + Date::EpochNanoseconds(startdate) / Interval::NANOS_PER_MICRO; + } + }; + + struct MillisecondsOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::EpochNanoseconds(enddate) / Interval::NANOS_PER_MSEC - + Date::EpochNanoseconds(startdate) / Interval::NANOS_PER_MSEC; + } + }; + + struct SecondsOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::Epoch(enddate) - Date::Epoch(startdate); + } + }; + + struct MinutesOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::Epoch(enddate) / Interval::SECS_PER_MINUTE - + Date::Epoch(startdate) / Interval::SECS_PER_MINUTE; + } + }; + + struct HoursOperator { + template + static inline TR Operation(TA startdate, TB enddate) { + return Date::Epoch(enddate) / Interval::SECS_PER_HOUR - Date::Epoch(startdate) / Interval::SECS_PER_HOUR; + } + }; +}; + +// TIMESTAMP specialisations +template <> +int64_t DateDiff::YearOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return YearOperator::Operation(Timestamp::GetDate(startdate), Timestamp::GetDate(enddate)); +} + +template <> +int64_t DateDiff::MonthOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return MonthOperator::Operation(Timestamp::GetDate(startdate), + Timestamp::GetDate(enddate)); +} + +template <> +int64_t DateDiff::DayOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return DayOperator::Operation(Timestamp::GetDate(startdate), Timestamp::GetDate(enddate)); +} + +template <> +int64_t DateDiff::DecadeOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return DecadeOperator::Operation(Timestamp::GetDate(startdate), + Timestamp::GetDate(enddate)); +} + +template <> +int64_t DateDiff::CenturyOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return CenturyOperator::Operation(Timestamp::GetDate(startdate), + Timestamp::GetDate(enddate)); +} + +template <> +int64_t DateDiff::MilleniumOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return MilleniumOperator::Operation(Timestamp::GetDate(startdate), + Timestamp::GetDate(enddate)); +} + +template <> +int64_t DateDiff::QuarterOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return QuarterOperator::Operation(Timestamp::GetDate(startdate), + Timestamp::GetDate(enddate)); +} + +template <> +int64_t DateDiff::WeekOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return WeekOperator::Operation(Timestamp::GetDate(startdate), Timestamp::GetDate(enddate)); +} + +template <> +int64_t DateDiff::MicrosecondsOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return Timestamp::GetEpochMicroSeconds(enddate) - Timestamp::GetEpochMicroSeconds(startdate); +} + +template <> +int64_t DateDiff::MillisecondsOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return Timestamp::GetEpochMs(enddate) - Timestamp::GetEpochMs(startdate); +} + +template <> +int64_t DateDiff::SecondsOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return Timestamp::GetEpochSeconds(enddate) - Timestamp::GetEpochSeconds(startdate); +} + +template <> +int64_t DateDiff::MinutesOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return Timestamp::GetEpochSeconds(enddate) / Interval::SECS_PER_MINUTE - + Timestamp::GetEpochSeconds(startdate) / Interval::SECS_PER_MINUTE; +} + +template <> +int64_t DateDiff::HoursOperator::Operation(timestamp_t startdate, timestamp_t enddate) { + return Timestamp::GetEpochSeconds(enddate) / Interval::SECS_PER_HOUR - + Timestamp::GetEpochSeconds(startdate) / Interval::SECS_PER_HOUR; +} + +// TIME specialisations +template <> +int64_t DateDiff::YearOperator::Operation(dtime_t startdate, dtime_t enddate) { + throw NotImplementedException("\"time\" units \"year\" not recognized"); +} + +template <> +int64_t DateDiff::MonthOperator::Operation(dtime_t startdate, dtime_t enddate) { + throw NotImplementedException("\"time\" units \"month\" not recognized"); +} + +template <> +int64_t DateDiff::DayOperator::Operation(dtime_t startdate, dtime_t enddate) { + throw NotImplementedException("\"time\" units \"day\" not recognized"); +} + +template <> +int64_t DateDiff::DecadeOperator::Operation(dtime_t startdate, dtime_t enddate) { + throw NotImplementedException("\"time\" units \"decade\" not recognized"); +} + +template <> +int64_t DateDiff::CenturyOperator::Operation(dtime_t startdate, dtime_t enddate) { + throw NotImplementedException("\"time\" units \"century\" not recognized"); +} + +template <> +int64_t DateDiff::MilleniumOperator::Operation(dtime_t startdate, dtime_t enddate) { + throw NotImplementedException("\"time\" units \"millennium\" not recognized"); +} + +template <> +int64_t DateDiff::QuarterOperator::Operation(dtime_t startdate, dtime_t enddate) { + throw NotImplementedException("\"time\" units \"quarter\" not recognized"); +} + +template <> +int64_t DateDiff::WeekOperator::Operation(dtime_t startdate, dtime_t enddate) { + throw NotImplementedException("\"time\" units \"week\" not recognized"); +} + +template <> +int64_t DateDiff::MicrosecondsOperator::Operation(dtime_t startdate, dtime_t enddate) { + return enddate.micros - startdate.micros; +} + +template <> +int64_t DateDiff::MillisecondsOperator::Operation(dtime_t startdate, dtime_t enddate) { + return enddate.micros / Interval::MICROS_PER_MSEC - startdate.micros / Interval::MICROS_PER_MSEC; +} + +template <> +int64_t DateDiff::SecondsOperator::Operation(dtime_t startdate, dtime_t enddate) { + return enddate.micros / Interval::MICROS_PER_SEC - startdate.micros / Interval::MICROS_PER_SEC; +} + +template <> +int64_t DateDiff::MinutesOperator::Operation(dtime_t startdate, dtime_t enddate) { + return enddate.micros / Interval::MICROS_PER_MINUTE - startdate.micros / Interval::MICROS_PER_MINUTE; +} + +template <> +int64_t DateDiff::HoursOperator::Operation(dtime_t startdate, dtime_t enddate) { + return enddate.micros / Interval::MICROS_PER_HOUR - startdate.micros / Interval::MICROS_PER_HOUR; +} + +template +static int64_t DifferenceDates(DatePartSpecifier type, TA startdate, TB enddate) { + switch (type) { + case DatePartSpecifier::YEAR: + return DateDiff::YearOperator::template Operation(startdate, enddate); + case DatePartSpecifier::MONTH: + return DateDiff::MonthOperator::template Operation(startdate, enddate); + case DatePartSpecifier::DAY: + case DatePartSpecifier::DOW: + case DatePartSpecifier::ISODOW: + case DatePartSpecifier::DOY: + return DateDiff::DayOperator::template Operation(startdate, enddate); + case DatePartSpecifier::DECADE: + return DateDiff::DecadeOperator::template Operation(startdate, enddate); + case DatePartSpecifier::CENTURY: + return DateDiff::CenturyOperator::template Operation(startdate, enddate); + case DatePartSpecifier::MILLENNIUM: + return DateDiff::MilleniumOperator::template Operation(startdate, enddate); + case DatePartSpecifier::QUARTER: + return DateDiff::QuarterOperator::template Operation(startdate, enddate); + case DatePartSpecifier::WEEK: + case DatePartSpecifier::YEARWEEK: + return DateDiff::WeekOperator::template Operation(startdate, enddate); + case DatePartSpecifier::MICROSECONDS: + return DateDiff::MicrosecondsOperator::template Operation(startdate, enddate); + case DatePartSpecifier::MILLISECONDS: + return DateDiff::MillisecondsOperator::template Operation(startdate, enddate); + case DatePartSpecifier::SECOND: + case DatePartSpecifier::EPOCH: + return DateDiff::SecondsOperator::template Operation(startdate, enddate); + case DatePartSpecifier::MINUTE: + return DateDiff::MinutesOperator::template Operation(startdate, enddate); + case DatePartSpecifier::HOUR: + return DateDiff::HoursOperator::template Operation(startdate, enddate); + default: + throw NotImplementedException("Specifier type not implemented for DATEDIFF"); + } +} + +struct DateDiffTernaryOperator { + template + static inline TR Operation(TS part, TA startdate, TB enddate) { + return DifferenceDates(GetDatePartSpecifier(part.GetString()), startdate, enddate); + } +}; + +template +static void DateDiffBinaryExecutor(DatePartSpecifier type, Vector &left, Vector &right, Vector &result, idx_t count) { + switch (type) { + case DatePartSpecifier::YEAR: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::MONTH: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::DAY: + case DatePartSpecifier::DOW: + case DatePartSpecifier::ISODOW: + case DatePartSpecifier::DOY: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::DECADE: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::CENTURY: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::MILLENNIUM: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::QUARTER: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::WEEK: + case DatePartSpecifier::YEARWEEK: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::MICROSECONDS: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::MILLISECONDS: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::SECOND: + case DatePartSpecifier::EPOCH: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::MINUTE: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + case DatePartSpecifier::HOUR: + BinaryExecutor::ExecuteStandard(left, right, result, count); + break; + default: + throw NotImplementedException("Specifier type not implemented for DATEDIFF"); + } +} + +template +static void DateDiffFunction(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.ColumnCount() == 3); + auto &part_arg = args.data[0]; + auto &startdate_arg = args.data[1]; + auto &enddate_arg = args.data[2]; + + if (part_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) { + // Common case of constant part. + if (ConstantVector::IsNull(part_arg)) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(result, true); + } else { + const auto type = GetDatePartSpecifier(ConstantVector::GetData(part_arg)->GetString()); + DateDiffBinaryExecutor(type, startdate_arg, enddate_arg, result, args.size()); + } + } else { + TernaryExecutor::Execute(part_arg, startdate_arg, enddate_arg, result, args.size(), + DateDiffTernaryOperator::Operation); + } +} + +void DateDiffFun::RegisterFunction(BuiltinFunctions &set) { + ScalarFunctionSet date_diff("date_diff"); + date_diff.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::DATE, LogicalType::DATE}, + LogicalType::BIGINT, DateDiffFunction)); + date_diff.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP, LogicalType::TIMESTAMP}, + LogicalType::BIGINT, DateDiffFunction)); + date_diff.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIME, LogicalType::TIME}, + LogicalType::BIGINT, DateDiffFunction)); + set.AddFunction(date_diff); + + date_diff.name = "datediff"; + set.AddFunction(date_diff); +} + +} // namespace duckdb + + + + + + + + + namespace duckdb { -DatePartSpecifier GetDatePartSpecifier(string specifier) { - specifier = StringUtil::Lower(specifier); +bool TryGetDatePartSpecifier(const string &specifier_p, DatePartSpecifier &result) { + auto specifier = StringUtil::Lower(specifier_p); if (specifier == "year" || specifier == "y" || specifier == "years") { - return DatePartSpecifier::YEAR; + result = DatePartSpecifier::YEAR; } else if (specifier == "month" || specifier == "mon" || specifier == "months" || specifier == "mons") { - return DatePartSpecifier::MONTH; + result = DatePartSpecifier::MONTH; } else if (specifier == "day" || specifier == "days" || specifier == "d") { - return DatePartSpecifier::DAY; + result = DatePartSpecifier::DAY; } else if (specifier == "decade" || specifier == "decades") { - return DatePartSpecifier::DECADE; + result = DatePartSpecifier::DECADE; } else if (specifier == "century" || specifier == "centuries") { - return DatePartSpecifier::CENTURY; + result = DatePartSpecifier::CENTURY; } else if (specifier == "millennium" || specifier == "millenia") { - return DatePartSpecifier::MILLENNIUM; + result = DatePartSpecifier::MILLENNIUM; } else if (specifier == "microseconds" || specifier == "microsecond") { - return DatePartSpecifier::MICROSECONDS; + result = DatePartSpecifier::MICROSECONDS; } else if (specifier == "milliseconds" || specifier == "millisecond" || specifier == "ms" || specifier == "msec" || specifier == "msecs") { - return DatePartSpecifier::MILLISECONDS; + result = DatePartSpecifier::MILLISECONDS; } else if (specifier == "second" || specifier == "seconds" || specifier == "s") { - return DatePartSpecifier::SECOND; + result = DatePartSpecifier::SECOND; } else if (specifier == "minute" || specifier == "minutes" || specifier == "m") { - return DatePartSpecifier::MINUTE; + result = DatePartSpecifier::MINUTE; } else if (specifier == "hour" || specifier == "hours" || specifier == "h") { - return DatePartSpecifier::HOUR; + result = DatePartSpecifier::HOUR; } else if (specifier == "epoch") { // seconds since 1970-01-01 - return DatePartSpecifier::EPOCH; + result = DatePartSpecifier::EPOCH; } else if (specifier == "dow") { // day of the week (Sunday = 0, Saturday = 6) - return DatePartSpecifier::DOW; + result = DatePartSpecifier::DOW; } else if (specifier == "isodow") { // isodow (Monday = 1, Sunday = 7) - return DatePartSpecifier::ISODOW; + result = DatePartSpecifier::ISODOW; } else if (specifier == "week" || specifier == "weeks" || specifier == "w") { // week number - return DatePartSpecifier::WEEK; - } else if (specifier == "doy") { + result = DatePartSpecifier::WEEK; + } else if (specifier == "doy" || specifier == "dayofyear") { // day of the year (1-365/366) - return DatePartSpecifier::DOY; - } else if (specifier == "quarter") { + result = DatePartSpecifier::DOY; + } else if (specifier == "quarter" || specifier == "quarters") { // quarter of the year (1-4) - return DatePartSpecifier::QUARTER; + result = DatePartSpecifier::QUARTER; + } else if (specifier == "yearweek") { + // Combined year and week YYYYWW + result = DatePartSpecifier::YEARWEEK; } else { + return false; + } + return true; +} + +DatePartSpecifier GetDatePartSpecifier(const string &specifier) { + DatePartSpecifier result; + if (!TryGetDatePartSpecifier(specifier, result)) { throw ConversionException("extract specifier \"%s\" not recognized", specifier); } + return result; } template -static void LastYearOperator(DataChunk &args, ExpressionState &state, Vector &result) { +static void LastYearFunction(DataChunk &args, ExpressionState &state, Vector &result) { int32_t last_year = 0; UnaryExecutor::Execute(args.data[0], result, args.size(), [&](T input) { return Date::ExtractYear(input, &last_year); }); @@ -63633,7 +69162,7 @@ static unique_ptr PropagateSimpleDatePartStatistics(vector static inline TR Operation(TA input) { @@ -63893,503 +69422,329 @@ struct DateDatePart { }; }; -struct TimeDatePart { - struct YearOperator { - template - static inline TR Operation(TA input) { - throw NotImplementedException("\"time\" units \"year\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateDatePartStatistics(child_stats); - } - }; - - struct MonthOperator { - template - static inline TR Operation(TA input) { - throw NotImplementedException("\"time\" units \"month\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - // min/max of month operator is [1, 12] - return PropagateSimpleDatePartStatistics<1, 12>(child_stats); - } - }; - - struct DayOperator { - template - static inline TR Operation(TA input) { - throw NotImplementedException("\"time\" units \"month\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - // min/max of day operator is [1, 31] - return PropagateSimpleDatePartStatistics<1, 31>(child_stats); - } - }; - - struct DecadeOperator { - template - static inline TR Operation(TA input) { - throw NotImplementedException("\"time\" units \"decade\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateDatePartStatistics(child_stats); - } - }; - - struct CenturyOperator { - template - static inline TR Operation(TA input) { - throw NotImplementedException("\"time\" units \"century\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateDatePartStatistics(child_stats); - } - }; - - struct MilleniumOperator { - template - static inline TR Operation(TA input) { - throw NotImplementedException("\"time\" units \"millennium\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateDatePartStatistics(child_stats); - } - }; - - struct QuarterOperator { - template - static inline TR Operation(TA input) { - throw NotImplementedException("\"time\" units \"quarter\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - // min/max of quarter operator is [1, 4] - return PropagateSimpleDatePartStatistics<1, 4>(child_stats); - } - }; - - struct DayOfWeekOperator { - template - static inline TR Operation(TA input) { - // day of the week (Sunday = 0, Saturday = 6) - // turn sunday into 0 by doing mod 7 - throw NotImplementedException("\"time\" units \"dow\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateSimpleDatePartStatistics<0, 6>(child_stats); - } - }; - - struct ISODayOfWeekOperator { - template - static inline TR Operation(TA input) { - // isodow (Monday = 1, Sunday = 7) - throw NotImplementedException("\"time\" units \"isodow\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateSimpleDatePartStatistics<1, 7>(child_stats); - } - }; - - struct DayOfYearOperator { - template - static inline TR Operation(TA input) { - throw NotImplementedException("\"time\" units \"doy\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateSimpleDatePartStatistics<1, 366>(child_stats); - } - }; - - struct WeekOperator { - template - static inline TR Operation(TA input) { - throw NotImplementedException("\"time\" units \"week\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateSimpleDatePartStatistics<1, 54>(child_stats); - } - }; - - struct YearWeekOperator { - template - static inline TR Operation(TA input) { - throw NotImplementedException("\"time\" units \"yearweek\" not recognized"); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateDatePartStatistics(child_stats); - } - }; - - struct MicrosecondsOperator { - template - static inline TR Operation(TA input) { - // remove everything but the second & microsecond part - return input % Interval::MICROS_PER_MINUTE; - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateSimpleDatePartStatistics<0, 60000000>(child_stats); - } - }; - - struct MillisecondsOperator { - template - static inline TR Operation(TA input) { - return MicrosecondsOperator::Operation(input) / Interval::MICROS_PER_MSEC; - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateSimpleDatePartStatistics<0, 60000>(child_stats); - } - }; - - struct SecondsOperator { - template - static inline TR Operation(TA input) { - return MicrosecondsOperator::Operation(input) / Interval::MICROS_PER_SEC; - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateSimpleDatePartStatistics<0, 60>(child_stats); - } - }; - - struct MinutesOperator { - template - static inline TR Operation(TA input) { - return (input % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE; - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateSimpleDatePartStatistics<0, 60>(child_stats); - } - }; - - struct HoursOperator { - template - static inline TR Operation(TA input) { - return input / Interval::MICROS_PER_HOUR; - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - return PropagateSimpleDatePartStatistics<0, 24>(child_stats); - } - }; - - struct EpochOperator { - template - static inline TR Operation(TA input) { - return SecondsOperator::Operation(input); - } - - template - static unique_ptr PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - // time seconds range over a single day - return PropagateSimpleDatePartStatistics<0, 86400>(child_stats); - } - }; -}; - template <> -int64_t DateDatePart::YearOperator::Operation(timestamp_t input) { +int64_t DatePart::YearOperator::Operation(timestamp_t input) { return YearOperator::Operation(Timestamp::GetDate(input)); } template <> -int64_t DateDatePart::YearOperator::Operation(interval_t input) { +int64_t DatePart::YearOperator::Operation(interval_t input) { return input.months / Interval::MONTHS_PER_YEAR; } template <> -int64_t DateDatePart::MonthOperator::Operation(timestamp_t input) { +int64_t DatePart::YearOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"year\" not recognized"); +} + +template <> +int64_t DatePart::MonthOperator::Operation(timestamp_t input) { return MonthOperator::Operation(Timestamp::GetDate(input)); } template <> -int64_t DateDatePart::MonthOperator::Operation(interval_t input) { +int64_t DatePart::MonthOperator::Operation(interval_t input) { return input.months % Interval::MONTHS_PER_YEAR; } template <> -unique_ptr -DateDatePart::MonthOperator::PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - // interval months range from 0-11 - return PropagateSimpleDatePartStatistics<0, 11>(child_stats); +int64_t DatePart::MonthOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"month\" not recognized"); } template <> -int64_t DateDatePart::DayOperator::Operation(timestamp_t input) { +int64_t DatePart::DayOperator::Operation(timestamp_t input) { return DayOperator::Operation(Timestamp::GetDate(input)); } template <> -int64_t DateDatePart::DayOperator::Operation(interval_t input) { +int64_t DatePart::DayOperator::Operation(interval_t input) { return input.days; } template <> -int64_t DateDatePart::DecadeOperator::Operation(interval_t input) { +int64_t DatePart::DayOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"day\" not recognized"); +} + +template <> +int64_t DatePart::DecadeOperator::Operation(interval_t input) { return input.months / Interval::MONTHS_PER_DECADE; } template <> -int64_t DateDatePart::CenturyOperator::Operation(interval_t input) { +int64_t DatePart::DecadeOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"decade\" not recognized"); +} + +template <> +int64_t DatePart::CenturyOperator::Operation(interval_t input) { return input.months / Interval::MONTHS_PER_CENTURY; } template <> -int64_t DateDatePart::MilleniumOperator::Operation(interval_t input) { +int64_t DatePart::CenturyOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"century\" not recognized"); +} + +template <> +int64_t DatePart::MilleniumOperator::Operation(interval_t input) { return input.months / Interval::MONTHS_PER_MILLENIUM; } template <> -int64_t DateDatePart::QuarterOperator::Operation(timestamp_t input) { +int64_t DatePart::MilleniumOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"millennium\" not recognized"); +} + +template <> +int64_t DatePart::QuarterOperator::Operation(timestamp_t input) { return QuarterOperator::Operation(Timestamp::GetDate(input)); } template <> -int64_t DateDatePart::QuarterOperator::Operation(interval_t input) { +int64_t DatePart::QuarterOperator::Operation(interval_t input) { return MonthOperator::Operation(input) / Interval::MONTHS_PER_QUARTER + 1; } template <> -unique_ptr -DateDatePart::QuarterOperator::PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, - FunctionData *bind_data, - vector> &child_stats) { - // negative interval quarters range from -2 to 4 - return PropagateSimpleDatePartStatistics<-2, 4>(child_stats); +int64_t DatePart::QuarterOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"quarter\" not recognized"); } template <> -int64_t DateDatePart::DayOfWeekOperator::Operation(timestamp_t input) { +int64_t DatePart::DayOfWeekOperator::Operation(timestamp_t input) { return DayOfWeekOperator::Operation(Timestamp::GetDate(input)); } template <> -int64_t DateDatePart::DayOfWeekOperator::Operation(interval_t input) { +int64_t DatePart::DayOfWeekOperator::Operation(interval_t input) { throw NotImplementedException("interval units \"dow\" not recognized"); } template <> -int64_t DateDatePart::ISODayOfWeekOperator::Operation(timestamp_t input) { +int64_t DatePart::DayOfWeekOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"dow\" not recognized"); +} + +template <> +int64_t DatePart::ISODayOfWeekOperator::Operation(timestamp_t input) { return ISODayOfWeekOperator::Operation(Timestamp::GetDate(input)); } template <> -int64_t DateDatePart::ISODayOfWeekOperator::Operation(interval_t input) { +int64_t DatePart::ISODayOfWeekOperator::Operation(interval_t input) { throw NotImplementedException("interval units \"isodow\" not recognized"); } template <> -int64_t DateDatePart::DayOfYearOperator::Operation(timestamp_t input) { +int64_t DatePart::ISODayOfWeekOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"isodow\" not recognized"); +} + +template <> +int64_t DatePart::DayOfYearOperator::Operation(timestamp_t input) { return DayOfYearOperator::Operation(Timestamp::GetDate(input)); } template <> -int64_t DateDatePart::DayOfYearOperator::Operation(interval_t input) { +int64_t DatePart::DayOfYearOperator::Operation(interval_t input) { throw NotImplementedException("interval units \"doy\" not recognized"); } template <> -int64_t DateDatePart::WeekOperator::Operation(timestamp_t input) { +int64_t DatePart::DayOfYearOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"doy\" not recognized"); +} + +template <> +int64_t DatePart::WeekOperator::Operation(timestamp_t input) { return WeekOperator::Operation(Timestamp::GetDate(input)); } template <> -int64_t DateDatePart::WeekOperator::Operation(interval_t input) { +int64_t DatePart::WeekOperator::Operation(interval_t input) { throw NotImplementedException("interval units \"week\" not recognized"); } template <> -int64_t DateDatePart::MicrosecondsOperator::Operation(timestamp_t input) { +int64_t DatePart::WeekOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"week\" not recognized"); +} + +template <> +int64_t DatePart::YearWeekOperator::Operation(dtime_t input) { + throw NotImplementedException("\"time\" units \"yearweek\" not recognized"); +} + +template <> +int64_t DatePart::MicrosecondsOperator::Operation(timestamp_t input) { auto time = Timestamp::GetTime(input); // remove everything but the second & microsecond part - return time % Interval::MICROS_PER_MINUTE; + return time.micros % Interval::MICROS_PER_MINUTE; } template <> -int64_t DateDatePart::MicrosecondsOperator::Operation(interval_t input) { +int64_t DatePart::MicrosecondsOperator::Operation(interval_t input) { // remove everything but the second & microsecond part return input.micros; } template <> -int64_t DateDatePart::MillisecondsOperator::Operation(timestamp_t input) { +int64_t DatePart::MicrosecondsOperator::Operation(dtime_t input) { + // remove everything but the second & microsecond part + return input.micros % Interval::MICROS_PER_MINUTE; +} + +template <> +int64_t DatePart::MillisecondsOperator::Operation(timestamp_t input) { return MicrosecondsOperator::Operation(input) / Interval::MICROS_PER_MSEC; } template <> -int64_t DateDatePart::MillisecondsOperator::Operation(interval_t input) { +int64_t DatePart::MillisecondsOperator::Operation(interval_t input) { return MicrosecondsOperator::Operation(input) / Interval::MICROS_PER_MSEC; } template <> -int64_t DateDatePart::SecondsOperator::Operation(timestamp_t input) { +int64_t DatePart::MillisecondsOperator::Operation(dtime_t input) { + return MicrosecondsOperator::Operation(input) / Interval::MICROS_PER_MSEC; +} + +template <> +int64_t DatePart::SecondsOperator::Operation(timestamp_t input) { return MicrosecondsOperator::Operation(input) / Interval::MICROS_PER_SEC; } template <> -int64_t DateDatePart::SecondsOperator::Operation(interval_t input) { +int64_t DatePart::SecondsOperator::Operation(interval_t input) { return MicrosecondsOperator::Operation(input) / Interval::MICROS_PER_SEC; } template <> -int64_t DateDatePart::MinutesOperator::Operation(timestamp_t input) { +int64_t DatePart::SecondsOperator::Operation(dtime_t input) { + return MicrosecondsOperator::Operation(input) / Interval::MICROS_PER_SEC; +} + +template <> +int64_t DatePart::MinutesOperator::Operation(timestamp_t input) { auto time = Timestamp::GetTime(input); // remove the hour part, and truncate to minutes - return (time % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE; + return (time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE; +} + +template <> +int64_t DatePart::MinutesOperator::Operation(interval_t input) { + // remove the hour part, and truncate to minutes + return (input.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE; } template <> -int64_t DateDatePart::MinutesOperator::Operation(interval_t input) { +int64_t DatePart::MinutesOperator::Operation(dtime_t input) { // remove the hour part, and truncate to minutes return (input.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE; } template <> -int64_t DateDatePart::HoursOperator::Operation(timestamp_t input) { - return Timestamp::GetTime(input) / Interval::MICROS_PER_HOUR; +int64_t DatePart::HoursOperator::Operation(timestamp_t input) { + return Timestamp::GetTime(input).micros / Interval::MICROS_PER_HOUR; +} + +template <> +int64_t DatePart::HoursOperator::Operation(interval_t input) { + return input.micros / Interval::MICROS_PER_HOUR; } template <> -int64_t DateDatePart::HoursOperator::Operation(interval_t input) { +int64_t DatePart::HoursOperator::Operation(dtime_t input) { return input.micros / Interval::MICROS_PER_HOUR; } template <> -int64_t DateDatePart::EpochOperator::Operation(timestamp_t input) { +int64_t DatePart::EpochOperator::Operation(timestamp_t input) { return Timestamp::GetEpochSeconds(input); } template <> -int64_t DateDatePart::EpochOperator::Operation(interval_t input) { +int64_t DatePart::EpochOperator::Operation(interval_t input) { auto secs = SecondsOperator::Operation(input); return (input.months * Interval::DAYS_PER_MONTH + input.days) * Interval::SECS_PER_DAY + secs; } -template +template <> +int64_t DatePart::EpochOperator::Operation(dtime_t input) { + return SecondsOperator::Operation(input); +} + +template <> +unique_ptr +DatePart::EpochOperator::PropagateStatistics(ClientContext &context, BoundFunctionExpression &expr, + FunctionData *bind_data, + vector> &child_stats) { + // time seconds range over a single day + return PropagateSimpleDatePartStatistics<0, 86400>(child_stats); +} + +template static int64_t ExtractElement(DatePartSpecifier type, T element) { switch (type) { case DatePartSpecifier::YEAR: - return OP::YearOperator::template Operation(element); + return DatePart::YearOperator::template Operation(element); case DatePartSpecifier::MONTH: - return OP::MonthOperator::template Operation(element); + return DatePart::MonthOperator::template Operation(element); case DatePartSpecifier::DAY: - return OP::DayOperator::template Operation(element); + return DatePart::DayOperator::template Operation(element); case DatePartSpecifier::DECADE: - return OP::DecadeOperator::template Operation(element); + return DatePart::DecadeOperator::template Operation(element); case DatePartSpecifier::CENTURY: - return OP::CenturyOperator::template Operation(element); + return DatePart::CenturyOperator::template Operation(element); case DatePartSpecifier::MILLENNIUM: - return OP::MilleniumOperator::template Operation(element); + return DatePart::MilleniumOperator::template Operation(element); case DatePartSpecifier::QUARTER: - return OP::QuarterOperator::template Operation(element); + return DatePart::QuarterOperator::template Operation(element); case DatePartSpecifier::DOW: - return OP::DayOfWeekOperator::template Operation(element); + return DatePart::DayOfWeekOperator::template Operation(element); case DatePartSpecifier::ISODOW: - return OP::ISODayOfWeekOperator::template Operation(element); + return DatePart::ISODayOfWeekOperator::template Operation(element); case DatePartSpecifier::DOY: - return OP::DayOfYearOperator::template Operation(element); + return DatePart::DayOfYearOperator::template Operation(element); case DatePartSpecifier::WEEK: - return OP::WeekOperator::template Operation(element); + return DatePart::WeekOperator::template Operation(element); + case DatePartSpecifier::YEARWEEK: + return DatePart::YearWeekOperator::template Operation(element); case DatePartSpecifier::EPOCH: - return OP::EpochOperator::template Operation(element); + return DatePart::EpochOperator::template Operation(element); case DatePartSpecifier::MICROSECONDS: - return OP::MicrosecondsOperator::template Operation(element); + return DatePart::MicrosecondsOperator::template Operation(element); case DatePartSpecifier::MILLISECONDS: - return OP::MillisecondsOperator::template Operation(element); + return DatePart::MillisecondsOperator::template Operation(element); case DatePartSpecifier::SECOND: - return OP::SecondsOperator::template Operation(element); + return DatePart::SecondsOperator::template Operation(element); case DatePartSpecifier::MINUTE: - return OP::MinutesOperator::template Operation(element); + return DatePart::MinutesOperator::template Operation(element); case DatePartSpecifier::HOUR: - return OP::HoursOperator::template Operation(element); + return DatePart::HoursOperator::template Operation(element); default: - throw NotImplementedException("Specifier type not implemented"); + throw NotImplementedException("Specifier type not implemented for DATEPART"); } } -struct DateDatePartOperator { +struct DatePartBinaryOperator { template static inline TR Operation(TA specifier, TB date) { - return ExtractElement(GetDatePartSpecifier(specifier.GetString()), date); + return ExtractElement(GetDatePartSpecifier(specifier.GetString()), date); } }; -struct TimeDatePartOperator { - template - static inline TR Operation(TA specifier, TB date) { - return ExtractElement(GetDatePartSpecifier(specifier.GetString()), date); - } -}; +template +static void DatePartFunction(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.ColumnCount() == 2); + auto &part_arg = args.data[0]; + auto &date_arg = args.data[1]; + + BinaryExecutor::ExecuteStandard(part_arg, date_arg, result, + args.size()); +} void AddGenericDatePartOperator(BuiltinFunctions &set, const string &name, scalar_function_t date_func, scalar_function_t ts_func, scalar_function_t interval_func, @@ -64426,14 +69781,13 @@ void AddGenericTimePartOperator(BuiltinFunctions &set, const string &name, scala set.AddFunction(operator_set); } -template +template static void AddTimePartOperator(BuiltinFunctions &set, string name) { AddGenericTimePartOperator( - set, name, ScalarFunction::UnaryFunction, - ScalarFunction::UnaryFunction, - ScalarFunction::UnaryFunction, ScalarFunction::UnaryFunction, - DOP::template PropagateStatistics, DOP::template PropagateStatistics, - TOP::template PropagateStatistics); + set, name, ScalarFunction::UnaryFunction, + ScalarFunction::UnaryFunction, ScalarFunction::UnaryFunction, + ScalarFunction::UnaryFunction, OP::template PropagateStatistics, + OP::template PropagateStatistics, OP::template PropagateStatistics); } struct LastDayOperator { @@ -64456,47 +69810,47 @@ date_t LastDayOperator::Operation(timestamp_t input) { struct MonthNameOperator { template static inline TR Operation(TA input) { - return Date::MONTH_NAMES[DateDatePart::MonthOperator::Operation(input) - 1]; + return Date::MONTH_NAMES[DatePart::MonthOperator::Operation(input) - 1]; } }; struct DayNameOperator { template static inline TR Operation(TA input) { - return Date::DAY_NAMES[DateDatePart::DayOfWeekOperator::Operation(input)]; + return Date::DAY_NAMES[DatePart::DayOfWeekOperator::Operation(input)]; } }; void DatePartFun::RegisterFunction(BuiltinFunctions &set) { // register the individual operators - AddGenericDatePartOperator(set, "year", LastYearOperator, LastYearOperator, - ScalarFunction::UnaryFunction, - DateDatePart::YearOperator::PropagateStatistics, - DateDatePart::YearOperator::PropagateStatistics); - AddDatePartOperator(set, "month"); - AddDatePartOperator(set, "day"); - AddDatePartOperator(set, "decade"); - AddDatePartOperator(set, "century"); - AddDatePartOperator(set, "millenium"); - AddDatePartOperator(set, "quarter"); - AddDatePartOperator(set, "dayofweek"); - AddDatePartOperator(set, "isodow"); - AddDatePartOperator(set, "dayofyear"); - AddDatePartOperator(set, "week"); - AddTimePartOperator(set, "epoch"); - AddTimePartOperator(set, "microsecond"); - AddTimePartOperator(set, "millisecond"); - AddTimePartOperator(set, "second"); - AddTimePartOperator(set, "minute"); - AddTimePartOperator(set, "hour"); + AddGenericDatePartOperator(set, "year", LastYearFunction, LastYearFunction, + ScalarFunction::UnaryFunction, + DatePart::YearOperator::PropagateStatistics, + DatePart::YearOperator::PropagateStatistics); + AddDatePartOperator(set, "month"); + AddDatePartOperator(set, "day"); + AddDatePartOperator(set, "decade"); + AddDatePartOperator(set, "century"); + AddDatePartOperator(set, "millenium"); + AddDatePartOperator(set, "quarter"); + AddDatePartOperator(set, "dayofweek"); + AddDatePartOperator(set, "isodow"); + AddDatePartOperator(set, "dayofyear"); + AddDatePartOperator(set, "week"); + AddTimePartOperator(set, "epoch"); + AddTimePartOperator(set, "microsecond"); + AddTimePartOperator(set, "millisecond"); + AddTimePartOperator(set, "second"); + AddTimePartOperator(set, "minute"); + AddTimePartOperator(set, "hour"); // register combinations - AddDatePartOperator(set, "yearweek"); + AddDatePartOperator(set, "yearweek"); // register various aliases - AddDatePartOperator(set, "dayofmonth"); - AddDatePartOperator(set, "weekday"); - AddDatePartOperator(set, "weekofyear"); // Note that WeekOperator is ISO-8601, not US + AddDatePartOperator(set, "dayofmonth"); + AddDatePartOperator(set, "weekday"); + AddDatePartOperator(set, "weekofyear"); // Note that WeekOperator is ISO-8601, not US // register the last_day function ScalarFunctionSet last_day("last_day"); @@ -64525,17 +69879,13 @@ void DatePartFun::RegisterFunction(BuiltinFunctions &set) { // finally the actual date_part function ScalarFunctionSet date_part("date_part"); date_part.AddFunction( - ScalarFunction({LogicalType::VARCHAR, LogicalType::DATE}, LogicalType::BIGINT, - ScalarFunction::BinaryFunction)); + ScalarFunction({LogicalType::VARCHAR, LogicalType::DATE}, LogicalType::BIGINT, DatePartFunction)); + date_part.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, LogicalType::BIGINT, + DatePartFunction)); date_part.AddFunction( - ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, LogicalType::BIGINT, - ScalarFunction::BinaryFunction)); - date_part.AddFunction( - ScalarFunction({LogicalType::VARCHAR, LogicalType::TIME}, LogicalType::BIGINT, - ScalarFunction::BinaryFunction)); - date_part.AddFunction( - ScalarFunction({LogicalType::VARCHAR, LogicalType::INTERVAL}, LogicalType::BIGINT, - ScalarFunction::BinaryFunction)); + ScalarFunction({LogicalType::VARCHAR, LogicalType::TIME}, LogicalType::BIGINT, DatePartFunction)); + date_part.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::INTERVAL}, LogicalType::BIGINT, + DatePartFunction)); set.AddFunction(date_part); date_part.name = "datepart"; set.AddFunction(date_part); @@ -64555,215 +69905,324 @@ void DatePartFun::RegisterFunction(BuiltinFunctions &set) { namespace duckdb { -struct MillenniumTruncOperator { - template - static inline TR Operation(TA input) { - date_t date = Timestamp::GetDate(input); - return Timestamp::FromDatetime(Date::FromDate((Date::ExtractYear(date) / 1000) * 1000, 1, 1), 0); - } +struct DateTrunc { + + struct MillenniumOperator { + template + static inline TR Operation(TA input) { + date_t date = Timestamp::GetDate(input); + return Timestamp::FromDatetime(Date::FromDate((Date::ExtractYear(date) / 1000) * 1000, 1, 1), dtime_t(0)); + } + }; + + struct CenturyOperator { + template + static inline TR Operation(TA input) { + date_t date = Timestamp::GetDate(input); + return Timestamp::FromDatetime(Date::FromDate((Date::ExtractYear(date) / 100) * 100, 1, 1), dtime_t(0)); + } + }; + + struct DecadeOperator { + template + static inline TR Operation(TA input) { + date_t date = Timestamp::GetDate(input); + return Timestamp::FromDatetime(Date::FromDate((Date::ExtractYear(date) / 10) * 10, 1, 1), dtime_t(0)); + } + }; + + struct YearOperator { + template + static inline TR Operation(TA input) { + date_t date = Timestamp::GetDate(input); + return Timestamp::FromDatetime(Date::FromDate(Date::ExtractYear(date), 1, 1), dtime_t(0)); + } + }; + + struct QuarterOperator { + template + static inline TR Operation(TA input) { + int32_t yyyy, mm, dd; + Date::Convert(Timestamp::GetDate(input), yyyy, mm, dd); + mm = 1 + (((mm - 1) / 3) * 3); + return Timestamp::FromDatetime(Date::FromDate(yyyy, mm, 1), dtime_t(0)); + } + }; + + struct MonthOperator { + template + static inline TR Operation(TA input) { + date_t date = Timestamp::GetDate(input); + return Timestamp::FromDatetime(Date::FromDate(Date::ExtractYear(date), Date::ExtractMonth(date), 1), + dtime_t(0)); + } + }; + + struct WeekOperator { + template + static inline TR Operation(TA input) { + date_t date = Timestamp::GetDate(input); + + return Timestamp::FromDatetime(Date::GetMondayOfCurrentWeek(date), dtime_t(0)); + } + }; + + struct DayOperator { + template + static inline TR Operation(TA input) { + date_t date = Timestamp::GetDate(input); + return Timestamp::FromDatetime(date, dtime_t(0)); + } + }; + + struct HourOperator { + template + static inline TR Operation(TA input) { + int32_t hour, min, sec, micros; + date_t date; + dtime_t time; + Timestamp::Convert(input, date, time); + Time::Convert(time, hour, min, sec, micros); + return Timestamp::FromDatetime(date, Time::FromTime(hour, 0, 0, 0)); + } + }; + + struct MinuteOperator { + template + static inline TR Operation(TA input) { + int32_t hour, min, sec, micros; + date_t date; + dtime_t time; + Timestamp::Convert(input, date, time); + Time::Convert(time, hour, min, sec, micros); + return Timestamp::FromDatetime(date, Time::FromTime(hour, min, 0, 0)); + } + }; + + struct SecondOperator { + template + static inline TR Operation(TA input) { + int32_t hour, min, sec, micros; + date_t date; + dtime_t time; + Timestamp::Convert(input, date, time); + Time::Convert(time, hour, min, sec, micros); + return Timestamp::FromDatetime(date, Time::FromTime(hour, min, sec, 0)); + } + }; + + struct MillisecondOperator { + template + static inline TR Operation(TA input) { + int32_t hour, min, sec, micros; + date_t date; + dtime_t time; + Timestamp::Convert(input, date, time); + Time::Convert(time, hour, min, sec, micros); + micros -= micros % Interval::MICROS_PER_MSEC; + return Timestamp::FromDatetime(date, Time::FromTime(hour, min, sec, micros)); + } + }; + + struct MicrosecondOperator { + template + static inline TR Operation(TA input) { + return input; + } + }; }; + +// DATE specialisations template <> -timestamp_t MillenniumTruncOperator::Operation(date_t input) { - return MillenniumTruncOperator::Operation(Timestamp::FromDatetime(input, 0)); +timestamp_t DateTrunc::MillenniumOperator::Operation(date_t input) { + return MillenniumOperator::Operation(Timestamp::FromDatetime(input, dtime_t(0))); } -struct CenturyTruncOperator { - template - static inline TR Operation(TA input) { - date_t date = Timestamp::GetDate(input); - return Timestamp::FromDatetime(Date::FromDate((Date::ExtractYear(date) / 100) * 100, 1, 1), 0); - } -}; template <> -timestamp_t CenturyTruncOperator::Operation(date_t input) { - return CenturyTruncOperator::Operation(Timestamp::FromDatetime(input, 0)); +timestamp_t DateTrunc::CenturyOperator::Operation(date_t input) { + return CenturyOperator::Operation(Timestamp::FromDatetime(input, dtime_t(0))); } -struct DecadeTruncOperator { - template - static inline TR Operation(TA input) { - date_t date = Timestamp::GetDate(input); - return Timestamp::FromDatetime(Date::FromDate((Date::ExtractYear(date) / 10) * 10, 1, 1), 0); - } -}; template <> -timestamp_t DecadeTruncOperator::Operation(date_t input) { - return DecadeTruncOperator::Operation(Timestamp::FromDatetime(input, 0)); +timestamp_t DateTrunc::DecadeOperator::Operation(date_t input) { + return DecadeOperator::Operation(Timestamp::FromDatetime(input, dtime_t(0))); } -struct YearTruncOperator { - template - static inline TR Operation(TA input) { - date_t date = Timestamp::GetDate(input); - return Timestamp::FromDatetime(Date::FromDate(Date::ExtractYear(date), 1, 1), 0); - } -}; template <> -timestamp_t YearTruncOperator::Operation(date_t input) { - return YearTruncOperator::Operation(Timestamp::FromDatetime(input, 0)); +timestamp_t DateTrunc::YearOperator::Operation(date_t input) { + return YearOperator::Operation(Timestamp::FromDatetime(input, dtime_t(0))); } -struct QuarterTruncOperator { - template - static inline TR Operation(TA input) { - date_t date = Timestamp::GetDate(input); - - int32_t month = Date::ExtractMonth(date); - month = 1 + (((month - 1) / 3) * 3); - return Timestamp::FromDatetime(Date::FromDate(Date::ExtractYear(date), month, 1), 0); - } -}; template <> -timestamp_t QuarterTruncOperator::Operation(date_t input) { - return QuarterTruncOperator::Operation(Timestamp::FromDatetime(input, 0)); +timestamp_t DateTrunc::QuarterOperator::Operation(date_t input) { + return QuarterOperator::Operation(Timestamp::FromDatetime(input, dtime_t(0))); } -struct MonthTruncOperator { - template - static inline TR Operation(TA input) { - date_t date = Timestamp::GetDate(input); - return Timestamp::FromDatetime(Date::FromDate(Date::ExtractYear(date), Date::ExtractMonth(date), 1), 0); - } -}; template <> -timestamp_t MonthTruncOperator::Operation(date_t input) { - return MonthTruncOperator::Operation(Timestamp::FromDatetime(input, 0)); +timestamp_t DateTrunc::MonthOperator::Operation(date_t input) { + return MonthOperator::Operation(Timestamp::FromDatetime(input, dtime_t(0))); } -struct WeekTruncOperator { - template - static inline TR Operation(TA input) { - date_t date = Timestamp::GetDate(input); +template <> +timestamp_t DateTrunc::WeekOperator::Operation(date_t input) { + return WeekOperator::Operation(Timestamp::FromDatetime(input, dtime_t(0))); +} - return Timestamp::FromDatetime(Date::GetMondayOfCurrentWeek(date), 0); - } -}; template <> -timestamp_t WeekTruncOperator::Operation(date_t input) { - return WeekTruncOperator::Operation(Timestamp::FromDatetime(input, 0)); +timestamp_t DateTrunc::DayOperator::Operation(date_t input) { + return Timestamp::FromDatetime(input, dtime_t(0)); } -struct DayTruncOperator { - template - static inline TR Operation(TA input) { - date_t date = Timestamp::GetDate(input); - return Timestamp::FromDatetime(date, 0); - } -}; template <> -timestamp_t DayTruncOperator::Operation(date_t input) { - return Timestamp::FromDatetime(input, 0); +timestamp_t DateTrunc::HourOperator::Operation(date_t input) { + return DayOperator::Operation(input); } -struct HourTruncOperator { - template - static inline TR Operation(TA input) { - int32_t hour, min, sec, micros; - date_t date; - dtime_t time; - Timestamp::Convert(input, date, time); - Time::Convert(time, hour, min, sec, micros); - return Timestamp::FromDatetime(date, Time::FromTime(hour, 0, 0, 0)); - } -}; template <> -timestamp_t HourTruncOperator::Operation(date_t input) { - return Timestamp::FromDatetime(input, 0); +timestamp_t DateTrunc::MinuteOperator::Operation(date_t input) { + return DayOperator::Operation(input); } -struct MinuteTruncOperator { - template - static inline TR Operation(TA input) { - int32_t hour, min, sec, micros; - date_t date; - dtime_t time; - Timestamp::Convert(input, date, time); - Time::Convert(time, hour, min, sec, micros); - return Timestamp::FromDatetime(date, Time::FromTime(hour, min, 0, 0)); - } -}; template <> -timestamp_t MinuteTruncOperator::Operation(date_t input) { - return Timestamp::FromDatetime(input, 0); +timestamp_t DateTrunc::SecondOperator::Operation(date_t input) { + return DayOperator::Operation(input); } -struct SecondsTruncOperator { - template - static inline TR Operation(TA input) { - int32_t hour, min, sec, micros; - date_t date; - dtime_t time; - Timestamp::Convert(input, date, time); - Time::Convert(time, hour, min, sec, micros); - return Timestamp::FromDatetime(date, Time::FromTime(hour, min, sec, 0)); - } -}; template <> -timestamp_t SecondsTruncOperator::Operation(date_t input) { - return Timestamp::FromDatetime(input, 0); +timestamp_t DateTrunc::MillisecondOperator::Operation(date_t input) { + return DayOperator::Operation(input); } -struct MilliSecondsTruncOperator { - template - static inline TR Operation(TA input) { - return input; - } -}; template <> -timestamp_t MilliSecondsTruncOperator::Operation(date_t input) { - return Timestamp::FromDatetime(input, 0); +timestamp_t DateTrunc::MicrosecondOperator::Operation(date_t input) { + return DayOperator::Operation(input); } template static TR TruncateElement(DatePartSpecifier type, TA element) { switch (type) { case DatePartSpecifier::MILLENNIUM: - return MillenniumTruncOperator::Operation(element); + return DateTrunc::MillenniumOperator::Operation(element); case DatePartSpecifier::CENTURY: - return CenturyTruncOperator::Operation(element); + return DateTrunc::CenturyOperator::Operation(element); case DatePartSpecifier::DECADE: - return DecadeTruncOperator::Operation(element); + return DateTrunc::DecadeOperator::Operation(element); case DatePartSpecifier::YEAR: - return YearTruncOperator::Operation(element); + return DateTrunc::YearOperator::Operation(element); case DatePartSpecifier::QUARTER: - return QuarterTruncOperator::Operation(element); + return DateTrunc::QuarterOperator::Operation(element); case DatePartSpecifier::MONTH: - return MonthTruncOperator::Operation(element); + return DateTrunc::MonthOperator::Operation(element); case DatePartSpecifier::WEEK: - return WeekTruncOperator::Operation(element); + case DatePartSpecifier::YEARWEEK: + return DateTrunc::WeekOperator::Operation(element); case DatePartSpecifier::DAY: - return DayTruncOperator::Operation(element); + case DatePartSpecifier::DOW: + case DatePartSpecifier::ISODOW: + case DatePartSpecifier::DOY: + return DateTrunc::DayOperator::Operation(element); case DatePartSpecifier::HOUR: - return HourTruncOperator::Operation(element); + return DateTrunc::HourOperator::Operation(element); case DatePartSpecifier::MINUTE: - return MinuteTruncOperator::Operation(element); + return DateTrunc::MinuteOperator::Operation(element); case DatePartSpecifier::SECOND: - return SecondsTruncOperator::Operation(element); + case DatePartSpecifier::EPOCH: + return DateTrunc::SecondOperator::Operation(element); case DatePartSpecifier::MILLISECONDS: - return MilliSecondsTruncOperator::Operation(element); + return DateTrunc::MillisecondOperator::Operation(element); case DatePartSpecifier::MICROSECONDS: - // Since microseconds are not stored truncating to microseconds does the same as to milliseconds. - return MilliSecondsTruncOperator::Operation(element); + return DateTrunc::MicrosecondOperator::Operation(element); default: - throw NotImplementedException("Specifier type not implemented"); + throw NotImplementedException("Specifier type not implemented for DATETRUNC"); } } -struct DateTruncOperator { +struct DateTruncBinaryOperator { template static inline TR Operation(TA specifier, TB date) { return TruncateElement(GetDatePartSpecifier(specifier.GetString()), date); } }; +template +static void DateTruncUnaryExecutor(DatePartSpecifier type, Vector &left, Vector &result, idx_t count) { + switch (type) { + case DatePartSpecifier::MILLENNIUM: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::CENTURY: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::DECADE: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::YEAR: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::QUARTER: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::MONTH: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::WEEK: + case DatePartSpecifier::YEARWEEK: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::DAY: + case DatePartSpecifier::DOW: + case DatePartSpecifier::ISODOW: + case DatePartSpecifier::DOY: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::HOUR: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::MINUTE: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::SECOND: + case DatePartSpecifier::EPOCH: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::MILLISECONDS: + UnaryExecutor::Execute(left, result, count); + break; + case DatePartSpecifier::MICROSECONDS: + UnaryExecutor::Execute(left, result, count); + break; + default: + throw NotImplementedException("Specifier type not implemented for DATETRUNC"); + } +} + +template +static void DateTruncFunction(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.ColumnCount() == 2); + auto &part_arg = args.data[0]; + auto &date_arg = args.data[1]; + + if (part_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) { + // Common case of constant part. + if (ConstantVector::IsNull(part_arg)) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(result, true); + } else { + const auto type = GetDatePartSpecifier(ConstantVector::GetData(part_arg)->GetString()); + DateTruncUnaryExecutor(type, date_arg, result, args.size()); + } + } else { + BinaryExecutor::ExecuteStandard(part_arg, date_arg, result, + args.size()); + } +} + void DateTruncFun::RegisterFunction(BuiltinFunctions &set) { ScalarFunctionSet date_trunc("date_trunc"); + date_trunc.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, LogicalType::TIMESTAMP, + DateTruncFunction)); date_trunc.AddFunction( - ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, LogicalType::TIMESTAMP, - ScalarFunction::BinaryFunction)); - date_trunc.AddFunction( - ScalarFunction({LogicalType::VARCHAR, LogicalType::DATE}, LogicalType::TIMESTAMP, - ScalarFunction::BinaryFunction)); + ScalarFunction({LogicalType::VARCHAR, LogicalType::DATE}, LogicalType::TIMESTAMP, DateTruncFunction)); set.AddFunction(date_trunc); date_trunc.name = "datetrunc"; set.AddFunction(date_trunc); @@ -64779,20 +70238,30 @@ void DateTruncFun::RegisterFunction(BuiltinFunctions &set) { namespace duckdb { +struct EpochSecOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input) { + return Timestamp::FromEpochSeconds(input); + } +}; + static void EpochSecFunction(DataChunk &input, ExpressionState &state, Vector &result) { D_ASSERT(input.ColumnCount() == 1); - string output_buffer; - UnaryExecutor::Execute(input.data[0], result, input.size(), - [&](int64_t input) { return Timestamp::FromEpochSeconds(input); }); + UnaryExecutor::Execute(input.data[0], result, input.size()); } +struct EpochMillisOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input) { + return Timestamp::FromEpochMs(input); + } +}; + static void EpochMillisFunction(DataChunk &input, ExpressionState &state, Vector &result) { D_ASSERT(input.ColumnCount() == 1); - string output_buffer; - UnaryExecutor::Execute(input.data[0], result, input.size(), - [&](int64_t input) { return Timestamp::FromEpochMs(input); }); + UnaryExecutor::Execute(input.data[0], result, input.size()); } void EpochFun::RegisterFunction(BuiltinFunctions &set) { @@ -64827,7 +70296,7 @@ void EpochFun::RegisterFunction(BuiltinFunctions &set) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2003-2009 The RE2 Authors. All Rights Reserved. @@ -64919,7 +70388,7 @@ void EpochFun::RegisterFunction(BuiltinFunctions &set) { // Example: integer overflow causes failure // CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i)); // -// NOTE: Asking for substrings slows successful matches quite a bit. +// NOTE(rsc): Asking for substrings slows successful matches quite a bit. // This may get a little faster in the future, but right now is slower // than PCRE. On the other hand, failed matches run *very* fast (faster // than PCRE), as do matches without substring extraction. @@ -65031,7 +70500,7 @@ void EpochFun::RegisterFunction(BuiltinFunctions &set) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2001-2010 The RE2 Authors. All Rights Reserved. @@ -65682,7 +71151,7 @@ class RE2 { void set_encoding(Encoding encoding) { encoding_ = encoding; } // Legacy interface to encoding. - // TODO: Remove once clients have been converted. + // TODO(rsc): Remove once clients have been converted. bool utf8() const { return encoding_ == EncodingUTF8; } void set_utf8(bool b) { if (b) { @@ -66115,7 +71584,7 @@ idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date len += sec >= 10; break; default: - break; + throw InternalException("Time specifier mismatch"); } return len; } @@ -66126,7 +71595,7 @@ idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date case StrTimeSpecifier::YEAR_WITHOUT_CENTURY: return NumericHelper::UnsignedLength(Date::ExtractYear(date) % 100); default: - throw NotImplementedException("Unimplemented specifier for GetSpecifierLength"); + throw InternalException("Unimplemented specifier for GetSpecifierLength"); } } @@ -66196,11 +71665,11 @@ bool StrfTimeFormat::IsDateSpecifier(StrTimeSpecifier specifier) { switch (specifier) { case StrTimeSpecifier::ABBREVIATED_WEEKDAY_NAME: case StrTimeSpecifier::FULL_WEEKDAY_NAME: - case StrTimeSpecifier::WEEKDAY_DECIMAL: case StrTimeSpecifier::DAY_OF_YEAR_PADDED: + case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL: case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST: case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST: - case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL: + case StrTimeSpecifier::WEEKDAY_DECIMAL: return true; default: return false; @@ -66210,17 +71679,17 @@ bool StrfTimeFormat::IsDateSpecifier(StrTimeSpecifier specifier) { char *StrfTimeFormat::WriteDateSpecifier(StrTimeSpecifier specifier, date_t date, char *target) { switch (specifier) { case StrTimeSpecifier::ABBREVIATED_WEEKDAY_NAME: { - date_t dow = Date::ExtractISODayOfTheWeek(date); + auto dow = Date::ExtractISODayOfTheWeek(date); target = WriteString(target, Date::DAY_NAMES_ABBREVIATED[dow % 7]); break; } case StrTimeSpecifier::FULL_WEEKDAY_NAME: { - date_t dow = Date::ExtractISODayOfTheWeek(date); + auto dow = Date::ExtractISODayOfTheWeek(date); target = WriteString(target, Date::DAY_NAMES[dow % 7]); break; } case StrTimeSpecifier::WEEKDAY_DECIMAL: { - date_t dow = Date::ExtractISODayOfTheWeek(date); + auto dow = Date::ExtractISODayOfTheWeek(date); *target = char('0' + uint8_t(dow % 7)); target++; break; @@ -66243,7 +71712,7 @@ char *StrfTimeFormat::WriteDateSpecifier(StrTimeSpecifier specifier, date_t date break; } default: - throw NotImplementedException("Unimplemented date specifier for strftime"); + throw InternalException("Unimplemented date specifier for strftime"); } return target; } @@ -66266,7 +71735,7 @@ char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t target = WritePadded2(target, data[1]); break; case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED: - target = WritePadded2(target, data[0] % 100); + target = WritePadded2(target, AbsValue(data[0]) % 100); break; case StrTimeSpecifier::YEAR_DECIMAL: if (data[0] >= 0 && data[0] <= 9999) { @@ -66353,7 +71822,7 @@ char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t break; } default: - throw NotImplementedException("Unimplemented specifier for WriteStandardSpecifier in strftime"); + throw InternalException("Unimplemented specifier for WriteStandardSpecifier in strftime"); } return target; } @@ -66435,7 +71904,7 @@ string StrTimeFormat::ParseFormatSpecifier(string format_string, StrTimeFormat & specifier = StrTimeSpecifier::DAY_OF_YEAR_DECIMAL; break; default: - return "Unrecognized format for strftime/strptime: %-" + string(format_char, 1); + return "Unrecognized format for strftime/strptime: %-" + string(1, format_char); } } else { switch (format_char) { @@ -66535,7 +72004,7 @@ string StrTimeFormat::ParseFormatSpecifier(string format_string, StrTimeFormat & continue; } default: - return "Unrecognized format for strftime/strptime: %" + string(format_char, 1); + return "Unrecognized format for strftime/strptime: %" + string(1, format_char); } } format.AddFormatSpecifier(move(current_literal), specifier); @@ -66564,7 +72033,7 @@ struct StrfTimeBindData : public FunctionData { static unique_ptr StrfTimeBindFunction(ClientContext &context, ScalarFunction &bound_function, vector> &arguments) { - if (!arguments[1]->IsScalar()) { + if (!arguments[1]->IsFoldable()) { throw InvalidInputException("strftime format must be a constant"); } Value options_str = ExpressionExecutor::EvaluateScalar(*arguments[1]); @@ -66588,12 +72057,11 @@ static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector ConstantVector::SetNull(result, true); return; } - - dtime_t time = 0; - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](date_t date) { - idx_t len = info.format.GetLength(date, time); + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](date_t input) { + dtime_t time(0); + idx_t len = info.format.GetLength(input, time); string_t target = StringVector::EmptyString(result, len); - info.format.FormatString(date, time, target.GetDataWriteable()); + info.format.FormatString(input, time, target.GetDataWriteable()); target.Finalize(); return target; }); @@ -66609,10 +72077,10 @@ static void StrfTimeFunctionTimestamp(DataChunk &args, ExpressionState &state, V return; } - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](timestamp_t timestamp) { + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](timestamp_t input) { date_t date; dtime_t time; - Timestamp::Convert(timestamp, date, time); + Timestamp::Convert(input, date, time); idx_t len = info.format.GetLength(date, time); string_t target = StringVector::EmptyString(result, len); info.format.FormatString(date, time, target.GetDataWriteable()); @@ -66634,16 +72102,6 @@ void StrfTimeFun::RegisterFunction(BuiltinFunctions &set) { } void StrpTimeFormat::AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) { - switch (specifier) { - case StrTimeSpecifier::DAY_OF_YEAR_PADDED: - case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL: - case StrTimeSpecifier::WEEKDAY_DECIMAL: - case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST: - case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST: - throw NotImplementedException("Unimplemented specifier for strptime"); - default: - break; - } numeric_width.push_back(NumericSpecifierWidth(specifier)); StrTimeFormat::AddFormatSpecifier(move(preceding_literal), specifier); } @@ -66736,6 +72194,12 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) { idx_t pos = 0; TimeSpecifierAMOrPM ampm = TimeSpecifierAMOrPM::TIME_SPECIFIER_NONE; + // Year offset state (Year+W/j) + auto offset_specifier = StrTimeSpecifier::WEEKDAY_DECIMAL; + uint64_t weekno = 0; + uint64_t weekday = 0; + uint64_t yearday = 0; + for (idx_t i = 0;; i++) { // first compare the literal if (literals[i].size() > (size - pos) || memcmp(data + pos, literals[i].c_str(), literals[i].size()) != 0) { @@ -66774,6 +72238,7 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) { } // day of the month result_data[2] = number; + offset_specifier = specifiers[i]; break; case StrTimeSpecifier::MONTH_DECIMAL_PADDED: case StrTimeSpecifier::MONTH_DECIMAL: @@ -66784,6 +72249,7 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) { } // month number result_data[1] = number; + offset_specifier = specifiers[i]; break; case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED: case StrTimeSpecifier::YEAR_WITHOUT_CENTURY: @@ -66846,22 +72312,74 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) { result_data[5] = number; break; case StrTimeSpecifier::MICROSECOND_PADDED: - if (number >= 1000000ULL) { - error_message = "Microseconds out of range, expected a value between 0 and 999999"; - error_position = start_pos; - return false; - } + D_ASSERT(number < 1000000ULL); // enforced by the length of the number // milliseconds result_data[6] = number; break; case StrTimeSpecifier::MILLISECOND_PADDED: - if (number >= 1000ULL) { - error_message = "Milliseconds out of range, expected a value between 0 and 999"; + D_ASSERT(number < 1000ULL); // enforced by the length of the number + // milliseconds + result_data[6] = number * 1000; + break; + case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST: + case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST: + // m/d overrides WU/w but does not conflict + switch (offset_specifier) { + case StrTimeSpecifier::DAY_OF_MONTH_PADDED: + case StrTimeSpecifier::DAY_OF_MONTH: + case StrTimeSpecifier::MONTH_DECIMAL_PADDED: + case StrTimeSpecifier::MONTH_DECIMAL: + // Just validate, don't use + break; + case StrTimeSpecifier::WEEKDAY_DECIMAL: + // First offset specifier + offset_specifier = specifiers[i]; + break; + default: + error_message = "Multiple year offsets specified"; error_position = start_pos; return false; } - // milliseconds - result_data[6] = number * 1000; + if (number > 53) { + error_message = "Week out of range, expected a value between 0 and 53"; + error_position = start_pos; + return false; + } + weekno = number; + break; + case StrTimeSpecifier::WEEKDAY_DECIMAL: + if (number > 6) { + error_message = "Weekday out of range, expected a value between 0 and 6"; + error_position = start_pos; + return false; + } + weekday = number; + break; + case StrTimeSpecifier::DAY_OF_YEAR_PADDED: + case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL: + // m/d overrides j but does not conflict + switch (offset_specifier) { + case StrTimeSpecifier::DAY_OF_MONTH_PADDED: + case StrTimeSpecifier::DAY_OF_MONTH: + case StrTimeSpecifier::MONTH_DECIMAL_PADDED: + case StrTimeSpecifier::MONTH_DECIMAL: + // Just validate, don't use + break; + case StrTimeSpecifier::WEEKDAY_DECIMAL: + // First offset specifier + offset_specifier = specifiers[i]; + break; + default: + error_message = "Multiple year offsets specified"; + error_position = start_pos; + return false; + } + if (number < 1 || number > 366) { + error_message = "Year day out of range, expected a value between 1 and 366"; + error_position = start_pos; + return false; + } + yearday = number; break; default: throw NotImplementedException("Unsupported specifier for strptime"); @@ -66975,6 +72493,39 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) { } } } + switch (offset_specifier) { + case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST: + case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST: { + // Adjust weekday to be 0-based for the week type + weekday = (weekday + 7 - int(offset_specifier == StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST)) % 7; + // Get the start of week 1, move back 7 days and then weekno * 7 + weekday gives the date + const auto jan1 = Date::FromDate(result_data[0], 1, 1); + auto yeardate = Date::GetMondayOfCurrentWeek(jan1); + yeardate -= int(offset_specifier == StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST); + // Is there a week 0? + yeardate -= 7 * int(yeardate >= jan1); + yeardate += weekno * 7 + weekday; + Date::Convert(yeardate, result_data[0], result_data[1], result_data[2]); + break; + } + case StrTimeSpecifier::DAY_OF_YEAR_PADDED: + case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL: { + auto yeardate = Date::FromDate(result_data[0], 1, 1); + yeardate += yearday - 1; + Date::Convert(yeardate, result_data[0], result_data[1], result_data[2]); + break; + } + case StrTimeSpecifier::DAY_OF_MONTH_PADDED: + case StrTimeSpecifier::DAY_OF_MONTH: + case StrTimeSpecifier::MONTH_DECIMAL_PADDED: + case StrTimeSpecifier::MONTH_DECIMAL: + // m/d overrides UWw/j + break; + default: + D_ASSERT(offset_specifier == StrTimeSpecifier::WEEKDAY_DECIMAL); + break; + } + return true; } @@ -66991,8 +72542,8 @@ struct StrpTimeBindData : public FunctionData { static unique_ptr StrpTimeBindFunction(ClientContext &context, ScalarFunction &bound_function, vector> &arguments) { - if (!arguments[1]->IsScalar()) { - throw InvalidInputException("strftime format must be a constant"); + if (!arguments[1]->IsFoldable()) { + throw InvalidInputException("strptime format must be a constant"); } Value options_str = ExpressionExecutor::EvaluateScalar(*arguments[1]); StrpTimeFormat format; @@ -67014,26 +72565,56 @@ string StrpTimeFormat::FormatStrpTimeError(const string &input, idx_t position) return input + "\n" + string(position, ' ') + "^"; } +date_t StrpTimeFormat::ParseResult::ToDate() { + return Date::FromDate(data[0], data[1], data[2]); +} + +timestamp_t StrpTimeFormat::ParseResult::ToTimestamp() { + date_t date = Date::FromDate(data[0], data[1], data[2]); + dtime_t time = Time::FromTime(data[3], data[4], data[5], data[6]); + return Timestamp::FromDatetime(date, time); +} + +string StrpTimeFormat::ParseResult::FormatError(string_t input, const string &format_specifier) { + return StringUtil::Format("Could not parse string \"%s\" according to format specifier \"%s\"\n%s\nError: %s", + input.GetString(), format_specifier, + FormatStrpTimeError(input.GetString(), error_position), error_message); +} + +bool StrpTimeFormat::TryParseDate(string_t input, date_t &result, string &error_message) { + ParseResult parse_result; + if (!Parse(input, parse_result)) { + error_message = parse_result.FormatError(input, format_specifier); + return false; + } + result = parse_result.ToDate(); + return true; +} + +bool StrpTimeFormat::TryParseTimestamp(string_t input, timestamp_t &result, string &error_message) { + ParseResult parse_result; + if (!Parse(input, parse_result)) { + error_message = parse_result.FormatError(input, format_specifier); + return false; + } + result = parse_result.ToTimestamp(); + return true; +} + date_t StrpTimeFormat::ParseDate(string_t input) { ParseResult result; if (!Parse(input, result)) { - throw InvalidInputException( - "Could not parse string \"%s\" according to format specifier \"%s\"\n%s\nError: %s", input.GetString(), - format_specifier, FormatStrpTimeError(input.GetString(), result.error_position), result.error_message); + throw InvalidInputException(result.FormatError(input, format_specifier)); } - return Date::FromDate(result.data[0], result.data[1], result.data[2]); + return result.ToDate(); } timestamp_t StrpTimeFormat::ParseTimestamp(string_t input) { ParseResult result; if (!Parse(input, result)) { - throw InvalidInputException( - "Could not parse string \"%s\" according to format specifier \"%s\"\n%s\nError: %s", input.GetString(), - format_specifier, FormatStrpTimeError(input.GetString(), result.error_position), result.error_message); + throw InvalidInputException(result.FormatError(input, format_specifier)); } - date_t date = Date::FromDate(result.data[0], result.data[1], result.data[2]); - dtime_t time = Time::FromTime(result.data[3], result.data[4], result.data[5], result.data[6]); - return Timestamp::FromDatetime(date, time); + return result.ToTimestamp(); } static void StrpTimeFunction(DataChunk &args, ExpressionState &state, Vector &result) { @@ -67151,7 +72732,7 @@ struct ToMilliSecondsOperator { result.days = 0; if (!TryMultiplyOperator::Operation(input, Interval::MICROS_PER_MSEC, result.micros)) { - throw OutOfRangeException("Interval value %d seconds out of range", input); + throw OutOfRangeException("Interval value %d milliseconds out of range", input); } return result; } @@ -67195,6 +72776,7 @@ namespace duckdb { void BuiltinFunctions::RegisterDateFunctions() { Register(); + Register(); Register(); Register(); Register(); @@ -67253,6 +72835,10 @@ struct CurrentSettingFun { static void RegisterFunction(BuiltinFunctions &set); }; +struct SystemFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + } // namespace duckdb @@ -67384,12 +72970,12 @@ unique_ptr CurrentSettingBind(ClientContext &context, ScalarFuncti if (key_child->return_type.id() != LogicalTypeId::VARCHAR || key_child->return_type.id() != LogicalTypeId::VARCHAR || !key_child->IsFoldable()) { - throw Exception("Key name for struct_extract needs to be a constant string"); + throw ParserException("Key name for struct_extract needs to be a constant string"); } Value key_val = ExpressionExecutor::EvaluateScalar(*key_child.get()); D_ASSERT(key_val.type().id() == LogicalTypeId::VARCHAR); if (key_val.is_null || key_val.str_value.length() < 1) { - throw Exception("Key name for struct_extract needs to be neither NULL nor empty"); + throw ParserException("Key name for struct_extract needs to be neither NULL nor empty"); } auto &config_map = context.db->config.set_variables; @@ -67606,6 +73192,7 @@ void BuiltinFunctions::RegisterGenericFunctions() { Register(); Register(); Register(); + Register(); } } // namespace duckdb @@ -67708,6 +73295,10 @@ struct FactorialFun { static void RegisterFunction(BuiltinFunctions &set); }; +struct NextAfterFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + } // namespace duckdb @@ -67795,9 +73386,10 @@ static scalar_function_t GetScalarIntegerUnaryFunctionFixedReturn(const LogicalT return function; } +template struct UnaryDoubleWrapper { - template - static inline RESULT_TYPE Operation(FUNC fun, INPUT_TYPE input, ValidityMask &mask, idx_t idx) { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { RESULT_TYPE result = OP::template Operation(input); if (std::isnan(result) || std::isinf(result) || errno != 0) { errno = 0; @@ -67806,17 +73398,13 @@ struct UnaryDoubleWrapper { } return result; } - - static bool AddsNulls() { - return true; - } }; template static void UnaryDoubleFunctionWrapper(DataChunk &input, ExpressionState &state, Vector &result) { D_ASSERT(input.ColumnCount() >= 1); errno = 0; - UnaryExecutor::Execute(input.data[0], result, input.size()); + UnaryExecutor::GenericExecute>(input.data[0], result, input.size(), nullptr, true); } struct BinaryDoubleWrapper { @@ -67843,6 +73431,46 @@ static void BinaryDoubleFunctionWrapper(DataChunk &input, ExpressionState &state BinaryExecutor::Execute(input.data[0], input.data[1], result, input.size()); } +//===--------------------------------------------------------------------===// +// nextafter +//===--------------------------------------------------------------------===// + +struct NextAfterOperator { + template + static inline TR Operation(TA base, TB exponent) { + throw NotImplementedException("Unimplemented type for NextAfter Function"); + } + + template + static inline double Operation(double input, double approximate_to) { + return nextafter(input, approximate_to); + } + template + static inline float Operation(float input, float approximate_to) { + return nextafterf(input, approximate_to); + } +}; + +unique_ptr BindNextAfter(ClientContext &context, ScalarFunction &function, + vector> &arguments) { + if ((arguments[0]->return_type != arguments[1]->return_type) || + (arguments[0]->return_type != LogicalType::FLOAT && arguments[0]->return_type != LogicalType::DOUBLE)) { + throw NotImplementedException("Unimplemented type for NextAfter Function"); + } + return nullptr; +} + +void NextAfterFun::RegisterFunction(BuiltinFunctions &set) { + ScalarFunctionSet next_after_fun("nextafter"); + next_after_fun.AddFunction( + ScalarFunction("nextafter", {LogicalType::DOUBLE, LogicalType::DOUBLE}, LogicalType::DOUBLE, + BinaryDoubleFunctionWrapper, false, BindNextAfter)); + next_after_fun.AddFunction(ScalarFunction("nextafter", {LogicalType::FLOAT, LogicalType::FLOAT}, LogicalType::FLOAT, + BinaryDoubleFunctionWrapper, false, + BindNextAfter)); + set.AddFunction(next_after_fun); +} + //===--------------------------------------------------------------------===// // abs //===--------------------------------------------------------------------===// @@ -67961,15 +73589,17 @@ struct CeilOperator { template static void GenericRoundFunctionDecimal(DataChunk &input, ExpressionState &state, Vector &result) { auto &func_expr = (BoundFunctionExpression &)state.expr; - OP::template Operation(input, func_expr.children[0]->return_type.scale(), result); + OP::template Operation(input, DecimalType::GetScale(func_expr.children[0]->return_type), result); } template unique_ptr BindGenericRoundFunctionDecimal(ClientContext &context, ScalarFunction &bound_function, vector> &arguments) { // ceil essentially removes the scale - auto decimal_type = arguments[0]->return_type; - if (decimal_type.scale() == 0) { + auto &decimal_type = arguments[0]->return_type; + auto scale = DecimalType::GetScale(decimal_type); + auto width = DecimalType::GetWidth(decimal_type); + if (scale == 0) { bound_function.function = ScalarFunction::NopFunction; } else { switch (decimal_type.InternalType()) { @@ -67988,7 +73618,7 @@ unique_ptr BindGenericRoundFunctionDecimal(ClientContext &context, } } bound_function.arguments[0] = decimal_type; - bound_function.return_type = LogicalType(LogicalTypeId::DECIMAL, decimal_type.width(), 0); + bound_function.return_type = LogicalType::DECIMAL(width, 0); return nullptr; } @@ -68028,7 +73658,7 @@ void CeilFun::RegisterFunction(BuiltinFunctions &set) { bind_func = BindGenericRoundFunctionDecimal; break; default: - throw NotImplementedException("Unimplemented numeric type for function \"ceil\""); + throw InternalException("Unimplemented numeric type for function \"ceil\""); } ceil.AddFunction(ScalarFunction({type}, type, func, false, bind_func)); } @@ -68084,7 +73714,7 @@ void FloorFun::RegisterFunction(BuiltinFunctions &set) { bind_func = BindGenericRoundFunctionDecimal; break; default: - throw NotImplementedException("Unimplemented numeric type for function \"floor\""); + throw InternalException("Unimplemented numeric type for function \"floor\""); } floor.AddFunction(ScalarFunction({type}, type, func, false, bind_func)); } @@ -68164,8 +73794,9 @@ template static void DecimalRoundNegativePrecisionFunction(DataChunk &input, ExpressionState &state, Vector &result) { auto &func_expr = (BoundFunctionExpression &)state.expr; auto &info = (RoundPrecisionFunctionData &)*func_expr.bind_info; - auto source_scale = func_expr.children[0]->return_type.scale(); - if (-info.target_scale >= func_expr.children[0]->return_type.width()) { + auto source_scale = DecimalType::GetScale(func_expr.children[0]->return_type); + auto width = DecimalType::GetWidth(func_expr.children[0]->return_type); + if (-info.target_scale >= width) { // scale too big for width result.SetVectorType(VectorType::CONSTANT_VECTOR); result.SetValue(0, Value::INTEGER(0)); @@ -68174,6 +73805,7 @@ static void DecimalRoundNegativePrecisionFunction(DataChunk &input, ExpressionSt T divide_power_of_ten = POWERS_OF_TEN_CLASS::POWERS_OF_TEN[-info.target_scale + source_scale]; T multiply_power_of_ten = POWERS_OF_TEN_CLASS::POWERS_OF_TEN[-info.target_scale]; T addition = divide_power_of_ten / 2; + UnaryExecutor::Execute(input.data[0], result, input.size(), [&](T input) { if (input < 0) { input -= addition; @@ -68188,7 +73820,7 @@ template static void DecimalRoundPositivePrecisionFunction(DataChunk &input, ExpressionState &state, Vector &result) { auto &func_expr = (BoundFunctionExpression &)state.expr; auto &info = (RoundPrecisionFunctionData &)*func_expr.bind_info; - auto source_scale = func_expr.children[0]->return_type.scale(); + auto source_scale = DecimalType::GetScale(func_expr.children[0]->return_type); T power_of_ten = POWERS_OF_TEN_CLASS::POWERS_OF_TEN[source_scale - info.target_scale]; T addition = power_of_ten / 2; UnaryExecutor::Execute(input.data[0], result, input.size(), [&](T input) { @@ -68203,7 +73835,7 @@ static void DecimalRoundPositivePrecisionFunction(DataChunk &input, ExpressionSt unique_ptr BindDecimalRoundPrecision(ClientContext &context, ScalarFunction &bound_function, vector> &arguments) { - auto decimal_type = arguments[0]->return_type; + auto &decimal_type = arguments[0]->return_type; if (!arguments[1]->IsFoldable()) { throw NotImplementedException("ROUND(DECIMAL, INTEGER) with non-constant precision is not supported"); } @@ -68218,6 +73850,8 @@ unique_ptr BindDecimalRoundPrecision(ClientContext &context, Scala // i.e. ROUND(DECIMAL(18,3), -1) -> DECIMAL(18,0) int32_t round_value = val.value_.integer; uint8_t target_scale; + auto width = DecimalType::GetWidth(decimal_type); + auto scale = DecimalType::GetScale(decimal_type); if (round_value < 0) { target_scale = 0; switch (decimal_type.InternalType()) { @@ -68235,10 +73869,10 @@ unique_ptr BindDecimalRoundPrecision(ClientContext &context, Scala break; } } else { - if (round_value >= (int32_t)decimal_type.scale()) { + if (round_value >= (int32_t)scale) { // if round_value is bigger than or equal to scale we do nothing bound_function.function = ScalarFunction::NopFunction; - target_scale = decimal_type.scale(); + target_scale = scale; } else { target_scale = round_value; switch (decimal_type.InternalType()) { @@ -68258,7 +73892,7 @@ unique_ptr BindDecimalRoundPrecision(ClientContext &context, Scala } } bound_function.arguments[0] = decimal_type; - bound_function.return_type = LogicalType(LogicalTypeId::DECIMAL, decimal_type.width(), target_scale); + bound_function.return_type = LogicalType::DECIMAL(width, target_scale); return make_unique(round_value); } @@ -68287,7 +73921,7 @@ void RoundFun::RegisterFunction(BuiltinFunctions &set) { bind_prec_func = BindDecimalRoundPrecision; break; default: - throw NotImplementedException("Unimplemented numeric type for function \"floor\""); + throw InternalException("Unimplemented numeric type for function \"floor\""); } round.AddFunction(ScalarFunction({type}, type, round_func, false, bind_func)); round.AddFunction(ScalarFunction({type, LogicalType::INTEGER}, type, round_prec_func, false, bind_prec_func)); @@ -68759,60 +74393,14 @@ void BuiltinFunctions::RegisterMathFunctions() { Register(); Register(); + + Register(); } } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/function/scalar/nested_functions.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { - -struct VariableReturnBindData : public FunctionData { - LogicalType stype; - - explicit VariableReturnBindData(LogicalType stype) : stype(stype) { - } - - unique_ptr Copy() override { - return make_unique(stype); - } -}; - -struct ArraySliceFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct StructPackFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct ListValueFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct ListExtractFun { - static void RegisterFunction(BuiltinFunctions &set); -}; - -struct StructExtractFun { - static ScalarFunction GetFunction(); - static void RegisterFunction(BuiltinFunctions &set); -}; - -} // namespace duckdb @@ -68967,7 +74555,6 @@ static void ArraySliceFunction(DataChunk &args, ExpressionState &state, Vector & switch (result.GetType().id()) { case LogicalTypeId::LIST: // Share the value dictionary as we are just going to slice it - D_ASSERT(result.GetType().child_types().size() == 1); ListVector::ReferenceEntry(result, s); ExecuteSlice(result, s, b, e, count); break; @@ -69018,6 +74605,44 @@ void ArraySliceFun::RegisterFunction(BuiltinFunctions &set) { +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/statistics/list_statistics.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { +class Value; + +class ListStatistics : public BaseStatistics { +public: + explicit ListStatistics(LogicalType type); + + unique_ptr child_stats; + +public: + void Merge(const BaseStatistics &other) override; + FilterPropagateResult CheckZonemap(ExpressionType comparison_type, const Value &constant); + + unique_ptr Copy() override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(Deserializer &source, LogicalType type); + void Verify(Vector &vector, const SelectionVector &sel, idx_t count) override; + + string ToString() override; +}; + +} // namespace duckdb + + namespace duckdb { @@ -69077,7 +74702,6 @@ void ListExtractTemplate(idx_t count, Vector &list, Vector &offsets, Vector &res static void ExecuteListExtract(Vector &result, Vector &list, Vector &offsets, const idx_t count) { D_ASSERT(list.GetType().id() == LogicalTypeId::LIST); - D_ASSERT(list.GetType().child_types().size() == 1); switch (result.GetType().id()) { case LogicalTypeId::UTINYINT: @@ -69129,6 +74753,16 @@ static void ExecuteListExtract(Vector &result, Vector &list, Vector &offsets, co case LogicalTypeId::SQLNULL: result.Reference(Value()); break; + case LogicalTypeId::LIST: { + // nested list: we have to reference the child + auto &child_list = ListVector::GetEntry(list); + auto &child_child_list = ListVector::GetEntry(child_list); + + ListVector::GetEntry(result).Reference(child_child_list); + ListVector::SetListSize(result, ListVector::GetListSize(child_list)); + ListExtractTemplate(count, list, offsets, result); + break; + } default: throw NotImplementedException("Unimplemented type for LIST_EXTRACT"); } @@ -69174,16 +74808,31 @@ static unique_ptr ListExtractBind(ClientContext &context, ScalarFu D_ASSERT(bound_function.arguments.size() == 2); D_ASSERT(LogicalTypeId::LIST == arguments[0]->return_type.id()); // list extract returns the child type of the list as return type - bound_function.return_type = arguments[0]->return_type.child_types()[0].second; + bound_function.return_type = ListType::GetChildType(arguments[0]->return_type); return make_unique(bound_function.return_type); } +static unique_ptr ListExtractStats(ClientContext &context, BoundFunctionExpression &expr, + FunctionData *bind_data, + vector> &child_stats) { + if (!child_stats[0]) { + return nullptr; + } + auto &list_stats = (ListStatistics &)*child_stats[0]; + if (!list_stats.child_stats) { + return nullptr; + } + auto child_copy = list_stats.child_stats->Copy(); + // list_extract always pushes a NULL, since if the offset is out of range for a list it inserts a null + child_copy->validity_stats = make_unique(true); + return child_copy; +} + void ListExtractFun::RegisterFunction(BuiltinFunctions &set) { // the arguments and return types are actually set in the binder function - LogicalType list_of_any(LogicalTypeId::LIST, {make_pair("", LogicalTypeId::ANY)}); - ScalarFunction lfun({list_of_any, LogicalType::BIGINT}, LogicalType::ANY, ListExtractFunction, false, - ListExtractBind); + ScalarFunction lfun({LogicalType::LIST(LogicalType::ANY), LogicalType::BIGINT}, LogicalType::ANY, + ListExtractFunction, false, ListExtractBind, nullptr, ListExtractStats); ScalarFunction sfun({LogicalType::VARCHAR, LogicalType::INTEGER}, LogicalType::VARCHAR, ListExtractFunction, false, nullptr); @@ -69218,10 +74867,7 @@ namespace duckdb { static void ListValueFunction(DataChunk &args, ExpressionState &state, Vector &result) { D_ASSERT(result.GetType().id() == LogicalTypeId::LIST); - D_ASSERT(result.GetType().child_types().size() == 1); - auto child_type = result.GetType().child_types()[0].second; - auto list_child = make_unique(child_type); - ListVector::SetEntry(result, move(list_child)); + auto &child_type = ListType::GetChildType(result.GetType()); result.SetVectorType(VectorType::CONSTANT_VECTOR); for (idx_t i = 0; i < args.ColumnCount(); i++) { @@ -69245,21 +74891,35 @@ static void ListValueFunction(DataChunk &args, ExpressionState &state, Vector &r static unique_ptr ListValueBind(ClientContext &context, ScalarFunction &bound_function, vector> &arguments) { // collect names and deconflict, construct return type - child_list_t child_types; - if (!arguments.empty()) { - child_types.push_back(make_pair("", arguments[0]->return_type)); - } else { - child_types.push_back(make_pair("", LogicalType::SQLNULL)); + auto child_type = LogicalType::SQLNULL; + for (idx_t i = 0; i < arguments.size(); i++) { + child_type = LogicalType::MaxLogicalType(child_type, arguments[i]->return_type); } // this is more for completeness reasons - bound_function.return_type = LogicalType(LogicalTypeId::LIST, move(child_types)); + bound_function.varargs = child_type; + bound_function.return_type = LogicalType::LIST(move(child_type)); return make_unique(bound_function.return_type); } +unique_ptr ListValueStats(ClientContext &context, BoundFunctionExpression &expr, + FunctionData *bind_data, vector> &child_stats) { + auto list_stats = make_unique(expr.return_type); + for (idx_t i = 0; i < child_stats.size(); i++) { + if (child_stats[i]) { + list_stats->child_stats->Merge(*child_stats[i]); + } else { + list_stats->child_stats.reset(); + return move(list_stats); + } + } + return move(list_stats); +} + void ListValueFun::RegisterFunction(BuiltinFunctions &set) { // the arguments and return types are actually set in the binder function - ScalarFunction fun("list_value", {}, LogicalType::LIST, ListValueFunction, false, ListValueBind); + ScalarFunction fun("list_value", {}, LogicalTypeId::LIST, ListValueFunction, false, ListValueBind, nullptr, + ListValueStats); fun.varargs = LogicalType::ANY; set.AddFunction(fun); fun.name = "list_pack"; @@ -69272,6 +74932,282 @@ void ListValueFun::RegisterFunction(BuiltinFunctions &set) { + +namespace duckdb { + +static void CardinalityFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &map = args.data[0]; + VectorData list_data; + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + + if (map.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &child = DictionaryVector::Child(map); + auto &dict_sel = DictionaryVector::SelVector(map); + + auto &children = StructVector::GetEntries(child); + children[0]->Orrify(args.size(), list_data); + for (idx_t row = 0; row < args.size(); row++) { + auto list_entry = ((list_entry_t *)list_data.data)[list_data.sel->get_index(dict_sel.get_index(row))]; + result_data[row] = list_entry.length; + } + } else { + auto &children = StructVector::GetEntries(map); + children[0]->Orrify(args.size(), list_data); + for (idx_t row = 0; row < args.size(); row++) { + auto list_entry = ((list_entry_t *)list_data.data)[list_data.sel->get_index(row)]; + result_data[row] = list_entry.length; + } + } + + if (args.size() == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } +} + +static unique_ptr CardinalityBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + + if (arguments[0]->return_type.id() != LogicalTypeId::MAP) { + throw BinderException("Cardinality can only operate on MAPs"); + } + + bound_function.return_type = LogicalType::UBIGINT; + return make_unique(bound_function.return_type); +} + +void CardinalityFun::RegisterFunction(BuiltinFunctions &set) { + ScalarFunction fun("cardinality", {LogicalType::ANY}, LogicalType::UBIGINT, CardinalityFunction, false, + CardinalityBind); + fun.varargs = LogicalType::ANY; + set.AddFunction(fun); +} + +} // namespace duckdb + + + + + + + +namespace duckdb { + +static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(result.GetType().id() == LogicalTypeId::MAP); + + //! Otherwise if its not a constant vector, this breaks the optimizer + result.SetVectorType(VectorType::CONSTANT_VECTOR); + for (idx_t i = 0; i < args.ColumnCount(); i++) { + if (args.data[i].GetVectorType() != VectorType::CONSTANT_VECTOR) { + result.SetVectorType(VectorType::FLAT_VECTOR); + } + } + + auto &child_entries = StructVector::GetEntries(result); + D_ASSERT(child_entries.size() == 2); + auto &key_vector = child_entries[0]; + auto &value_vector = child_entries[1]; + if (args.data.empty()) { + // no arguments: construct an empty map + ListVector::SetListSize(*key_vector, 0); + key_vector->SetVectorType(VectorType::CONSTANT_VECTOR); + auto list_data = ConstantVector::GetData(*key_vector); + list_data->offset = 0; + list_data->length = 0; + + ListVector::SetListSize(*value_vector, 0); + value_vector->SetVectorType(VectorType::CONSTANT_VECTOR); + list_data = ConstantVector::GetData(*value_vector); + list_data->offset = 0; + list_data->length = 0; + + result.Verify(args.size()); + return; + } + + if (ListVector::GetListSize(args.data[0]) != ListVector::GetListSize(args.data[1])) { + throw Exception("Key list has a different size from Value list"); + } + key_vector->Reference(args.data[0]); + value_vector->Reference(args.data[1]); + + result.Verify(args.size()); +} + +static unique_ptr MapBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + child_list_t child_types; + + if (arguments.size() != 2 && !arguments.empty()) { + throw Exception("We need exactly two lists for a map"); + } + if (arguments.size() == 2) { + if (arguments[0]->return_type.id() != LogicalTypeId::LIST) { + throw Exception("First argument is not a list"); + } + if (arguments[1]->return_type.id() != LogicalTypeId::LIST) { + throw Exception("Second argument is not a list"); + } + child_types.push_back(make_pair("key", arguments[0]->return_type)); + child_types.push_back(make_pair("value", arguments[1]->return_type)); + } + + if (arguments.empty()) { + auto empty = LogicalType::LIST(LogicalTypeId::SQLNULL); + child_types.push_back(make_pair("key", empty)); + child_types.push_back(make_pair("value", empty)); + } + + //! this is more for completeness reasons + bound_function.return_type = LogicalType::MAP(move(child_types)); + return make_unique(bound_function.return_type); +} + +void MapFun::RegisterFunction(BuiltinFunctions &set) { + //! the arguments and return types are actually set in the binder function + ScalarFunction fun("map", {}, LogicalTypeId::MAP, MapFunction, false, MapBind); + fun.varargs = LogicalType::ANY; + set.AddFunction(fun); +} + +} // namespace duckdb + + + + + + +namespace duckdb { +void FillResult(Value &values, Vector &result, idx_t row) { + //! First Initialize List Vector + idx_t current_offset = ListVector::GetListSize(result); + //! Push Values to List Vector + for (idx_t i = 0; i < values.list_value.size(); i++) { + ListVector::PushBack(result, values.list_value[i]); + } + + //! now set the pointer + auto &entry = ((list_entry_t *)result.GetData())[row]; + entry.length = values.list_value.size(); + entry.offset = current_offset; +} + +static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 2); + D_ASSERT(args.data[0].GetType().id() == LogicalTypeId::MAP); + result.SetVectorType(VectorType::FLAT_VECTOR); + + auto &map = args.data[0]; + auto &key = args.data[1]; + + auto key_value = key.GetValue(0); + VectorData offset_data; + + if (map.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &child = DictionaryVector::Child(map); + auto &children = StructVector::GetEntries(child); + auto &dict_sel = DictionaryVector::SelVector(map); + children[0]->Orrify(args.size(), offset_data); + auto &key_type = ListType::GetChildType(children[0]->GetType()); + if (key_type != LogicalTypeId::SQLNULL) { + key_value = key_value.CastAs(key_type); + } + for (idx_t row = 0; row < args.size(); row++) { + auto offsets = + ListVector::Search(*children[0], key_value, offset_data.sel->get_index(dict_sel.get_index(row))); + auto values = ListVector::GetValuesFromOffsets(*children[1], offsets); + FillResult(values, result, row); + } + } else { + auto &children = StructVector::GetEntries(map); + children[0]->Orrify(args.size(), offset_data); + auto &key_type = ListType::GetChildType(children[0]->GetType()); + if (key_type != LogicalTypeId::SQLNULL) { + key_value = key_value.CastAs(key_type); + } + for (idx_t row = 0; row < args.size(); row++) { + auto offsets = ListVector::Search(*children[0], key_value, offset_data.sel->get_index(row)); + auto values = ListVector::GetValuesFromOffsets(*children[1], offsets); + FillResult(values, result, row); + } + } + + if (args.size() == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + + result.Verify(args.size()); +} + +static unique_ptr MapExtractBind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + if (arguments.size() != 2) { + throw BinderException("MAP_EXTRACT must have exactly two arguments"); + } + if (arguments[0]->return_type.id() != LogicalTypeId::MAP) { + throw BinderException("MAP_EXTRACT can only operate on MAPs"); + } + auto &child_types = StructType::GetChildTypes(arguments[0]->return_type); + auto &value_type = ListType::GetChildType(child_types[1].second); + + //! Here we have to construct the List Type that will be returned + bound_function.return_type = LogicalType::LIST(value_type); + return make_unique(value_type); +} + +void MapExtractFun::RegisterFunction(BuiltinFunctions &set) { + ScalarFunction fun("map_extract", {LogicalType::ANY, LogicalType::ANY}, LogicalType::ANY, MapExtractFunction, false, + MapExtractBind); + fun.varargs = LogicalType::ANY; + set.AddFunction(fun); + fun.name = "element_at"; + set.AddFunction(fun); +} + +} // namespace duckdb + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/statistics/struct_statistics.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { +class Value; + +class StructStatistics : public BaseStatistics { +public: + explicit StructStatistics(LogicalType type); + + vector> child_stats; + +public: + void Merge(const BaseStatistics &other) override; + FilterPropagateResult CheckZonemap(ExpressionType comparison_type, const Value &constant); + + unique_ptr Copy() override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(Deserializer &source, LogicalType type); + void Verify(Vector &vector, const SelectionVector &sel, idx_t count) override; + + string ToString() override; +}; + +} // namespace duckdb + + namespace duckdb { struct StructExtractBindData : public FunctionData { @@ -69304,31 +75240,21 @@ static void StructExtractFunction(DataChunk &args, ExpressionState &state, Vecto auto &child = DictionaryVector::Child(vec); auto &dict_sel = DictionaryVector::SelVector(vec); auto &children = StructVector::GetEntries(child); - if (info.index >= children.size()) { - throw Exception("Not enough struct entries for struct_extract"); - } + D_ASSERT(info.index < children.size()); auto &struct_child = children[info.index]; - if (struct_child.first != info.key || struct_child.second->GetType() != info.type) { - throw Exception("Struct key or type mismatch"); - } - result.Slice(*struct_child.second, dict_sel, args.size()); + result.Slice(*struct_child, dict_sel, args.size()); } else { auto &children = StructVector::GetEntries(vec); - if (info.index >= children.size()) { - throw Exception("Not enough struct entries for struct_extract"); - } + D_ASSERT(info.index < children.size()); auto &struct_child = children[info.index]; - if (struct_child.first != info.key || struct_child.second->GetType() != info.type) { - throw Exception("Struct key or type mismatch"); - } - result.Reference(*struct_child.second); + result.Reference(*struct_child); } result.Verify(args.size()); } static unique_ptr StructExtractBind(ClientContext &context, ScalarFunction &bound_function, vector> &arguments) { - auto &struct_children = arguments[0]->return_type.child_types(); + auto &struct_children = StructType::GetChildTypes(arguments[0]->return_type); if (struct_children.empty()) { throw Exception("Can't extract something from an empty struct"); } @@ -69368,9 +75294,23 @@ static unique_ptr StructExtractBind(ClientContext &context, Scalar return make_unique(key, key_index, return_type); } +static unique_ptr PropagateStructExtractStats(ClientContext &context, BoundFunctionExpression &expr, + FunctionData *bind_data, + vector> &child_stats) { + if (!child_stats[0]) { + return nullptr; + } + auto &struct_stats = (StructStatistics &)*child_stats[0]; + auto &info = (StructExtractBindData &)*bind_data; + if (info.index >= struct_stats.child_stats.size() || !struct_stats.child_stats[info.index]) { + return nullptr; + } + return struct_stats.child_stats[info.index]->Copy(); +} + ScalarFunction StructExtractFun::GetFunction() { - return ScalarFunction("struct_extract", {LogicalType::STRUCT, LogicalType::VARCHAR}, LogicalType::ANY, - StructExtractFunction, false, StructExtractBind); + return ScalarFunction("struct_extract", {LogicalTypeId::STRUCT, LogicalType::VARCHAR}, LogicalType::ANY, + StructExtractFunction, false, StructExtractBind, nullptr, PropagateStructExtractStats); } void StructExtractFun::RegisterFunction(BuiltinFunctions &set) { @@ -69386,25 +75326,24 @@ void StructExtractFun::RegisterFunction(BuiltinFunctions &set) { + namespace duckdb { static void StructPackFunction(DataChunk &args, ExpressionState &state, Vector &result) { +#ifdef DEBUG auto &func_expr = (BoundFunctionExpression &)state.expr; auto &info = (VariableReturnBindData &)*func_expr.bind_info; - // this should never happen if the binder below is sane - D_ASSERT(args.ColumnCount() == info.stype.child_types().size()); - + D_ASSERT(args.ColumnCount() == StructType::GetChildTypes(info.stype).size()); +#endif bool all_const = true; + auto &child_entries = StructVector::GetEntries(result); for (size_t i = 0; i < args.ColumnCount(); i++) { if (args.data[i].GetVectorType() != VectorType::CONSTANT_VECTOR) { all_const = false; } // same holds for this - D_ASSERT(args.data[i].GetType() == info.stype.child_types()[i].second); - auto new_child = make_unique(info.stype.child_types()[i].second); - new_child->Reference(args.data[i]); - StructVector::AddEntry(result, info.stype.child_types()[i].first, move(new_child)); + child_entries[i]->Reference(args.data[i]); } result.SetVectorType(all_const ? VectorType::CONSTANT_VECTOR : VectorType::FLAT_VECTOR); @@ -69423,26 +75362,37 @@ static unique_ptr StructPackBind(ClientContext &context, ScalarFun for (idx_t i = 0; i < arguments.size(); i++) { auto &child = arguments[i]; if (child->alias.empty() && bound_function.name == "struct_pack") { - throw Exception("Need named argument for struct pack, e.g. STRUCT_PACK(a := b)"); + throw BinderException("Need named argument for struct pack, e.g. STRUCT_PACK(a := b)"); } if (child->alias.empty() && bound_function.name == "row") { child->alias = "v" + std::to_string(i + 1); } if (name_collision_set.find(child->alias) != name_collision_set.end()) { - throw Exception("Duplicate struct entry name"); + throw BinderException("Duplicate struct entry name \"%s\"", child->alias); } name_collision_set.insert(child->alias); struct_children.push_back(make_pair(child->alias, arguments[i]->return_type)); } // this is more for completeness reasons - bound_function.return_type = LogicalType(LogicalTypeId::STRUCT, move(struct_children)); + bound_function.return_type = LogicalType::STRUCT(move(struct_children)); return make_unique(bound_function.return_type); } +unique_ptr StructPackStats(ClientContext &context, BoundFunctionExpression &expr, + FunctionData *bind_data, vector> &child_stats) { + auto struct_stats = make_unique(expr.return_type); + D_ASSERT(child_stats.size() == struct_stats->child_stats.size()); + for (idx_t i = 0; i < struct_stats->child_stats.size(); i++) { + struct_stats->child_stats[i] = child_stats[i] ? child_stats[i]->Copy() : nullptr; + } + return move(struct_stats); +} + void StructPackFun::RegisterFunction(BuiltinFunctions &set) { // the arguments and return types are actually set in the binder function - ScalarFunction fun("struct_pack", {}, LogicalType::STRUCT, StructPackFunction, false, StructPackBind); + ScalarFunction fun("struct_pack", {}, LogicalTypeId::STRUCT, StructPackFunction, false, StructPackBind, nullptr, + StructPackStats); fun.varargs = LogicalType::ANY; set.AddFunction(fun); fun.name = "row"; @@ -69460,6 +75410,9 @@ void BuiltinFunctions::RegisterNestedFunctions() { Register(); Register(); Register(); + Register(); + Register(); + Register(); } } // namespace duckdb @@ -69507,6 +75460,20 @@ interval_t AddOperator::Operation(interval_t left, interval_t right) { return left; } +template <> +date_t AddOperator::Operation(date_t left, int32_t right) { + int32_t result; + if (!TryAddOperator::Operation(left.days, right, result)) { + throw OutOfRangeException("Date out of range"); + } + return date_t(result); +} + +template <> +date_t AddOperator::Operation(int32_t left, date_t right) { + return AddOperator::Operation(right, left); +} + template <> date_t AddOperator::Operation(date_t left, interval_t right) { date_t result; @@ -69523,15 +75490,20 @@ date_t AddOperator::Operation(date_t left, interval_t right) { year--; month += Interval::MONTHS_PER_YEAR; } + day = MinValue(day, Date::MonthDays(year, month)); result = Date::FromDate(year, month, day); } else { result = left; } if (right.days != 0) { - result += right.days; + if (!TryAddOperator::Operation(result.days, right.days, result.days)) { + throw OutOfRangeException("Date out of range"); + } } if (right.micros != 0) { - result += right.micros / Interval::MICROS_PER_DAY; + if (!TryAddOperator::Operation(result.days, int32_t(right.micros / Interval::MICROS_PER_DAY), result.days)) { + throw OutOfRangeException("Date out of range"); + } } return result; } @@ -69541,13 +75513,26 @@ date_t AddOperator::Operation(interval_t left, date_t right) { return AddOperator::Operation(right, left); } +dtime_t AddIntervalToTimeOperation(dtime_t left, interval_t right, date_t &date) { + int64_t diff = right.micros - ((right.micros / Interval::MICROS_PER_DAY) * Interval::MICROS_PER_DAY); + left += diff; + if (left.micros >= Interval::MICROS_PER_DAY) { + left.micros -= Interval::MICROS_PER_DAY; + date.days++; + } else if (left.micros < 0) { + left.micros += Interval::MICROS_PER_DAY; + date.days--; + } + return left; +} + template <> timestamp_t AddOperator::Operation(timestamp_t left, interval_t right) { date_t date; dtime_t time; Timestamp::Convert(left, date, time); auto new_date = AddOperator::Operation(date, right); - auto new_time = AddTimeOperator::Operation(time, right); + auto new_time = AddIntervalToTimeOperation(time, right, new_date); return Timestamp::FromDatetime(new_date, new_time); } @@ -69620,10 +75605,6 @@ bool TryAddOperator::Operation(int64_t left, int64_t right, int64_t &result) { return false; } #endif - // FIXME: this check can be removed if we get rid of NullValue - if (result == std::numeric_limits::min()) { - return false; - } return true; } @@ -69683,14 +75664,8 @@ hugeint_t DecimalAddOverflowCheck::Operation(hugeint_t left, hugeint_t right) { //===--------------------------------------------------------------------===// template <> dtime_t AddTimeOperator::Operation(dtime_t left, interval_t right) { - int64_t diff = right.micros - ((right.micros / Interval::MICROS_PER_DAY) * Interval::MICROS_PER_DAY); - left += diff; - if (left >= Interval::MICROS_PER_DAY) { - left -= Interval::MICROS_PER_DAY; - } else if (left < 0) { - left += Interval::MICROS_PER_DAY; - } - return left; + date_t date(0); + return AddIntervalToTimeOperation(left, right, date); } template <> @@ -69775,6 +75750,8 @@ struct BitwiseNotFun { +#include + namespace duckdb { template @@ -69923,6 +75900,9 @@ unique_ptr BindDecimalAddSubtract(ClientContext &context, ScalarFu // get the max width and scale of the input arguments uint8_t max_width = 0, max_scale = 0, max_width_over_scale = 0; for (idx_t i = 0; i < arguments.size(); i++) { + if (arguments[i]->return_type.id() == LogicalTypeId::UNKNOWN) { + continue; + } uint8_t width, scale; auto can_convert = arguments[i]->return_type.GetDecimalProperties(width, scale); if (!can_convert) { @@ -69932,6 +75912,7 @@ unique_ptr BindDecimalAddSubtract(ClientContext &context, ScalarFu max_scale = MaxValue(scale, max_scale); max_width_over_scale = MaxValue(width - scale, max_width_over_scale); } + D_ASSERT(max_width > 0); // for addition/subtraction, we add 1 to the width to ensure we don't overflow bool check_overflow = false; auto required_width = MaxValue(max_scale + max_width_over_scale, max_width) + 1; @@ -69946,14 +75927,15 @@ unique_ptr BindDecimalAddSubtract(ClientContext &context, ScalarFu required_width = Decimal::MAX_WIDTH_DECIMAL; } // arithmetic between two decimal arguments: check the types of the input arguments - LogicalType result_type = LogicalType(LogicalTypeId::DECIMAL, required_width, max_scale); + LogicalType result_type = LogicalType::DECIMAL(required_width, max_scale); // we cast all input types to the specified type for (idx_t i = 0; i < arguments.size(); i++) { // first check if the cast is necessary // if the argument has a matching scale and internal type as the output type, no casting is necessary auto &argument_type = arguments[i]->return_type; - if (argument_type.scale() == result_type.scale() && - argument_type.InternalType() == result_type.InternalType()) { + uint8_t width, scale; + argument_type.GetDecimalProperties(width, scale); + if (scale == DecimalType::GetScale(result_type) && argument_type.InternalType() == result_type.InternalType()) { bound_function.arguments[i] = argument_type; } else { bound_function.arguments[i] = result_type; @@ -70002,9 +75984,9 @@ void AddFun::RegisterFunction(BuiltinFunctions &set) { } // we can add integers to dates functions.AddFunction(ScalarFunction({LogicalType::DATE, LogicalType::INTEGER}, LogicalType::DATE, - GetScalarBinaryFunction(PhysicalType::INT32))); + ScalarFunction::BinaryFunction)); functions.AddFunction(ScalarFunction({LogicalType::INTEGER, LogicalType::DATE}, LogicalType::DATE, - GetScalarBinaryFunction(PhysicalType::INT32))); + ScalarFunction::BinaryFunction)); // we can add intervals together functions.AddFunction( ScalarFunction({LogicalType::INTERVAL, LogicalType::INTERVAL}, LogicalType::INTERVAL, @@ -70042,17 +76024,39 @@ void AddFun::RegisterFunction(BuiltinFunctions &set) { //===--------------------------------------------------------------------===// // - [subtract] //===--------------------------------------------------------------------===// +struct NegateOperator { + template + static inline TR Operation(TA input) { + using Limits = std::numeric_limits; + auto cast = (TR)input; + if (Limits::is_integer && Limits::is_signed && Limits::lowest() == cast) { + throw OutOfRangeException("Overflow in negation of integer!"); + } + return -cast; + } +}; + +template <> +interval_t NegateOperator::Operation(interval_t input) { + interval_t result; + result.months = NegateOperator::Operation(input.months); + result.days = NegateOperator::Operation(input.days); + result.micros = NegateOperator::Operation(input.micros); + return result; +} + unique_ptr DecimalNegateBind(ClientContext &context, ScalarFunction &bound_function, vector> &arguments) { - auto decimal_type = arguments[0]->return_type; - if (decimal_type.width() <= Decimal::MAX_WIDTH_INT16) { + auto &decimal_type = arguments[0]->return_type; + auto width = DecimalType::GetWidth(decimal_type); + if (width <= Decimal::MAX_WIDTH_INT16) { bound_function.function = ScalarFunction::GetScalarUnaryFunction(LogicalTypeId::SMALLINT); - } else if (decimal_type.width() <= Decimal::MAX_WIDTH_INT32) { + } else if (width <= Decimal::MAX_WIDTH_INT32) { bound_function.function = ScalarFunction::GetScalarUnaryFunction(LogicalTypeId::INTEGER); - } else if (decimal_type.width() <= Decimal::MAX_WIDTH_INT64) { + } else if (width <= Decimal::MAX_WIDTH_INT64) { bound_function.function = ScalarFunction::GetScalarUnaryFunction(LogicalTypeId::BIGINT); } else { - D_ASSERT(decimal_type.width() <= Decimal::MAX_WIDTH_INT128); + D_ASSERT(width <= Decimal::MAX_WIDTH_INT128); bound_function.function = ScalarFunction::GetScalarUnaryFunction(LogicalTypeId::HUGEINT); } decimal_type.Verify(); @@ -70125,10 +76129,10 @@ void SubtractFun::RegisterFunction(BuiltinFunctions &set) { } } // we can subtract dates from each other - functions.AddFunction(ScalarFunction({LogicalType::DATE, LogicalType::DATE}, LogicalType::INTEGER, - GetScalarBinaryFunction(PhysicalType::INT32))); + functions.AddFunction(ScalarFunction({LogicalType::DATE, LogicalType::DATE}, LogicalType::BIGINT, + ScalarFunction::BinaryFunction)); functions.AddFunction(ScalarFunction({LogicalType::DATE, LogicalType::INTEGER}, LogicalType::DATE, - GetScalarBinaryFunction(PhysicalType::INT32))); + ScalarFunction::BinaryFunction)); // we can subtract timestamps from each other functions.AddFunction( ScalarFunction({LogicalType::TIMESTAMP, LogicalType::TIMESTAMP}, LogicalType::INTERVAL, @@ -70158,6 +76162,8 @@ void SubtractFun::RegisterFunction(BuiltinFunctions &set) { nullptr, nullptr, NegateBindStatistics)); } } + functions.AddFunction(ScalarFunction({LogicalType::INTERVAL}, LogicalType::INTERVAL, + ScalarFunction::UnaryFunction)); set.AddFunction(functions); } @@ -70206,6 +76212,9 @@ unique_ptr BindDecimalMultiply(ClientContext &context, ScalarFunct uint8_t result_width = 0, result_scale = 0; uint8_t max_width = 0; for (idx_t i = 0; i < arguments.size(); i++) { + if (arguments[i]->return_type.id() == LogicalTypeId::UNKNOWN) { + continue; + } uint8_t width, scale; auto can_convert = arguments[i]->return_type.GetDecimalProperties(width, scale); if (!can_convert) { @@ -70217,6 +76226,7 @@ unique_ptr BindDecimalMultiply(ClientContext &context, ScalarFunct result_width += width; result_scale += scale; } + D_ASSERT(max_width > 0); if (result_scale > Decimal::MAX_WIDTH_DECIMAL) { throw OutOfRangeException( "Needed scale %d to accurately represent the multiplication result, but this is out of range of the " @@ -70234,7 +76244,7 @@ unique_ptr BindDecimalMultiply(ClientContext &context, ScalarFunct check_overflow = true; result_width = Decimal::MAX_WIDTH_DECIMAL; } - LogicalType result_type = LogicalType(LogicalTypeId::DECIMAL, result_width, result_scale); + LogicalType result_type = LogicalType::DECIMAL(result_width, result_scale); // since our scale is the summation of our input scales, we do not need to cast to the result scale // however, we might need to cast to the correct internal type for (idx_t i = 0; i < arguments.size(); i++) { @@ -70242,7 +76252,12 @@ unique_ptr BindDecimalMultiply(ClientContext &context, ScalarFunct if (argument_type.InternalType() == result_type.InternalType()) { bound_function.arguments[i] = argument_type; } else { - bound_function.arguments[i] = LogicalType(LogicalTypeId::DECIMAL, result_width, argument_type.scale()); + uint8_t width, scale; + if (!argument_type.GetDecimalProperties(width, scale)) { + scale = 0; + } + + bound_function.arguments[i] = LogicalType::DECIMAL(result_width, scale); } } result_type.Verify(); @@ -70802,10 +76817,6 @@ bool TryMultiplyOperator::Operation(int64_t left, int64_t right, int64_t &result // now we know that there is no overflow, we can just perform the multiplication result = left * right; #endif - // FIXME: this check can be removed if we get rid of NullValue - if (result == std::numeric_limits::min()) { - return false; - } return true; } @@ -70889,6 +76900,20 @@ double SubtractOperator::Operation(double left, double right) { return result; } +template <> +int64_t SubtractOperator::Operation(date_t left, date_t right) { + return int64_t(left.days) - int64_t(right.days); +} + +template <> +date_t SubtractOperator::Operation(date_t left, int32_t right) { + int32_t result; + if (!TrySubtractOperator::Operation(left.days, right, result)) { + throw OutOfRangeException("Date out of range"); + } + return date_t(result); +} + template <> interval_t SubtractOperator::Operation(interval_t left, interval_t right) { interval_t result; @@ -70987,10 +77012,6 @@ bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result if (__builtin_sub_overflow(left, right, &result)) { return false; } - // FIXME: this check can be removed if we get rid of NullValue - if (result == std::numeric_limits::min()) { - return false; - } #else if (right < 0) { if (NumericLimits::Maximum() + right < left) { @@ -71116,6 +77137,9 @@ struct NextvalFun { static void RegisterFunction(BuiltinFunctions &set); }; +struct CurrvalFun { + static void RegisterFunction(BuiltinFunctions &set); +}; } // namespace duckdb @@ -71145,37 +77169,61 @@ struct NextvalBindData : public FunctionData { } }; -static int64_t NextSequenceValue(Transaction &transaction, SequenceCatalogEntry *seq) { - lock_guard seqlock(seq->lock); - int64_t result; - if (seq->cycle) { - result = seq->counter; - seq->counter += seq->increment; - ; - if (result < seq->min_value) { - result = seq->max_value; - seq->counter = seq->max_value + seq->increment; - } else if (result > seq->max_value) { - result = seq->min_value; - seq->counter = seq->min_value + seq->increment; - } - } else { - result = seq->counter; - seq->counter += seq->increment; - if (result < seq->min_value) { - throw SequenceException("nextval: reached minimum value of sequence \"%s\" (%lld)", seq->name, - seq->min_value); +struct CurrentSequenceValueOperator { + static int64_t Operation(Transaction &transaction, SequenceCatalogEntry *seq) { + lock_guard seqlock(seq->lock); + int64_t result; + if (seq->usage_count == 0u) { + throw SequenceException("currval: sequence is not yet defined in this session"); } - if (result > seq->max_value) { - throw SequenceException("nextval: reached maximum value of sequence \"%s\" (%lld)", seq->name, - seq->max_value); + result = seq->last_value; + return result; + } +}; + +struct NextSequenceValueOperator { + static int64_t Operation(Transaction &transaction, SequenceCatalogEntry *seq) { + lock_guard seqlock(seq->lock); + int64_t result; + if (seq->cycle) { + result = seq->counter; + seq->counter += seq->increment; + if (result < seq->min_value) { + result = seq->max_value; + seq->counter = seq->max_value + seq->increment; + } else if (result > seq->max_value) { + result = seq->min_value; + seq->counter = seq->min_value + seq->increment; + } + } else { + result = seq->counter; + seq->counter += seq->increment; + if (result < seq->min_value) { + throw SequenceException("nextval: reached minimum value of sequence \"%s\" (%lld)", seq->name, + seq->min_value); + } + if (result > seq->max_value) { + throw SequenceException("nextval: reached maximum value of sequence \"%s\" (%lld)", seq->name, + seq->max_value); + } } + seq->last_value = result; + seq->usage_count++; + transaction.sequence_usage[seq] = SequenceValue(seq->usage_count, seq->counter); + return result; + } +}; + +struct NextValData { + NextValData(NextvalBindData &bind_data_p, Transaction &transaction_p) + : bind_data(bind_data_p), transaction(transaction_p) { } - seq->usage_count++; - transaction.sequence_usage[seq] = SequenceValue(seq->usage_count, seq->counter); - return result; -} + NextvalBindData &bind_data; + Transaction &transaction; +}; + +template static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &func_expr = (BoundFunctionExpression &)state.expr; auto &info = (NextvalBindData &)*func_expr.bind_info; @@ -71189,9 +77237,10 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res auto result_data = FlatVector::GetData(result); for (idx_t i = 0; i < args.size(); i++) { // get the next value from the sequence - result_data[i] = NextSequenceValue(transaction, info.sequence); + result_data[i] = OP::Operation(transaction, info.sequence); } } else { + NextValData next_val_input(info, transaction); // sequence to use comes from the input UnaryExecutor::Execute(input, result, args.size(), [&](string_t value) { auto qname = QualifiedName::Parse(value.GetString()); @@ -71199,7 +77248,7 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res auto sequence = Catalog::GetCatalog(info.context) .GetEntry(info.context, qname.schema, qname.name); // finally get the next value from the sequence - return NextSequenceValue(transaction, sequence); + return OP::Operation(transaction, sequence); }); } } @@ -71228,8 +77277,14 @@ static void NextValDependency(BoundFunctionExpression &expr, unordered_set, true, NextValBind, NextValDependency)); +} + +void CurrvalFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(ScalarFunction("currval", {LogicalType::VARCHAR}, LogicalType::BIGINT, + NextValFunction, true, NextValBind, + NextValDependency)); } } // namespace duckdb @@ -71239,6 +77294,7 @@ namespace duckdb { void BuiltinFunctions::RegisterSequenceFunctions() { Register(); + Register(); } } // namespace duckdb @@ -71386,21 +77442,34 @@ static string_t UnicodeCaseConvert(Vector &result, const char *input_data, idx_t } template -static void CaseConvertFunction(DataChunk &args, ExpressionState &state, Vector &result) { - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t input) { +struct CaseConvertOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { auto input_data = input.GetDataUnsafe(); auto input_length = input.GetSize(); return UnicodeCaseConvert(result, input_data, input_length); - }); + } +}; + +template +static void CaseConvertFunction(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::ExecuteString>(args.data[0], result, args.size()); } template -static void CaseConvertFunctionASCII(DataChunk &args, ExpressionState &state, Vector &result) { - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t input) { +struct CaseConvertOperatorASCII { + template + static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { auto input_data = input.GetDataUnsafe(); auto input_length = input.GetSize(); return ASCIICaseConvert(result, input_data, input_length); - }); + } +}; + +template +static void CaseConvertFunctionASCII(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::ExecuteString>(args.data[0], result, + args.size()); } template @@ -72709,8 +78778,8 @@ static void RegularLikeFunction(DataChunk &input, ExpressionState &state, Vector if (func_expr.bind_info) { auto &matcher = (LikeMatcher &)*func_expr.bind_info; // use fast like matcher - UnaryExecutor::Execute(input.data[0], result, input.size(), [&](string_t str) { - return INVERT ? !matcher.Match(str) : matcher.Match(str); + UnaryExecutor::Execute(input.data[0], result, input.size(), [&](string_t input) { + return INVERT ? !matcher.Match(input) : matcher.Match(input); }); } else { // use generic like matcher @@ -72754,17 +78823,22 @@ void LikeEscapeFun::RegisterFunction(BuiltinFunctions &set) { namespace duckdb { -static void MD5Function(DataChunk &args, ExpressionState &state, Vector &result) { - auto &input = args.data[0]; - - UnaryExecutor::Execute(input, result, args.size(), [&](string_t input) { +struct MD5Operator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { auto hash = StringVector::EmptyString(result, MD5Context::MD5_HASH_LENGTH_TEXT); MD5Context context; context.Add(input); context.FinishHex(hash.GetDataWriteable()); hash.Finalize(); return hash; - }); + } +}; + +static void MD5Function(DataChunk &args, ExpressionState &state, Vector &result) { + auto &input = args.data[0]; + + UnaryExecutor::ExecuteString(input, result, args.size()); } void MD5Fun::RegisterFunction(BuiltinFunctions &set) { @@ -72835,10 +78909,9 @@ void MismatchesFun::RegisterFunction(BuiltinFunctions &set) { namespace duckdb { -static void NFCNormalizeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.ColumnCount() == 1); - - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t input) { +struct NFCNormalizeOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { auto input_data = input.GetDataUnsafe(); auto input_length = input.GetSize(); if (StripAccentsFun::IsAscii(input_data, input_length)) { @@ -72849,7 +78922,13 @@ static void NFCNormalizeFunction(DataChunk &args, ExpressionState &state, Vector auto result_str = StringVector::AddString(result, normalized_str); free(normalized_str); return result_str; - }); + } +}; + +static void NFCNormalizeFunction(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.ColumnCount() == 1); + + UnaryExecutor::ExecuteString(args.data[0], result, args.size()); StringVector::AddHeapReference(result, args.data[0]); } @@ -73222,7 +79301,7 @@ static void PrintfFunction(DataChunk &args, ExpressionState &state, Vector &resu break; } default: - throw InvalidInputException("Unsupported type for format: \"%s\"!", col.GetType().ToString()); + throw InternalException("Unexpected type for printf format"); } } // finally actually perform the format @@ -73274,13 +79353,14 @@ void PrintfFun::RegisterFunction(BuiltinFunctions &set) { namespace duckdb { struct RegexpMatchesBindData : public FunctionData { - RegexpMatchesBindData(duckdb_re2::RE2::Options options, std::unique_ptr constant_pattern, - string range_min, string range_max, bool range_success); + RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string); ~RegexpMatchesBindData() override; duckdb_re2::RE2::Options options; + string constant_string; std::unique_ptr constant_pattern; - string range_min, range_max; + string range_min; + string range_max; bool range_success; unique_ptr Copy() override; @@ -73298,18 +79378,25 @@ struct RegexpReplaceBindData : public FunctionData { namespace duckdb { -RegexpMatchesBindData::RegexpMatchesBindData(duckdb_re2::RE2::Options options, - unique_ptr constant_pattern, string range_min, - string range_max, bool range_success) - : options(options), constant_pattern(move(constant_pattern)), range_min(move(range_min)), - range_max(move(range_max)), range_success(range_success) { +RegexpMatchesBindData::RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string_p) + : options(options), constant_string(move(constant_string_p)) { + if (!constant_string.empty()) { + constant_pattern = make_unique(constant_string, options); + if (!constant_pattern->ok()) { + throw Exception(constant_pattern->error()); + } + + range_success = constant_pattern->PossibleMatchRange(&range_min, &range_max, 1000); + } else { + range_success = false; + } } RegexpMatchesBindData::~RegexpMatchesBindData() { } unique_ptr RegexpMatchesBindData::Copy() { - return make_unique(options, move(constant_pattern), range_min, range_max, range_success); + return make_unique(options, constant_string); } static inline duckdb_re2::StringPiece CreateStringPiece(string_t &input) { @@ -73411,17 +79498,10 @@ static unique_ptr RegexpMatchesBind(ClientContext &context, Scalar if (arguments[1]->IsFoldable()) { Value pattern_str = ExpressionExecutor::EvaluateScalar(*arguments[1]); if (!pattern_str.is_null && pattern_str.type().id() == LogicalTypeId::VARCHAR) { - auto re = make_unique(pattern_str.str_value, options); - if (!re->ok()) { - throw Exception(re->error()); - } - - string range_min, range_max; - auto range_success = re->PossibleMatchRange(&range_min, &range_max, 1000); - return make_unique(options, move(re), range_min, range_max, range_success); + return make_unique(options, pattern_str.str_value); } } - return make_unique(options, nullptr, "", "", false); + return make_unique(options, ""); } static void RegexReplaceFunction(DataChunk &args, ExpressionState &state, Vector &result) { @@ -73564,8 +79644,6 @@ static idx_t NextNeedle(const char *input_haystack, idx_t size_haystack, const c return string_position; } } - - return size_haystack; } // Did not find the needle return size_haystack; @@ -73661,8 +79739,9 @@ static void StrReverseUnicode(const char *input, idx_t n, char *output) { }); } -static void ReverseFunction(DataChunk &args, ExpressionState &state, Vector &result) { - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t input) { +struct ReverseOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { auto input_data = input.GetDataUnsafe(); auto input_length = input.GetSize(); @@ -73673,7 +79752,11 @@ static void ReverseFunction(DataChunk &args, ExpressionState &state, Vector &res } target.Finalize(); return target; - }); + } +}; + +static void ReverseFunction(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::ExecuteString(args.data[0], result, args.size()); } void ReverseFun::RegisterFunction(BuiltinFunctions &set) { @@ -73706,9 +79789,7 @@ struct StringSplitIterator { idx_t size; public: - virtual idx_t Next(const char *input) { - return 0; - } + virtual idx_t Next(const char *input) = 0; bool HasNext() { return offset < size; } @@ -73832,10 +79913,6 @@ struct RegexStringSplitIterator : virtual public StringSplitIterator { }; void BaseStringSplitFunction(const char *input, StringSplitIterator &iter, Vector &result) { - if (!ListVector::HasEntry(result)) { - auto result_child = make_unique(LogicalType::VARCHAR); - ListVector::SetEntry(result, move(result_child)); - } // special case: empty string if (iter.size == 0) { Value val = StringVector::AddString(ListVector::GetEntry(result), &input[0], 0); @@ -73859,9 +79936,8 @@ unique_ptr BaseStringSplitFunction(string_t input, string_t delim, const bool ascii_only = Utf8Proc::Analyze(input_data, input_size) == UnicodeType::ASCII; - child_list_t child_type {{"", LogicalType::VARCHAR}}; - LogicalType list = {LogicalTypeId::LIST, child_type}; - auto output = make_unique(list); + auto list_type = LogicalType::LIST(LogicalType::VARCHAR); + auto output = make_unique(list_type); unique_ptr iter; if (regex) { auto re = make_unique(duckdb_re2::StringPiece(delim_data, delim_size)); @@ -73889,34 +79965,32 @@ static void StringSplitExecutor(DataChunk &args, ExpressionState &state, Vector auto delims = (string_t *)delim_data.data; D_ASSERT(result.GetType().id() == LogicalTypeId::LIST); - auto list_struct_data = FlatVector::GetData(result); - LogicalType varchar = LogicalType::VARCHAR; - auto list_child = make_unique(varchar); - ListVector::SetEntry(result, move(list_child)); + result.SetVectorType(VectorType::FLAT_VECTOR); + ListVector::SetListSize(result, 0); - child_list_t child_types; - child_types.push_back({"", varchar}); - LogicalType list_vector_type(LogicalType::LIST.id(), child_types); + auto list_struct_data = FlatVector::GetData(result); + auto list_vector_type = LogicalType::LIST(LogicalType::VARCHAR); - size_t total_len = 0; + idx_t total_len = 0; + auto &result_mask = FlatVector::Validity(result); for (idx_t i = 0; i < args.size(); i++) { - if (!input_data.validity.RowIsValid(input_data.sel->get_index(i))) { - FlatVector::SetNull(result, i, true); + auto input_idx = input_data.sel->get_index(i); + auto delim_idx = delim_data.sel->get_index(i); + if (!input_data.validity.RowIsValid(input_idx)) { + result_mask.SetInvalid(i); continue; } - string_t input = inputs[input_data.sel->get_index(i)]; + string_t input = inputs[input_idx]; unique_ptr split_input; - if (!delim_data.validity.RowIsValid(delim_data.sel->get_index(i))) { + if (!delim_data.validity.RowIsValid(delim_idx)) { // special case: delimiter is NULL split_input = make_unique(list_vector_type); - auto child = make_unique(varchar); - ListVector::SetEntry(*split_input, move(child)); Value val(input); ListVector::PushBack(*split_input, val); } else { - string_t delim = delims[delim_data.sel->get_index(i)]; + string_t delim = delims[delim_idx]; split_input = BaseStringSplitFunction(input, delim, regex); } list_struct_data[i].length = ListVector::GetListSize(*split_input); @@ -73941,12 +80015,10 @@ static void StringSplitRegexFunction(DataChunk &args, ExpressionState &state, Ve } void StringSplitFun::RegisterFunction(BuiltinFunctions &set) { - child_list_t child_types; - child_types.push_back(make_pair("string", LogicalType::VARCHAR)); - auto varchar_list_type = LogicalType(LogicalTypeId::LIST, child_types); + auto varchar_list_type = LogicalType::LIST(LogicalType::VARCHAR); set.AddFunction( - {"string_split", "str_split", "string_to_array"}, + {"string_split", "str_split", "string_to_array", "split"}, ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR}, varchar_list_type, StringSplitFunction)); set.AddFunction( {"string_split_regex", "str_split_regex", "regexp_split_to_array"}, @@ -73970,10 +80042,9 @@ bool StripAccentsFun::IsAscii(const char *input, idx_t n) { return true; } -static void StripAccentsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.ColumnCount() == 1); - - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t input) { +struct StripAccentsOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { if (StripAccentsFun::IsAscii(input.GetDataUnsafe(), input.GetSize())) { return input; } @@ -73983,7 +80054,13 @@ static void StripAccentsFunction(DataChunk &args, ExpressionState &state, Vector auto result_str = StringVector::AddString(result, (const char *)stripped); free(stripped); return result_str; - }); + } +}; + +static void StripAccentsFunction(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.ColumnCount() == 1); + + UnaryExecutor::ExecuteString(args.data[0], result, args.size()); StringVector::AddHeapReference(result, args.data[0]); } @@ -74254,8 +80331,9 @@ void SuffixFun::RegisterFunction(BuiltinFunctions &set) { namespace duckdb { template -static void UnaryTrimFunction(DataChunk &args, ExpressionState &state, Vector &result) { - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t input) { +struct TrimOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { auto data = input.GetDataUnsafe(); auto size = input.GetSize(); @@ -74298,7 +80376,12 @@ static void UnaryTrimFunction(DataChunk &args, ExpressionState &state, Vector &r target.Finalize(); return target; - }); + } +}; + +template +static void UnaryTrimFunction(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::ExecuteString>(args.data[0], result, args.size()); } static void GetIgnoredCodepoints(string_t ignored, unordered_set &ignored_codepoints) { @@ -74433,6 +80516,82 @@ void BuiltinFunctions::RegisterStringFunctions() { + + +namespace duckdb { + +// current_query +struct SystemBindData : public FunctionData { + ClientContext &context; + + explicit SystemBindData(ClientContext &context) : context(context) { + } + + unique_ptr Copy() override { + return make_unique(context); + } +}; + +unique_ptr BindSystemFunction(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + return make_unique(context); +} + +static void CurrentQueryFunction(DataChunk &input, ExpressionState &state, Vector &result) { + auto &func_expr = (BoundFunctionExpression &)state.expr; + auto &info = (SystemBindData &)*func_expr.bind_info; + + Value val(info.context.query); + result.Reference(val); +} + +// current_schema +static void CurrentSchemaFunction(DataChunk &input, ExpressionState &state, Vector &result) { + Value val(DEFAULT_SCHEMA); + result.Reference(val); +} + +// current_schemas +static void CurrentSchemasFunction(DataChunk &input, ExpressionState &state, Vector &result) { + vector schema_list = {Value(DEFAULT_SCHEMA)}; + auto val = Value::LIST(schema_list); + result.Reference(val); +} + +// txid_current +static void TransactionIdCurrent(DataChunk &input, ExpressionState &state, Vector &result) { + auto &func_expr = (BoundFunctionExpression &)state.expr; + auto &info = (SystemBindData &)*func_expr.bind_info; + + auto &transaction = Transaction::GetTransaction(info.context); + auto val = Value::BIGINT(transaction.start_time); + result.Reference(val); +} + +// version +static void VersionFunction(DataChunk &input, ExpressionState &state, Vector &result) { + auto val = Value(DuckDB::LibraryVersion()); + result.Reference(val); +} + +void SystemFun::RegisterFunction(BuiltinFunctions &set) { + auto varchar_list_type = LogicalType::LIST(LogicalType::VARCHAR); + + set.AddFunction( + ScalarFunction("current_query", {}, LogicalType::VARCHAR, CurrentQueryFunction, false, BindSystemFunction)); + set.AddFunction(ScalarFunction("current_schema", {}, LogicalType::VARCHAR, CurrentSchemaFunction)); + set.AddFunction( + ScalarFunction("current_schemas", {LogicalType::BOOLEAN}, varchar_list_type, CurrentSchemasFunction)); + set.AddFunction( + ScalarFunction("txid_current", {}, LogicalType::BIGINT, TransactionIdCurrent, false, BindSystemFunction)); + set.AddFunction(ScalarFunction("version", {}, LogicalType::VARCHAR, VersionFunction)); +} + +} // namespace duckdb + + + + namespace duckdb { void BuiltinFunctions::RegisterTrigonometricsFunctions() { @@ -74450,6 +80609,13 @@ void BuiltinFunctions::RegisterTrigonometricsFunctions() { + + + + + + + //===----------------------------------------------------------------------===// // DuckDB // @@ -74462,10 +80628,134 @@ void BuiltinFunctions::RegisterTrigonometricsFunctions() { + + + +#include + namespace duckdb { +//===--------------------------------------------------------------------===// +// Arrow Variable Size Types +//===--------------------------------------------------------------------===// +enum class ArrowVariableSizeType : uint8_t { FIXED_SIZE = 0, NORMAL = 1, SUPER_SIZE = 2 }; + +//===--------------------------------------------------------------------===// +// Arrow Time/Date Types +//===--------------------------------------------------------------------===// +enum class ArrowDateTimeType : uint8_t { + MILLISECONDS = 0, + MICROSECONDS = 1, + NANOSECONDS = 2, + SECONDS = 3, + DAYS = 4, + MONTHS = 5 +}; +struct ArrowConvertData { + ArrowConvertData(LogicalType type) : dictionary_type(type) {}; + ArrowConvertData() {}; + //! Hold type of dictionary + LogicalType dictionary_type; + //! If its a variable size type (e.g., strings, blobs, lists) holds which type it is + vector> variable_sz_type; + //! If this is a date/time holds its precision + vector date_time_precision; +}; + +struct ArrowScanFunctionData : public TableFunctionData { + ArrowScanFunctionData(idx_t rows_per_thread_p, + unique_ptr (*scanner_producer_p)( + uintptr_t stream_factory_ptr, + std::pair, std::vector> &project_columns, + TableFilterCollection *filters), + uintptr_t stream_factory_ptr_p) + : lines_read(0), rows_per_thread(rows_per_thread_p), stream_factory_ptr(stream_factory_ptr_p), + scanner_producer(scanner_producer_p), number_of_rows(0) { + } + //! This holds the original list type (col_idx, [ArrowListType,size]) + std::unordered_map> arrow_convert_data; + std::atomic lines_read; + ArrowSchemaWrapper schema_root; + idx_t rows_per_thread; + //! Pointer to the scanner factory + uintptr_t stream_factory_ptr; + //! Pointer to the scanner factory produce + unique_ptr (*scanner_producer)( + uintptr_t stream_factory_ptr, + std::pair, std::vector> &project_columns, + TableFilterCollection *filters); + //! Number of rows (Used in cardinality and progress bar) + int64_t number_of_rows; +}; + +struct ArrowScanState : public FunctionOperatorData { + explicit ArrowScanState(unique_ptr current_chunk) : chunk(move(current_chunk)) { + } + unique_ptr stream; + unique_ptr chunk; + idx_t chunk_offset = 0; + vector column_ids; + //! Store child vectors for Arrow Dictionary Vectors (col-idx,vector) + unordered_map> arrow_dictionary_vectors; + TableFilterCollection *filters = nullptr; +}; + +struct ParallelArrowScanState : public ParallelState { + ParallelArrowScanState() { + } + unique_ptr stream; + std::mutex lock; +}; struct ArrowTableFunction { +public: static void RegisterFunction(BuiltinFunctions &set); + +private: + //! Binds an arrow table + static unique_ptr ArrowScanBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, vector &return_types, + vector &names); + //! Actual conversion from Arrow to DuckDB + static void ArrowToDuckDB(ArrowScanState &scan_state, + std::unordered_map> &arrow_convert_data, + DataChunk &output, idx_t start); + + //! -----Single Thread Functions:----- + //! Initialize Single Thread Scan + static unique_ptr ArrowScanInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters); + + //! Scan Function for Single Thread Execution + static void ArrowScanFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output); + + //! -----Multi Thread Functions:----- + //! Initialize Parallel State + static unique_ptr ArrowScanInitParallelState(ClientContext &context, + const FunctionData *bind_data_p); + //! Initialize Parallel Scans + static unique_ptr ArrowScanParallelInit(ClientContext &context, + const FunctionData *bind_data_p, ParallelState *state, + const vector &column_ids, + TableFilterCollection *filters); + //! Defines Maximum Number of Threads + static idx_t ArrowScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p); + //! Scan Function for Parallel Execution + static void ArrowScanFunctionParallel(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output, + ParallelState *parallel_state_p); + //! Get next chunk for the running thread + static bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, ParallelState *parallel_state_p); + + //! -----Utility Functions:----- + //! Gets Arrow Table's Cardinality + static unique_ptr ArrowScanCardinality(ClientContext &context, const FunctionData *bind_data); + //! Gets the progress on the table scan, used for Progress Bars + static int ArrowProgress(ClientContext &context, const FunctionData *bind_data_p); }; } // namespace duckdb @@ -74478,281 +80768,1107 @@ struct ArrowTableFunction { +#include +namespace duckdb { + +LogicalType GetArrowLogicalType(ArrowSchema &schema, + std::unordered_map> &arrow_convert_data, + idx_t col_idx) { + auto format = string(schema.format); + if (arrow_convert_data.find(col_idx) == arrow_convert_data.end()) { + arrow_convert_data[col_idx] = make_unique(); + } + if (format == "n") { + return LogicalType::SQLNULL; + } else if (format == "b") { + return LogicalType::BOOLEAN; + } else if (format == "c") { + return LogicalType::TINYINT; + } else if (format == "s") { + return LogicalType::SMALLINT; + } else if (format == "i") { + return LogicalType::INTEGER; + } else if (format == "l") { + return LogicalType::BIGINT; + } else if (format == "C") { + return LogicalType::UTINYINT; + } else if (format == "S") { + return LogicalType::USMALLINT; + } else if (format == "I") { + return LogicalType::UINTEGER; + } else if (format == "L") { + return LogicalType::UBIGINT; + } else if (format == "f") { + return LogicalType::FLOAT; + } else if (format == "g") { + return LogicalType::DOUBLE; + } else if (format[0] == 'd') { //! this can be either decimal128 or decimal 256 (e.g., d:38,0) + std::string parameters = format.substr(format.find(':')); + uint8_t width = std::stoi(parameters.substr(1, parameters.find(','))); + uint8_t scale = std::stoi(parameters.substr(parameters.find(',') + 1)); + if (width > 38) { + throw NotImplementedException("Unsupported Internal Arrow Type for Decimal %s", format); + } + return LogicalType::DECIMAL(width, scale); + } else if (format == "u") { + arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0); + return LogicalType::VARCHAR; + } else if (format == "U") { + arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0); + return LogicalType::VARCHAR; + } else if (format == "tsn:") { + return LogicalTypeId::TIMESTAMP_NS; + } else if (format == "tsu:") { + return LogicalTypeId::TIMESTAMP; + } else if (format == "tsm:") { + return LogicalTypeId::TIMESTAMP_MS; + } else if (format == "tss:") { + return LogicalTypeId::TIMESTAMP_SEC; + } else if (format == "tdD") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::DAYS); + return LogicalType::DATE; + } else if (format == "tdm") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS); + return LogicalType::DATE; + } else if (format == "tts") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::SECONDS); + return LogicalType::TIME; + } else if (format == "ttm") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS); + return LogicalType::TIME; + } else if (format == "ttu") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS); + return LogicalType::TIME; + } else if (format == "ttn") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS); + return LogicalType::TIME; + } else if (format == "tDs") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::SECONDS); + return LogicalType::INTERVAL; + } else if (format == "tDm") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS); + return LogicalType::INTERVAL; + } else if (format == "tDu") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS); + return LogicalType::INTERVAL; + } else if (format == "tDn") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS); + return LogicalType::INTERVAL; + } else if (format == "tiD") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::DAYS); + return LogicalType::INTERVAL; + } else if (format == "tiM") { + arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MONTHS); + return LogicalType::INTERVAL; + } else if (format == "+l") { + arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0); + auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx); + return LogicalType::LIST(child_type); + } else if (format == "+L") { + arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0); + auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx); + return LogicalType::LIST(child_type); + } else if (format[0] == '+' && format[1] == 'w') { + std::string parameters = format.substr(format.find(':') + 1); + idx_t fixed_size = std::stoi(parameters); + arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::FIXED_SIZE, fixed_size); + auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx); + return LogicalType::LIST(move(child_type)); + } else if (format == "+s") { + child_list_t child_types; + for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) { + auto child_type = GetArrowLogicalType(*schema.children[type_idx], arrow_convert_data, col_idx); + child_types.push_back({schema.children[type_idx]->name, child_type}); + } + return LogicalType::STRUCT(move(child_types)); + + } else if (format == "+m") { + child_list_t child_types; + //! First type will be struct, so we skip it + auto &struct_schema = *schema.children[0]; + for (idx_t type_idx = 0; type_idx < (idx_t)struct_schema.n_children; type_idx++) { + //! The other types must be added on lists + auto child_type = GetArrowLogicalType(*struct_schema.children[type_idx], arrow_convert_data, col_idx); + + auto list_type = LogicalType::LIST(child_type); + child_types.push_back({struct_schema.children[type_idx]->name, list_type}); + } + return LogicalType::MAP(move(child_types)); + } else if (format == "z") { + arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0); + return LogicalType::BLOB; + } else if (format == "Z") { + arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0); + return LogicalType::BLOB; + } else if (format[0] == 'w') { + std::string parameters = format.substr(format.find(':') + 1); + idx_t fixed_size = std::stoi(parameters); + arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::FIXED_SIZE, fixed_size); + return LogicalType::BLOB; + } else { + throw NotImplementedException("Unsupported Internal Arrow Type %s", format); + } +} + +unique_ptr ArrowTableFunction::ArrowScanBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + auto stream_factory_ptr = inputs[0].GetPointer(); + unique_ptr (*stream_factory_produce)( + uintptr_t stream_factory_ptr, + std::pair, std::vector> & project_columns, + TableFilterCollection * filters) = + (unique_ptr(*)(uintptr_t stream_factory_ptr, + std::pair, std::vector> & + project_columns, + TableFilterCollection * filters)) inputs[1] + .GetPointer(); + auto rows_per_thread = inputs[2].GetValue(); + std::pair, std::vector> project_columns; + auto res = make_unique(rows_per_thread, stream_factory_produce, stream_factory_ptr); + auto &data = *res; + auto stream = stream_factory_produce(stream_factory_ptr, project_columns, nullptr); + data.number_of_rows = stream->number_of_rows; + if (!stream) { + throw InvalidInputException("arrow_scan: NULL pointer passed"); + } -namespace duckdb { + stream->GetSchema(data.schema_root); -struct ArrowScanFunctionData : public TableFunctionData { - ArrowArrayStream *stream; - ArrowSchema schema_root; - ArrowArray current_chunk_root; - idx_t chunk_offset = 0; - bool is_consumed = false; - - void ReleaseArray() { - if (current_chunk_root.release) { - for (idx_t child_idx = 0; child_idx < (idx_t)current_chunk_root.n_children; child_idx++) { - auto &child = *current_chunk_root.children[child_idx]; - if (child.release) { - child.release(&child); + for (idx_t col_idx = 0; col_idx < (idx_t)data.schema_root.arrow_schema.n_children; col_idx++) { + auto &schema = *data.schema_root.arrow_schema.children[col_idx]; + if (!schema.release) { + throw InvalidInputException("arrow_scan: released schema passed"); + } + if (schema.dictionary) { + res->arrow_convert_data[col_idx] = + make_unique(GetArrowLogicalType(schema, res->arrow_convert_data, col_idx)); + return_types.emplace_back(GetArrowLogicalType(*schema.dictionary, res->arrow_convert_data, col_idx)); + } else { + return_types.emplace_back(GetArrowLogicalType(schema, res->arrow_convert_data, col_idx)); + } + auto format = string(schema.format); + auto name = string(schema.name); + if (name.empty()) { + name = string("v") + to_string(col_idx); + } + names.push_back(name); + } + return move(res); +} + +unique_ptr ProduceArrowScan(const ArrowScanFunctionData &function, ArrowScanState &scan_state, + TableFilterCollection *filters) { + //! Generate Projection Pushdown Vector + pair, vector> project_columns; + if (scan_state.column_ids.empty()) { + //! We have to push all columns, to generate proper scanners. + auto &schema = function.schema_root.arrow_schema; + for (idx_t col_idx = 0; col_idx < (idx_t)schema.n_children; col_idx++) { + auto &column_schema = *schema.children[col_idx]; + project_columns.first[col_idx] = column_schema.name; + project_columns.second.emplace_back(column_schema.name); + } + } + for (idx_t idx = 0; idx < scan_state.column_ids.size(); idx++) { + auto col_idx = scan_state.column_ids[idx]; + if (col_idx != COLUMN_IDENTIFIER_ROW_ID) { + auto &schema = *function.schema_root.arrow_schema.children[col_idx]; + project_columns.first[idx] = schema.name; + project_columns.second.emplace_back(schema.name); + } + } + return function.scanner_producer(function.stream_factory_ptr, project_columns, filters); +} + +unique_ptr ArrowTableFunction::ArrowScanInit(ClientContext &context, + const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + auto current_chunk = make_unique(); + auto result = make_unique(move(current_chunk)); + result->column_ids = column_ids; + auto &data = (const ArrowScanFunctionData &)*bind_data; + result->stream = ProduceArrowScan(data, *result, filters); + return move(result); +} + +void ShiftRight(unsigned char *ar, int size, int shift) { + int carry = 0; + while (shift--) { + for (int i = size - 1; i >= 0; --i) { + int next = (ar[i] & 1) ? 0x80 : 0; + ar[i] = carry | (ar[i] >> 1); + carry = next; + } + } +} + +void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size, int64_t nested_offset, + bool add_null = false) { + auto &mask = FlatVector::Validity(vector); + if (array.null_count != 0 && array.buffers[0]) { + D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); + auto bit_offset = scan_state.chunk_offset + array.offset; + if (nested_offset != -1) { + bit_offset = nested_offset; + } + auto n_bitmask_bytes = (size + 8 - 1) / 8; + mask.EnsureWritable(); + if (bit_offset % 8 == 0) { + //! just memcpy nullmask + memcpy((void *)mask.GetData(), (uint8_t *)array.buffers[0] + bit_offset / 8, n_bitmask_bytes); + } else { + //! need to re-align nullmask + std::vector temp_nullmask(n_bitmask_bytes + 1); + memcpy(temp_nullmask.data(), (uint8_t *)array.buffers[0] + bit_offset / 8, n_bitmask_bytes + 1); + ShiftRight(temp_nullmask.data(), n_bitmask_bytes + 1, + bit_offset % 8); //! why this has to be a right shift is a mystery to me + memcpy((void *)mask.GetData(), (data_ptr_t)temp_nullmask.data(), n_bitmask_bytes); + } + } + if (add_null) { + //! We are setting a validity mask of the data part of dictionary vector + //! For some reason, Nulls are allowed to be indexes, hence we need to set the last element here to be null + //! We might have to resize the mask + mask.Resize(size, size + 1); + mask.SetInvalid(size); + } +} + +void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanState &scan_state, idx_t size) { + if (array.null_count != 0 && array.buffers[0]) { + auto bit_offset = scan_state.chunk_offset + array.offset; + auto n_bitmask_bytes = (size + 8 - 1) / 8; + mask.EnsureWritable(); + if (bit_offset % 8 == 0) { + //! just memcpy nullmask + memcpy((void *)mask.GetData(), (uint8_t *)array.buffers[0] + bit_offset / 8, n_bitmask_bytes); + } else { + //! need to re-align nullmask + std::vector temp_nullmask(n_bitmask_bytes + 1); + memcpy(temp_nullmask.data(), (uint8_t *)array.buffers[0] + bit_offset / 8, n_bitmask_bytes + 1); + ShiftRight(temp_nullmask.data(), n_bitmask_bytes + 1, + bit_offset % 8); //! why this has to be a right shift is a mystery to me + memcpy((void *)mask.GetData(), (data_ptr_t)temp_nullmask.data(), n_bitmask_bytes); + } + } +} + +void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size, + std::unordered_map> &arrow_convert_data, idx_t col_idx, + std::pair &arrow_convert_idx, int64_t nested_offset = -1, + ValidityMask *parent_mask = nullptr); + +void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size, + std::unordered_map> &arrow_convert_data, idx_t col_idx, + std::pair &arrow_convert_idx, int64_t nested_offset, ValidityMask *parent_mask) { + auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.first++]; + idx_t list_size = 0; + SetValidityMask(vector, array, scan_state, size, nested_offset); + idx_t start_offset = 0; + idx_t cur_offset = 0; + if (original_type.first == ArrowVariableSizeType::FIXED_SIZE) { + //! Have to check validity mask before setting this up + idx_t offset = (scan_state.chunk_offset + array.offset) * original_type.second; + if (nested_offset != -1) { + offset = original_type.second * nested_offset; + } + start_offset = offset; + auto list_data = FlatVector::GetData(vector); + for (idx_t i = 0; i < size; i++) { + auto &le = list_data[i]; + le.offset = cur_offset; + le.length = original_type.second; + cur_offset += original_type.second; + } + list_size = cur_offset; + } else if (original_type.first == ArrowVariableSizeType::NORMAL) { + auto offsets = (uint32_t *)array.buffers[1] + array.offset + scan_state.chunk_offset; + if (nested_offset != -1) { + offsets = (uint32_t *)array.buffers[1] + nested_offset; + } + start_offset = offsets[0]; + auto list_data = FlatVector::GetData(vector); + for (idx_t i = 0; i < size; i++) { + auto &le = list_data[i]; + le.offset = cur_offset; + le.length = offsets[i + 1] - offsets[i]; + cur_offset += le.length; + } + list_size = offsets[size]; + } else { + auto offsets = (uint64_t *)array.buffers[1] + array.offset + scan_state.chunk_offset; + if (nested_offset != -1) { + offsets = (uint64_t *)array.buffers[1] + nested_offset; + } + start_offset = offsets[0]; + auto list_data = FlatVector::GetData(vector); + for (idx_t i = 0; i < size; i++) { + auto &le = list_data[i]; + le.offset = cur_offset; + le.length = offsets[i + 1] - offsets[i]; + cur_offset += le.length; + } + list_size = offsets[size]; + } + list_size -= start_offset; + ListVector::Reserve(vector, list_size); + ListVector::SetListSize(vector, list_size); + auto &child_vector = ListVector::GetEntry(vector); + SetValidityMask(child_vector, *array.children[0], scan_state, list_size, start_offset); + auto &list_mask = FlatVector::Validity(vector); + if (parent_mask) { + //! Since this List is owned by a struct we must guarantee their validity map matches on Null + if (!parent_mask->AllValid()) { + for (idx_t i = 0; i < size; i++) { + if (!parent_mask->RowIsValid(i)) { + list_mask.SetInvalid(i); + } + } + } + } + if (list_size == 0 && start_offset == 0) { + ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size, arrow_convert_data, col_idx, + arrow_convert_idx, -1); + } else { + ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size, arrow_convert_data, col_idx, + arrow_convert_idx, start_offset); + } +} + +void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size, + std::unordered_map> &arrow_convert_data, idx_t col_idx, + std::pair &arrow_convert_idx, int64_t nested_offset) { + auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.first++]; + SetValidityMask(vector, array, scan_state, size, nested_offset); + if (original_type.first == ArrowVariableSizeType::FIXED_SIZE) { + //! Have to check validity mask before setting this up + idx_t offset = (scan_state.chunk_offset + array.offset) * original_type.second; + if (nested_offset != -1) { + offset = original_type.second * nested_offset; + } + auto cdata = (char *)array.buffers[1]; + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + if (FlatVector::IsNull(vector, row_idx)) { + continue; + } + auto bptr = cdata + offset; + auto blob_len = original_type.second; + FlatVector::GetData(vector)[row_idx] = StringVector::AddStringOrBlob(vector, bptr, blob_len); + offset += blob_len; + } + } else if (original_type.first == ArrowVariableSizeType::NORMAL) { + auto offsets = (uint32_t *)array.buffers[1] + array.offset + scan_state.chunk_offset; + if (nested_offset != -1) { + offsets = (uint32_t *)array.buffers[1] + array.offset + nested_offset; + } + auto cdata = (char *)array.buffers[2]; + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + if (FlatVector::IsNull(vector, row_idx)) { + continue; + } + auto bptr = cdata + offsets[row_idx]; + auto blob_len = offsets[row_idx + 1] - offsets[row_idx]; + FlatVector::GetData(vector)[row_idx] = StringVector::AddStringOrBlob(vector, bptr, blob_len); + } + } else { + //! Check if last offset is higher than max uint32 + if (((uint64_t *)array.buffers[1])[array.length] > NumericLimits::Maximum()) { + throw std::runtime_error("We do not support Blobs over 4GB"); + } + auto offsets = (uint64_t *)array.buffers[1] + array.offset + scan_state.chunk_offset; + if (nested_offset != -1) { + offsets = (uint64_t *)array.buffers[1] + array.offset + nested_offset; + } + auto cdata = (char *)array.buffers[2]; + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + if (FlatVector::IsNull(vector, row_idx)) { + continue; + } + auto bptr = cdata + offsets[row_idx]; + auto blob_len = offsets[row_idx + 1] - offsets[row_idx]; + FlatVector::GetData(vector)[row_idx] = StringVector::AddStringOrBlob(vector, bptr, blob_len); + } + } +} + +void ArrowToDuckDBMapList(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size, + std::unordered_map> &arrow_convert_data, idx_t col_idx, + std::pair &arrow_convert_idx, uint32_t *offsets, ValidityMask *parent_mask) { + idx_t list_size = offsets[size] - offsets[0]; + ListVector::Reserve(vector, list_size); + + auto &child_vector = ListVector::GetEntry(vector); + auto list_data = FlatVector::GetData(vector); + auto cur_offset = 0; + for (idx_t i = 0; i < size; i++) { + auto &le = list_data[i]; + le.offset = cur_offset; + le.length = offsets[i + 1] - offsets[i]; + cur_offset += le.length; + } + ListVector::SetListSize(vector, list_size); + if (list_size == 0 && offsets[0] == 0) { + SetValidityMask(child_vector, array, scan_state, list_size, -1); + } else { + SetValidityMask(child_vector, array, scan_state, list_size, offsets[0]); + } + + auto &list_mask = FlatVector::Validity(vector); + if (parent_mask) { + //! Since this List is owned by a struct we must guarantee their validity map matches on Null + if (!parent_mask->AllValid()) { + for (idx_t i = 0; i < size; i++) { + if (!parent_mask->RowIsValid(i)) { + list_mask.SetInvalid(i); } } - current_chunk_root.release(¤t_chunk_root); } } - - void ReleaseSchema() { - if (schema_root.release) { - for (idx_t child_idx = 0; child_idx < (idx_t)schema_root.n_children; child_idx++) { - auto &child = *schema_root.children[child_idx]; - if (child.release) { - child.release(&child); + if (list_size == 0 && offsets[0] == 0) { + ColumnArrowToDuckDB(child_vector, array, scan_state, list_size, arrow_convert_data, col_idx, arrow_convert_idx, + -1); + } else { + ColumnArrowToDuckDB(child_vector, array, scan_state, list_size, arrow_convert_data, col_idx, arrow_convert_idx, + offsets[0]); + } +} +template +static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets) { + auto strings = FlatVector::GetData(vector); + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + if (FlatVector::IsNull(vector, row_idx)) { + continue; + } + auto cptr = cdata + offsets[row_idx]; + auto str_len = offsets[row_idx + 1] - offsets[row_idx]; + strings[row_idx] = string_t(cptr, str_len); + } +} + +void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, int64_t nested_offset) { + auto internal_type = GetTypeIdSize(vector.GetType().InternalType()); + auto data_ptr = (data_ptr_t)array.buffers[1] + internal_type * (scan_state.chunk_offset + array.offset); + if (nested_offset != -1) { + data_ptr = (data_ptr_t)array.buffers[1] + internal_type * (array.offset + nested_offset); + } + FlatVector::SetData(vector, data_ptr); +} + +template +void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, int64_t nested_offset, idx_t size, + int64_t conversion) { + auto tgt_ptr = (dtime_t *)FlatVector::GetData(vector); + auto src_ptr = (T *)array.buffers[1] + scan_state.chunk_offset + array.offset; + if (nested_offset != -1) { + src_ptr = (T *)array.buffers[1] + nested_offset + array.offset; + } + for (idx_t row = 0; row < size; row++) { + if (!TryMultiplyOperator::Operation((int64_t)src_ptr[row], conversion, tgt_ptr[row].micros)) { + throw ConversionException("Could not convert Interval to Microsecond"); + } + } +} + +void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, int64_t nested_offset, + idx_t size, int64_t conversion) { + auto tgt_ptr = (interval_t *)FlatVector::GetData(vector); + auto src_ptr = (int64_t *)array.buffers[1] + scan_state.chunk_offset + array.offset; + if (nested_offset != -1) { + src_ptr = (int64_t *)array.buffers[1] + nested_offset + array.offset; + } + for (idx_t row = 0; row < size; row++) { + tgt_ptr[row].days = 0; + tgt_ptr[row].months = 0; + if (!TryMultiplyOperator::Operation(src_ptr[row], conversion, tgt_ptr[row].micros)) { + throw ConversionException("Could not convert Interval to Microsecond"); + } + } +} + +void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, int64_t nested_offset, + idx_t size) { + auto tgt_ptr = (interval_t *)FlatVector::GetData(vector); + auto src_ptr = (int32_t *)array.buffers[1] + scan_state.chunk_offset + array.offset; + if (nested_offset != -1) { + src_ptr = (int32_t *)array.buffers[1] + nested_offset + array.offset; + } + for (idx_t row = 0; row < size; row++) { + tgt_ptr[row].days = 0; + tgt_ptr[row].micros = 0; + tgt_ptr[row].months = src_ptr[row]; + } +} + +void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size, + std::unordered_map> &arrow_convert_data, idx_t col_idx, + std::pair &arrow_convert_idx, int64_t nested_offset, ValidityMask *parent_mask) { + switch (vector.GetType().id()) { + case LogicalTypeId::SQLNULL: + vector.Reference(Value()); + break; + case LogicalTypeId::BOOLEAN: { + //! Arrow bit-packs boolean values + //! Lets first figure out where we are in the source array + auto src_ptr = (uint8_t *)array.buffers[1] + (scan_state.chunk_offset + array.offset) / 8; + + if (nested_offset != -1) { + src_ptr = (uint8_t *)array.buffers[1] + (nested_offset + array.offset) / 8; + } + auto tgt_ptr = (uint8_t *)FlatVector::GetData(vector); + int src_pos = 0; + idx_t cur_bit = scan_state.chunk_offset % 8; + if (nested_offset != -1) { + cur_bit = nested_offset % 8; + } + for (idx_t row = 0; row < size; row++) { + if ((src_ptr[src_pos] & (1 << cur_bit)) == 0) { + tgt_ptr[row] = 0; + } else { + tgt_ptr[row] = 1; + } + cur_bit++; + if (cur_bit == 8) { + src_pos++; + cur_bit = 0; + } + } + break; + } + case LogicalTypeId::TINYINT: + case LogicalTypeId::SMALLINT: + case LogicalTypeId::INTEGER: + case LogicalTypeId::FLOAT: + case LogicalTypeId::UTINYINT: + case LogicalTypeId::USMALLINT: + case LogicalTypeId::UINTEGER: + case LogicalTypeId::UBIGINT: + case LogicalTypeId::BIGINT: + case LogicalTypeId::HUGEINT: + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP_NS: { + DirectConversion(vector, array, scan_state, nested_offset); + break; + } + case LogicalTypeId::DOUBLE: { + DirectConversion(vector, array, scan_state, nested_offset); + //! Need to check if there are NaNs, if yes, must turn that to null + auto data = (double *)vector.GetData(); + auto &mask = FlatVector::Validity(vector); + for (idx_t row_idx = 0; row_idx < size; row_idx++) { + if (!Value::DoubleIsValid(data[row_idx])) { + mask.SetInvalid(row_idx); + } + } + break; + } + case LogicalTypeId::VARCHAR: { + auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.first++]; + auto cdata = (char *)array.buffers[2]; + if (original_type.first == ArrowVariableSizeType::SUPER_SIZE) { + if (((uint64_t *)array.buffers[1])[array.length] > NumericLimits::Maximum()) { + throw std::runtime_error("We do not support Strings over 4GB"); + } + auto offsets = (uint64_t *)array.buffers[1] + array.offset + scan_state.chunk_offset; + if (nested_offset != -1) { + offsets = (uint64_t *)array.buffers[1] + array.offset + nested_offset; + } + SetVectorString(vector, size, cdata, offsets); + + } else { + auto offsets = (uint32_t *)array.buffers[1] + array.offset + scan_state.chunk_offset; + if (nested_offset != -1) { + offsets = (uint32_t *)array.buffers[1] + array.offset + nested_offset; + } + SetVectorString(vector, size, cdata, offsets); + } + + break; + } + case LogicalTypeId::DATE: { + auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.second++]; + switch (precision) { + case ArrowDateTimeType::DAYS: { + DirectConversion(vector, array, scan_state, nested_offset); + break; + } + case ArrowDateTimeType::MILLISECONDS: { + //! convert date from nanoseconds to days + auto src_ptr = (uint64_t *)array.buffers[1] + scan_state.chunk_offset + array.offset; + if (nested_offset != -1) { + src_ptr = (uint64_t *)array.buffers[1] + nested_offset + array.offset; + } + auto tgt_ptr = (date_t *)FlatVector::GetData(vector); + for (idx_t row = 0; row < size; row++) { + tgt_ptr[row] = date_t(int64_t(src_ptr[row]) / (1000 * 60 * 60 * 24)); + } + break; + } + default: + throw std::runtime_error("Unsupported precision for Date Type "); + } + break; + } + case LogicalTypeId::TIME: { + auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.second++]; + switch (precision) { + case ArrowDateTimeType::SECONDS: { + TimeConversion(vector, array, scan_state, nested_offset, size, 1000000); + break; + } + case ArrowDateTimeType::MILLISECONDS: { + TimeConversion(vector, array, scan_state, nested_offset, size, 1000); + break; + } + case ArrowDateTimeType::MICROSECONDS: { + TimeConversion(vector, array, scan_state, nested_offset, size, 1); + break; + } + case ArrowDateTimeType::NANOSECONDS: { + auto tgt_ptr = (dtime_t *)FlatVector::GetData(vector); + auto src_ptr = (int64_t *)array.buffers[1] + scan_state.chunk_offset + array.offset; + if (nested_offset != -1) { + src_ptr = (int64_t *)array.buffers[1] + nested_offset + array.offset; + } + for (idx_t row = 0; row < size; row++) { + tgt_ptr[row].micros = src_ptr[row] / 1000; + } + break; + } + default: + throw std::runtime_error("Unsupported precision for Time Type "); + } + break; + } + case LogicalTypeId::INTERVAL: { + auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.second++]; + switch (precision) { + case ArrowDateTimeType::SECONDS: { + IntervalConversionUs(vector, array, scan_state, nested_offset, size, 1000000); + break; + } + case ArrowDateTimeType::DAYS: + case ArrowDateTimeType::MILLISECONDS: { + IntervalConversionUs(vector, array, scan_state, nested_offset, size, 1000); + break; + } + case ArrowDateTimeType::MICROSECONDS: { + IntervalConversionUs(vector, array, scan_state, nested_offset, size, 1); + break; + } + case ArrowDateTimeType::NANOSECONDS: { + auto tgt_ptr = (interval_t *)FlatVector::GetData(vector); + auto src_ptr = (int64_t *)array.buffers[1] + scan_state.chunk_offset + array.offset; + if (nested_offset != -1) { + src_ptr = (int64_t *)array.buffers[1] + nested_offset + array.offset; + } + for (idx_t row = 0; row < size; row++) { + tgt_ptr[row].micros = src_ptr[row] / 1000; + tgt_ptr[row].days = 0; + tgt_ptr[row].months = 0; + } + break; + } + case ArrowDateTimeType::MONTHS: { + IntervalConversionMonths(vector, array, scan_state, nested_offset, size); + break; + } + default: + throw std::runtime_error("Unsupported precision for Interval/Duration Type "); + } + break; + } + case LogicalTypeId::DECIMAL: { + auto val_mask = FlatVector::Validity(vector); + //! We have to convert from INT128 + auto src_ptr = (hugeint_t *)array.buffers[1] + scan_state.chunk_offset + array.offset; + if (nested_offset != -1) { + src_ptr = (hugeint_t *)array.buffers[1] + nested_offset + array.offset; + } + switch (vector.GetType().InternalType()) { + case PhysicalType::INT16: { + auto tgt_ptr = (int16_t *)FlatVector::GetData(vector); + for (idx_t row = 0; row < size; row++) { + if (val_mask.RowIsValid(row)) { + auto result = Hugeint::TryCast(src_ptr[row], tgt_ptr[row]); + D_ASSERT(result); + (void)result; } } - schema_root.release(&schema_root); + break; } + case PhysicalType::INT32: { + auto tgt_ptr = (int32_t *)FlatVector::GetData(vector); + for (idx_t row = 0; row < size; row++) { + if (val_mask.RowIsValid(row)) { + auto result = Hugeint::TryCast(src_ptr[row], tgt_ptr[row]); + D_ASSERT(result); + (void)result; + } + } + break; + } + case PhysicalType::INT64: { + auto tgt_ptr = (int64_t *)FlatVector::GetData(vector); + for (idx_t row = 0; row < size; row++) { + if (val_mask.RowIsValid(row)) { + auto result = Hugeint::TryCast(src_ptr[row], tgt_ptr[row]); + D_ASSERT(result); + (void)result; + } + } + break; + } + case PhysicalType::INT128: { + FlatVector::SetData(vector, (data_ptr_t)array.buffers[1] + GetTypeIdSize(vector.GetType().InternalType()) * + (scan_state.chunk_offset + array.offset)); + break; + } + default: + throw std::runtime_error("Unsupported physical type for Decimal: " + + TypeIdToString(vector.GetType().InternalType())); + } + break; } - - ~ArrowScanFunctionData() override { - ReleaseSchema(); - ReleaseArray(); - } -}; - -static unique_ptr ArrowScanBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, vector &input_table_names, - vector &return_types, vector &names) { - - auto res = make_unique(); - auto &data = *res; - auto stream_factory_ptr = inputs[0].GetValue(); - ArrowArrayStream *(*stream_factory_produce)(uintptr_t stream_factory_ptr); - stream_factory_produce = (ArrowArrayStream * (*)(uintptr_t stream_factory_ptr)) inputs[1].GetValue(); - data.stream = stream_factory_produce(stream_factory_ptr); - if (!data.stream) { - throw InvalidInputException("arrow_scan: NULL pointer passed"); - } - - D_ASSERT(data.stream->get_schema); - if (data.stream->get_schema(data.stream, &data.schema_root)) { - throw InvalidInputException("arrow_scan: get_schema failed(): %s", - string(data.stream->get_last_error(data.stream))); - } - - if (!data.schema_root.release) { - throw InvalidInputException("arrow_scan: released schema passed"); + case LogicalTypeId::BLOB: { + ArrowToDuckDBBlob(vector, array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx, + nested_offset); + break; } - - if (data.schema_root.n_children < 1) { - throw InvalidInputException("arrow_scan: empty schema passed"); + case LogicalTypeId::LIST: { + ArrowToDuckDBList(vector, array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx, + nested_offset, parent_mask); + break; } - - for (idx_t col_idx = 0; col_idx < (idx_t)data.schema_root.n_children; col_idx++) { - auto &schema = *data.schema_root.children[col_idx]; - if (!schema.release) { - throw InvalidInputException("arrow_scan: released schema passed"); - } - if (schema.dictionary) { - throw NotImplementedException("arrow_scan: dictionary vectors not supported yet"); + case LogicalTypeId::MAP: { + //! Since this is a map we skip first child, because its a struct + auto &struct_arrow = *array.children[0]; + auto &child_entries = StructVector::GetEntries(vector); + D_ASSERT(child_entries.size() == 2); + auto offsets = (uint32_t *)array.buffers[1] + array.offset + scan_state.chunk_offset; + if (nested_offset != -1) { + offsets = (uint32_t *)array.buffers[1] + nested_offset; } - auto format = string(schema.format); - if (format == "n") { - return_types.push_back(LogicalType::SQLNULL); - } else if (format == "b") { - return_types.push_back(LogicalType::BOOLEAN); - } else if (format == "c") { - return_types.push_back(LogicalType::TINYINT); - } else if (format == "s") { - return_types.push_back(LogicalType::SMALLINT); - } else if (format == "i") { - return_types.push_back(LogicalType::INTEGER); - } else if (format == "l") { - return_types.push_back(LogicalType::BIGINT); - } else if (format == "C") { - return_types.push_back(LogicalType::UTINYINT); - } else if (format == "S") { - return_types.push_back(LogicalType::USMALLINT); - } else if (format == "I") { - return_types.push_back(LogicalType::UINTEGER); - } else if (format == "L") { - return_types.push_back(LogicalType::UBIGINT); - } else if (format == "f") { - return_types.push_back(LogicalType::FLOAT); - } else if (format == "g") { - return_types.push_back(LogicalType::DOUBLE); - } else if (format == "d:38,0") { // decimal128 - return_types.push_back(LogicalType::HUGEINT); - } else if (format == "u") { - return_types.push_back(LogicalType::VARCHAR); - } else if (format == "tsn:") { - return_types.push_back(LogicalType::TIMESTAMP); - } else if (format == "tdD") { - return_types.push_back(LogicalType::DATE); - } else if (format == "ttm") { - return_types.push_back(LogicalType::TIME); - } else { - throw NotImplementedException("1 Unsupported Arrow type %s", format); + auto &struct_validity_mask = FlatVector::Validity(vector); + //! Fill the children + for (idx_t type_idx = 0; type_idx < (idx_t)struct_arrow.n_children; type_idx++) { + ArrowToDuckDBMapList(*child_entries[type_idx], *struct_arrow.children[type_idx], scan_state, size, + arrow_convert_data, col_idx, arrow_convert_idx, offsets, &struct_validity_mask); } - auto name = string(schema.name); - if (name.empty()) { - name = string("v") + to_string(col_idx); + break; + } + case LogicalTypeId::STRUCT: { + //! Fill the children + auto &child_entries = StructVector::GetEntries(vector); + auto &struct_validity_mask = FlatVector::Validity(vector); + for (idx_t type_idx = 0; type_idx < (idx_t)array.n_children; type_idx++) { + SetValidityMask(*child_entries[type_idx], *array.children[type_idx], scan_state, size, nested_offset); + ColumnArrowToDuckDB(*child_entries[type_idx], *array.children[type_idx], scan_state, size, + arrow_convert_data, col_idx, arrow_convert_idx, nested_offset, &struct_validity_mask); } - names.push_back(name); + break; + } + default: + throw std::runtime_error("Unsupported type " + vector.GetType().ToString()); } - data.ReleaseSchema(); - return move(res); } -static unique_ptr ArrowScanInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, TableFilterCollection *filters) { - auto &data = (ArrowScanFunctionData &)*bind_data; - if (data.is_consumed) { - throw NotImplementedException("FIXME: Arrow streams can only be read once"); +template +static void SetSelectionVectorLoop(SelectionVector &sel, data_ptr_t indices_p, idx_t size) { + auto indices = (T *)indices_p; + for (idx_t row = 0; row < size; row++) { + sel.set_index(row, indices[row]); } - data.is_consumed = true; - return make_unique(); } -static void ArrowScanFunction(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &data = (ArrowScanFunctionData &)*bind_data; - if (!data.stream->release) { - // no more chunks - return; - } +template +static void SetSelectionVectorLoopWithChecks(SelectionVector &sel, data_ptr_t indices_p, idx_t size) { - // have we run out of data on the current chunk? move to next one - if (data.chunk_offset >= (idx_t)data.current_chunk_root.length) { - data.chunk_offset = 0; - data.ReleaseArray(); - if (data.stream->get_next(data.stream, &data.current_chunk_root)) { - throw InvalidInputException("arrow_scan: get_next failed(): %s", - string(data.stream->get_last_error(data.stream))); + auto indices = (T *)indices_p; + for (idx_t row = 0; row < size; row++) { + if (indices[row] > NumericLimits::Maximum()) { + throw std::runtime_error("DuckDB only supports indices that fit on an uint32"); } + sel.set_index(row, indices[row]); } +} - // have we run out of chunks? we done - if (!data.current_chunk_root.release) { - data.stream->release(data.stream); - return; - } - - if ((idx_t)data.current_chunk_root.n_children != output.ColumnCount()) { - throw InvalidInputException("arrow_scan: array column count mismatch"); - } - - output.SetCardinality(MinValue(STANDARD_VECTOR_SIZE, data.current_chunk_root.length - data.chunk_offset)); - - for (idx_t col_idx = 0; col_idx < output.ColumnCount(); col_idx++) { - auto &array = *data.current_chunk_root.children[col_idx]; - if (!array.release) { - throw InvalidInputException("arrow_scan: released array passed"); - } - if (array.length != data.current_chunk_root.length) { - throw InvalidInputException("arrow_scan: array length mismatch"); - } - if (array.dictionary) { - throw NotImplementedException("arrow_scan: dictionary vectors not supported yet"); +template +static void SetMaskedSelectionVectorLoop(SelectionVector &sel, data_ptr_t indices_p, idx_t size, ValidityMask &mask, + idx_t last_element_pos) { + auto indices = (T *)indices_p; + for (idx_t row = 0; row < size; row++) { + if (mask.RowIsValid(row)) { + sel.set_index(row, indices[row]); + } else { + //! Need to point out to last element + sel.set_index(row, last_element_pos); } - if (array.null_count != 0 && array.buffers[0]) { - auto &mask = FlatVector::Validity(output.data[col_idx]); - - auto bit_offset = data.chunk_offset + array.offset; - auto n_bitmask_bytes = (output.size() + 8 - 1) / 8; - - mask.EnsureWritable(); - if (bit_offset % 8 == 0) { - // just memcpy nullmask - memcpy((void *)mask.GetData(), (uint8_t *)array.buffers[0] + bit_offset / 8, n_bitmask_bytes); - } else { - // need to re-align nullmask :/ - bitset temp_nullmask; - memcpy(&temp_nullmask, (uint8_t *)array.buffers[0] + bit_offset / 8, n_bitmask_bytes + 1); + } +} - temp_nullmask >>= (bit_offset % 8); // why this has to be a right shift is a mystery to me - memcpy((void *)mask.GetData(), (data_ptr_t)&temp_nullmask, n_bitmask_bytes); - } - } +void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType &logical_type, idx_t size, + ValidityMask *mask = nullptr, idx_t last_element_pos = 0) { + sel.Initialize(size); - switch (output.data[col_idx].GetType().id()) { - case LogicalTypeId::SQLNULL: - output.data[col_idx].Reference(Value()); + if (mask) { + switch (logical_type.id()) { + case LogicalTypeId::UTINYINT: + SetMaskedSelectionVectorLoop(sel, indices_p, size, *mask, last_element_pos); break; - case LogicalTypeId::BOOLEAN: case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::FLOAT: - case LogicalTypeId::UTINYINT: + SetMaskedSelectionVectorLoop(sel, indices_p, size, *mask, last_element_pos); + break; case LogicalTypeId::USMALLINT: + SetMaskedSelectionVectorLoop(sel, indices_p, size, *mask, last_element_pos); + break; + case LogicalTypeId::SMALLINT: + SetMaskedSelectionVectorLoop(sel, indices_p, size, *mask, last_element_pos); + break; case LogicalTypeId::UINTEGER: + if (last_element_pos > NumericLimits::Maximum()) { + //! Its guaranteed that our indices will point to the last element, so just throw an error + throw std::runtime_error("DuckDB only supports indices that fit on an uint32"); + } + SetMaskedSelectionVectorLoop(sel, indices_p, size, *mask, last_element_pos); + break; + case LogicalTypeId::INTEGER: + SetMaskedSelectionVectorLoop(sel, indices_p, size, *mask, last_element_pos); + break; case LogicalTypeId::UBIGINT: - case LogicalTypeId::DOUBLE: + if (last_element_pos > NumericLimits::Maximum()) { + //! Its guaranteed that our indices will point to the last element, so just throw an error + throw std::runtime_error("DuckDB only supports indices that fit on an uint32"); + } + SetMaskedSelectionVectorLoop(sel, indices_p, size, *mask, last_element_pos); + break; case LogicalTypeId::BIGINT: - case LogicalTypeId::HUGEINT: - case LogicalTypeId::DATE: - FlatVector::SetData(output.data[col_idx], (data_ptr_t)array.buffers[1] + - GetTypeIdSize(output.data[col_idx].GetType().InternalType()) * - (data.chunk_offset + array.offset)); + if (last_element_pos > NumericLimits::Maximum()) { + //! Its guaranteed that our indices will point to the last element, so just throw an error + throw std::runtime_error("DuckDB only supports indices that fit on an uint32"); + } + SetMaskedSelectionVectorLoop(sel, indices_p, size, *mask, last_element_pos); break; - case LogicalTypeId::VARCHAR: { - auto offsets = (uint32_t *)array.buffers[1] + array.offset + data.chunk_offset; - auto cdata = (char *)array.buffers[2]; - - for (idx_t row_idx = 0; row_idx < output.size(); row_idx++) { - if (FlatVector::IsNull(output.data[col_idx], row_idx)) { - continue; - } - auto cptr = cdata + offsets[row_idx]; - auto str_len = offsets[row_idx + 1] - offsets[row_idx]; + default: + throw std::runtime_error("(Arrow) Unsupported type for selection vectors " + logical_type.ToString()); + } - auto utf_type = Utf8Proc::Analyze(cptr, str_len); - if (utf_type == UnicodeType::INVALID) { - throw std::runtime_error("Invalid UTF8 string encoding"); - } - FlatVector::GetData(output.data[col_idx])[row_idx] = - StringVector::AddString(output.data[col_idx], cptr, str_len); + } else { + switch (logical_type.id()) { + case LogicalTypeId::UTINYINT: + SetSelectionVectorLoop(sel, indices_p, size); + break; + case LogicalTypeId::TINYINT: + SetSelectionVectorLoop(sel, indices_p, size); + break; + case LogicalTypeId::USMALLINT: + SetSelectionVectorLoop(sel, indices_p, size); + break; + case LogicalTypeId::SMALLINT: + SetSelectionVectorLoop(sel, indices_p, size); + break; + case LogicalTypeId::UINTEGER: + SetSelectionVectorLoop(sel, indices_p, size); + break; + case LogicalTypeId::INTEGER: + SetSelectionVectorLoop(sel, indices_p, size); + break; + case LogicalTypeId::UBIGINT: + if (last_element_pos > NumericLimits::Maximum()) { + //! We need to check if our indexes fit in a uint32_t + SetSelectionVectorLoopWithChecks(sel, indices_p, size); + } else { + SetSelectionVectorLoop(sel, indices_p, size); } - break; - } - case LogicalTypeId::TIME: { - // convert time from milliseconds to microseconds - auto src_ptr = (uint32_t *)array.buffers[1] + data.chunk_offset; - auto tgt_ptr = (dtime_t *)FlatVector::GetData(output.data[col_idx]); - for (idx_t row = 0; row < output.size(); row++) { - auto source_idx = data.chunk_offset + row; - tgt_ptr[row] = dtime_t(src_ptr[source_idx]) * 1000; + case LogicalTypeId::BIGINT: + if (last_element_pos > NumericLimits::Maximum()) { + //! We need to check if our indexes fit in a uint32_t + SetSelectionVectorLoopWithChecks(sel, indices_p, size); + } else { + SetSelectionVectorLoop(sel, indices_p, size); } break; + default: + throw std::runtime_error("(Arrow) Unsupported type for selection vectors " + logical_type.ToString()); } - case LogicalTypeId::TIMESTAMP: { - // convert timestamps from nanoseconds to microseconds - auto src_ptr = (uint64_t *)array.buffers[1] + data.chunk_offset; - auto tgt_ptr = (timestamp_t *)FlatVector::GetData(output.data[col_idx]); + } +} - for (idx_t row = 0; row < output.size(); row++) { - auto source_idx = data.chunk_offset + row; - tgt_ptr[row] = Timestamp::FromEpochNanoSeconds(src_ptr[source_idx]); - } - break; +void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size, + std::unordered_map> &arrow_convert_data, + idx_t col_idx, std::pair &arrow_convert_idx) { + SelectionVector sel; + auto &dict_vectors = scan_state.arrow_dictionary_vectors; + if (dict_vectors.find(col_idx) == dict_vectors.end()) { + //! We need to set the dictionary data for this column + auto base_vector = make_unique(vector.GetType(), array.dictionary->length); + SetValidityMask(*base_vector, *array.dictionary, scan_state, array.dictionary->length, 0, array.null_count > 0); + ColumnArrowToDuckDB(*base_vector, *array.dictionary, scan_state, array.dictionary->length, arrow_convert_data, + col_idx, arrow_convert_idx); + dict_vectors[col_idx] = move(base_vector); + } + auto dictionary_type = arrow_convert_data[col_idx]->dictionary_type; + //! Get Pointer to Indices of Dictionary + auto indices = (data_ptr_t)array.buffers[1] + + GetTypeIdSize(dictionary_type.InternalType()) * (scan_state.chunk_offset + array.offset); + if (array.null_count > 0) { + ValidityMask indices_validity; + GetValidityMask(indices_validity, array, scan_state, size); + SetSelectionVector(sel, indices, dictionary_type, size, &indices_validity, array.dictionary->length); + } else { + SetSelectionVector(sel, indices, dictionary_type, size); + } + vector.Slice(*dict_vectors[col_idx], sel, size); +} +void ArrowTableFunction::ArrowToDuckDB(ArrowScanState &scan_state, + std::unordered_map> &arrow_convert_data, + DataChunk &output, idx_t start) { + for (idx_t idx = 0; idx < output.ColumnCount(); idx++) { + auto col_idx = scan_state.column_ids[idx]; + std::pair arrow_convert_idx {0, 0}; + auto &array = *scan_state.chunk->arrow_array.children[idx]; + if (!array.release) { + throw InvalidInputException("arrow_scan: released array passed"); } - default: - throw std::runtime_error("Unsupported type " + output.data[col_idx].GetType().ToString()); + if (array.length != scan_state.chunk->arrow_array.length) { + throw InvalidInputException("arrow_scan: array length mismatch"); } + if (array.dictionary) { + ColumnArrowToDuckDBDictionary(output.data[idx], array, scan_state, output.size(), arrow_convert_data, + col_idx, arrow_convert_idx); + } else { + SetValidityMask(output.data[idx], array, scan_state, output.size(), -1); + ColumnArrowToDuckDB(output.data[idx], array, scan_state, output.size(), arrow_convert_data, col_idx, + arrow_convert_idx); + } + } +} + +void ArrowTableFunction::ArrowScanFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + + auto &data = (ArrowScanFunctionData &)*bind_data; + auto &state = (ArrowScanState &)*operator_state; + + //! have we run out of data on the current chunk? move to next one + if (state.chunk_offset >= (idx_t)state.chunk->arrow_array.length) { + state.chunk_offset = 0; + state.arrow_dictionary_vectors.clear(); + state.chunk = state.stream->GetNextChunk(); + } + + //! have we run out of chunks? we are done + if (!state.chunk->arrow_array.release) { + return; + } + + int64_t output_size = MinValue(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset); + data.lines_read += output_size; + output.SetCardinality(output_size); + ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size); + output.Verify(); + state.chunk_offset += output.size(); +} + +void ArrowTableFunction::ArrowScanFunctionParallel(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, + DataChunk &output, ParallelState *parallel_state_p) { + auto &data = (ArrowScanFunctionData &)*bind_data; + auto &state = (ArrowScanState &)*operator_state; + //! Out of tuples in this chunk + if (state.chunk_offset >= (idx_t)state.chunk->arrow_array.length) { + return; } + int64_t output_size = MinValue(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset); + data.lines_read += output_size; + output.SetCardinality(output_size); + ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size); output.Verify(); - data.chunk_offset += output.size(); + state.chunk_offset += output.size(); +} + +idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p; + if (bind_data.number_of_rows <= 0 || context.force_parallelism) { + return context.db->NumberOfThreads(); + } + return ((bind_data.number_of_rows + bind_data.rows_per_thread - 1) / bind_data.rows_per_thread) + 1; +} + +unique_ptr ArrowTableFunction::ArrowScanInitParallelState(ClientContext &context, + const FunctionData *bind_data_p) { + return make_unique(); +} + +bool ArrowTableFunction::ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, + ParallelState *parallel_state_p) { + auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p; + auto &state = (ArrowScanState &)*operator_state; + auto ¶llel_state = (ParallelArrowScanState &)*parallel_state_p; + + lock_guard parallel_lock(parallel_state.lock); + if (!parallel_state.stream) { + //! Generate a Stream + parallel_state.stream = ProduceArrowScan(bind_data, state, state.filters); + } + state.chunk_offset = 0; + + auto current_chunk = parallel_state.stream->GetNextChunk(); + while (current_chunk->arrow_array.length == 0 && current_chunk->arrow_array.release) { + current_chunk = parallel_state.stream->GetNextChunk(); + } + state.chunk = move(current_chunk); + //! have we run out of chunks? we are done + if (!state.chunk->arrow_array.release) { + return false; + } + return true; +} + +unique_ptr +ArrowTableFunction::ArrowScanParallelInit(ClientContext &context, const FunctionData *bind_data_p, ParallelState *state, + const vector &column_ids, TableFilterCollection *filters) { + auto current_chunk = make_unique(); + auto result = make_unique(move(current_chunk)); + result->column_ids = column_ids; + result->filters = filters; + if (!ArrowScanParallelStateNext(context, bind_data_p, result.get(), state)) { + return nullptr; + } + return move(result); +} + +unique_ptr ArrowTableFunction::ArrowScanCardinality(ClientContext &context, const FunctionData *data) { + auto &bind_data = (ArrowScanFunctionData &)*data; + return make_unique(bind_data.number_of_rows, bind_data.number_of_rows); +} + +int ArrowTableFunction::ArrowProgress(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p; + if (bind_data.number_of_rows == 0) { + return 100; + } + auto percentage = bind_data.lines_read * 100 / bind_data.number_of_rows; + return percentage; } void ArrowTableFunction::RegisterFunction(BuiltinFunctions &set) { TableFunctionSet arrow("arrow_scan"); - - arrow.AddFunction( - TableFunction({LogicalType::POINTER, LogicalType::POINTER}, ArrowScanFunction, ArrowScanBind, ArrowScanInit)); + arrow.AddFunction(TableFunction({LogicalType::POINTER, LogicalType::POINTER, LogicalType::UBIGINT}, + ArrowScanFunction, ArrowScanBind, ArrowScanInit, nullptr, nullptr, nullptr, + ArrowScanCardinality, nullptr, nullptr, ArrowScanMaxThreads, + ArrowScanInitParallelState, ArrowScanFunctionParallel, ArrowScanParallelInit, + ArrowScanParallelStateNext, true, true, ArrowProgress)); set.AddFunction(arrow); } @@ -74790,6 +81906,10 @@ struct RepeatTableFunction { static void RegisterFunction(BuiltinFunctions &set); }; +struct UnnestTableFunction { + static void RegisterFunction(BuiltinFunctions &set); +}; + } // namespace duckdb @@ -74877,9 +81997,9 @@ struct ReadCSVData : public BaseCSVData { //! In this case, the CSV reader is already created and might as well be re-used. unique_ptr initial_reader; //! Total File Size - idx_t file_size; + atomic file_size; //! How many bytes were read up to this point - idx_t bytes_read; + atomic bytes_read; }; struct CSVCopyFunction { @@ -75297,7 +82417,7 @@ struct LocalReadCSVData : public LocalFunctionData { }; struct GlobalWriteCSVData : public GlobalFunctionData { - GlobalWriteCSVData(FileSystem &fs, string file_path) : fs(fs) { + GlobalWriteCSVData(FileSystem &fs, const string &file_path) : fs(fs) { handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW, FileLockType::WRITE_LOCK); } @@ -75466,7 +82586,8 @@ struct GlobFunctionState : public FunctionOperatorData { }; static unique_ptr GlobFunctionInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, TableFilterCollection *filters) { + const vector &column_ids, + TableFilterCollection *filters) { return make_unique(); } @@ -75494,7 +82615,375 @@ void GlobTableFunction::RegisterFunction(BuiltinFunctions &set) { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/function/table/information_schema_functions.hpp +// duckdb/function/table/system_functions.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +struct PragmaCollations { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct PragmaFunctionPragma { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct PragmaTableInfo { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct PragmaStorageInfo { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct PragmaLastProfilingOutput { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct PragmaDetailedProfilingOutput { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct PragmaVersion { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct PragmaDatabaseList { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct PragmaDatabaseSize { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct DuckDBSchemasFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct DuckDBColumnsFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct DuckDBConstraintsFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct DuckDBDependenciesFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct DuckDBIndexesFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct DuckDBSequencesFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct DuckDBTablesFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct DuckDBTypesFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +struct DuckDBViewsFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + +} // namespace duckdb + + + + + + + +namespace duckdb { + +struct PragmaDetailedProfilingOutputOperatorData : public FunctionOperatorData { + explicit PragmaDetailedProfilingOutputOperatorData() : chunk_index(0), initialized(false) { + } + idx_t chunk_index; + bool initialized; +}; + +struct PragmaDetailedProfilingOutputData : public TableFunctionData { + explicit PragmaDetailedProfilingOutputData(vector &types) : types(types) { + } + unique_ptr collection; + vector types; +}; + +static unique_ptr PragmaDetailedProfilingOutputBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, + vector &names) { + names.emplace_back("OPERATOR_ID"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("ANNOTATION"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("ID"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("NAME"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("TIME"); + return_types.push_back(LogicalType::DOUBLE); + + names.emplace_back("CYCLES_PER_TUPLE"); + return_types.push_back(LogicalType::DOUBLE); + + names.emplace_back("SAMPLE_SIZE"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("INPUT_SIZE"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("EXTRA_INFO"); + return_types.push_back(LogicalType::VARCHAR); + + return make_unique(return_types); +} + +unique_ptr PragmaDetailedProfilingOutputInit(ClientContext &context, + const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + return make_unique(); +} + +// Insert a row into the given datachunk +static void SetValue(DataChunk &output, int index, int op_id, string annotation, int id, string name, double time, + int sample_counter, int tuple_counter, string extra_info) { + output.SetValue(0, index, op_id); + output.SetValue(1, index, move(annotation)); + output.SetValue(2, index, id); + output.SetValue(3, index, move(name)); +#if defined(RDTSC) + output.SetValue(4, index, Value(nullptr)); + output.SetValue(5, index, time); +#else + output.SetValue(4, index, time); + output.SetValue(5, index, Value(nullptr)); + +#endif + output.SetValue(6, index, sample_counter); + output.SetValue(7, index, tuple_counter); + output.SetValue(8, index, move(extra_info)); +} + +static void ExtractFunctions(ChunkCollection &collection, ExpressionInfo &info, DataChunk &chunk, int op_id, + int &fun_id) { + if (info.hasfunction) { + D_ASSERT(info.sample_tuples_count != 0); + SetValue(chunk, chunk.size(), op_id, "Function", fun_id++, info.function_name, + int(info.function_time) / double(info.sample_tuples_count), info.sample_tuples_count, + info.tuples_count, ""); + + chunk.SetCardinality(chunk.size() + 1); + if (chunk.size() == STANDARD_VECTOR_SIZE) { + collection.Append(chunk); + chunk.Reset(); + } + } + if (info.children.empty()) { + return; + } + // extract the children of this node + for (auto &child : info.children) { + ExtractFunctions(collection, *child, chunk, op_id, fun_id); + } +} + +static void PragmaDetailedProfilingOutputFunction(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, DataChunk *input, + DataChunk &output) { + auto &state = (PragmaDetailedProfilingOutputOperatorData &)*operator_state; + auto &data = (PragmaDetailedProfilingOutputData &)*bind_data_p; + + if (!state.initialized) { + // create a ChunkCollection + auto collection = make_unique(); + + // create a chunk + DataChunk chunk; + chunk.Initialize(data.types); + + // Initialize ids + int operator_counter = 1; + int function_counter = 1; + int expression_counter = 1; + if (context.query_profiler_history->GetPrevProfilers().empty()) { + return; + } + // For each Operator + for (auto op : context.query_profiler_history->GetPrevProfilers().back().second->GetTreeMap()) { + // For each Expression Executor + for (auto &expr_executor : op.second->info.executors_info) { + // For each Expression tree + if (!expr_executor) { + continue; + } + for (auto &expr_timer : expr_executor->roots) { + D_ASSERT(expr_timer->sample_tuples_count != 0); + SetValue(chunk, chunk.size(), operator_counter, "ExpressionRoot", expression_counter++, + // Sometimes, cycle counter is not accurate, too big or too small. return 0 for + // those cases + expr_timer->name, int(expr_timer->time) / double(expr_timer->sample_tuples_count), + expr_timer->sample_tuples_count, expr_timer->tuples_count, expr_timer->extra_info); + // Increment cardinality + chunk.SetCardinality(chunk.size() + 1); + // Check whether data chunk is full or not + if (chunk.size() == STANDARD_VECTOR_SIZE) { + collection->Append(chunk); + chunk.Reset(); + } + // Extract all functions inside the tree + ExtractFunctions(*collection, *expr_timer->root, chunk, operator_counter, function_counter); + } + } + operator_counter++; + } + collection->Append(chunk); + data.collection = move(collection); + state.initialized = true; + } + + if (state.chunk_index >= data.collection->ChunkCount()) { + output.SetCardinality(0); + return; + } + output.Reference(data.collection->GetChunk(state.chunk_index++)); +} + +void PragmaDetailedProfilingOutput::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("pragma_detailed_profiling_output", {}, PragmaDetailedProfilingOutputFunction, + PragmaDetailedProfilingOutputBind, PragmaDetailedProfilingOutputInit)); +} + +} // namespace duckdb + + + + + + + + +namespace duckdb { + +struct PragmaLastProfilingOutputOperatorData : public FunctionOperatorData { + PragmaLastProfilingOutputOperatorData() : chunk_index(0), initialized(false) { + } + idx_t chunk_index; + bool initialized; +}; + +struct PragmaLastProfilingOutputData : public TableFunctionData { + explicit PragmaLastProfilingOutputData(vector &types) : types(types) { + } + unique_ptr collection; + vector types; +}; + +static unique_ptr PragmaLastProfilingOutputBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, + vector &names) { + names.emplace_back("OPERATOR_ID"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("NAME"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("TIME"); + return_types.push_back(LogicalType::DOUBLE); + + names.emplace_back("CARDINALITY"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("DESCRIPTION"); + return_types.push_back(LogicalType::VARCHAR); + + return make_unique(return_types); +} + +static void SetValue(DataChunk &output, int index, int op_id, string name, double time, int64_t car, + string description) { + output.SetValue(0, index, op_id); + output.SetValue(1, index, move(name)); + output.SetValue(2, index, time); + output.SetValue(3, index, car); + output.SetValue(4, index, move(description)); +} + +unique_ptr PragmaLastProfilingOutputInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + return make_unique(); +} + +static void PragmaLastProfilingOutputFunction(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, DataChunk *input, + DataChunk &output) { + auto &state = (PragmaLastProfilingOutputOperatorData &)*operator_state; + auto &data = (PragmaLastProfilingOutputData &)*bind_data_p; + if (!state.initialized) { + // create a ChunkCollection + auto collection = make_unique(); + + DataChunk chunk; + chunk.Initialize(data.types); + int operator_counter = 1; + if (!context.query_profiler_history->GetPrevProfilers().empty()) { + for (auto op : context.query_profiler_history->GetPrevProfilers().back().second->GetTreeMap()) { + SetValue(chunk, chunk.size(), operator_counter++, op.second->name, op.second->info.time, + op.second->info.elements, " "); + chunk.SetCardinality(chunk.size() + 1); + if (chunk.size() == STANDARD_VECTOR_SIZE) { + collection->Append(chunk); + chunk.Reset(); + } + } + } + collection->Append(chunk); + data.collection = move(collection); + state.initialized = true; + } + + if (state.chunk_index >= data.collection->ChunkCount()) { + output.SetCardinality(0); + return; + } + output.Reference(data.collection->GetChunk(state.chunk_index++)); +} + +void PragmaLastProfilingOutput::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("pragma_last_profiling_output", {}, PragmaLastProfilingOutputFunction, + PragmaLastProfilingOutputBind, PragmaLastProfilingOutputInit)); +} + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/function/table/summary.hpp // // //===----------------------------------------------------------------------===// @@ -75503,19 +82992,641 @@ void GlobTableFunction::RegisterFunction(BuiltinFunctions &set) { -namespace duckdb { +namespace duckdb { + +struct SummaryTableFunction { + static void RegisterFunction(BuiltinFunctions &set); +}; + +} // namespace duckdb + + + + + + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Range (integers) +//===--------------------------------------------------------------------===// +struct RangeFunctionBindData : public TableFunctionData { + int64_t start; + int64_t end; + int64_t increment; +}; + +template +static unique_ptr +RangeFunctionBind(ClientContext &context, vector &inputs, unordered_map &named_parameters, + vector &input_table_types, vector &input_table_names, + vector &return_types, vector &names) { + auto result = make_unique(); + if (inputs.size() < 2) { + // single argument: only the end is specified + result->start = 0; + result->end = inputs[0].GetValue(); + } else { + // two arguments: first two arguments are start and end + result->start = inputs[0].GetValue(); + result->end = inputs[1].GetValue(); + } + if (inputs.size() < 3) { + result->increment = 1; + } else { + result->increment = inputs[2].GetValue(); + } + if (result->increment == 0) { + throw BinderException("interval cannot be 0!"); + } + if (result->start > result->end && result->increment > 0) { + throw BinderException("start is bigger than end, but increment is positive: cannot generate infinite series"); + } else if (result->start < result->end && result->increment < 0) { + throw BinderException("start is smaller than end, but increment is negative: cannot generate infinite series"); + } + return_types.push_back(LogicalType::BIGINT); + if (GENERATE_SERIES) { + // generate_series has inclusive bounds on the RHS + if (result->increment < 0) { + result->end = result->end - 1; + } else { + result->end = result->end + 1; + } + names.emplace_back("generate_series"); + } else { + names.emplace_back("range"); + } + return move(result); +} + +struct RangeFunctionState : public FunctionOperatorData { + RangeFunctionState() : current_idx(0) { + } + + int64_t current_idx; +}; + +static unique_ptr RangeFunctionInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + return make_unique(); +} + +static void RangeFunction(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *state_p, + DataChunk *input, DataChunk &output) { + auto &bind_data = (RangeFunctionBindData &)*bind_data_p; + auto &state = (RangeFunctionState &)*state_p; + + auto increment = bind_data.increment; + auto end = bind_data.end; + int64_t current_value = bind_data.start + (int64_t)increment * state.current_idx; + // set the result vector as a sequence vector + output.data[0].Sequence(current_value, increment); + idx_t remaining = MinValue((end - current_value) / increment, STANDARD_VECTOR_SIZE); + // increment the index pointer by the remaining count + state.current_idx += remaining; + output.SetCardinality(remaining); +} + +unique_ptr RangeCardinality(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (RangeFunctionBindData &)*bind_data_p; + idx_t cardinality = (bind_data.end - bind_data.start) / bind_data.increment; + return make_unique(cardinality, cardinality); +} + +//===--------------------------------------------------------------------===// +// Range (timestamp) +//===--------------------------------------------------------------------===// +struct RangeDateTimeBindData : public TableFunctionData { + timestamp_t start; + timestamp_t end; + interval_t increment; + bool inclusive_bound; + bool greater_than_check; + + bool Finished(timestamp_t current_value) { + if (greater_than_check) { + if (inclusive_bound) { + return current_value > end; + } else { + return current_value >= end; + } + } else { + if (inclusive_bound) { + return current_value < end; + } else { + return current_value <= end; + } + } + } +}; + +template +static unique_ptr +RangeDateTimeBind(ClientContext &context, vector &inputs, unordered_map &named_parameters, + vector &input_table_types, vector &input_table_names, + vector &return_types, vector &names) { + auto result = make_unique(); + D_ASSERT(inputs.size() == 3); + result->start = inputs[0].GetValue(); + result->end = inputs[1].GetValue(); + result->increment = inputs[2].GetValue(); + + if (result->increment.months == 0 && result->increment.days == 0 && result->increment.micros == 0) { + throw BinderException("interval cannot be 0!"); + } + // all elements should point in the same direction + if (result->increment.months > 0 || result->increment.days > 0 || result->increment.micros > 0) { + if (result->increment.months < 0 || result->increment.days < 0 || result->increment.micros < 0) { + throw BinderException("RANGE with composite interval that has mixed signs is not supported"); + } + result->greater_than_check = true; + if (result->start > result->end) { + throw BinderException( + "start is bigger than end, but increment is positive: cannot generate infinite series"); + } + } else { + result->greater_than_check = false; + if (result->start < result->end) { + throw BinderException( + "start is smaller than end, but increment is negative: cannot generate infinite series"); + } + } + return_types.push_back(inputs[0].type()); + if (GENERATE_SERIES) { + // generate_series has inclusive bounds on the RHS + result->inclusive_bound = true; + names.emplace_back("generate_series"); + } else { + result->inclusive_bound = false; + names.emplace_back("range"); + } + return move(result); +} + +struct RangeDateTimeState : public FunctionOperatorData { + explicit RangeDateTimeState(timestamp_t start_p) : current_state(start_p) { + } + + timestamp_t current_state; + bool finished = false; +}; + +static unique_ptr RangeDateTimeInit(ClientContext &context, const FunctionData *bind_data_p, + const vector &column_ids, + TableFilterCollection *filters) { + auto &bind_data = (RangeDateTimeBindData &)*bind_data_p; + return make_unique(bind_data.start); +} + +static void RangeDateTimeFunction(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *state_p, DataChunk *input, DataChunk &output) { + auto &bind_data = (RangeDateTimeBindData &)*bind_data_p; + auto &state = (RangeDateTimeState &)*state_p; + if (state.finished) { + return; + } + + idx_t size = 0; + auto data = FlatVector::GetData(output.data[0]); + while (true) { + data[size++] = state.current_state; + state.current_state = + AddOperator::Operation(state.current_state, bind_data.increment); + if (bind_data.Finished(state.current_state)) { + state.finished = true; + break; + } + if (size >= STANDARD_VECTOR_SIZE) { + break; + } + } + output.SetCardinality(size); +} + +void RangeTableFunction::RegisterFunction(BuiltinFunctions &set) { + TableFunctionSet range("range"); + + // single argument range: (end) - implicit start = 0 and increment = 1 + range.AddFunction(TableFunction({LogicalType::BIGINT}, RangeFunction, RangeFunctionBind, RangeFunctionInit, + nullptr, nullptr, nullptr, RangeCardinality)); + // two arguments range: (start, end) - implicit increment = 1 + range.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT}, RangeFunction, RangeFunctionBind, + RangeFunctionInit, nullptr, nullptr, nullptr, RangeCardinality)); + // three arguments range: (start, end, increment) + range.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, RangeFunction, + RangeFunctionBind, RangeFunctionInit, nullptr, nullptr, nullptr, + RangeCardinality)); + range.AddFunction(TableFunction({LogicalType::TIMESTAMP, LogicalType::TIMESTAMP, LogicalType::INTERVAL}, + RangeDateTimeFunction, RangeDateTimeBind, RangeDateTimeInit)); + set.AddFunction(range); + // generate_series: similar to range, but inclusive instead of exclusive bounds on the RHS + TableFunctionSet generate_series("generate_series"); + generate_series.AddFunction(TableFunction({LogicalType::BIGINT}, RangeFunction, RangeFunctionBind, + RangeFunctionInit, nullptr, nullptr, nullptr, RangeCardinality)); + generate_series.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT}, RangeFunction, + RangeFunctionBind, RangeFunctionInit, nullptr, nullptr, nullptr, + RangeCardinality)); + generate_series.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + RangeFunction, RangeFunctionBind, RangeFunctionInit, nullptr, + nullptr, nullptr, RangeCardinality)); + generate_series.AddFunction(TableFunction({LogicalType::TIMESTAMP, LogicalType::TIMESTAMP, LogicalType::INTERVAL}, + RangeDateTimeFunction, RangeDateTimeBind, RangeDateTimeInit)); + set.AddFunction(generate_series); +} + +void BuiltinFunctions::RegisterTableFunctions() { + CheckpointFunction::RegisterFunction(*this); + GlobTableFunction::RegisterFunction(*this); + RangeTableFunction::RegisterFunction(*this); + RepeatTableFunction::RegisterFunction(*this); + SummaryTableFunction::RegisterFunction(*this); + UnnestTableFunction::RegisterFunction(*this); +} + +} // namespace duckdb + + + + + + + + + + + +#include + +namespace duckdb { + +static unique_ptr ReadCSVBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, vector &input_table_names, + vector &return_types, vector &names) { + auto result = make_unique(); + auto &options = result->options; + + string file_pattern = inputs[0].str_value; + + auto &fs = FileSystem::GetFileSystem(context); + result->files = fs.Glob(file_pattern); + if (result->files.empty()) { + throw IOException("No files found that match the pattern \"%s\"", file_pattern); + } + + for (auto &kv : named_parameters) { + if (kv.first == "auto_detect") { + options.auto_detect = kv.second.value_.boolean; + } else if (kv.first == "sep" || kv.first == "delim") { + options.delimiter = kv.second.str_value; + options.has_delimiter = true; + } else if (kv.first == "header") { + options.header = kv.second.value_.boolean; + options.has_header = true; + } else if (kv.first == "quote") { + options.quote = kv.second.str_value; + options.has_quote = true; + } else if (kv.first == "escape") { + options.escape = kv.second.str_value; + options.has_escape = true; + } else if (kv.first == "nullstr") { + options.null_str = kv.second.str_value; + } else if (kv.first == "sample_size") { + int64_t sample_size = kv.second.GetValue(); + if (sample_size < 1 && sample_size != -1) { + throw BinderException("Unsupported parameter for SAMPLE_SIZE: cannot be smaller than 1"); + } + if (sample_size == -1) { + options.sample_chunks = std::numeric_limits::max(); + options.sample_chunk_size = STANDARD_VECTOR_SIZE; + } else if (sample_size <= STANDARD_VECTOR_SIZE) { + options.sample_chunk_size = sample_size; + options.sample_chunks = 1; + } else { + options.sample_chunk_size = STANDARD_VECTOR_SIZE; + options.sample_chunks = sample_size / STANDARD_VECTOR_SIZE; + } + } else if (kv.first == "sample_chunk_size") { + options.sample_chunk_size = kv.second.GetValue(); + if (options.sample_chunk_size > STANDARD_VECTOR_SIZE) { + throw BinderException( + "Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be bigger than STANDARD_VECTOR_SIZE %d", + STANDARD_VECTOR_SIZE); + } else if (options.sample_chunk_size < 1) { + throw BinderException("Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be smaller than 1"); + } + } else if (kv.first == "sample_chunks") { + options.sample_chunks = kv.second.GetValue(); + if (options.sample_chunks < 1) { + throw BinderException("Unsupported parameter for SAMPLE_CHUNKS: cannot be smaller than 1"); + } + } else if (kv.first == "all_varchar") { + options.all_varchar = kv.second.value_.boolean; + } else if (kv.first == "dateformat") { + options.has_format[LogicalTypeId::DATE] = true; + auto &date_format = options.date_format[LogicalTypeId::DATE]; + date_format.format_specifier = kv.second.str_value; + string error = StrTimeFormat::ParseFormatSpecifier(date_format.format_specifier, date_format); + if (!error.empty()) { + throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str()); + } + } else if (kv.first == "timestampformat") { + options.has_format[LogicalTypeId::TIMESTAMP] = true; + auto ×tamp_format = options.date_format[LogicalTypeId::TIMESTAMP]; + timestamp_format.format_specifier = kv.second.str_value; + string error = StrTimeFormat::ParseFormatSpecifier(timestamp_format.format_specifier, timestamp_format); + if (!error.empty()) { + throw InvalidInputException("Could not parse TIMESTAMPFORMAT: %s", error.c_str()); + } + } else if (kv.first == "normalize_names") { + options.normalize_names = kv.second.value_.boolean; + } else if (kv.first == "columns") { + auto &child_type = kv.second.type(); + if (child_type.id() != LogicalTypeId::STRUCT) { + throw BinderException("read_csv columns requires a a struct as input"); + } + D_ASSERT(StructType::GetChildCount(child_type) == kv.second.struct_value.size()); + for (idx_t i = 0; i < kv.second.struct_value.size(); i++) { + auto &name = StructType::GetChildName(child_type, i); + auto &val = kv.second.struct_value[i]; + names.push_back(name); + if (val.type().id() != LogicalTypeId::VARCHAR) { + throw BinderException("read_csv requires a type specification as string"); + } + return_types.emplace_back(TransformStringToLogicalType(val.str_value.c_str())); + } + if (names.empty()) { + throw BinderException("read_csv requires at least a single column as input!"); + } + } else if (kv.first == "compression") { + options.compression = kv.second.str_value; + } else if (kv.first == "filename") { + result->include_file_name = kv.second.value_.boolean; + } else if (kv.first == "skip") { + options.skip_rows = kv.second.GetValue(); + } + } + if (!options.auto_detect && return_types.empty()) { + throw BinderException("read_csv requires columns to be specified. Use read_csv_auto or set read_csv(..., " + "AUTO_DETECT=TRUE) to automatically guess columns."); + } + if (!(options.compression == "infer" || options.compression == "gzip" || options.compression == "none" || + options.compression.empty())) { + throw BinderException("read_csv currently only supports 'gzip' compression."); + } + if (options.auto_detect) { + options.file_path = result->files[0]; + auto initial_reader = make_unique(context, options); + + return_types.assign(initial_reader->sql_types.begin(), initial_reader->sql_types.end()); + names.assign(initial_reader->col_names.begin(), initial_reader->col_names.end()); + result->initial_reader = move(initial_reader); + } else { + result->sql_types = return_types; + D_ASSERT(return_types.size() == names.size()); + } + if (result->include_file_name) { + return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("filename"); + } + return move(result); +} + +struct ReadCSVOperatorData : public FunctionOperatorData { + //! The CSV reader + unique_ptr csv_reader; + //! The index of the next file to read (i.e. current file + 1) + idx_t file_index; +}; + +static unique_ptr ReadCSVInit(ClientContext &context, const FunctionData *bind_data_p, + const vector &column_ids, + TableFilterCollection *filters) { + auto &bind_data = (ReadCSVData &)*bind_data_p; + auto result = make_unique(); + if (bind_data.initial_reader) { + result->csv_reader = move(bind_data.initial_reader); + } else { + bind_data.options.file_path = bind_data.files[0]; + result->csv_reader = make_unique(context, bind_data.options, bind_data.sql_types); + } + bind_data.bytes_read = 0; + bind_data.file_size = result->csv_reader->file_size; + result->file_index = 1; + return move(result); +} + +static unique_ptr ReadCSVAutoBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, vector &return_types, + vector &names) { + named_parameters["auto_detect"] = Value::BOOLEAN(true); + return ReadCSVBind(context, inputs, named_parameters, input_table_types, input_table_names, return_types, names); +} + +static void ReadCSVFunction(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &bind_data = (ReadCSVData &)*bind_data_p; + auto &data = (ReadCSVOperatorData &)*operator_state; + do { + data.csv_reader->ParseCSV(output); + bind_data.bytes_read = data.csv_reader->bytes_in_chunk; + if (output.size() == 0 && data.file_index < bind_data.files.size()) { + // exhausted this file, but we have more files we can read + // open the next file and increment the counter + bind_data.options.file_path = bind_data.files[data.file_index]; + data.csv_reader = make_unique(context, bind_data.options, data.csv_reader->sql_types); + data.file_index++; + } else { + break; + } + } while (true); + if (bind_data.include_file_name) { + auto &col = output.data.back(); + col.SetValue(0, Value(data.csv_reader->options.file_path)); + col.SetVectorType(VectorType::CONSTANT_VECTOR); + } +} + +static void ReadCSVAddNamedParameters(TableFunction &table_function) { + table_function.named_parameters["sep"] = LogicalType::VARCHAR; + table_function.named_parameters["delim"] = LogicalType::VARCHAR; + table_function.named_parameters["quote"] = LogicalType::VARCHAR; + table_function.named_parameters["escape"] = LogicalType::VARCHAR; + table_function.named_parameters["nullstr"] = LogicalType::VARCHAR; + table_function.named_parameters["columns"] = LogicalType::ANY; + table_function.named_parameters["header"] = LogicalType::BOOLEAN; + table_function.named_parameters["auto_detect"] = LogicalType::BOOLEAN; + table_function.named_parameters["sample_size"] = LogicalType::BIGINT; + table_function.named_parameters["sample_chunk_size"] = LogicalType::BIGINT; + table_function.named_parameters["sample_chunks"] = LogicalType::BIGINT; + table_function.named_parameters["all_varchar"] = LogicalType::BOOLEAN; + table_function.named_parameters["dateformat"] = LogicalType::VARCHAR; + table_function.named_parameters["timestampformat"] = LogicalType::VARCHAR; + table_function.named_parameters["normalize_names"] = LogicalType::BOOLEAN; + table_function.named_parameters["compression"] = LogicalType::VARCHAR; + table_function.named_parameters["filename"] = LogicalType::BOOLEAN; + table_function.named_parameters["skip"] = LogicalType::BIGINT; +} + +int CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (ReadCSVData &)*bind_data_p; + if (bind_data.file_size == 0) { + return 100; + } + auto percentage = bind_data.bytes_read * 100 / bind_data.file_size; + return percentage; +} + +TableFunction ReadCSVTableFunction::GetFunction() { + TableFunction read_csv("read_csv", {LogicalType::VARCHAR}, ReadCSVFunction, ReadCSVBind, ReadCSVInit); + read_csv.table_scan_progress = CSVReaderProgress; + ReadCSVAddNamedParameters(read_csv); + return read_csv; +} + +void ReadCSVTableFunction::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(ReadCSVTableFunction::GetFunction()); + + TableFunction read_csv_auto("read_csv_auto", {LogicalType::VARCHAR}, ReadCSVFunction, ReadCSVAutoBind, ReadCSVInit); + read_csv_auto.table_scan_progress = CSVReaderProgress; + ReadCSVAddNamedParameters(read_csv_auto); + set.AddFunction(read_csv_auto); +} + +unique_ptr ReadCSVReplacement(const string &table_name, void *data) { + if (!StringUtil::EndsWith(table_name, ".csv") && !StringUtil::EndsWith(table_name, ".tsv") && + !StringUtil::EndsWith(table_name, ".csv.gz")) { + return nullptr; + } + auto table_function = make_unique(); + vector> children; + children.push_back(make_unique(Value(table_name))); + table_function->function = make_unique("read_csv_auto", move(children)); + return table_function; +} + +void BuiltinFunctions::RegisterReadFunctions() { + CSVCopyFunction::RegisterFunction(*this); + ReadCSVTableFunction::RegisterFunction(*this); + + auto &config = DBConfig::GetConfig(context); + config.replacement_scans.emplace_back(ReadCSVReplacement); +} + +} // namespace duckdb + + + +namespace duckdb { + +struct RepeatFunctionData : public TableFunctionData { + RepeatFunctionData(Value value, idx_t target_count) : value(move(value)), target_count(target_count) { + } + + Value value; + idx_t target_count; +}; + +struct RepeatOperatorData : public FunctionOperatorData { + RepeatOperatorData() : current_count(0) { + } + idx_t current_count; +}; + +static unique_ptr RepeatBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, vector &input_table_names, + vector &return_types, vector &names) { + // the repeat function returns the type of the first argument + return_types.push_back(inputs[0].type()); + names.push_back(inputs[0].ToString()); + return make_unique(inputs[0], inputs[1].GetValue()); +} + +static unique_ptr RepeatInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, TableFilterCollection *filters) { + return make_unique(); +} + +static void RepeatFunction(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &bind_data = (RepeatFunctionData &)*bind_data_p; + auto &state = (RepeatOperatorData &)*operator_state; + + idx_t remaining = MinValue(bind_data.target_count - state.current_count, STANDARD_VECTOR_SIZE); + output.data[0].Reference(bind_data.value); + output.SetCardinality(remaining); + state.current_count += remaining; +} + +static unique_ptr RepeatCardinality(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (RepeatFunctionData &)*bind_data_p; + return make_unique(bind_data.target_count, bind_data.target_count); +} + +void RepeatTableFunction::RegisterFunction(BuiltinFunctions &set) { + TableFunction repeat("repeat", {LogicalType::ANY, LogicalType::BIGINT}, RepeatFunction, RepeatBind, RepeatInit, + nullptr, nullptr, nullptr, RepeatCardinality); + set.AddFunction(repeat); +} + +} // namespace duckdb + + + + + +// this function makes not that much sense on its own but is a demo for table-parameter table-producing functions + +namespace duckdb { + +static unique_ptr SummaryFunctionBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + + return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("summary"); + + for (idx_t i = 0; i < input_table_types.size(); i++) { + return_types.push_back(input_table_types[i]); + names.emplace_back(input_table_names[i]); + } + + return make_unique(); +} + +static void SummaryFunction(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *state_p, + DataChunk *input, DataChunk &output) { + D_ASSERT(input); + output.SetCardinality(input->size()); -struct InformationSchemaSchemata { - static void RegisterFunction(BuiltinFunctions &set); -}; + for (idx_t row_idx = 0; row_idx < input->size(); row_idx++) { + string summary_val = "["; -struct InformationSchemaTables { - static void RegisterFunction(BuiltinFunctions &set); -}; + for (idx_t col_idx = 0; col_idx < input->ColumnCount(); col_idx++) { + summary_val += input->GetValue(col_idx, row_idx).ToString(); + if (col_idx < input->ColumnCount() - 1) { + summary_val += ", "; + } + } + summary_val += "]"; + output.SetValue(0, row_idx, Value(summary_val)); + } + for (idx_t col_idx = 0; col_idx < input->ColumnCount(); col_idx++) { + output.data[col_idx + 1].Reference(input->data[col_idx]); + } +} -struct InformationSchemaColumns { - static void RegisterFunction(BuiltinFunctions &set); -}; +void SummaryTableFunction::RegisterFunction(BuiltinFunctions &set) { + TableFunctionSet summary("summary"); + summary.AddFunction(TableFunction({LogicalType::TABLE}, SummaryFunction, SummaryFunctionBind)); + set.AddFunction(summary); +} } // namespace duckdb @@ -75531,8 +83642,8 @@ struct InformationSchemaColumns { namespace duckdb { -struct InformationSchemaColumnsData : public FunctionOperatorData { - InformationSchemaColumnsData() : offset(0), column_offset(0) { +struct DuckDBColumnsData : public FunctionOperatorData { + DuckDBColumnsData() : offset(0), column_offset(0) { } vector entries; @@ -75540,63 +83651,68 @@ struct InformationSchemaColumnsData : public FunctionOperatorData { idx_t column_offset; }; -static unique_ptr InformationSchemaColumnsBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, vector &names) { - names.emplace_back("table_catalog"); - return_types.push_back(LogicalType::VARCHAR); +static unique_ptr DuckDBColumnsBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, vector &return_types, + vector &names) { + names.emplace_back("schema_oid"); + return_types.push_back(LogicalType::BIGINT); - names.emplace_back("table_schema"); + names.emplace_back("schema_name"); return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("table_oid"); + return_types.push_back(LogicalType::BIGINT); + names.emplace_back("table_name"); return_types.push_back(LogicalType::VARCHAR); names.emplace_back("column_name"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("ordinal_position"); + names.emplace_back("column_index"); return_types.push_back(LogicalType::INTEGER); + names.emplace_back("internal"); + return_types.push_back(LogicalType::BOOLEAN); + names.emplace_back("column_default"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("is_nullable"); // YES/NO - return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("is_nullable"); + return_types.push_back(LogicalType::BOOLEAN); names.emplace_back("data_type"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("character_maximum_length"); - return_types.push_back(LogicalType::INTEGER); + names.emplace_back("data_type_id"); + return_types.push_back(LogicalType::BIGINT); - names.emplace_back("character_octet_length"); + names.emplace_back("character_maximum_length"); return_types.push_back(LogicalType::INTEGER); names.emplace_back("numeric_precision"); return_types.push_back(LogicalType::INTEGER); - names.emplace_back("numeric_scale"); + names.emplace_back("numeric_precision_radix"); return_types.push_back(LogicalType::INTEGER); - names.emplace_back("datetime_precision"); + names.emplace_back("numeric_scale"); return_types.push_back(LogicalType::INTEGER); return nullptr; } -unique_ptr InformationSchemaColumnsInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, - TableFilterCollection *filters) { - auto result = make_unique(); +unique_ptr DuckDBColumnsInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, TableFilterCollection *filters) { + auto result = make_unique(); // scan all the schemas for tables and views and collect them - Catalog::GetCatalog(context).schemas->Scan(context, [&](CatalogEntry *entry) { - auto schema = (SchemaCatalogEntry *)entry; + auto schemas = Catalog::GetCatalog(context).schemas->GetEntries(context); + for (auto &schema : schemas) { schema->Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) { result->entries.push_back(entry); }); - }); + } // check the temp schema as well context.temporary_objects->Scan(context, CatalogType::TABLE_ENTRY, @@ -75696,109 +83812,110 @@ unique_ptr ColumnHelper::Create(CatalogEntry *entry) { case CatalogType::VIEW_ENTRY: return make_unique((ViewCatalogEntry *)entry); default: - throw NotImplementedException("Unsupported catalog type for information_schema_columns"); + throw NotImplementedException("Unsupported catalog type for duckdb_columns"); } } void ColumnHelper::WriteColumns(idx_t start_index, idx_t start_col, idx_t end_col, DataChunk &output) { for (idx_t i = start_col; i < end_col; i++) { auto index = start_index + (i - start_col); - // "table_catalog", PhysicalType::VARCHAR - output.SetValue(0, index, Value()); - // "table_schema", PhysicalType::VARCHAR - output.SetValue(1, index, Value(Entry()->schema->name)); - // "table_name", PhysicalType::VARCHAR - output.SetValue(2, index, Value(Entry()->name)); - // "column_name", PhysicalType::VARCHAR - output.SetValue(3, index, Value(ColumnName(i))); - // "ordinal_position", PhysicalType::INTEGER - output.SetValue(4, index, Value::INTEGER(i + 1)); - // "column_default", PhysicalType::VARCHAR - output.SetValue(5, index, Value(ColumnDefault(i))); - // "is_nullable", PhysicalType::VARCHAR YES/NO - output.SetValue(6, index, Value(IsNullable(i) ? "YES" : "NO")); - - // "data_type", PhysicalType::VARCHAR + auto &entry = *Entry(); + + // schema_oid, BIGINT + output.SetValue(0, index, Value::BIGINT(entry.schema->oid)); + // schema_name, VARCHAR + output.SetValue(1, index, entry.schema->name); + // table_oid, BIGINT + output.SetValue(2, index, Value::BIGINT(entry.oid)); + // table_name, VARCHAR + output.SetValue(3, index, entry.name); + // column_name, VARCHAR + output.SetValue(4, index, Value(ColumnName(i))); + // column_index, INTEGER + output.SetValue(5, index, Value::INTEGER(i + 1)); + // internal, BOOLEAN + output.SetValue(6, index, Value::BOOLEAN(entry.internal)); + // column_default, VARCHAR + output.SetValue(7, index, Value(ColumnDefault(i))); + // is_nullable, BOOLEAN + output.SetValue(8, index, Value::BOOLEAN(IsNullable(i))); + // data_type, VARCHAR const LogicalType &type = ColumnType(i); - output.SetValue(7, index, Value(type.ToString())); - + output.SetValue(9, index, Value(type.ToString())); + // data_type_id, BIGINT + output.SetValue(10, index, Value::BIGINT(int(type.id()))); if (type == LogicalType::VARCHAR) { // FIXME: need check constraints in place to set this correctly - // "character_maximum_length", PhysicalType::INTEGER - output.SetValue(8, index, Value()); - // "character_octet_length", PhysicalType::INTEGER - // FIXME: where did this number come from? - output.SetValue(9, index, Value::INTEGER(1073741824)); + // character_maximum_length, INTEGER + output.SetValue(11, index, Value()); } else { // "character_maximum_length", PhysicalType::INTEGER - output.SetValue(8, index, Value()); - // "character_octet_length", PhysicalType::INTEGER - output.SetValue(9, index, Value()); + output.SetValue(11, index, Value()); } - Value numeric_precision, numeric_scale; + Value numeric_precision, numeric_scale, numeric_precision_radix; switch (type.id()) { case LogicalTypeId::DECIMAL: - numeric_precision = Value::INTEGER(type.width()); - numeric_scale = Value::INTEGER(type.scale()); + numeric_precision = Value::INTEGER(DecimalType::GetWidth(type)); + numeric_scale = Value::INTEGER(DecimalType::GetScale(type)); + numeric_precision_radix = Value::INTEGER(10); break; case LogicalTypeId::HUGEINT: numeric_precision = Value::INTEGER(128); numeric_scale = Value::INTEGER(0); + numeric_precision_radix = Value::INTEGER(2); break; case LogicalTypeId::BIGINT: numeric_precision = Value::INTEGER(64); numeric_scale = Value::INTEGER(0); + numeric_precision_radix = Value::INTEGER(2); break; case LogicalTypeId::INTEGER: numeric_precision = Value::INTEGER(32); numeric_scale = Value::INTEGER(0); + numeric_precision_radix = Value::INTEGER(2); break; case LogicalTypeId::SMALLINT: numeric_precision = Value::INTEGER(16); numeric_scale = Value::INTEGER(0); + numeric_precision_radix = Value::INTEGER(2); break; case LogicalTypeId::TINYINT: numeric_precision = Value::INTEGER(8); numeric_scale = Value::INTEGER(0); + numeric_precision_radix = Value::INTEGER(2); break; case LogicalTypeId::FLOAT: numeric_precision = Value::INTEGER(24); numeric_scale = Value::INTEGER(0); + numeric_precision_radix = Value::INTEGER(2); break; case LogicalTypeId::DOUBLE: numeric_precision = Value::INTEGER(53); numeric_scale = Value::INTEGER(0); + numeric_precision_radix = Value::INTEGER(2); break; default: numeric_precision = Value(); numeric_scale = Value(); + numeric_precision_radix = Value(); break; } - output.SetValue(10, index, numeric_precision); - output.SetValue(11, index, numeric_scale); - Value datetime_precision; - switch (type.id()) { - case LogicalTypeId::DATE: - case LogicalTypeId::INTERVAL: - case LogicalTypeId::TIME: - case LogicalTypeId::TIMESTAMP: - // No fractional seconds are currently supported in DuckDB - datetime_precision = Value::INTEGER(0); - break; - default: - datetime_precision = Value(); - } - output.SetValue(12, index, datetime_precision); + // numeric_precision, INTEGER + output.SetValue(12, index, numeric_precision); + // numeric_precision_radix, INTEGER + output.SetValue(13, index, numeric_precision_radix); + // numeric_scale, INTEGER + output.SetValue(14, index, numeric_scale); } } } // anonymous namespace -void InformationSchemaColumnsFunction(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &data = (InformationSchemaColumnsData &)*operator_state; +void DuckDBColumnsFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state, + DataChunk *input, DataChunk &output) { + auto &data = (DuckDBColumnsData &)*operator_state; if (data.offset >= data.entries.size()) { // finished returning values return; @@ -75838,9 +83955,8 @@ void InformationSchemaColumnsFunction(ClientContext &context, const FunctionData data.column_offset = column_offset; } -void InformationSchemaColumns::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(TableFunction("information_schema_columns", {}, InformationSchemaColumnsFunction, - InformationSchemaColumnsBind, InformationSchemaColumnsInit)); +void DuckDBColumnsFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("duckdb_columns", {}, DuckDBColumnsFunction, DuckDBColumnsBind, DuckDBColumnsInit)); } } // namespace duckdb @@ -75851,50 +83967,493 @@ void InformationSchemaColumns::RegisterFunction(BuiltinFunctions &set) { + + + + + + + + namespace duckdb { -struct InformationSchemaSchemataData : public FunctionOperatorData { - InformationSchemaSchemataData() : offset(0) { +struct DuckDBConstraintsData : public FunctionOperatorData { + DuckDBConstraintsData() : offset(0), constraint_offset(0) { } - vector entries; + vector entries; idx_t offset; + idx_t constraint_offset; }; -static unique_ptr InformationSchemaSchemataBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, - vector &names) { - names.emplace_back("catalog_name"); +static unique_ptr DuckDBConstraintsBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + names.emplace_back("schema_name"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("schema_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("table_name"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("table_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("constraint_index"); + return_types.push_back(LogicalType::BIGINT); + + // CHECK, PRIMARY KEY or UNIQUE + names.emplace_back("constraint_type"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("constraint_text"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("expression"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("constraint_column_indexes"); + ; + return_types.push_back(LogicalType::LIST(LogicalType::BIGINT)); + + names.emplace_back("constraint_column_names"); + return_types.push_back(LogicalType::LIST(LogicalType::VARCHAR)); + + return nullptr; +} + +unique_ptr DuckDBConstraintsInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + auto result = make_unique(); + + // scan all the schemas for tables and collect themand collect them + auto schemas = Catalog::GetCatalog(context).schemas->GetEntries(context); + for (auto &schema : schemas) { + schema->Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) { result->entries.push_back(entry); }); + }; + + // check the temp schema as well + context.temporary_objects->Scan(context, CatalogType::TABLE_ENTRY, + [&](CatalogEntry *entry) { result->entries.push_back(entry); }); + return move(result); +} + +void DuckDBConstraintsFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (DuckDBConstraintsData &)*operator_state; + if (data.offset >= data.entries.size()) { + // finished returning values + return; + } + // start returning values + // either fill up the chunk or return all the remaining columns + idx_t count = 0; + while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) { + auto &entry = data.entries[data.offset]; + + if (entry->type != CatalogType::TABLE_ENTRY) { + data.offset++; + continue; + } + + auto &table = (TableCatalogEntry &)*entry; + for (; data.constraint_offset < table.constraints.size() && count < STANDARD_VECTOR_SIZE; + data.constraint_offset++) { + auto &constraint = table.constraints[data.constraint_offset]; + // return values: + // schema_name, LogicalType::VARCHAR + output.SetValue(0, count, Value(table.schema->name)); + // schema_oid, LogicalType::BIGINT + output.SetValue(1, count, Value::BIGINT(table.schema->oid)); + // table_name, LogicalType::VARCHAR + output.SetValue(2, count, Value(table.name)); + // table_oid, LogicalType::BIGINT + output.SetValue(3, count, Value::BIGINT(table.oid)); + + // constraint_index, BIGINT + output.SetValue(4, count, Value::BIGINT(data.constraint_offset)); + + // constraint_type, VARCHAR + string constraint_type; + switch (constraint->type) { + case ConstraintType::CHECK: + constraint_type = "CHECK"; + break; + case ConstraintType::UNIQUE: { + auto &unique = (UniqueConstraint &)*constraint; + constraint_type = unique.is_primary_key ? "PRIMARY KEY" : "UNIQUE"; + break; + } + case ConstraintType::NOT_NULL: + constraint_type = "NOT NULL"; + break; + case ConstraintType::FOREIGN_KEY: + constraint_type = "FOREIGN KEY"; + break; + default: + throw NotImplementedException("Unimplemented constraint for duckdb_constraints"); + } + output.SetValue(5, count, Value(constraint_type)); + + // constraint_text, VARCHAR + output.SetValue(6, count, Value(constraint->ToString())); + + // expression, VARCHAR + Value expression_text; + if (constraint->type == ConstraintType::CHECK) { + auto &check = (CheckConstraint &)*constraint; + expression_text = Value(check.expression->ToString()); + } + output.SetValue(7, count, expression_text); + + auto &bound_constraint = (BoundConstraint &)*table.bound_constraints[data.constraint_offset]; + vector column_index_list; + switch (bound_constraint.type) { + case ConstraintType::CHECK: { + auto &bound_check = (BoundCheckConstraint &)bound_constraint; + for (auto &col_idx : bound_check.bound_columns) { + column_index_list.push_back(col_idx); + } + break; + } + case ConstraintType::UNIQUE: { + auto &bound_unique = (BoundUniqueConstraint &)bound_constraint; + for (auto &col_idx : bound_unique.keys) { + column_index_list.push_back(column_t(col_idx)); + } + break; + } + case ConstraintType::NOT_NULL: { + auto &bound_not_null = (BoundNotNullConstraint &)bound_constraint; + column_index_list.push_back(bound_not_null.index); + break; + } + case ConstraintType::FOREIGN_KEY: + default: + throw NotImplementedException("Unimplemented constraint for duckdb_constraints"); + } + + vector index_list; + vector column_name_list; + for (auto column_index : column_index_list) { + index_list.push_back(Value::BIGINT(column_index)); + column_name_list.emplace_back(table.columns[column_index].name); + } + + // constraint_column_indexes, LIST + output.SetValue(8, count, Value::LIST(move(index_list))); + + // constraint_column_names, LIST + output.SetValue(9, count, Value::LIST(move(column_name_list))); + + count++; + } + if (data.constraint_offset >= table.constraints.size()) { + data.constraint_offset = 0; + data.offset++; + } + } + output.SetCardinality(count); +} + +void DuckDBConstraintsFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("duckdb_constraints", {}, DuckDBConstraintsFunction, DuckDBConstraintsBind, + DuckDBConstraintsInit)); +} + +} // namespace duckdb + + + + + + + +namespace duckdb { + +struct DependencyInformation { + CatalogEntry *object; + CatalogEntry *dependent; + DependencyType type; +}; + +struct DuckDBDependenciesData : public FunctionOperatorData { + DuckDBDependenciesData() : offset(0) { + } + + vector entries; + idx_t offset; +}; + +static unique_ptr DuckDBDependenciesBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + names.emplace_back("classid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("objid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("objsubid"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("refclassid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("refobjid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("refobjsubid"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("deptype"); return_types.push_back(LogicalType::VARCHAR); + return nullptr; +} + +unique_ptr DuckDBDependenciesInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + auto result = make_unique(); + + // scan all the schemas and collect them + auto &catalog = Catalog::GetCatalog(context); + auto &dependency_manager = catalog.GetDependencyManager(); + dependency_manager.Scan([&](CatalogEntry *obj, CatalogEntry *dependent, DependencyType type) { + DependencyInformation info; + info.object = obj; + info.dependent = dependent; + info.type = type; + result->entries.push_back(info); + }); + + return move(result); +} + +void DuckDBDependenciesFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (DuckDBDependenciesData &)*operator_state; + if (data.offset >= data.entries.size()) { + // finished returning values + return; + } + // start returning values + // either fill up the chunk or return all the remaining columns + idx_t count = 0; + while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) { + auto &entry = data.entries[data.offset]; + + // return values: + // classid, LogicalType::BIGINT + output.SetValue(0, count, Value::BIGINT(0)); + // objid, LogicalType::BIGINT + output.SetValue(1, count, Value::BIGINT(entry.object->oid)); + // objsubid, LogicalType::INTEGER + output.SetValue(2, count, Value::INTEGER(0)); + // refclassid, LogicalType::BIGINT + output.SetValue(3, count, Value::BIGINT(0)); + // refobjid, LogicalType::BIGINT + output.SetValue(4, count, Value::BIGINT(entry.dependent->oid)); + // refobjsubid, LogicalType::INTEGER + output.SetValue(5, count, Value::INTEGER(0)); + // deptype, LogicalType::VARCHAR + string dependency_type_str; + switch (entry.type) { + case DependencyType::DEPENDENCY_REGULAR: + dependency_type_str = "n"; + break; + case DependencyType::DEPENDENCY_AUTOMATIC: + dependency_type_str = "a"; + break; + default: + throw NotImplementedException("Unimplemented dependency type"); + } + output.SetValue(6, count, Value(dependency_type_str)); + + data.offset++; + count++; + } + output.SetCardinality(count); +} + +void DuckDBDependenciesFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("duckdb_dependencies", {}, DuckDBDependenciesFunction, DuckDBDependenciesBind, + DuckDBDependenciesInit)); +} + +} // namespace duckdb + + + + + + + + + + +namespace duckdb { + +struct DuckDBIndexesData : public FunctionOperatorData { + DuckDBIndexesData() : offset(0) { + } + + vector entries; + idx_t offset; +}; + +static unique_ptr DuckDBIndexesBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, vector &return_types, + vector &names) { names.emplace_back("schema_name"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("schema_owner"); + names.emplace_back("schema_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("index_name"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("index_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("table_name"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("default_character_set_catalog"); + names.emplace_back("table_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("is_unique"); + return_types.push_back(LogicalType::BOOLEAN); + + names.emplace_back("is_primary"); + return_types.push_back(LogicalType::BOOLEAN); + + names.emplace_back("expressions"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("default_character_set_schema"); + names.emplace_back("sql"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("default_character_set_name"); + return nullptr; +} + +unique_ptr DuckDBIndexesInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, TableFilterCollection *filters) { + auto result = make_unique(); + + // scan all the schemas for tables and collect themand collect them + auto schemas = Catalog::GetCatalog(context).schemas->GetEntries(context); + for (auto &schema : schemas) { + schema->Scan(context, CatalogType::INDEX_ENTRY, [&](CatalogEntry *entry) { result->entries.push_back(entry); }); + }; + + // check the temp schema as well + context.temporary_objects->Scan(context, CatalogType::INDEX_ENTRY, + [&](CatalogEntry *entry) { result->entries.push_back(entry); }); + return move(result); +} + +void DuckDBIndexesFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state, + DataChunk *input, DataChunk &output) { + auto &data = (DuckDBIndexesData &)*operator_state; + if (data.offset >= data.entries.size()) { + // finished returning values + return; + } + // start returning values + // either fill up the chunk or return all the remaining columns + idx_t count = 0; + while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) { + auto &entry = data.entries[data.offset++]; + + auto &index = (IndexCatalogEntry &)*entry; + // return values: + + // schema_name, VARCHAR + output.SetValue(0, count, Value(index.schema->name)); + // schema_oid, BIGINT + output.SetValue(1, count, Value::BIGINT(index.schema->oid)); + // index_name, VARCHAR + output.SetValue(2, count, Value(index.name)); + // index_oid, BIGINT + output.SetValue(3, count, Value::BIGINT(index.oid)); + // table_name, VARCHAR + output.SetValue(4, count, Value(index.info->table)); + // table_oid, BIGINT + // find the table in the catalog + auto &catalog = Catalog::GetCatalog(context); + auto table_entry = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, index.info->schema, index.info->table); + output.SetValue(5, count, Value::BIGINT(table_entry->oid)); + // is_unique, BOOLEAN + output.SetValue(6, count, Value::BOOLEAN(index.index->is_unique)); + // is_primary, BOOLEAN + output.SetValue(7, count, Value::BOOLEAN(index.index->is_primary)); + // expressions, VARCHAR + output.SetValue(8, count, Value()); + // sql, VARCHAR + output.SetValue(9, count, Value(index.ToSQL())); + + count++; + } + output.SetCardinality(count); +} + +void DuckDBIndexesFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("duckdb_indexes", {}, DuckDBIndexesFunction, DuckDBIndexesBind, DuckDBIndexesInit)); +} + +} // namespace duckdb + + + + + + + +namespace duckdb { + +struct DuckDBSchemasData : public FunctionOperatorData { + DuckDBSchemasData() : offset(0) { + } + + vector entries; + idx_t offset; +}; + +static unique_ptr DuckDBSchemasBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, vector &return_types, + vector &names) { + names.emplace_back("oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("schema_name"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("sql_path"); + names.emplace_back("internal"); + return_types.push_back(LogicalType::BOOLEAN); + + names.emplace_back("sql"); return_types.push_back(LogicalType::VARCHAR); return nullptr; } -unique_ptr InformationSchemaSchemataInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, - TableFilterCollection *filters) { - auto result = make_unique(); +unique_ptr DuckDBSchemasInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, TableFilterCollection *filters) { + auto result = make_unique(); // scan all the schemas and collect them Catalog::GetCatalog(context).ScanSchemas( @@ -75905,9 +84464,9 @@ unique_ptr InformationSchemaSchemataInit(ClientContext &co return move(result); } -void InformationSchemaSchemataFunction(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &data = (InformationSchemaSchemataData &)*operator_state; +void DuckDBSchemasFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state, + DataChunk *input, DataChunk &output) { + auto &data = (DuckDBSchemasData &)*operator_state; if (data.offset >= data.entries.size()) { // finished returning values return; @@ -75916,31 +84475,26 @@ void InformationSchemaSchemataFunction(ClientContext &context, const FunctionDat // either fill up the chunk or return all the remaining columns idx_t count = 0; while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) { - auto &entry = data.entries[data.offset++]; + auto &entry = data.entries[data.offset]; // return values: - // "catalog_name", PhysicalType::VARCHAR - output.SetValue(0, count, Value()); + // "oid", PhysicalType::BIGINT + output.SetValue(0, count, Value::BIGINT(entry->oid)); // "schema_name", PhysicalType::VARCHAR output.SetValue(1, count, Value(entry->name)); - // "schema_owner", PhysicalType::VARCHAR - output.SetValue(2, count, Value()); - // "default_character_set_catalog", PhysicalType::VARCHAR + // "internal", PhysicalType::BOOLEAN + output.SetValue(2, count, Value::BOOLEAN(entry->internal)); + // "sql", PhysicalType::VARCHAR output.SetValue(3, count, Value()); - // "default_character_set_schema", PhysicalType::VARCHAR - output.SetValue(4, count, Value()); - // "default_character_set_name", PhysicalType::VARCHAR - output.SetValue(5, count, Value()); - // "sql_path", PhysicalType::VARCHAR - output.SetValue(6, count, Value()); + + data.offset++; count++; } output.SetCardinality(count); } -void InformationSchemaSchemata::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(TableFunction("information_schema_schemata", {}, InformationSchemaSchemataFunction, - InformationSchemaSchemataBind, InformationSchemaSchemataInit)); +void DuckDBSchemasFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("duckdb_schemas", {}, DuckDBSchemasFunction, DuckDBSchemasBind, DuckDBSchemasInit)); } } // namespace duckdb @@ -75954,68 +84508,201 @@ void InformationSchemaSchemata::RegisterFunction(BuiltinFunctions &set) { namespace duckdb { -struct InformationSchemaTablesData : public FunctionOperatorData { - InformationSchemaTablesData() : offset(0) { +struct DuckDBSequencesData : public FunctionOperatorData { + DuckDBSequencesData() : offset(0) { } vector entries; idx_t offset; }; -static unique_ptr InformationSchemaTablesBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, vector &names) { - names.emplace_back("table_catalog"); +static unique_ptr DuckDBSequencesBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + names.emplace_back("schema_name"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("table_schema"); - return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("schema_oid"); + return_types.push_back(LogicalType::BIGINT); - names.emplace_back("table_name"); + names.emplace_back("sequence_name"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("table_type"); - return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("sequence_oid"); + return_types.push_back(LogicalType::BIGINT); - names.emplace_back("self_referencing_column_name"); - return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("temporary"); + return_types.push_back(LogicalType::BOOLEAN); - names.emplace_back("reference_generation"); - return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("start_value"); + return_types.push_back(LogicalType::BIGINT); - names.emplace_back("user_defined_type_catalog"); - return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("min_value"); + return_types.push_back(LogicalType::BIGINT); - names.emplace_back("user_defined_type_schema"); - return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("max_value"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("increment_by"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("cycle"); + return_types.push_back(LogicalType::BOOLEAN); + + names.emplace_back("last_value"); + return_types.push_back(LogicalType::BIGINT); - names.emplace_back("user_defined_type_name"); + names.emplace_back("sql"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("is_insertable_into"); + return nullptr; +} + +unique_ptr DuckDBSequencesInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + auto result = make_unique(); + + // scan all the schemas for tables and collect themand collect them + auto schemas = Catalog::GetCatalog(context).schemas->GetEntries(context); + for (auto &schema : schemas) { + schema->Scan(context, CatalogType::SEQUENCE_ENTRY, + [&](CatalogEntry *entry) { result->entries.push_back(entry); }); + }; + + // check the temp schema as well + context.temporary_objects->Scan(context, CatalogType::SEQUENCE_ENTRY, + [&](CatalogEntry *entry) { result->entries.push_back(entry); }); + return move(result); +} + +void DuckDBSequencesFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (DuckDBSequencesData &)*operator_state; + if (data.offset >= data.entries.size()) { + // finished returning values + return; + } + // start returning values + // either fill up the chunk or return all the remaining columns + idx_t count = 0; + while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) { + auto &entry = data.entries[data.offset++]; + + auto &seq = (SequenceCatalogEntry &)*entry; + // return values: + // schema_name, VARCHAR + output.SetValue(0, count, Value(seq.schema->name)); + // schema_oid, BIGINT + output.SetValue(1, count, Value::BIGINT(seq.schema->oid)); + // sequence_name, VARCHAR + output.SetValue(2, count, Value(seq.name)); + // sequence_oid, BIGINT + output.SetValue(3, count, Value::BIGINT(seq.oid)); + // temporary, BOOLEAN + output.SetValue(4, count, Value::BOOLEAN(seq.temporary)); + // start_value, BIGINT + output.SetValue(5, count, Value::BIGINT(seq.start_value)); + // min_value, BIGINT + output.SetValue(6, count, Value::BIGINT(seq.min_value)); + // max_value, BIGINT + output.SetValue(7, count, Value::BIGINT(seq.max_value)); + // increment_by, BIGINT + output.SetValue(8, count, Value::BIGINT(seq.increment)); + // cycle, BOOLEAN + output.SetValue(9, count, Value::BOOLEAN(seq.cycle)); + // last_value, BIGINT + output.SetValue(10, count, seq.usage_count == 0 ? Value() : Value::BOOLEAN(seq.last_value)); + // sql, LogicalType::VARCHAR + output.SetValue(11, count, Value(seq.ToSQL())); + + count++; + } + output.SetCardinality(count); +} + +void DuckDBSequencesFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction( + TableFunction("duckdb_sequences", {}, DuckDBSequencesFunction, DuckDBSequencesBind, DuckDBSequencesInit)); +} + +} // namespace duckdb + + + + + + + + + + + +namespace duckdb { + +struct DuckDBTablesData : public FunctionOperatorData { + DuckDBTablesData() : offset(0) { + } + + vector entries; + idx_t offset; +}; + +static unique_ptr DuckDBTablesBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, vector &return_types, + vector &names) { + names.emplace_back("schema_name"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("is_typed"); + names.emplace_back("schema_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("table_name"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("commit_action"); + names.emplace_back("table_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("internal"); + return_types.push_back(LogicalType::BOOLEAN); + + names.emplace_back("temporary"); + return_types.push_back(LogicalType::BOOLEAN); + + names.emplace_back("has_primary_key"); + return_types.push_back(LogicalType::BOOLEAN); + + names.emplace_back("estimated_size"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("column_count"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("index_count"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("check_constraint_count"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("sql"); return_types.push_back(LogicalType::VARCHAR); return nullptr; } -unique_ptr InformationSchemaTablesInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, - TableFilterCollection *filters) { - auto result = make_unique(); +unique_ptr DuckDBTablesInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, TableFilterCollection *filters) { + auto result = make_unique(); - // scan all the schemas for tables and views and collect them - Catalog::GetCatalog(context).schemas->Scan(context, [&](CatalogEntry *entry) { - auto schema = (SchemaCatalogEntry *)entry; + // scan all the schemas for tables and collect themand collect them + auto schemas = Catalog::GetCatalog(context).schemas->GetEntries(context); + for (auto &schema : schemas) { schema->Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) { result->entries.push_back(entry); }); - }); + }; // check the temp schema as well context.temporary_objects->Scan(context, CatalogType::TABLE_ENTRY, @@ -76023,94 +84710,332 @@ unique_ptr InformationSchemaTablesInit(ClientContext &cont return move(result); } -void InformationSchemaTablesFunction(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &data = (InformationSchemaTablesData &)*operator_state; +static bool TableHasPrimaryKey(TableCatalogEntry &table) { + for (auto &constraint : table.constraints) { + if (constraint->type == ConstraintType::UNIQUE) { + auto &unique = (UniqueConstraint &)*constraint; + if (unique.is_primary_key) { + return true; + } + } + } + return false; +} + +static idx_t CheckConstraintCount(TableCatalogEntry &table) { + idx_t check_count = 0; + for (auto &constraint : table.constraints) { + if (constraint->type == ConstraintType::CHECK) { + check_count++; + } + } + return check_count; +} + +void DuckDBTablesFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state, + DataChunk *input, DataChunk &output) { + auto &data = (DuckDBTablesData &)*operator_state; if (data.offset >= data.entries.size()) { // finished returning values return; } - idx_t next = MinValue(data.offset + STANDARD_VECTOR_SIZE, data.entries.size()); - output.SetCardinality(next - data.offset); + // start returning values + // either fill up the chunk or return all the remaining columns + idx_t count = 0; + while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) { + auto &entry = data.entries[data.offset++]; + + if (entry->type != CatalogType::TABLE_ENTRY) { + continue; + } + auto &table = (TableCatalogEntry &)*entry; + // return values: + // schema_name, LogicalType::VARCHAR + output.SetValue(0, count, Value(table.schema->name)); + // schema_oid, LogicalType::BIGINT + output.SetValue(1, count, Value::BIGINT(table.schema->oid)); + // table_name, LogicalType::VARCHAR + output.SetValue(2, count, Value(table.name)); + // table_oid, LogicalType::BIGINT + output.SetValue(3, count, Value::BIGINT(table.oid)); + // internal, LogicalType::BOOLEAN + output.SetValue(4, count, Value::BOOLEAN(table.internal)); + // temporary, LogicalType::BOOLEAN + output.SetValue(5, count, Value::BOOLEAN(table.temporary)); + // has_primary_key, LogicalType::BOOLEAN + output.SetValue(6, count, Value::BOOLEAN(TableHasPrimaryKey(table))); + // estimated_size, LogicalType::BIGINT + output.SetValue(7, count, Value::BIGINT(table.storage->info->cardinality.load())); + // column_count, LogicalType::BIGINT + output.SetValue(8, count, Value::BIGINT(table.columns.size())); + // index_count, LogicalType::BIGINT + output.SetValue(9, count, Value::BIGINT(table.storage->info->indexes.Count())); + // check_constraint_count, LogicalType::BIGINT + output.SetValue(10, count, Value::BIGINT(CheckConstraintCount(table))); + // sql, LogicalType::VARCHAR + output.SetValue(11, count, Value(table.ToSQL())); + + count++; + } + output.SetCardinality(count); +} + +void DuckDBTablesFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("duckdb_tables", {}, DuckDBTablesFunction, DuckDBTablesBind, DuckDBTablesInit)); +} + +} // namespace duckdb + + + + + + + + +namespace duckdb { + +struct DuckDBTypesData : public FunctionOperatorData { + DuckDBTypesData() : offset(0) { + } + + vector types; + idx_t offset; +}; + +static unique_ptr DuckDBTypesBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, vector &return_types, + vector &names) { + names.emplace_back("schema_name"); + return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("schema_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("type_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("type_name"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("type_size"); + return_types.push_back(LogicalType::BIGINT); + + // NUMERIC, STRING, DATETIME, BOOLEAN, COMPOSITE, USER + names.emplace_back("type_category"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("internal"); + return_types.push_back(LogicalType::BOOLEAN); + + return nullptr; +} + +unique_ptr DuckDBTypesInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, TableFilterCollection *filters) { + auto result = make_unique(); + result->types = LogicalType::ALL_TYPES; + // FIXME: add user-defined types here (when we have them) + return move(result); +} + +void DuckDBTypesFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state, + DataChunk *input, DataChunk &output) { + auto &data = (DuckDBTypesData &)*operator_state; + if (data.offset >= data.types.size()) { + // finished returning values + return; + } // start returning values // either fill up the chunk or return all the remaining columns - for (idx_t i = data.offset; i < next; i++) { - auto index = i - data.offset; - auto entry = (StandardEntry *)data.entries[i]; + idx_t count = 0; + while (data.offset < data.types.size() && count < STANDARD_VECTOR_SIZE) { + auto &type = data.types[data.offset++]; - const char *table_type; - const char *is_insertable_into = "NO"; - switch (entry->type) { - case CatalogType::TABLE_ENTRY: - if (entry->temporary) { - table_type = "LOCAL TEMPORARY"; - } else { - table_type = "BASE TABLE"; - } - is_insertable_into = "YES"; + // return values: + // schema_name, VARCHAR + output.SetValue(0, count, Value()); + // schema_oid, BIGINT + output.SetValue(1, count, Value()); + // type_oid, BIGINT + output.SetValue(2, count, Value::BIGINT(int(type.id()))); + // type_name, VARCHAR + output.SetValue(3, count, Value(type.ToString())); + // type_size, BIGINT + auto internal_type = type.InternalType(); + output.SetValue(4, count, + internal_type == PhysicalType::INVALID ? Value() : Value::BIGINT(GetTypeIdSize(internal_type))); + // type_category, VARCHAR + string category; + switch (type.id()) { + case LogicalTypeId::TINYINT: + case LogicalTypeId::SMALLINT: + case LogicalTypeId::INTEGER: + case LogicalTypeId::BIGINT: + case LogicalTypeId::DECIMAL: + case LogicalTypeId::FLOAT: + case LogicalTypeId::DOUBLE: + case LogicalTypeId::UTINYINT: + case LogicalTypeId::USMALLINT: + case LogicalTypeId::UINTEGER: + case LogicalTypeId::UBIGINT: + case LogicalTypeId::HUGEINT: + category = "NUMERIC"; break; - case CatalogType::VIEW_ENTRY: - table_type = "VIEW"; + case LogicalTypeId::DATE: + case LogicalTypeId::TIME: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::INTERVAL: + category = "DATETIME"; + break; + case LogicalTypeId::CHAR: + case LogicalTypeId::VARCHAR: + category = "STRING"; + break; + case LogicalTypeId::BOOLEAN: + category = "BOOLEAN"; + break; + case LogicalTypeId::STRUCT: + case LogicalTypeId::LIST: + case LogicalTypeId::MAP: + category = "COMPOSITE"; break; default: - table_type = "UNKNOWN"; break; } + output.SetValue(5, count, category.empty() ? Value() : Value(category)); + // internal, BOOLEAN + output.SetValue(6, count, Value::BOOLEAN(true)); - // return values: - // "table_catalog", PhysicalType::VARCHAR - output.SetValue(0, index, Value()); - // "table_schema", PhysicalType::VARCHAR - output.SetValue(1, index, Value(entry->schema->name)); - // "table_name", PhysicalType::VARCHAR - output.SetValue(2, index, Value(entry->name)); - // "table_type", PhysicalType::VARCHAR - output.SetValue(3, index, Value(table_type)); - // "self_referencing_column_name", PhysicalType::VARCHAR - output.SetValue(4, index, Value()); - // "reference_generation", PhysicalType::VARCHAR - output.SetValue(5, index, Value()); - // "user_defined_type_catalog", PhysicalType::VARCHAR - output.SetValue(6, index, Value()); - // "user_defined_type_schema", PhysicalType::VARCHAR - output.SetValue(7, index, Value()); - // "user_defined_type_name", PhysicalType::VARCHAR - output.SetValue(8, index, Value()); - // "is_insertable_into", PhysicalType::VARCHAR (YES/NO) - output.SetValue(9, index, Value(is_insertable_into)); - // "is_typed", PhysicalType::VARCHAR (YES/NO) - output.SetValue(10, index, Value("NO")); - // "commit_action", PhysicalType::VARCHAR - output.SetValue(11, index, Value()); + count++; } - data.offset = next; + output.SetCardinality(count); } -void InformationSchemaTables::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(TableFunction("information_schema_tables", {}, InformationSchemaTablesFunction, - InformationSchemaTablesBind, InformationSchemaTablesInit)); +void DuckDBTypesFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("duckdb_types", {}, DuckDBTypesFunction, DuckDBTypesBind, DuckDBTypesInit)); } } // namespace duckdb + + + + + + namespace duckdb { -void BuiltinFunctions::RegisterInformationSchemaFunctions() { - InformationSchemaSchemata::RegisterFunction(*this); - InformationSchemaTables::RegisterFunction(*this); - InformationSchemaColumns::RegisterFunction(*this); +struct DuckDBViewsData : public FunctionOperatorData { + DuckDBViewsData() : offset(0) { + } + + vector entries; + idx_t offset; +}; + +static unique_ptr DuckDBViewsBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, vector &return_types, + vector &names) { + names.emplace_back("schema_name"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("schema_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("view_name"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("view_oid"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("internal"); + return_types.push_back(LogicalType::BOOLEAN); + + names.emplace_back("temporary"); + return_types.push_back(LogicalType::BOOLEAN); + + names.emplace_back("column_count"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("sql"); + return_types.push_back(LogicalType::VARCHAR); + + return nullptr; +} + +unique_ptr DuckDBViewsInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, TableFilterCollection *filters) { + auto result = make_unique(); + + // scan all the schemas for tables and collect themand collect them + auto schemas = Catalog::GetCatalog(context).schemas->GetEntries(context); + for (auto &schema : schemas) { + schema->Scan(context, CatalogType::VIEW_ENTRY, [&](CatalogEntry *entry) { result->entries.push_back(entry); }); + }; + + // check the temp schema as well + context.temporary_objects->Scan(context, CatalogType::VIEW_ENTRY, + [&](CatalogEntry *entry) { result->entries.push_back(entry); }); + return move(result); +} + +void DuckDBViewsFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state, + DataChunk *input, DataChunk &output) { + auto &data = (DuckDBViewsData &)*operator_state; + if (data.offset >= data.entries.size()) { + // finished returning values + return; + } + // start returning values + // either fill up the chunk or return all the remaining columns + idx_t count = 0; + while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) { + auto &entry = data.entries[data.offset++]; + + if (entry->type != CatalogType::VIEW_ENTRY) { + continue; + } + auto &view = (ViewCatalogEntry &)*entry; + + // return values: + // schema_name, LogicalType::VARCHAR + output.SetValue(0, count, Value(view.schema->name)); + // schema_oid, LogicalType::BIGINT + output.SetValue(1, count, Value::BIGINT(view.schema->oid)); + // view_name, LogicalType::VARCHAR + output.SetValue(2, count, Value(view.name)); + // view_oid, LogicalType::BIGINT + output.SetValue(3, count, Value::BIGINT(view.oid)); + // internal, LogicalType::BOOLEAN + output.SetValue(4, count, Value::BOOLEAN(view.internal)); + // temporary, LogicalType::BOOLEAN + output.SetValue(5, count, Value::BOOLEAN(view.temporary)); + // column_count, LogicalType::BIGINT + output.SetValue(6, count, Value::BIGINT(view.types.size())); + // sql, LogicalType::VARCHAR + output.SetValue(7, count, Value(view.ToSQL())); + + count++; + } + output.SetCardinality(count); +} + +void DuckDBViewsFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("duckdb_views", {}, DuckDBViewsFunction, DuckDBViewsBind, DuckDBViewsInit)); } } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/function/table/sqlite_functions.hpp -// -// -//===----------------------------------------------------------------------===// + + @@ -76118,42 +85043,342 @@ void BuiltinFunctions::RegisterInformationSchemaFunctions() { namespace duckdb { -struct PragmaCollations { - static void RegisterFunction(BuiltinFunctions &set); -}; +struct PragmaCollateData : public FunctionOperatorData { + PragmaCollateData() : offset(0) { + } -struct PragmaFunctionPragma { - static void RegisterFunction(BuiltinFunctions &set); + vector entries; + idx_t offset; }; -struct PragmaTableInfo { - static void RegisterFunction(BuiltinFunctions &set); -}; +static unique_ptr PragmaCollateBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, vector &return_types, + vector &names) { + names.emplace_back("collname"); + return_types.push_back(LogicalType::VARCHAR); -struct PragmaLastProfilingOutput { - static void RegisterFunction(BuiltinFunctions &set); -}; + return nullptr; +} -struct PragmaDetailedProfilingOutput { - static void RegisterFunction(BuiltinFunctions &set); -}; +unique_ptr PragmaCollateInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, TableFilterCollection *filters) { + auto result = make_unique(); -struct SQLiteMaster { - static void RegisterFunction(BuiltinFunctions &set); -}; + Catalog::GetCatalog(context).schemas->Scan(context, [&](CatalogEntry *entry) { + auto schema = (SchemaCatalogEntry *)entry; + schema->Scan(context, CatalogType::COLLATION_ENTRY, + [&](CatalogEntry *entry) { result->entries.push_back(entry->name); }); + }); -struct PragmaVersion { - static void RegisterFunction(BuiltinFunctions &set); + return move(result); +} + +static void PragmaCollateFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (PragmaCollateData &)*operator_state; + if (data.offset >= data.entries.size()) { + // finished returning values + return; + } + idx_t next = MinValue(data.offset + STANDARD_VECTOR_SIZE, data.entries.size()); + output.SetCardinality(next - data.offset); + for (idx_t i = data.offset; i < next; i++) { + auto index = i - data.offset; + output.SetValue(0, index, Value(data.entries[i])); + } + + data.offset = next; +} + +void PragmaCollations::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction( + TableFunction("pragma_collations", {}, PragmaCollateFunction, PragmaCollateBind, PragmaCollateInit)); +} + +} // namespace duckdb + + + + +namespace duckdb { + +struct PragmaDatabaseListData : public FunctionOperatorData { + PragmaDatabaseListData() : finished(false) { + } + + bool finished; }; -struct PragmaDatabaseList { - static void RegisterFunction(BuiltinFunctions &set); +static unique_ptr PragmaDatabaseListBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + names.emplace_back("seq"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("name"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("file"); + return_types.push_back(LogicalType::VARCHAR); + + return nullptr; +} + +unique_ptr PragmaDatabaseListInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + return make_unique(); +} + +void PragmaDatabaseListFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (PragmaDatabaseListData &)*operator_state; + if (data.finished) { + return; + } + + output.SetCardinality(1); + output.data[0].SetValue(0, Value::INTEGER(0)); + output.data[1].SetValue(0, Value("main")); + output.data[2].SetValue(0, Value(StorageManager::GetStorageManager(context).GetDBPath())); + + data.finished = true; +} + +void PragmaDatabaseList::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("pragma_database_list", {}, PragmaDatabaseListFunction, PragmaDatabaseListBind, + PragmaDatabaseListInit)); +} + +} // namespace duckdb + + + + + + + + +namespace duckdb { + +struct PragmaDatabaseSizeData : public FunctionOperatorData { + PragmaDatabaseSizeData() : finished(false) { + } + + bool finished; }; -struct PragmaDatabaseSize { - static void RegisterFunction(BuiltinFunctions &set); +static unique_ptr PragmaDatabaseSizeBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + names.emplace_back("database_size"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("block_size"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("total_blocks"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("used_blocks"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("free_blocks"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("wal_size"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("memory_usage"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("memory_limit"); + return_types.push_back(LogicalType::VARCHAR); + + return nullptr; +} + +unique_ptr PragmaDatabaseSizeInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + return make_unique(); +} + +void PragmaDatabaseSizeFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (PragmaDatabaseSizeData &)*operator_state; + if (data.finished) { + return; + } + auto &storage = StorageManager::GetStorageManager(context); + auto &block_manager = BlockManager::GetBlockManager(context); + auto &buffer_manager = BufferManager::GetBufferManager(context); + + output.SetCardinality(1); + if (!storage.InMemory()) { + auto total_blocks = block_manager.TotalBlocks(); + auto block_size = Storage::BLOCK_ALLOC_SIZE; + auto free_blocks = block_manager.FreeBlocks(); + auto used_blocks = total_blocks - free_blocks; + auto bytes = (total_blocks * block_size); + auto wal_size = storage.GetWriteAheadLog()->GetWALSize(); + output.data[0].SetValue(0, Value(StringUtil::BytesToHumanReadableString(bytes))); + output.data[1].SetValue(0, Value::BIGINT(block_size)); + output.data[2].SetValue(0, Value::BIGINT(total_blocks)); + output.data[3].SetValue(0, Value::BIGINT(used_blocks)); + output.data[4].SetValue(0, Value::BIGINT(free_blocks)); + output.data[5].SetValue(0, Value(StringUtil::BytesToHumanReadableString(wal_size))); + } else { + output.data[0].SetValue(0, Value()); + output.data[1].SetValue(0, Value()); + output.data[2].SetValue(0, Value()); + output.data[3].SetValue(0, Value()); + output.data[4].SetValue(0, Value()); + output.data[5].SetValue(0, Value()); + } + output.data[6].SetValue(0, Value(StringUtil::BytesToHumanReadableString(buffer_manager.GetUsedMemory()))); + auto max_memory = buffer_manager.GetMaxMemory(); + output.data[7].SetValue(0, max_memory == (idx_t)-1 ? Value("Unlimited") + : Value(StringUtil::BytesToHumanReadableString(max_memory))); + + data.finished = true; +} + +void PragmaDatabaseSize::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("pragma_database_size", {}, PragmaDatabaseSizeFunction, PragmaDatabaseSizeBind, + PragmaDatabaseSizeInit)); +} + +} // namespace duckdb + + + + + + + + + +namespace duckdb { + +struct PragmaFunctionsData : public FunctionOperatorData { + PragmaFunctionsData() : offset(0), offset_in_entry(0) { + } + + vector entries; + idx_t offset; + idx_t offset_in_entry; }; +static unique_ptr PragmaFunctionsBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + names.emplace_back("name"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("type"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("parameters"); + return_types.push_back(LogicalType::LIST(LogicalType::VARCHAR)); + + names.emplace_back("varargs"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("return_type"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("side_effects"); + return_types.push_back(LogicalType::BOOLEAN); + + return nullptr; +} + +unique_ptr PragmaFunctionsInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + auto result = make_unique(); + + Catalog::GetCatalog(context).schemas->Scan(context, [&](CatalogEntry *entry) { + auto schema = (SchemaCatalogEntry *)entry; + schema->Scan(context, CatalogType::SCALAR_FUNCTION_ENTRY, + [&](CatalogEntry *entry) { result->entries.push_back(entry); }); + }); + + return move(result); +} + +void AddFunction(BaseScalarFunction &f, idx_t &count, DataChunk &output, bool is_aggregate) { + output.SetValue(0, count, Value(f.name)); + output.SetValue(1, count, Value(is_aggregate ? "AGGREGATE" : "SCALAR")); + auto result_data = FlatVector::GetData(output.data[2]); + result_data[count].offset = ListVector::GetListSize(output.data[2]); + result_data[count].length = f.arguments.size(); + string parameters; + for (idx_t i = 0; i < f.arguments.size(); i++) { + auto val = Value(f.arguments[i].ToString()); + ListVector::PushBack(output.data[2], val); + } + + output.SetValue(3, count, f.varargs.id() != LogicalTypeId::INVALID ? Value(f.varargs.ToString()) : Value()); + output.SetValue(4, count, f.return_type.ToString()); + output.SetValue(5, count, Value::BOOLEAN(f.has_side_effects)); + + count++; +} + +static void PragmaFunctionsFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (PragmaFunctionsData &)*operator_state; + if (data.offset >= data.entries.size()) { + // finished returning values + return; + } + idx_t count = 0; + while (count < STANDARD_VECTOR_SIZE && data.offset < data.entries.size()) { + auto &entry = data.entries[data.offset]; + switch (entry->type) { + case CatalogType::SCALAR_FUNCTION_ENTRY: { + auto &func = (ScalarFunctionCatalogEntry &)*entry; + if (data.offset_in_entry >= func.functions.size()) { + data.offset++; + data.offset_in_entry = 0; + break; + } + AddFunction(func.functions[data.offset_in_entry++], count, output, false); + break; + } + case CatalogType::AGGREGATE_FUNCTION_ENTRY: { + auto &aggr = (AggregateFunctionCatalogEntry &)*entry; + if (data.offset_in_entry >= aggr.functions.size()) { + data.offset++; + data.offset_in_entry = 0; + break; + } + AddFunction(aggr.functions[data.offset_in_entry++], count, output, true); + break; + } + default: + data.offset++; + break; + } + } + output.SetCardinality(count); +} + +void PragmaFunctionPragma::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction( + TableFunction("pragma_functions", {}, PragmaFunctionsFunction, PragmaFunctionsBind, PragmaFunctionsInit)); +} + } // namespace duckdb @@ -76164,116 +85389,124 @@ struct PragmaDatabaseSize { + + + + +#include + namespace duckdb { -struct PragmaDetailedProfilingOutputOperatorData : public FunctionOperatorData { - explicit PragmaDetailedProfilingOutputOperatorData() : chunk_index(0), initialized(false) { +struct PragmaStorageFunctionData : public TableFunctionData { + explicit PragmaStorageFunctionData(TableCatalogEntry *table_entry) : table_entry(table_entry) { } - idx_t chunk_index; - bool initialized; + + TableCatalogEntry *table_entry; + vector> storage_info; }; -struct PragmaDetailedProfilingOutputData : public TableFunctionData { - explicit PragmaDetailedProfilingOutputData(vector &types) : types(types) { +struct PragmaStorageOperatorData : public FunctionOperatorData { + PragmaStorageOperatorData() : offset(0) { } - unique_ptr collection; - vector types; + + idx_t offset; }; -static unique_ptr PragmaDetailedProfilingOutputBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, - vector &names) { - names.emplace_back("OPERATOR_ID"); - return_types.push_back(LogicalType::INTEGER); +static unique_ptr PragmaStorageInfoBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + names.emplace_back("row_group_id"); + return_types.push_back(LogicalType::BIGINT); - names.emplace_back("FUNCTION_ID"); - return_types.push_back(LogicalType::INTEGER); + names.emplace_back("column_name"); + return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("NAME"); + names.emplace_back("column_id"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("column_path"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("segment_id"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("segment_type"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("start"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("count"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("compression"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("TIME"); - return_types.push_back(LogicalType::DOUBLE); + names.emplace_back("stats"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("has_updates"); + return_types.push_back(LogicalType::BOOLEAN); + + names.emplace_back("persistent"); + return_types.push_back(LogicalType::BOOLEAN); - return make_unique(return_types); -} + names.emplace_back("block_id"); + return_types.push_back(LogicalType::BIGINT); -unique_ptr PragmaDetailedProfilingOutputInit(ClientContext &context, - const FunctionData *bind_data, - vector &column_ids, - TableFilterCollection *filters) { - return make_unique(); -} + names.emplace_back("block_offset"); + return_types.push_back(LogicalType::BIGINT); -static void SetValue(DataChunk &output, int index, int op_id, int fun_id, string name, double time) { - output.SetValue(0, index, op_id); - output.SetValue(1, index, fun_id); - output.SetValue(2, index, move(name)); - output.SetValue(3, index, time); -} + auto qname = QualifiedName::Parse(inputs[0].GetValue()); -static void ExtractExpressions(ChunkCollection &collection, ExpressionInformation &info, DataChunk &chunk, int op_id, - int &fun_id, int sample_tuples_count) { - if (info.hasfunction) { - SetValue(chunk, chunk.size(), op_id, fun_id++, info.function_name, double(info.time) / sample_tuples_count); - chunk.SetCardinality(chunk.size() + 1); - if (chunk.size() == STANDARD_VECTOR_SIZE) { - collection.Append(chunk); - chunk.Reset(); - } - } - if (info.children.empty()) { - return; - } - // extract the children of this node - for (auto &child : info.children) { - ExtractExpressions(collection, *child, chunk, op_id, fun_id, sample_tuples_count); + // look up the table name in the catalog + auto &catalog = Catalog::GetCatalog(context); + auto entry = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, qname.schema, qname.name); + if (entry->type != CatalogType::TABLE_ENTRY) { + throw Exception("storage_info requires a table as parameter"); } -} + auto table_entry = (TableCatalogEntry *)entry; -static void PragmaDetailedProfilingOutputFunction(ClientContext &context, const FunctionData *bind_data_p, - FunctionOperatorData *operator_state, DataChunk *input, - DataChunk &output) { - auto &state = (PragmaDetailedProfilingOutputOperatorData &)*operator_state; - auto &data = (PragmaDetailedProfilingOutputData &)*bind_data_p; - if (!state.initialized) { - // create a ChunkCollection - auto collection = make_unique(); + auto result = make_unique(table_entry); + result->storage_info = table_entry->storage->GetStorageInfo(); + return move(result); +} - DataChunk chunk; - chunk.Initialize(data.types); +unique_ptr PragmaStorageInfoInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + return make_unique(); +} - int operator_counter = 1; - if (!context.query_profiler_history.GetPrevProfilers().empty()) { - for (auto op : context.query_profiler_history.GetPrevProfilers().back().second.GetTreeMap()) { - int function_counter = 1; - if (op.second->info.has_executor) { - for (auto &info : op.second->info.executors_info->roots) { - ExtractExpressions(*collection, *info, chunk, operator_counter, function_counter, - op.second->info.executors_info->sample_tuples_count); - } - } - operator_counter++; +static void PragmaStorageInfoFunction(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &bind_data = (PragmaStorageFunctionData &)*bind_data_p; + auto &data = (PragmaStorageOperatorData &)*operator_state; + idx_t count = 0; + while (data.offset < bind_data.storage_info.size() && count < STANDARD_VECTOR_SIZE) { + auto &entry = bind_data.storage_info[data.offset++]; + D_ASSERT(entry.size() + 1 == output.ColumnCount()); + idx_t result_idx = 0; + for (idx_t col_idx = 0; col_idx < entry.size(); col_idx++, result_idx++) { + if (col_idx == 1) { + // write the column name + auto column_index = entry[col_idx].GetValue(); + output.SetValue(result_idx, count, Value(bind_data.table_entry->columns[column_index].name)); + result_idx++; } + output.SetValue(result_idx, count, entry[col_idx]); } - collection->Append(chunk); - data.collection = move(collection); - state.initialized = true; - } - if (state.chunk_index >= data.collection->ChunkCount()) { - output.SetCardinality(0); - return; + count++; } - output.Reference(data.collection->GetChunk(state.chunk_index++)); + output.SetCardinality(count); } -void PragmaDetailedProfilingOutput::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(TableFunction("pragma_detailed_profiling_output", {}, PragmaDetailedProfilingOutputFunction, - PragmaDetailedProfilingOutputBind, PragmaDetailedProfilingOutputInit)); +void PragmaStorageInfo::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("pragma_storage_info", {LogicalType::VARCHAR}, PragmaStorageInfoFunction, + PragmaStorageInfoBind, PragmaStorageInfoInit)); } } // namespace duckdb @@ -76284,106 +85517,187 @@ void PragmaDetailedProfilingOutput::RegisterFunction(BuiltinFunctions &set) { + + + + + +#include + namespace duckdb { -struct PragmaLastProfilingOutputOperatorData : public FunctionOperatorData { - PragmaLastProfilingOutputOperatorData() : chunk_index(0), initialized(false) { +struct PragmaTableFunctionData : public TableFunctionData { + explicit PragmaTableFunctionData(CatalogEntry *entry_p) : entry(entry_p) { } - idx_t chunk_index; - bool initialized; + + CatalogEntry *entry; }; -struct PragmaLastProfilingOutputData : public TableFunctionData { - explicit PragmaLastProfilingOutputData(vector &types) : types(types) { +struct PragmaTableOperatorData : public FunctionOperatorData { + PragmaTableOperatorData() : offset(0) { } - unique_ptr collection; - vector types; + idx_t offset; }; -static unique_ptr PragmaLastProfilingOutputBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, - vector &names) { - names.emplace_back("OPERATOR_ID"); +static unique_ptr PragmaTableInfoBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + names.emplace_back("cid"); return_types.push_back(LogicalType::INTEGER); - names.emplace_back("NAME"); + names.emplace_back("name"); return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("TIME"); - return_types.push_back(LogicalType::DOUBLE); + names.emplace_back("type"); + return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("CARDINALITY"); - return_types.push_back(LogicalType::BIGINT); + names.emplace_back("notnull"); + return_types.push_back(LogicalType::BOOLEAN); - names.emplace_back("DESCRIPTION"); + names.emplace_back("dflt_value"); return_types.push_back(LogicalType::VARCHAR); - return make_unique(return_types); -} + names.emplace_back("pk"); + return_types.push_back(LogicalType::BOOLEAN); -static void SetValue(DataChunk &output, int index, int op_id, string name, double time, int64_t car, - string description) { - output.SetValue(0, index, op_id); - output.SetValue(1, index, move(name)); - output.SetValue(2, index, time); - output.SetValue(3, index, car); - output.SetValue(4, index, move(description)); -} + auto qname = QualifiedName::Parse(inputs[0].GetValue()); -unique_ptr PragmaLastProfilingOutputInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, - TableFilterCollection *filters) { - return make_unique(); + // look up the table name in the catalog + auto &catalog = Catalog::GetCatalog(context); + auto entry = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, qname.schema, qname.name); + return make_unique(entry); } -static void PragmaLastProfilingOutputFunction(ClientContext &context, const FunctionData *bind_data_p, - FunctionOperatorData *operator_state, DataChunk *input, - DataChunk &output) { - auto &state = (PragmaLastProfilingOutputOperatorData &)*operator_state; - auto &data = (PragmaLastProfilingOutputData &)*bind_data_p; - if (!state.initialized) { - // create a ChunkCollection - auto collection = make_unique(); +unique_ptr PragmaTableInfoInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + return make_unique(); +} - DataChunk chunk; - chunk.Initialize(data.types); - int operator_counter = 1; - if (!context.query_profiler_history.GetPrevProfilers().empty()) { - for (auto op : context.query_profiler_history.GetPrevProfilers().back().second.GetTreeMap()) { - SetValue(chunk, chunk.size(), operator_counter++, op.second->name, op.second->info.time, - op.second->info.elements, " "); - chunk.SetCardinality(chunk.size() + 1); - if (chunk.size() == STANDARD_VECTOR_SIZE) { - collection->Append(chunk); - chunk.Reset(); - } +static void CheckConstraints(TableCatalogEntry *table, idx_t oid, bool &out_not_null, bool &out_pk) { + out_not_null = false; + out_pk = false; + // check all constraints + // FIXME: this is pretty inefficient, it probably doesn't matter + for (auto &constraint : table->bound_constraints) { + switch (constraint->type) { + case ConstraintType::NOT_NULL: { + auto ¬_null = (BoundNotNullConstraint &)*constraint; + if (not_null.index == oid) { + out_not_null = true; } + break; + } + case ConstraintType::UNIQUE: { + auto &unique = (BoundUniqueConstraint &)*constraint; + if (unique.is_primary_key && unique.key_set.find(oid) != unique.key_set.end()) { + out_pk = true; + } + break; + } + default: + break; } - collection->Append(chunk); - data.collection = move(collection); - state.initialized = true; } +} - if (state.chunk_index >= data.collection->ChunkCount()) { - output.SetCardinality(0); +static void PragmaTableInfoTable(PragmaTableOperatorData &data, TableCatalogEntry *table, DataChunk &output) { + if (data.offset >= table->columns.size()) { + // finished returning values return; } - output.Reference(data.collection->GetChunk(state.chunk_index++)); + // start returning values + // either fill up the chunk or return all the remaining columns + idx_t next = MinValue(data.offset + STANDARD_VECTOR_SIZE, table->columns.size()); + output.SetCardinality(next - data.offset); + + for (idx_t i = data.offset; i < next; i++) { + bool not_null, pk; + auto index = i - data.offset; + auto &column = table->columns[i]; + D_ASSERT(column.oid < (idx_t)NumericLimits::Maximum()); + CheckConstraints(table, column.oid, not_null, pk); + + // return values: + // "cid", PhysicalType::INT32 + output.SetValue(0, index, Value::INTEGER((int32_t)column.oid)); + // "name", PhysicalType::VARCHAR + output.SetValue(1, index, Value(column.name)); + // "type", PhysicalType::VARCHAR + output.SetValue(2, index, Value(column.type.ToString())); + // "notnull", PhysicalType::BOOL + output.SetValue(3, index, Value::BOOLEAN(not_null)); + // "dflt_value", PhysicalType::VARCHAR + Value def_value = column.default_value ? Value(column.default_value->ToString()) : Value(); + output.SetValue(4, index, def_value); + // "pk", PhysicalType::BOOL + output.SetValue(5, index, Value::BOOLEAN(pk)); + } + data.offset = next; } -void PragmaLastProfilingOutput::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(TableFunction("pragma_last_profiling_output", {}, PragmaLastProfilingOutputFunction, - PragmaLastProfilingOutputBind, PragmaLastProfilingOutputInit)); +static void PragmaTableInfoView(PragmaTableOperatorData &data, ViewCatalogEntry *view, DataChunk &output) { + if (data.offset >= view->types.size()) { + // finished returning values + return; + } + // start returning values + // either fill up the chunk or return all the remaining columns + idx_t next = MinValue(data.offset + STANDARD_VECTOR_SIZE, view->types.size()); + output.SetCardinality(next - data.offset); + + for (idx_t i = data.offset; i < next; i++) { + auto index = i - data.offset; + auto type = view->types[index]; + auto &name = view->aliases[index]; + // return values: + // "cid", PhysicalType::INT32 + + output.SetValue(0, index, Value::INTEGER((int32_t)index)); + // "name", PhysicalType::VARCHAR + output.SetValue(1, index, Value(name)); + // "type", PhysicalType::VARCHAR + output.SetValue(2, index, Value(type.ToString())); + // "notnull", PhysicalType::BOOL + output.SetValue(3, index, Value::BOOLEAN(false)); + // "dflt_value", PhysicalType::VARCHAR + output.SetValue(4, index, Value()); + // "pk", PhysicalType::BOOL + output.SetValue(5, index, Value::BOOLEAN(false)); + } + data.offset = next; +} + +static void PragmaTableInfoFunction(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &bind_data = (PragmaTableFunctionData &)*bind_data_p; + auto &state = (PragmaTableOperatorData &)*operator_state; + switch (bind_data.entry->type) { + case CatalogType::TABLE_ENTRY: + PragmaTableInfoTable(state, (TableCatalogEntry *)bind_data.entry, output); + break; + case CatalogType::VIEW_ENTRY: + PragmaTableInfoView(state, (ViewCatalogEntry *)bind_data.entry, output); + break; + default: + throw NotImplementedException("Unimplemented catalog type for pragma_table_info"); + } +} + +void PragmaTableInfo::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("pragma_table_info", {LogicalType::VARCHAR}, PragmaTableInfoFunction, + PragmaTableInfoBind, PragmaTableInfoInit)); } } // namespace duckdb + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/function/table/summary.hpp +// duckdb/parser/query_node/select_node.hpp // // //===----------------------------------------------------------------------===// @@ -76392,137 +85706,81 @@ void PragmaLastProfilingOutput::RegisterFunction(BuiltinFunctions &set) { -namespace duckdb { - -struct SummaryTableFunction { - static void RegisterFunction(BuiltinFunctions &set); -}; - -} // namespace duckdb - namespace duckdb { -struct RangeFunctionBindData : public TableFunctionData { - int64_t start; - int64_t end; - int64_t increment; +enum class AggregateHandling : uint8_t { + STANDARD_HANDLING, // standard handling as in the SELECT clause + NO_AGGREGATES_ALLOWED, // no aggregates allowed: any aggregates in this node will result in an error + FORCE_AGGREGATES // force aggregates: any non-aggregate select list entry will become a GROUP }; -template -static unique_ptr -RangeFunctionBind(ClientContext &context, vector &inputs, unordered_map &named_parameters, - vector &input_table_types, vector &input_table_names, - vector &return_types, vector &names) { - auto result = make_unique(); - if (inputs.size() < 2) { - // single argument: only the end is specified - result->start = 0; - result->end = inputs[0].GetValue(); - } else { - // two arguments: first two arguments are start and end - result->start = inputs[0].GetValue(); - result->end = inputs[1].GetValue(); - } - if (inputs.size() < 3) { - result->increment = 1; - } else { - result->increment = inputs[2].GetValue(); - } - if (result->increment == 0) { - throw BinderException("interval cannot be 0!"); - } - if (result->start > result->end && result->increment > 0) { - throw BinderException("start is bigger than end, but increment is positive: cannot generate infinite series"); - } else if (result->start < result->end && result->increment < 0) { - throw BinderException("start is smaller than end, but increment is negative: cannot generate infinite series"); - } - return_types.push_back(LogicalType::BIGINT); - if (GENERATE_SERIES) { - // generate_series has inclusive bounds on the RHS - if (result->increment < 0) { - result->end = result->end - 1; - } else { - result->end = result->end + 1; - } - names.emplace_back("generate_series"); - } else { - names.emplace_back("range"); +//! SelectNode represents a standard SELECT statement +class SelectNode : public QueryNode { +public: + SelectNode() : QueryNode(QueryNodeType::SELECT_NODE), aggregate_handling(AggregateHandling::STANDARD_HANDLING) { } - return move(result); -} -struct RangeFunctionState : public FunctionOperatorData { - RangeFunctionState() : current_idx(0) { + //! The projection list + vector> select_list; + //! The FROM clause + unique_ptr from_table; + //! The WHERE clause + unique_ptr where_clause; + //! list of groups + vector> groups; + //! HAVING clause + unique_ptr having; + //! Aggregate handling during binding + AggregateHandling aggregate_handling; + //! The SAMPLE clause + unique_ptr sample; + + const vector> &GetSelectList() const override { + return select_list; } - int64_t current_idx; +public: + bool Equals(const QueryNode *other) const override; + //! Create a copy of this SelectNode + unique_ptr Copy() override; + //! Serializes a SelectNode to a stand-alone binary blob + void Serialize(Serializer &serializer) override; + //! Deserializes a blob back into a SelectNode + static unique_ptr Deserialize(Deserializer &source); }; +} // namespace duckdb -static unique_ptr RangeFunctionInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, - TableFilterCollection *filters) { - return make_unique(); -} -static void RangeFunction(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *state_p, - DataChunk *input, DataChunk &output) { - auto &bind_data = (RangeFunctionBindData &)*bind_data_p; - auto &state = (RangeFunctionState &)*state_p; - auto increment = bind_data.increment; - auto end = bind_data.end; - int64_t current_value = bind_data.start + (int64_t)increment * state.current_idx; - // set the result vector as a sequence vector - output.data[0].Sequence(current_value, increment); - idx_t remaining = MinValue((end - current_value) / increment, STANDARD_VECTOR_SIZE); - // increment the index pointer by the remaining count - state.current_idx += remaining; - output.SetCardinality(remaining); -} -unique_ptr RangeCardinality(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = (RangeFunctionBindData &)*bind_data_p; - idx_t cardinality = (bind_data.end - bind_data.start) / bind_data.increment; - return make_unique(cardinality, cardinality); -} -void RangeTableFunction::RegisterFunction(BuiltinFunctions &set) { - TableFunctionSet range("range"); - // single argument range: (end) - implicit start = 0 and increment = 1 - range.AddFunction(TableFunction({LogicalType::BIGINT}, RangeFunction, RangeFunctionBind, RangeFunctionInit, - nullptr, nullptr, nullptr, RangeCardinality)); - // two arguments range: (start, end) - implicit increment = 1 - range.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT}, RangeFunction, RangeFunctionBind, - RangeFunctionInit, nullptr, nullptr, nullptr, RangeCardinality)); - // three arguments range: (start, end, increment) - range.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, RangeFunction, - RangeFunctionBind, RangeFunctionInit, nullptr, nullptr, nullptr, - RangeCardinality)); - set.AddFunction(range); - // generate_series: similar to range, but inclusive instead of exclusive bounds on the RHS - TableFunctionSet generate_series("generate_series"); - generate_series.AddFunction(TableFunction({LogicalType::BIGINT}, RangeFunction, RangeFunctionBind, - RangeFunctionInit, nullptr, nullptr, nullptr, RangeCardinality)); - generate_series.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT}, RangeFunction, - RangeFunctionBind, RangeFunctionInit, nullptr, nullptr, nullptr, - RangeCardinality)); - generate_series.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, - RangeFunction, RangeFunctionBind, RangeFunctionInit, nullptr, - nullptr, nullptr, RangeCardinality)); - set.AddFunction(generate_series); -} +namespace duckdb { -void BuiltinFunctions::RegisterTableFunctions() { - CheckpointFunction::RegisterFunction(*this); - GlobTableFunction::RegisterFunction(*this); - RangeTableFunction::RegisterFunction(*this); - RepeatTableFunction::RegisterFunction(*this); - SummaryTableFunction::RegisterFunction(*this); +void BuiltinFunctions::RegisterSQLiteFunctions() { + PragmaVersion::RegisterFunction(*this); + PragmaFunctionPragma::RegisterFunction(*this); + PragmaCollations::RegisterFunction(*this); + PragmaTableInfo::RegisterFunction(*this); + PragmaStorageInfo::RegisterFunction(*this); + PragmaDatabaseSize::RegisterFunction(*this); + PragmaDatabaseList::RegisterFunction(*this); + PragmaLastProfilingOutput::RegisterFunction(*this); + PragmaDetailedProfilingOutput::RegisterFunction(*this); + + DuckDBColumnsFun::RegisterFunction(*this); + DuckDBConstraintsFun::RegisterFunction(*this); + DuckDBIndexesFun::RegisterFunction(*this); + DuckDBSchemasFun::RegisterFunction(*this); + DuckDBDependenciesFun::RegisterFunction(*this); + DuckDBSequencesFun::RegisterFunction(*this); + DuckDBTablesFun::RegisterFunction(*this); + DuckDBTypesFun::RegisterFunction(*this); + DuckDBViewsFun::RegisterFunction(*this); } } // namespace duckdb @@ -76538,256 +85796,350 @@ void BuiltinFunctions::RegisterTableFunctions() { -#include + + + + namespace duckdb { -static unique_ptr ReadCSVBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, vector &input_table_names, - vector &return_types, vector &names) { - auto result = make_unique(); - auto &options = result->options; +//===--------------------------------------------------------------------===// +// Table Scan +//===--------------------------------------------------------------------===// +bool TableScanParallelStateNext(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, ParallelState *parallel_state_p); - string file_pattern = inputs[0].str_value; +struct TableScanOperatorData : public FunctionOperatorData { + //! The current position in the scan + TableScanState scan_state; + vector column_ids; +}; - auto &fs = FileSystem::GetFileSystem(context); - result->files = fs.Glob(file_pattern); - if (result->files.empty()) { - throw IOException("No files found that match the pattern \"%s\"", file_pattern); - } +static unique_ptr TableScanInit(ClientContext &context, const FunctionData *bind_data_p, + const vector &column_ids, + TableFilterCollection *filters) { + auto result = make_unique(); + auto &transaction = Transaction::GetTransaction(context); + auto &bind_data = (const TableScanBindData &)*bind_data_p; + result->column_ids = column_ids; + result->scan_state.table_filters = filters->table_filters; + bind_data.table->storage->InitializeScan(transaction, result->scan_state, result->column_ids, + filters->table_filters); + return move(result); +} - for (auto &kv : named_parameters) { - if (kv.first == "auto_detect") { - options.auto_detect = kv.second.value_.boolean; - } else if (kv.first == "sep" || kv.first == "delim") { - options.delimiter = kv.second.str_value; - options.has_delimiter = true; - } else if (kv.first == "header") { - options.header = kv.second.value_.boolean; - options.has_header = true; - } else if (kv.first == "quote") { - options.quote = kv.second.str_value; - options.has_quote = true; - } else if (kv.first == "escape") { - options.escape = kv.second.str_value; - options.has_escape = true; - } else if (kv.first == "nullstr") { - options.null_str = kv.second.str_value; - } else if (kv.first == "sample_size") { - int64_t sample_size = kv.second.GetValue(); - if (sample_size < 1 && sample_size != -1) { - throw BinderException("Unsupported parameter for SAMPLE_SIZE: cannot be smaller than 1"); - } - if (sample_size == -1) { - options.sample_chunks = std::numeric_limits::max(); - options.sample_chunk_size = STANDARD_VECTOR_SIZE; - } else if (sample_size <= STANDARD_VECTOR_SIZE) { - options.sample_chunk_size = sample_size; - options.sample_chunks = 1; - } else { - options.sample_chunk_size = STANDARD_VECTOR_SIZE; - options.sample_chunks = sample_size / STANDARD_VECTOR_SIZE; - } - } else if (kv.first == "sample_chunk_size") { - options.sample_chunk_size = kv.second.GetValue(); - if (options.sample_chunk_size > STANDARD_VECTOR_SIZE) { - throw BinderException( - "Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be bigger than STANDARD_VECTOR_SIZE %d", - STANDARD_VECTOR_SIZE); - } else if (options.sample_chunk_size < 1) { - throw BinderException("Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be smaller than 1"); - } - } else if (kv.first == "sample_chunks") { - options.sample_chunks = kv.second.GetValue(); - if (options.sample_chunks < 1) { - throw BinderException("Unsupported parameter for SAMPLE_CHUNKS: cannot be smaller than 1"); - } - } else if (kv.first == "all_varchar") { - options.all_varchar = kv.second.value_.boolean; - } else if (kv.first == "dateformat") { - options.has_format[LogicalTypeId::DATE] = true; - auto &date_format = options.date_format[LogicalTypeId::DATE]; - date_format.format_specifier = kv.second.str_value; - string error = StrTimeFormat::ParseFormatSpecifier(date_format.format_specifier, date_format); - if (!error.empty()) { - throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str()); - } - } else if (kv.first == "timestampformat") { - options.has_format[LogicalTypeId::TIMESTAMP] = true; - auto ×tamp_format = options.date_format[LogicalTypeId::TIMESTAMP]; - timestamp_format.format_specifier = kv.second.str_value; - string error = StrTimeFormat::ParseFormatSpecifier(timestamp_format.format_specifier, timestamp_format); - if (!error.empty()) { - throw InvalidInputException("Could not parse TIMESTAMPFORMAT: %s", error.c_str()); - } - } else if (kv.first == "columns") { - if (kv.second.type().id() != LogicalTypeId::STRUCT) { - throw BinderException("read_csv columns requires a a struct as input"); - } - for (auto &val : kv.second.struct_value) { - names.push_back(val.first); - if (val.second.type().id() != LogicalTypeId::VARCHAR) { - throw BinderException("read_csv requires a type specification as string"); - } - return_types.push_back(TransformStringToLogicalType(val.second.str_value.c_str())); - } - if (names.empty()) { - throw BinderException("read_csv requires at least a single column as input!"); - } - } else if (kv.first == "compression") { - options.compression = kv.second.str_value; - } else if (kv.first == "filename") { - result->include_file_name = kv.second.value_.boolean; - } else if (kv.first == "skip") { - options.skip_rows = kv.second.GetValue(); - } - } - if (!options.auto_detect && return_types.empty()) { - throw BinderException("read_csv requires columns to be specified. Use read_csv_auto or set read_csv(..., " - "AUTO_DETECT=TRUE) to automatically guess columns."); - } - if (!(options.compression == "infer" || options.compression == "gzip" || options.compression == "none" || - options.compression.empty())) { - throw BinderException("read_csv currently only supports 'gzip' compression."); +static unique_ptr TableScanStatistics(ClientContext &context, const FunctionData *bind_data_p, + column_t column_id) { + auto &bind_data = (const TableScanBindData &)*bind_data_p; + auto &transaction = Transaction::GetTransaction(context); + if (transaction.storage.Find(bind_data.table->storage.get())) { + // we don't emit any statistics for tables that have outstanding transaction-local data + return nullptr; } - if (options.auto_detect) { - options.file_path = result->files[0]; - auto initial_reader = make_unique(context, options); + return bind_data.table->storage->GetStatistics(context, column_id); +} - return_types.assign(initial_reader->sql_types.begin(), initial_reader->sql_types.end()); - names.assign(initial_reader->col_names.begin(), initial_reader->col_names.end()); - result->initial_reader = move(initial_reader); - } else { - result->sql_types = return_types; - D_ASSERT(return_types.size() == names.size()); - } - if (result->include_file_name) { - return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("filename"); +static unique_ptr TableScanParallelInit(ClientContext &context, const FunctionData *bind_data_p, + ParallelState *state, const vector &column_ids, + TableFilterCollection *filters) { + auto result = make_unique(); + result->column_ids = column_ids; + result->scan_state.table_filters = filters->table_filters; + if (!TableScanParallelStateNext(context, bind_data_p, result.get(), state)) { + return nullptr; } return move(result); } -struct ReadCSVOperatorData : public FunctionOperatorData { - //! The CSV reader - unique_ptr csv_reader; - //! The index of the next file to read (i.e. current file + 1) - idx_t file_index; +static void TableScanFunc(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *operator_state, + DataChunk *, DataChunk &output) { + auto &bind_data = (TableScanBindData &)*bind_data_p; + auto &state = (TableScanOperatorData &)*operator_state; + auto &transaction = Transaction::GetTransaction(context); + bind_data.table->storage->Scan(transaction, output, state.scan_state, state.column_ids); + bind_data.chunk_count++; +} + +struct ParallelTableFunctionScanState : public ParallelState { + ParallelTableScanState state; + mutex lock; }; -static unique_ptr ReadCSVInit(ClientContext &context, const FunctionData *bind_data_p, - vector &column_ids, TableFilterCollection *filters) { - auto &bind_data = (ReadCSVData &)*bind_data_p; - auto result = make_unique(); - if (bind_data.initial_reader) { - result->csv_reader = move(bind_data.initial_reader); - } else { - bind_data.options.file_path = bind_data.files[0]; - result->csv_reader = make_unique(context, bind_data.options, bind_data.sql_types); - } - bind_data.bytes_read = 0; - bind_data.file_size = result->csv_reader->file_size; - result->file_index = 1; - return move(result); +idx_t TableScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (const TableScanBindData &)*bind_data_p; + return bind_data.table->storage->MaxThreads(context); } -static unique_ptr ReadCSVAutoBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, vector &return_types, - vector &names) { - named_parameters["auto_detect"] = Value::BOOLEAN(true); - return ReadCSVBind(context, inputs, named_parameters, input_table_types, input_table_names, return_types, names); +unique_ptr TableScanInitParallelState(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (const TableScanBindData &)*bind_data_p; + auto result = make_unique(); + bind_data.table->storage->InitializeParallelScan(result->state); + return move(result); } -static void ReadCSVFunction(ClientContext &context, const FunctionData *bind_data_p, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &bind_data = (ReadCSVData &)*bind_data_p; - auto &data = (ReadCSVOperatorData &)*operator_state; - do { - data.csv_reader->ParseCSV(output); - bind_data.bytes_read = data.csv_reader->bytes_in_chunk; - if (output.size() == 0 && data.file_index < bind_data.files.size()) { - // exhausted this file, but we have more files we can read - // open the next file and increment the counter - bind_data.options.file_path = bind_data.files[data.file_index]; - data.csv_reader = make_unique(context, bind_data.options, data.csv_reader->sql_types); - data.file_index++; - } else { - break; - } - } while (true); - if (bind_data.include_file_name) { - auto &col = output.data.back(); - col.SetValue(0, Value(data.csv_reader->options.file_path)); - col.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} +bool TableScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, ParallelState *parallel_state_p) { + auto &bind_data = (const TableScanBindData &)*bind_data_p; + auto ¶llel_state = (ParallelTableFunctionScanState &)*parallel_state_p; + auto &state = (TableScanOperatorData &)*operator_state; -static void ReadCSVAddNamedParameters(TableFunction &table_function) { - table_function.named_parameters["sep"] = LogicalType::VARCHAR; - table_function.named_parameters["delim"] = LogicalType::VARCHAR; - table_function.named_parameters["quote"] = LogicalType::VARCHAR; - table_function.named_parameters["escape"] = LogicalType::VARCHAR; - table_function.named_parameters["nullstr"] = LogicalType::VARCHAR; - table_function.named_parameters["columns"] = LogicalType::ANY; - table_function.named_parameters["header"] = LogicalType::BOOLEAN; - table_function.named_parameters["auto_detect"] = LogicalType::BOOLEAN; - table_function.named_parameters["sample_size"] = LogicalType::BIGINT; - table_function.named_parameters["sample_chunk_size"] = LogicalType::BIGINT; - table_function.named_parameters["sample_chunks"] = LogicalType::BIGINT; - table_function.named_parameters["all_varchar"] = LogicalType::BOOLEAN; - table_function.named_parameters["dateformat"] = LogicalType::VARCHAR; - table_function.named_parameters["timestampformat"] = LogicalType::VARCHAR; - table_function.named_parameters["compression"] = LogicalType::VARCHAR; - table_function.named_parameters["filename"] = LogicalType::BOOLEAN; - table_function.named_parameters["skip"] = LogicalType::BIGINT; + lock_guard parallel_lock(parallel_state.lock); + return bind_data.table->storage->NextParallelScan(context, parallel_state.state, state.scan_state, + state.column_ids); } -int CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = (ReadCSVData &)*bind_data_p; - if (bind_data.file_size == 0) { +int TableScanProgress(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (TableScanBindData &)*bind_data_p; + idx_t total_rows = bind_data.table->storage->GetTotalRows(); + if (total_rows == 0 || total_rows < STANDARD_VECTOR_SIZE) { + //! Table is either empty or smaller than a vector size, so it is finished + return 100; + } + auto percentage = (bind_data.chunk_count * STANDARD_VECTOR_SIZE * 100) / total_rows; + if (percentage > 100) { + //! In case the last chunk has less elements than STANDARD_VECTOR_SIZE, if our percentage is over 100 + //! It means we finished this table. return 100; } - auto percentage = bind_data.bytes_read * 100 / bind_data.file_size; return percentage; } -TableFunction ReadCSVTableFunction::GetFunction() { - TableFunction read_csv("read_csv", {LogicalType::VARCHAR}, ReadCSVFunction, ReadCSVBind, ReadCSVInit); - read_csv.table_scan_progress = CSVReaderProgress; - ReadCSVAddNamedParameters(read_csv); - return read_csv; +void TableScanDependency(unordered_set &entries, const FunctionData *bind_data_p) { + auto &bind_data = (const TableScanBindData &)*bind_data_p; + entries.insert(bind_data.table); } -void ReadCSVTableFunction::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(ReadCSVTableFunction::GetFunction()); +unique_ptr TableScanCardinality(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (const TableScanBindData &)*bind_data_p; + auto &transaction = Transaction::GetTransaction(context); + idx_t estimated_cardinality = + bind_data.table->storage->info->cardinality + transaction.storage.AddedRows(bind_data.table->storage.get()); + return make_unique(bind_data.table->storage->info->cardinality, estimated_cardinality); +} - TableFunction read_csv_auto("read_csv_auto", {LogicalType::VARCHAR}, ReadCSVFunction, ReadCSVAutoBind, ReadCSVInit); - read_csv_auto.table_scan_progress = CSVReaderProgress; - ReadCSVAddNamedParameters(read_csv_auto); - set.AddFunction(read_csv_auto); +//===--------------------------------------------------------------------===// +// Index Scan +//===--------------------------------------------------------------------===// +struct IndexScanOperatorData : public FunctionOperatorData { + explicit IndexScanOperatorData(data_ptr_t row_id_data) : row_ids(LOGICAL_ROW_TYPE, row_id_data) { + } + + Vector row_ids; + ColumnFetchState fetch_state; + LocalScanState local_storage_state; + vector column_ids; + bool finished; +}; + +static unique_ptr IndexScanInit(ClientContext &context, const FunctionData *bind_data_p, + const vector &column_ids, + TableFilterCollection *filters) { + auto &bind_data = (const TableScanBindData &)*bind_data_p; + data_ptr_t row_id_data = nullptr; + if (!bind_data.result_ids.empty()) { + row_id_data = (data_ptr_t)&bind_data.result_ids[0]; + } + auto result = make_unique(row_id_data); + auto &transaction = Transaction::GetTransaction(context); + result->column_ids = column_ids; + transaction.storage.InitializeScan(bind_data.table->storage.get(), result->local_storage_state, + filters->table_filters); + + result->finished = false; + return move(result); } -unique_ptr ReadCSVReplacement(const string &table_name, void *data) { - if (!StringUtil::EndsWith(table_name, ".csv") && !StringUtil::EndsWith(table_name, ".tsv") && - !StringUtil::EndsWith(table_name, ".csv.gz")) { - return nullptr; +static void IndexScanFunction(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &bind_data = (const TableScanBindData &)*bind_data_p; + auto &state = (IndexScanOperatorData &)*operator_state; + auto &transaction = Transaction::GetTransaction(context); + if (!state.finished) { + bind_data.table->storage->Fetch(transaction, output, state.column_ids, state.row_ids, + bind_data.result_ids.size(), state.fetch_state); + state.finished = true; + } + if (output.size() == 0) { + transaction.storage.Scan(state.local_storage_state, state.column_ids, output); } - auto table_function = make_unique(); - vector> children; - children.push_back(make_unique(Value(table_name))); - table_function->function = make_unique("read_csv_auto", children); - return table_function; } -void BuiltinFunctions::RegisterReadFunctions() { - CSVCopyFunction::RegisterFunction(*this); - ReadCSVTableFunction::RegisterFunction(*this); +static void RewriteIndexExpression(Index &index, LogicalGet &get, Expression &expr, bool &rewrite_possible) { + if (expr.type == ExpressionType::BOUND_COLUMN_REF) { + auto &bound_colref = (BoundColumnRefExpression &)expr; + // bound column ref: rewrite to fit in the current set of bound column ids + bound_colref.binding.table_index = get.table_index; + column_t referenced_column = index.column_ids[bound_colref.binding.column_index]; + // search for the referenced column in the set of column_ids + for (idx_t i = 0; i < get.column_ids.size(); i++) { + if (get.column_ids[i] == referenced_column) { + bound_colref.binding.column_index = i; + return; + } + } + // column id not found in bound columns in the LogicalGet: rewrite not possible + rewrite_possible = false; + } + ExpressionIterator::EnumerateChildren( + expr, [&](Expression &child) { RewriteIndexExpression(index, get, child, rewrite_possible); }); +} - auto &config = DBConfig::GetConfig(context); - config.replacement_scans.emplace_back(ReadCSVReplacement); +void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p, + vector> &filters) { + auto &bind_data = (TableScanBindData &)*bind_data_p; + auto table = bind_data.table; + auto &storage = *table->storage; + + if (bind_data.is_index_scan) { + return; + } + if (filters.empty()) { + // no indexes or no filters: skip the pushdown + return; + } + // behold + storage.info->indexes.Scan([&](Index &index) { + // first rewrite the index expression so the ColumnBindings align with the column bindings of the current table + if (index.unbound_expressions.size() > 1) { + return false; + } + auto index_expression = index.unbound_expressions[0]->Copy(); + bool rewrite_possible = true; + RewriteIndexExpression(index, get, *index_expression, rewrite_possible); + if (!rewrite_possible) { + // could not rewrite! + return false; + } + + Value low_value, high_value, equal_value; + ExpressionType low_comparison_type = ExpressionType::INVALID, high_comparison_type = ExpressionType::INVALID; + // try to find a matching index for any of the filter expressions + for (auto &filter : filters) { + auto expr = filter.get(); + + // create a matcher for a comparison with a constant + ComparisonExpressionMatcher matcher; + // match on a comparison type + matcher.expr_type = make_unique(); + // match on a constant comparison with the indexed expression + matcher.matchers.push_back(make_unique(index_expression.get())); + matcher.matchers.push_back(make_unique()); + + matcher.policy = SetMatcher::Policy::UNORDERED; + + vector bindings; + if (matcher.Match(expr, bindings)) { + // range or equality comparison with constant value + // we can use our index here + // bindings[0] = the expression + // bindings[1] = the index expression + // bindings[2] = the constant + auto comparison = (BoundComparisonExpression *)bindings[0]; + D_ASSERT(bindings[0]->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON); + D_ASSERT(bindings[2]->type == ExpressionType::VALUE_CONSTANT); + + auto constant_value = ((BoundConstantExpression *)bindings[2])->value; + auto comparison_type = comparison->type; + if (comparison->left->type == ExpressionType::VALUE_CONSTANT) { + // the expression is on the right side, we flip them around + comparison_type = FlipComparisionExpression(comparison_type); + } + if (comparison_type == ExpressionType::COMPARE_EQUAL) { + // equality value + // equality overrides any other bounds so we just break here + equal_value = constant_value; + break; + } else if (comparison_type == ExpressionType::COMPARE_GREATERTHANOREQUALTO || + comparison_type == ExpressionType::COMPARE_GREATERTHAN) { + // greater than means this is a lower bound + low_value = constant_value; + low_comparison_type = comparison_type; + } else { + // smaller than means this is an upper bound + high_value = constant_value; + high_comparison_type = comparison_type; + } + } else if (expr->type == ExpressionType::COMPARE_BETWEEN) { + // BETWEEN expression + auto &between = (BoundBetweenExpression &)*expr; + if (!between.input->Equals(index_expression.get())) { + // expression doesn't match the current index expression + continue; + } + if (between.lower->type != ExpressionType::VALUE_CONSTANT || + between.upper->type != ExpressionType::VALUE_CONSTANT) { + // not a constant comparison + continue; + } + low_value = ((BoundConstantExpression &)*between.lower).value; + low_comparison_type = between.lower_inclusive ? ExpressionType::COMPARE_GREATERTHANOREQUALTO + : ExpressionType::COMPARE_GREATERTHAN; + high_value = ((BoundConstantExpression &)*between.upper).value; + high_comparison_type = between.upper_inclusive ? ExpressionType::COMPARE_LESSTHANOREQUALTO + : ExpressionType::COMPARE_LESSTHAN; + break; + } + } + if (!equal_value.is_null || !low_value.is_null || !high_value.is_null) { + // we can scan this index using this predicate: try a scan + auto &transaction = Transaction::GetTransaction(context); + unique_ptr index_state; + if (!equal_value.is_null) { + // equality predicate + index_state = + index.InitializeScanSinglePredicate(transaction, equal_value, ExpressionType::COMPARE_EQUAL); + } else if (!low_value.is_null && !high_value.is_null) { + // two-sided predicate + index_state = index.InitializeScanTwoPredicates(transaction, low_value, low_comparison_type, high_value, + high_comparison_type); + } else if (!low_value.is_null) { + // less than predicate + index_state = index.InitializeScanSinglePredicate(transaction, low_value, low_comparison_type); + } else { + D_ASSERT(!high_value.is_null); + index_state = index.InitializeScanSinglePredicate(transaction, high_value, high_comparison_type); + } + if (index.Scan(transaction, storage, *index_state, STANDARD_VECTOR_SIZE, bind_data.result_ids)) { + // use an index scan! + bind_data.is_index_scan = true; + get.function.init = IndexScanInit; + get.function.function = IndexScanFunction; + get.function.max_threads = nullptr; + get.function.init_parallel_state = nullptr; + get.function.parallel_state_next = nullptr; + get.function.table_scan_progress = nullptr; + get.function.filter_pushdown = false; + } else { + bind_data.result_ids.clear(); + } + return true; + } + return false; + }); +} + +string TableScanToString(const FunctionData *bind_data_p) { + auto &bind_data = (const TableScanBindData &)*bind_data_p; + string result = bind_data.table->name; + return result; +} + +TableFunction TableScanFunction::GetFunction() { + TableFunction scan_function("seq_scan", {}, TableScanFunc); + scan_function.init = TableScanInit; + scan_function.statistics = TableScanStatistics; + scan_function.dependency = TableScanDependency; + scan_function.cardinality = TableScanCardinality; + scan_function.pushdown_complex_filter = TableScanPushdownComplexFilter; + scan_function.to_string = TableScanToString; + scan_function.max_threads = TableScanMaxThreads; + scan_function.init_parallel_state = TableScanInitParallelState; + scan_function.parallel_init = TableScanParallelInit; + scan_function.parallel_state_next = TableScanParallelStateNext; + scan_function.table_scan_progress = TableScanProgress; + scan_function.projection_pushdown = true; + scan_function.filter_pushdown = true; + return scan_function; } } // namespace duckdb @@ -76796,55 +86148,132 @@ void BuiltinFunctions::RegisterReadFunctions() { namespace duckdb { -struct RepeatFunctionData : public TableFunctionData { - RepeatFunctionData(Value value, idx_t target_count) : value(move(value)), target_count(target_count) { +struct UnnestFunctionData : public TableFunctionData { + explicit UnnestFunctionData(Value value) : value(move(value)) { } Value value; - idx_t target_count; }; -struct RepeatOperatorData : public FunctionOperatorData { - RepeatOperatorData() : current_count(0) { +struct UnnestOperatorData : public FunctionOperatorData { + UnnestOperatorData() : current_count(0) { } + idx_t current_count; }; -static unique_ptr RepeatBind(ClientContext &context, vector &inputs, +static unique_ptr UnnestBind(ClientContext &context, vector &inputs, unordered_map &named_parameters, vector &input_table_types, vector &input_table_names, vector &return_types, vector &names) { - // the repeat function returns the type of the first argument - return_types.push_back(inputs[0].type()); + return_types.push_back(ListType::GetChildType(inputs[0].type())); names.push_back(inputs[0].ToString()); - return make_unique(inputs[0], inputs[1].GetValue()); + return make_unique(inputs[0]); } -static unique_ptr RepeatInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, TableFilterCollection *filters) { - return make_unique(); +static unique_ptr UnnestInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, TableFilterCollection *filters) { + return make_unique(); } -static void RepeatFunction(ClientContext &context, const FunctionData *bind_data_p, +static void UnnestFunction(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &bind_data = (RepeatFunctionData &)*bind_data_p; - auto &state = (RepeatOperatorData &)*operator_state; + auto &bind_data = (UnnestFunctionData &)*bind_data_p; + auto &state = (UnnestOperatorData &)*operator_state; - idx_t remaining = MinValue(bind_data.target_count - state.current_count, STANDARD_VECTOR_SIZE); - output.data[0].Reference(bind_data.value); - output.SetCardinality(remaining); - state.current_count += remaining; + auto &list_value = bind_data.value.list_value; + idx_t count = 0; + for (; state.current_count < list_value.size() && count < STANDARD_VECTOR_SIZE; state.current_count++) { + output.data[0].SetValue(count, list_value[state.current_count]); + count++; + } + output.SetCardinality(count); } -static unique_ptr RepeatCardinality(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = (RepeatFunctionData &)*bind_data_p; - return make_unique(bind_data.target_count, bind_data.target_count); +void UnnestTableFunction::RegisterFunction(BuiltinFunctions &set) { + TableFunction unnest_function("unnest", {LogicalTypeId::LIST}, UnnestFunction, UnnestBind, UnnestInit); + set.AddFunction(unnest_function); } -void RepeatTableFunction::RegisterFunction(BuiltinFunctions &set) { - TableFunction repeat("repeat", {LogicalType::ANY, LogicalType::BIGINT}, RepeatFunction, RepeatBind, RepeatInit, - nullptr, nullptr, nullptr, RepeatCardinality); - set.AddFunction(repeat); +} // namespace duckdb + + + +namespace duckdb { + +struct PragmaVersionData : public FunctionOperatorData { + PragmaVersionData() : finished(false) { + } + bool finished; +}; + +static unique_ptr PragmaVersionBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, vector &return_types, + vector &names) { + names.emplace_back("library_version"); + return_types.push_back(LogicalType::VARCHAR); + names.emplace_back("source_id"); + return_types.push_back(LogicalType::VARCHAR); + return nullptr; +} + +static unique_ptr PragmaVersionInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters) { + return make_unique(); +} + +static void PragmaVersionFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (PragmaVersionData &)*operator_state; + if (data.finished) { + // finished returning values + return; + } + output.SetCardinality(1); + output.SetValue(0, 0, DuckDB::LibraryVersion()); + output.SetValue(1, 0, DuckDB::SourceID()); + data.finished = true; +} + +void PragmaVersion::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("pragma_version", {}, PragmaVersionFunction, PragmaVersionBind, PragmaVersionInit)); +} + +const char *DuckDB::SourceID() { + return DUCKDB_SOURCE_ID; +} + +const char *DuckDB::LibraryVersion() { + return DUCKDB_VERSION; +} + +} // namespace duckdb + + + + + + + +namespace duckdb { + +void UDFWrapper::RegisterFunction(string name, vector args, LogicalType ret_type, + scalar_function_t udf_function, ClientContext &context, LogicalType varargs) { + + ScalarFunction scalar_function(move(name), move(args), move(ret_type), move(udf_function)); + scalar_function.varargs = move(varargs); + CreateScalarFunctionInfo info(scalar_function); + info.schema = DEFAULT_SCHEMA; + context.RegisterFunction(&info); +} + +void UDFWrapper::RegisterAggrFunction(AggregateFunction aggr_function, ClientContext &context, LogicalType varargs) { + aggr_function.varargs = move(varargs); + CreateAggregateFunctionInfo info(move(aggr_function)); + context.RegisterFunction(&info); } } // namespace duckdb @@ -76855,240 +86284,279 @@ void RepeatTableFunction::RegisterFunction(BuiltinFunctions &set) { -namespace duckdb { - -struct PragmaCollateData : public FunctionOperatorData { - PragmaCollateData() : offset(0) { - } - - vector entries; - idx_t offset; -}; -static unique_ptr PragmaCollateBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, vector &return_types, - vector &names) { - names.emplace_back("collname"); - return_types.push_back(LogicalType::VARCHAR); - return nullptr; -} -unique_ptr PragmaCollateInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, TableFilterCollection *filters) { - auto result = make_unique(); - Catalog::GetCatalog(context).schemas->Scan(context, [&](CatalogEntry *entry) { - auto schema = (SchemaCatalogEntry *)entry; - schema->Scan(context, CatalogType::COLLATION_ENTRY, - [&](CatalogEntry *entry) { result->entries.push_back(entry->name); }); - }); - return move(result); -} +namespace duckdb { -static void PragmaCollateFunction(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &data = (PragmaCollateData &)*operator_state; - if (data.offset >= data.entries.size()) { - // finished returning values - return; +Appender::Appender(Connection &con, const string &schema_name, const string &table_name) + : context(con.context), column(0) { + description = con.TableInfo(schema_name, table_name); + if (!description) { + // table could not be found + throw CatalogException(StringUtil::Format("Table \"%s.%s\" could not be found", schema_name, table_name)); } - idx_t next = MinValue(data.offset + STANDARD_VECTOR_SIZE, data.entries.size()); - output.SetCardinality(next - data.offset); - for (idx_t i = data.offset; i < next; i++) { - auto index = i - data.offset; - output.SetValue(0, index, Value(data.entries[i])); + for (auto &column : description->columns) { + types.push_back(column.type); } - - data.offset = next; + InitializeChunk(); } -void PragmaCollations::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction( - TableFunction("pragma_collations", {}, PragmaCollateFunction, PragmaCollateBind, PragmaCollateInit)); +Appender::Appender(Connection &con, const string &table_name) : Appender(con, DEFAULT_SCHEMA, table_name) { } -} // namespace duckdb - - - - -namespace duckdb { - -struct PragmaDatabaseListData : public FunctionOperatorData { - PragmaDatabaseListData() : finished(false) { +Appender::~Appender() { + // flush any remaining chunks + // wrapped in a try/catch because Close() can throw if the table was dropped in the meantime + try { + Close(); + } catch (...) { } +} - bool finished; -}; - -static unique_ptr PragmaDatabaseListBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, vector &names) { - names.emplace_back("seq"); - return_types.push_back(LogicalType::INTEGER); - - names.emplace_back("name"); - return_types.push_back(LogicalType::VARCHAR); +void Appender::InitializeChunk() { + chunk = make_unique(); + chunk->Initialize(types); +} - names.emplace_back("file"); - return_types.push_back(LogicalType::VARCHAR); +void Appender::BeginRow() { +} - return nullptr; +void Appender::EndRow() { + // check that all rows have been appended to + if (column != chunk->ColumnCount()) { + throw InvalidInputException("Call to EndRow before all rows have been appended to!"); + } + column = 0; + chunk->SetCardinality(chunk->size() + 1); + if (chunk->size() >= STANDARD_VECTOR_SIZE) { + FlushChunk(); + } } -unique_ptr PragmaDatabaseListInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, TableFilterCollection *filters) { - return make_unique(); +template +void Appender::AppendValueInternal(Vector &col, SRC input) { + FlatVector::GetData(col)[chunk->size()] = Cast::Operation(input); } -void PragmaDatabaseListFunction(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &data = (PragmaDatabaseListData &)*operator_state; - if (data.finished) { +template +void Appender::AppendValueInternal(T input) { + if (column >= types.size()) { + throw InvalidInputException("Too many appends for chunk!"); + } + auto &col = chunk->data[column]; + switch (col.GetType().InternalType()) { + case PhysicalType::BOOL: + AppendValueInternal(col, input); + break; + case PhysicalType::UINT8: + AppendValueInternal(col, input); + break; + case PhysicalType::INT8: + AppendValueInternal(col, input); + break; + case PhysicalType::UINT16: + AppendValueInternal(col, input); + break; + case PhysicalType::INT16: + AppendValueInternal(col, input); + break; + case PhysicalType::UINT32: + AppendValueInternal(col, input); + break; + case PhysicalType::INT32: + AppendValueInternal(col, input); + break; + case PhysicalType::UINT64: + AppendValueInternal(col, input); + break; + case PhysicalType::INT64: + AppendValueInternal(col, input); + break; + case PhysicalType::INT128: + AppendValueInternal(col, input); + break; + case PhysicalType::FLOAT: + AppendValueInternal(col, input); + break; + case PhysicalType::DOUBLE: + AppendValueInternal(col, input); + break; + case PhysicalType::VARCHAR: + FlatVector::GetData(col)[chunk->size()] = StringCast::Operation(input, col); + break; + default: + AppendValue(Value::CreateValue(input)); return; } + column++; +} - output.SetCardinality(1); - output.data[0].SetValue(0, Value::INTEGER(0)); - output.data[1].SetValue(0, Value("main")); - output.data[2].SetValue(0, Value(StorageManager::GetStorageManager(context).GetDBPath())); - - data.finished = true; +template <> +void Appender::Append(bool value) { + AppendValueInternal(value); } -void PragmaDatabaseList::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(TableFunction("pragma_database_list", {}, PragmaDatabaseListFunction, PragmaDatabaseListBind, - PragmaDatabaseListInit)); +template <> +void Appender::Append(int8_t value) { + AppendValueInternal(value); } -} // namespace duckdb +template <> +void Appender::Append(int16_t value) { + AppendValueInternal(value); +} +template <> +void Appender::Append(int32_t value) { + AppendValueInternal(value); +} +template <> +void Appender::Append(int64_t value) { + AppendValueInternal(value); +} +template <> +void Appender::Append(hugeint_t value) { + AppendValueInternal(value); +} +template <> +void Appender::Append(uint8_t value) { + AppendValueInternal(value); +} +template <> +void Appender::Append(uint16_t value) { + AppendValueInternal(value); +} +template <> +void Appender::Append(uint32_t value) { + AppendValueInternal(value); +} -namespace duckdb { +template <> +void Appender::Append(uint64_t value) { + AppendValueInternal(value); +} -struct PragmaDatabaseSizeData : public FunctionOperatorData { - PragmaDatabaseSizeData() : finished(false) { - } +template <> +void Appender::Append(const char *value) { + AppendValueInternal(string_t(value)); +} - bool finished; -}; +void Appender::Append(const char *value, uint32_t length) { + AppendValueInternal(string_t(value, length)); +} -static unique_ptr PragmaDatabaseSizeBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, vector &names) { - names.emplace_back("database_size"); - return_types.push_back(LogicalType::VARCHAR); +template <> +void Appender::Append(string_t value) { + AppendValueInternal(value); +} - names.emplace_back("block_size"); - return_types.push_back(LogicalType::BIGINT); +template <> +void Appender::Append(float value) { + if (!Value::FloatIsValid(value)) { + throw InvalidInputException("Float value is out of range!"); + } + AppendValueInternal(value); +} - names.emplace_back("total_blocks"); - return_types.push_back(LogicalType::BIGINT); +template <> +void Appender::Append(double value) { + if (!Value::DoubleIsValid(value)) { + throw InvalidInputException("Double value is out of range!"); + } + AppendValueInternal(value); +} - names.emplace_back("used_blocks"); - return_types.push_back(LogicalType::BIGINT); +template <> +void Appender::Append(date_t value) { + AppendValueInternal(value.days); +} - names.emplace_back("free_blocks"); - return_types.push_back(LogicalType::BIGINT); +template <> +void Appender::Append(dtime_t value) { + AppendValueInternal(value.micros); +} - names.emplace_back("wal_size"); - return_types.push_back(LogicalType::VARCHAR); +template <> +void Appender::Append(timestamp_t value) { + AppendValueInternal(value.value); +} - names.emplace_back("memory_usage"); - return_types.push_back(LogicalType::VARCHAR); +template <> +void Appender::Append(interval_t value) { + AppendValueInternal(value); +} - names.emplace_back("memory_limit"); - return_types.push_back(LogicalType::VARCHAR); +template <> +void Appender::Append(Value value) { // NOLINT: template shtuff + if (column >= chunk->ColumnCount()) { + throw InvalidInputException("Too many appends for chunk!"); + } + AppendValue(value); +} - return nullptr; +template <> +void Appender::Append(std::nullptr_t value) { + if (column >= chunk->ColumnCount()) { + throw InvalidInputException("Too many appends for chunk!"); + } + auto &col = chunk->data[column++]; + FlatVector::SetNull(col, chunk->size(), true); } -unique_ptr PragmaDatabaseSizeInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, TableFilterCollection *filters) { - return make_unique(); +void Appender::AppendValue(const Value &value) { + chunk->SetValue(column, chunk->size(), value); + column++; } -static string BytesToHumanReadableString(idx_t bytes) { - string db_size; - auto kilobytes = bytes / 1000; - auto megabytes = kilobytes / 1000; - kilobytes -= megabytes * 1000; - auto gigabytes = megabytes / 1000; - megabytes -= gigabytes * 1000; - auto terabytes = gigabytes / 1000; - gigabytes -= terabytes * 1000; - if (terabytes > 0) { - return to_string(terabytes) + "." + to_string(gigabytes / 100) + "TB"; - } else if (gigabytes > 0) { - return to_string(gigabytes) + "." + to_string(megabytes / 100) + "GB"; - } else if (megabytes > 0) { - return to_string(megabytes) + "." + to_string(kilobytes / 100) + "MB"; - } else if (kilobytes > 0) { - return to_string(kilobytes) + "KB"; - } else { - return to_string(bytes) + " bytes"; +void Appender::FlushChunk() { + if (chunk->size() == 0) { + return; + } + collection.Append(move(chunk)); + InitializeChunk(); + if (collection.ChunkCount() >= FLUSH_COUNT) { + Flush(); } } -void PragmaDatabaseSizeFunction(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &data = (PragmaDatabaseSizeData &)*operator_state; - if (data.finished) { - return; +void Appender::Flush() { + // check that all vectors have the same length before appending + if (column != 0) { + throw InvalidInputException("Failed to Flush appender: incomplete append to row!"); } - auto &storage = StorageManager::GetStorageManager(context); - auto &block_manager = BlockManager::GetBlockManager(context); - auto &buffer_manager = BufferManager::GetBufferManager(context); - output.SetCardinality(1); - if (!storage.InMemory()) { - auto total_blocks = block_manager.TotalBlocks(); - auto block_size = Storage::BLOCK_ALLOC_SIZE; - auto free_blocks = block_manager.FreeBlocks(); - auto used_blocks = total_blocks - free_blocks; - auto bytes = (total_blocks * block_size); - auto wal_size = storage.GetWriteAheadLog()->GetWALSize(); - output.data[0].SetValue(0, Value(BytesToHumanReadableString(bytes))); - output.data[1].SetValue(0, Value::BIGINT(block_size)); - output.data[2].SetValue(0, Value::BIGINT(total_blocks)); - output.data[3].SetValue(0, Value::BIGINT(used_blocks)); - output.data[4].SetValue(0, Value::BIGINT(free_blocks)); - output.data[5].SetValue(0, Value(BytesToHumanReadableString(wal_size))); - } else { - output.data[0].SetValue(0, Value()); - output.data[1].SetValue(0, Value()); - output.data[2].SetValue(0, Value()); - output.data[3].SetValue(0, Value()); - output.data[4].SetValue(0, Value()); - output.data[5].SetValue(0, Value()); + FlushChunk(); + if (collection.Count() == 0) { + return; } - output.data[6].SetValue(0, Value(BytesToHumanReadableString(buffer_manager.GetUsedMemory()))); - auto max_memory = buffer_manager.GetMaxMemory(); - output.data[7].SetValue(0, max_memory == (idx_t)-1 ? Value("Unlimited") - : Value(BytesToHumanReadableString(max_memory))); + context->Append(*description, collection); - data.finished = true; + collection.Reset(); + column = 0; } -void PragmaDatabaseSize::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(TableFunction("pragma_database_size", {}, PragmaDatabaseSizeFunction, PragmaDatabaseSizeBind, - PragmaDatabaseSizeInit)); +void Appender::Close() { + if (column == 0 || column == types.size()) { + Flush(); + } } } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/main/capi_internal.hpp +// +// +//===----------------------------------------------------------------------===// @@ -77096,1238 +86564,1496 @@ void PragmaDatabaseSize::RegisterFunction(BuiltinFunctions &set) { +#include +#include +#ifdef _WIN32 +#ifndef strdup +#define strdup _strdup +#endif +#endif namespace duckdb { -struct PragmaFunctionsData : public FunctionOperatorData { - PragmaFunctionsData() : offset(0), offset_in_entry(0) { - } - - vector entries; - idx_t offset; - idx_t offset_in_entry; +struct DatabaseData { + unique_ptr database; }; -static unique_ptr PragmaFunctionsBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, vector &names) { - names.emplace_back("name"); - return_types.push_back(LogicalType::VARCHAR); - - names.emplace_back("type"); - return_types.push_back(LogicalType::VARCHAR); +struct PreparedStatementWrapper { + unique_ptr statement; + vector values; +}; - names.emplace_back("parameters"); - child_list_t child_types; - child_types.push_back(std::make_pair("", LogicalType::VARCHAR)); - LogicalType param_types(LogicalTypeId::LIST, move(child_types)); - return_types.push_back(move(param_types)); +struct ArrowResultWrapper { + unique_ptr result; + unique_ptr current_chunk; +}; - names.emplace_back("varargs"); - return_types.push_back(LogicalType::VARCHAR); +struct AppenderWrapper { + unique_ptr appender; + string error; +}; - names.emplace_back("return_type"); - return_types.push_back(LogicalType::VARCHAR); +duckdb_type ConvertCPPTypeToC(const LogicalType &type); +idx_t GetCTypeSize(duckdb_type type); +duckdb_state duckdb_translate_result(MaterializedQueryResult *result, duckdb_result *out); - names.emplace_back("side_effects"); - return_types.push_back(LogicalType::BOOLEAN); +} // namespace duckdb - return nullptr; -} -unique_ptr PragmaFunctionsInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, TableFilterCollection *filters) { - auto result = make_unique(); +using duckdb::Appender; +using duckdb::AppenderWrapper; +using duckdb::Connection; +using duckdb::date_t; +using duckdb::dtime_t; +using duckdb::hugeint_t; +using duckdb::interval_t; +using duckdb::string_t; +using duckdb::timestamp_t; - Catalog::GetCatalog(context).schemas->Scan(context, [&](CatalogEntry *entry) { - auto schema = (SchemaCatalogEntry *)entry; - schema->Scan(context, CatalogType::SCALAR_FUNCTION_ENTRY, - [&](CatalogEntry *entry) { result->entries.push_back(entry); }); - }); +duckdb_state duckdb_appender_create(duckdb_connection connection, const char *schema, const char *table, + duckdb_appender *out_appender) { + Connection *conn = (Connection *)connection; - return move(result); + if (!connection || !table || !out_appender) { + return DuckDBError; + } + if (schema == nullptr) { + schema = DEFAULT_SCHEMA; + } + auto wrapper = new AppenderWrapper(); + *out_appender = (duckdb_appender)wrapper; + try { + wrapper->appender = duckdb::make_unique(*conn, schema, table); + } catch (std::exception &ex) { + wrapper->error = ex.what(); + return DuckDBError; + } catch (...) { // LCOV_EXCL_START + wrapper->error = "Unknown create appender error"; + return DuckDBError; + } // LCOV_EXCL_STOP + return DuckDBSuccess; } -void AddFunction(BaseScalarFunction &f, idx_t &count, DataChunk &output, bool is_aggregate) { - output.SetValue(0, count, Value(f.name)); - output.SetValue(1, count, Value(is_aggregate ? "AGGREGATE" : "SCALAR")); - ListVector::Initialize(output.data[2]); - auto result_data = FlatVector::GetData(output.data[2]); - result_data[count].offset = ListVector::GetListSize(output.data[2]); - result_data[count].length = f.arguments.size(); - string parameters; - for (idx_t i = 0; i < f.arguments.size(); i++) { - auto val = Value(f.arguments[i].ToString()); - ListVector::PushBack(output.data[2], val); +duckdb_state duckdb_appender_destroy(duckdb_appender *appender) { + if (!appender || !*appender) { + return DuckDBError; } + duckdb_appender_close(*appender); + auto wrapper = (AppenderWrapper *)*appender; + if (wrapper) { + delete wrapper; + } + *appender = nullptr; + return DuckDBSuccess; +} - output.SetValue(3, count, f.varargs.id() != LogicalTypeId::INVALID ? Value(f.varargs.ToString()) : Value()); - output.SetValue(4, count, f.return_type.ToString()); - output.SetValue(5, count, Value::BOOLEAN(f.has_side_effects)); - - count++; +template +duckdb_state duckdb_appender_run_function(duckdb_appender appender, FUN &&function) { + if (!appender) { + return DuckDBError; + } + auto wrapper = (AppenderWrapper *)appender; + if (!wrapper->appender) { + return DuckDBError; + } + try { + function(*wrapper->appender); + } catch (std::exception &ex) { + wrapper->error = ex.what(); + return DuckDBError; + } catch (...) { // LCOV_EXCL_START + wrapper->error = "Unknown error"; + return DuckDBError; + } // LCOV_EXCL_STOP + return DuckDBSuccess; } -static void PragmaFunctionsFunction(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &data = (PragmaFunctionsData &)*operator_state; - if (data.offset >= data.entries.size()) { - // finished returning values - return; +const char *duckdb_appender_error(duckdb_appender appender) { + if (!appender) { + return nullptr; } - idx_t count = 0; - while (count < STANDARD_VECTOR_SIZE && data.offset < data.entries.size()) { - auto &entry = data.entries[data.offset]; - switch (entry->type) { - case CatalogType::SCALAR_FUNCTION_ENTRY: { - auto &func = (ScalarFunctionCatalogEntry &)*entry; - if (data.offset_in_entry >= func.functions.size()) { - data.offset++; - data.offset_in_entry = 0; - break; - } - AddFunction(func.functions[data.offset_in_entry++], count, output, false); - break; - } - case CatalogType::AGGREGATE_FUNCTION_ENTRY: { - auto &aggr = (AggregateFunctionCatalogEntry &)*entry; - if (data.offset_in_entry >= aggr.functions.size()) { - data.offset++; - data.offset_in_entry = 0; - break; - } - AddFunction(aggr.functions[data.offset_in_entry++], count, output, true); - break; - } - default: - data.offset++; - break; - } + auto wrapper = (AppenderWrapper *)appender; + if (wrapper->error.empty()) { + return nullptr; } - output.SetCardinality(count); + return strdup(wrapper->error.c_str()); } -void PragmaFunctionPragma::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction( - TableFunction("pragma_functions", {}, PragmaFunctionsFunction, PragmaFunctionsBind, PragmaFunctionsInit)); +duckdb_state duckdb_appender_begin_row(duckdb_appender appender) { + return DuckDBSuccess; } -} // namespace duckdb +duckdb_state duckdb_appender_end_row(duckdb_appender appender) { + return duckdb_appender_run_function(appender, [&](Appender &appender) { appender.EndRow(); }); +} +template +duckdb_state duckdb_append_internal(duckdb_appender appender, T value) { + if (!appender) { + return DuckDBError; + } + auto *appender_instance = (AppenderWrapper *)appender; + try { + appender_instance->appender->Append(value); + } catch (...) { + return DuckDBError; + } + return DuckDBSuccess; +} +duckdb_state duckdb_append_bool(duckdb_appender appender, bool value) { + return duckdb_append_internal(appender, value); +} +duckdb_state duckdb_append_int8(duckdb_appender appender, int8_t value) { + return duckdb_append_internal(appender, value); +} +duckdb_state duckdb_append_int16(duckdb_appender appender, int16_t value) { + return duckdb_append_internal(appender, value); +} +duckdb_state duckdb_append_int32(duckdb_appender appender, int32_t value) { + return duckdb_append_internal(appender, value); +} +duckdb_state duckdb_append_int64(duckdb_appender appender, int64_t value) { + return duckdb_append_internal(appender, value); +} +duckdb_state duckdb_append_hugeint(duckdb_appender appender, duckdb_hugeint value) { + hugeint_t internal; + internal.lower = value.lower; + internal.upper = value.upper; + return duckdb_append_internal(appender, internal); +} +duckdb_state duckdb_append_uint8(duckdb_appender appender, uint8_t value) { + return duckdb_append_internal(appender, value); +} +duckdb_state duckdb_append_uint16(duckdb_appender appender, uint16_t value) { + return duckdb_append_internal(appender, value); +} +duckdb_state duckdb_append_uint32(duckdb_appender appender, uint32_t value) { + return duckdb_append_internal(appender, value); +} +duckdb_state duckdb_append_uint64(duckdb_appender appender, uint64_t value) { + return duckdb_append_internal(appender, value); +} -#include +duckdb_state duckdb_append_float(duckdb_appender appender, float value) { + return duckdb_append_internal(appender, value); +} -namespace duckdb { +duckdb_state duckdb_append_double(duckdb_appender appender, double value) { + return duckdb_append_internal(appender, value); +} -struct PragmaTableFunctionData : public TableFunctionData { - explicit PragmaTableFunctionData(CatalogEntry *entry_p) : entry(entry_p) { - } +duckdb_state duckdb_append_date(duckdb_appender appender, duckdb_date value) { + return duckdb_append_internal(appender, date_t(value.days)); +} - CatalogEntry *entry; -}; +duckdb_state duckdb_append_time(duckdb_appender appender, duckdb_time value) { + return duckdb_append_internal(appender, dtime_t(value.micros)); +} -struct PragmaTableOperatorData : public FunctionOperatorData { - PragmaTableOperatorData() : offset(0) { - } - idx_t offset; -}; +duckdb_state duckdb_append_timestamp(duckdb_appender appender, duckdb_timestamp value) { + return duckdb_append_internal(appender, timestamp_t(value.micros)); +} -static unique_ptr PragmaTableInfoBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, vector &names) { - names.emplace_back("cid"); - return_types.push_back(LogicalType::INTEGER); +duckdb_state duckdb_append_interval(duckdb_appender appender, duckdb_interval value) { + interval_t interval; + interval.months = value.months; + interval.days = value.days; + interval.micros = value.micros; + return duckdb_append_internal(appender, interval); +} - names.emplace_back("name"); - return_types.push_back(LogicalType::VARCHAR); +duckdb_state duckdb_append_null(duckdb_appender appender) { + return duckdb_append_internal(appender, nullptr); +} - names.emplace_back("type"); - return_types.push_back(LogicalType::VARCHAR); +duckdb_state duckdb_append_varchar(duckdb_appender appender, const char *val) { + return duckdb_append_internal(appender, val); +} - names.emplace_back("notnull"); - return_types.push_back(LogicalType::BOOLEAN); +duckdb_state duckdb_append_varchar_length(duckdb_appender appender, const char *val, idx_t length) { + return duckdb_append_internal(appender, string_t(val, length)); +} +duckdb_state duckdb_append_blob(duckdb_appender appender, const void *data, idx_t length) { + return duckdb_append_internal(appender, string_t((const char *)data, length)); +} - names.emplace_back("dflt_value"); - return_types.push_back(LogicalType::VARCHAR); +duckdb_state duckdb_appender_flush(duckdb_appender appender) { + return duckdb_appender_run_function(appender, [&](Appender &appender) { appender.Flush(); }); +} - names.emplace_back("pk"); - return_types.push_back(LogicalType::BOOLEAN); +duckdb_state duckdb_appender_close(duckdb_appender appender) { + return duckdb_appender_run_function(appender, [&](Appender &appender) { appender.Close(); }); +} - auto qname = QualifiedName::Parse(inputs[0].GetValue()); - // look up the table name in the catalog - auto &catalog = Catalog::GetCatalog(context); - auto entry = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, qname.schema, qname.name); - return make_unique(entry); -} +using duckdb::ArrowResultWrapper; +using duckdb::Connection; +using duckdb::DataChunk; +using duckdb::LogicalType; +using duckdb::MaterializedQueryResult; +using duckdb::PreparedStatementWrapper; +using duckdb::QueryResult; +using duckdb::QueryResultType; -unique_ptr PragmaTableInfoInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, TableFilterCollection *filters) { - return make_unique(); +duckdb_state duckdb_query_arrow(duckdb_connection connection, const char *query, duckdb_arrow *out_result) { + Connection *conn = (Connection *)connection; + auto wrapper = new ArrowResultWrapper(); + wrapper->result = conn->Query(query); + *out_result = (duckdb_arrow)wrapper; + return wrapper->result->success ? DuckDBSuccess : DuckDBError; } -static void CheckConstraints(TableCatalogEntry *table, idx_t oid, bool &out_not_null, bool &out_pk) { - out_not_null = false; - out_pk = false; - // check all constraints - // FIXME: this is pretty inefficient, it probably doesn't matter - for (auto &constraint : table->bound_constraints) { - switch (constraint->type) { - case ConstraintType::NOT_NULL: { - auto ¬_null = (BoundNotNullConstraint &)*constraint; - if (not_null.index == oid) { - out_not_null = true; - } - break; - } - case ConstraintType::UNIQUE: { - auto &unique = (BoundUniqueConstraint &)*constraint; - if (unique.is_primary_key && unique.keys.find(oid) != unique.keys.end()) { - out_pk = true; - } - break; - } - default: - break; - } +duckdb_state duckdb_query_arrow_schema(duckdb_arrow result, duckdb_arrow_schema *out_schema) { + if (!out_schema) { + return DuckDBSuccess; } + auto wrapper = (ArrowResultWrapper *)result; + wrapper->result->ToArrowSchema((ArrowSchema *)*out_schema); + return DuckDBSuccess; } -static void PragmaTableInfoTable(PragmaTableOperatorData &data, TableCatalogEntry *table, DataChunk &output) { - if (data.offset >= table->columns.size()) { - // finished returning values - return; +duckdb_state duckdb_query_arrow_array(duckdb_arrow result, duckdb_arrow_array *out_array) { + if (!out_array) { + return DuckDBSuccess; } - // start returning values - // either fill up the chunk or return all the remaining columns - idx_t next = MinValue(data.offset + STANDARD_VECTOR_SIZE, table->columns.size()); - output.SetCardinality(next - data.offset); - - for (idx_t i = data.offset; i < next; i++) { - bool not_null, pk; - auto index = i - data.offset; - auto &column = table->columns[i]; - D_ASSERT(column.oid < (idx_t)NumericLimits::Maximum()); - CheckConstraints(table, column.oid, not_null, pk); - - // return values: - // "cid", PhysicalType::INT32 - output.SetValue(0, index, Value::INTEGER((int32_t)column.oid)); - // "name", PhysicalType::VARCHAR - output.SetValue(1, index, Value(column.name)); - // "type", PhysicalType::VARCHAR - output.SetValue(2, index, Value(column.type.ToString())); - // "notnull", PhysicalType::BOOL - output.SetValue(3, index, Value::BOOLEAN(not_null)); - // "dflt_value", PhysicalType::VARCHAR - Value def_value = column.default_value ? Value(column.default_value->ToString()) : Value(); - output.SetValue(4, index, def_value); - // "pk", PhysicalType::BOOL - output.SetValue(5, index, Value::BOOLEAN(pk)); + auto wrapper = (ArrowResultWrapper *)result; + auto success = wrapper->result->TryFetch(wrapper->current_chunk, wrapper->result->error); + if (!success) { // LCOV_EXCL_START + return DuckDBError; + } // LCOV_EXCL_STOP + if (!wrapper->current_chunk || wrapper->current_chunk->size() == 0) { + return DuckDBSuccess; } - data.offset = next; + wrapper->current_chunk->ToArrowArray((ArrowArray *)*out_array); + return DuckDBSuccess; } -static void PragmaTableInfoView(PragmaTableOperatorData &data, ViewCatalogEntry *view, DataChunk &output) { - if (data.offset >= view->types.size()) { - // finished returning values - return; - } - // start returning values - // either fill up the chunk or return all the remaining columns - idx_t next = MinValue(data.offset + STANDARD_VECTOR_SIZE, view->types.size()); - output.SetCardinality(next - data.offset); - - for (idx_t i = data.offset; i < next; i++) { - auto index = i - data.offset; - auto type = view->types[index]; - auto &name = view->aliases[index]; - // return values: - // "cid", PhysicalType::INT32 +idx_t duckdb_arrow_row_count(duckdb_arrow result) { + auto wrapper = (ArrowResultWrapper *)result; + return wrapper->result->collection.Count(); +} - output.SetValue(0, index, Value::INTEGER((int32_t)index)); - // "name", PhysicalType::VARCHAR - output.SetValue(1, index, Value(name)); - // "type", PhysicalType::VARCHAR - output.SetValue(2, index, Value(type.ToString())); - // "notnull", PhysicalType::BOOL - output.SetValue(3, index, Value::BOOLEAN(false)); - // "dflt_value", PhysicalType::VARCHAR - output.SetValue(4, index, Value()); - // "pk", PhysicalType::BOOL - output.SetValue(5, index, Value::BOOLEAN(false)); - } - data.offset = next; +idx_t duckdb_arrow_column_count(duckdb_arrow result) { + auto wrapper = (ArrowResultWrapper *)result; + return wrapper->result->types.size(); } -static void PragmaTableInfoFunction(ClientContext &context, const FunctionData *bind_data_p, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &bind_data = (PragmaTableFunctionData &)*bind_data_p; - auto &state = (PragmaTableOperatorData &)*operator_state; - switch (bind_data.entry->type) { - case CatalogType::TABLE_ENTRY: - PragmaTableInfoTable(state, (TableCatalogEntry *)bind_data.entry, output); - break; - case CatalogType::VIEW_ENTRY: - PragmaTableInfoView(state, (ViewCatalogEntry *)bind_data.entry, output); - break; - default: - throw NotImplementedException("Unimplemented catalog type for pragma_table_info"); +idx_t duckdb_arrow_rows_changed(duckdb_arrow result) { + auto wrapper = (ArrowResultWrapper *)result; + idx_t rows_changed = 0; + idx_t row_count = wrapper->result->collection.Count(); + if (row_count > 0 && StatementTypeReturnChanges(wrapper->result->statement_type)) { + auto row_changes = wrapper->result->GetValue(0, 0); + if (!row_changes.is_null && row_changes.TryCastAs(LogicalType::BIGINT)) { + rows_changed = row_changes.GetValue(); + } } + return rows_changed; } -void PragmaTableInfo::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(TableFunction("pragma_table_info", {LogicalType::VARCHAR}, PragmaTableInfoFunction, - PragmaTableInfoBind, PragmaTableInfoInit)); +const char *duckdb_query_arrow_error(duckdb_arrow result) { + auto wrapper = (ArrowResultWrapper *)result; + return strdup(wrapper->result->error.c_str()); } -} // namespace duckdb - - - +void duckdb_destroy_arrow(duckdb_arrow *result) { + if (*result) { + auto wrapper = (ArrowResultWrapper *)*result; + delete wrapper; + *result = nullptr; + } +} +duckdb_state duckdb_execute_prepared_arrow(duckdb_prepared_statement prepared_statement, duckdb_arrow *out_result) { + auto wrapper = (PreparedStatementWrapper *)prepared_statement; + if (!wrapper || !wrapper->statement || !wrapper->statement->success || !out_result) { + return DuckDBError; + } + auto arrow_wrapper = new ArrowResultWrapper(); + auto result = wrapper->statement->Execute(wrapper->values, false); + D_ASSERT(result->type == QueryResultType::MATERIALIZED_RESULT); + arrow_wrapper->result = duckdb::unique_ptr_cast(move(result)); + *out_result = (duckdb_arrow)arrow_wrapper; + return arrow_wrapper->result->success ? DuckDBSuccess : DuckDBError; +} +using duckdb::DBConfig; +using duckdb::Value; -#include +// config +duckdb_state duckdb_create_config(duckdb_config *out_config) { + if (!out_config) { + return DuckDBError; + } + DBConfig *config; + try { + config = new DBConfig(); + } catch (...) { // LCOV_EXCL_START + return DuckDBError; + } // LCOV_EXCL_STOP + *out_config = (duckdb_config)config; + return DuckDBSuccess; +} -namespace duckdb { +size_t duckdb_config_count() { + return DBConfig::GetOptionCount(); +} -struct SQLiteMasterData : public FunctionOperatorData { - SQLiteMasterData() : offset(0) { +duckdb_state duckdb_get_config_flag(size_t index, const char **out_name, const char **out_description) { + auto option = DBConfig::GetOptionByIndex(index); + if (!option) { + return DuckDBError; + } + if (out_name) { + *out_name = option->name; } + if (out_description) { + *out_description = option->description; + } + return DuckDBSuccess; +} - vector entries; - idx_t offset; -}; +duckdb_state duckdb_set_config(duckdb_config config, const char *name, const char *option) { + if (!config || !name || !option) { + return DuckDBError; + } + auto config_option = DBConfig::GetOptionByName(name); + if (!config_option) { + return DuckDBError; + } + try { + auto db_config = (DBConfig *)config; + db_config->SetOption(*config_option, Value(option)); + } catch (...) { + return DuckDBError; + } + return DuckDBSuccess; +} -static unique_ptr SQLiteMasterBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, vector &return_types, - vector &names) { - names.emplace_back("type"); - return_types.push_back(LogicalType::VARCHAR); +void duckdb_destroy_config(duckdb_config *config) { + if (!config) { + return; + } + if (*config) { + auto db_config = (DBConfig *)*config; + delete db_config; + *config = nullptr; + } +} - names.emplace_back("name"); - return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("tbl_name"); - return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("rootpage"); - return_types.push_back(LogicalType::INTEGER); - names.emplace_back("sql"); - return_types.push_back(LogicalType::VARCHAR); - return nullptr; -} +using duckdb::Date; +using duckdb::Time; +using duckdb::Timestamp; -unique_ptr SQLiteMasterInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, TableFilterCollection *filters) { - auto result = make_unique(); +using duckdb::date_t; +using duckdb::dtime_t; +using duckdb::timestamp_t; - // scan all the schemas for tables and views and collect them - Catalog::GetCatalog(context).schemas->Scan(context, [&](CatalogEntry *entry) { - auto schema = (SchemaCatalogEntry *)entry; - schema->Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) { result->entries.push_back(entry); }); - schema->Scan(context, CatalogType::INDEX_ENTRY, [&](CatalogEntry *entry) { result->entries.push_back(entry); }); - }); +duckdb_date_struct duckdb_from_date(duckdb_date date) { + int32_t year, month, day; + Date::Convert(date_t(date.days), year, month, day); - return move(result); + duckdb_date_struct result; + result.year = year; + result.month = month; + result.day = day; + return result; } -void SQLiteMasterFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state, - DataChunk *input, DataChunk &output) { - auto &data = (SQLiteMasterData &)*operator_state; - if (data.offset >= data.entries.size()) { - // finished returning values - return; - } +duckdb_date duckdb_to_date(duckdb_date_struct date) { + duckdb_date result; + result.days = Date::FromDate(date.year, date.month, date.day).days; + return result; +} - // start returning values - // either fill up the chunk or return all the remaining columns - idx_t count = 0; - while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) { - auto &entry = data.entries[data.offset++]; - if (entry->internal) { - continue; - } +duckdb_time_struct duckdb_from_time(duckdb_time time) { + int32_t hour, minute, second, micros; + Time::Convert(dtime_t(time.micros), hour, minute, second, micros); - // return values: - // "type", PhysicalType::VARCHAR - string table_name = entry->name; - const char *type_str; - switch (entry->type) { - case CatalogType::TABLE_ENTRY: - type_str = "table"; - break; - case CatalogType::SCHEMA_ENTRY: - type_str = "schema"; - break; - case CatalogType::TABLE_FUNCTION_ENTRY: - type_str = "function"; - break; - case CatalogType::VIEW_ENTRY: - type_str = "view"; - break; - case CatalogType::INDEX_ENTRY: { - auto &index = (IndexCatalogEntry &)*entry; - table_name = index.info->table; - type_str = "index"; - break; - } - default: - type_str = "unknown"; - } - output.SetValue(0, count, Value(type_str)); - // "name", PhysicalType::VARCHAR - output.SetValue(1, count, Value(entry->name)); - // "tbl_name", PhysicalType::VARCHAR - output.SetValue(2, count, Value(table_name)); - // "rootpage", PhysicalType::INT32 - output.SetValue(3, count, Value::INTEGER(0)); - // "sql", PhysicalType::VARCHAR - output.SetValue(4, count, Value(entry->ToSQL())); - count++; - } - output.SetCardinality(count); + duckdb_time_struct result; + result.hour = hour; + result.min = minute; + result.sec = second; + result.micros = micros; + return result; } -void SQLiteMaster::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(TableFunction("sqlite_master", {}, SQLiteMasterFunction, SQLiteMasterBind, SQLiteMasterInit)); +duckdb_time duckdb_to_time(duckdb_time_struct time) { + duckdb_time result; + result.micros = Time::FromTime(time.hour, time.min, time.sec, time.micros).micros; + return result; } -} // namespace duckdb - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/query_node/select_node.hpp -// -// -//===----------------------------------------------------------------------===// - +duckdb_timestamp_struct duckdb_from_timestamp(duckdb_timestamp ts) { + date_t date; + dtime_t time; + Timestamp::Convert(timestamp_t(ts.micros), date, time); + duckdb_date ddate; + ddate.days = date.days; + duckdb_time dtime; + dtime.micros = time.micros; + duckdb_timestamp_struct result; + result.date = duckdb_from_date(ddate); + result.time = duckdb_from_time(dtime); + return result; +} +duckdb_timestamp duckdb_to_timestamp(duckdb_timestamp_struct ts) { + date_t date = date_t(duckdb_to_date(ts.date).days); + dtime_t time = dtime_t(duckdb_to_time(ts.time).micros); + duckdb_timestamp result; + result.micros = Timestamp::FromDatetime(date, time).value; + return result; +} +using duckdb::Connection; +using duckdb::DatabaseData; +using duckdb::DBConfig; +using duckdb::DuckDB; -namespace duckdb { +duckdb_state duckdb_open_ext(const char *path, duckdb_database *out, duckdb_config config, char **error) { + auto wrapper = new DatabaseData(); + try { + auto db_config = (DBConfig *)config; + wrapper->database = duckdb::make_unique(path, db_config); + } catch (std::exception &ex) { + if (error) { + *error = strdup(ex.what()); + } + delete wrapper; + return DuckDBError; + } catch (...) { // LCOV_EXCL_START + if (error) { + *error = strdup("Unknown error"); + } + delete wrapper; + return DuckDBError; + } // LCOV_EXCL_STOP + *out = (duckdb_database)wrapper; + return DuckDBSuccess; +} -enum class AggregateHandling : uint8_t { - STANDARD_HANDLING, // standard handling as in the SELECT clause - NO_AGGREGATES_ALLOWED, // no aggregates allowed: any aggregates in this node will result in an error - FORCE_AGGREGATES // force aggregates: any non-aggregate select list entry will become a GROUP -}; +duckdb_state duckdb_open(const char *path, duckdb_database *out) { + return duckdb_open_ext(path, out, nullptr, nullptr); +} -//! SelectNode represents a standard SELECT statement -class SelectNode : public QueryNode { -public: - SelectNode() : QueryNode(QueryNodeType::SELECT_NODE), aggregate_handling(AggregateHandling::STANDARD_HANDLING) { +void duckdb_close(duckdb_database *database) { + if (database && *database) { + auto wrapper = (DatabaseData *)*database; + delete wrapper; + *database = nullptr; } +} - //! The projection list - vector> select_list; - //! The FROM clause - unique_ptr from_table; - //! The WHERE clause - unique_ptr where_clause; - //! list of groups - vector> groups; - //! HAVING clause - unique_ptr having; - //! Aggregate handling during binding - AggregateHandling aggregate_handling; - //! The SAMPLE clause - unique_ptr sample; +duckdb_state duckdb_connect(duckdb_database database, duckdb_connection *out) { + if (!database || !out) { + return DuckDBError; + } + auto wrapper = (DatabaseData *)database; + Connection *connection; + try { + connection = new Connection(*wrapper->database); + } catch (...) { // LCOV_EXCL_START + return DuckDBError; + } // LCOV_EXCL_STOP + *out = (duckdb_connection)connection; + return DuckDBSuccess; +} - const vector> &GetSelectList() const override { - return select_list; +void duckdb_disconnect(duckdb_connection *connection) { + if (connection && *connection) { + Connection *conn = (Connection *)*connection; + delete conn; + *connection = nullptr; } +} -public: - bool Equals(const QueryNode *other) const override; - //! Create a copy of this SelectNode - unique_ptr Copy() override; - //! Serializes a SelectNode to a stand-alone binary blob - void Serialize(Serializer &serializer) override; - //! Deserializes a blob back into a SelectNode - static unique_ptr Deserialize(Deserializer &source); -}; -} // namespace duckdb +duckdb_state duckdb_query(duckdb_connection connection, const char *query, duckdb_result *out) { + Connection *conn = (Connection *)connection; + auto result = conn->Query(query); + return duckdb_translate_result(result.get(), out); +} +namespace duckdb { +duckdb_type ConvertCPPTypeToC(const LogicalType &sql_type) { + switch (sql_type.id()) { + case LogicalTypeId::BOOLEAN: + return DUCKDB_TYPE_BOOLEAN; + case LogicalTypeId::TINYINT: + return DUCKDB_TYPE_TINYINT; + case LogicalTypeId::SMALLINT: + return DUCKDB_TYPE_SMALLINT; + case LogicalTypeId::INTEGER: + return DUCKDB_TYPE_INTEGER; + case LogicalTypeId::BIGINT: + return DUCKDB_TYPE_BIGINT; + case LogicalTypeId::UTINYINT: + return DUCKDB_TYPE_UTINYINT; + case LogicalTypeId::USMALLINT: + return DUCKDB_TYPE_USMALLINT; + case LogicalTypeId::UINTEGER: + return DUCKDB_TYPE_UINTEGER; + case LogicalTypeId::UBIGINT: + return DUCKDB_TYPE_UBIGINT; + case LogicalTypeId::HUGEINT: + return DUCKDB_TYPE_HUGEINT; + case LogicalTypeId::FLOAT: + return DUCKDB_TYPE_FLOAT; + case LogicalTypeId::DOUBLE: + return DUCKDB_TYPE_DOUBLE; + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP_NS: + return DUCKDB_TYPE_TIMESTAMP; + case LogicalTypeId::DATE: + return DUCKDB_TYPE_DATE; + case LogicalTypeId::TIME: + return DUCKDB_TYPE_TIME; + case LogicalTypeId::VARCHAR: + return DUCKDB_TYPE_VARCHAR; + case LogicalTypeId::BLOB: + return DUCKDB_TYPE_BLOB; + case LogicalTypeId::INTERVAL: + return DUCKDB_TYPE_INTERVAL; + default: // LCOV_EXCL_START + D_ASSERT(0); + return DUCKDB_TYPE_INVALID; + } // LCOV_EXCL_STOP +} +idx_t GetCTypeSize(duckdb_type type) { + switch (type) { + case DUCKDB_TYPE_BOOLEAN: + return sizeof(bool); + case DUCKDB_TYPE_TINYINT: + return sizeof(int8_t); + case DUCKDB_TYPE_SMALLINT: + return sizeof(int16_t); + case DUCKDB_TYPE_INTEGER: + return sizeof(int32_t); + case DUCKDB_TYPE_BIGINT: + return sizeof(int64_t); + case DUCKDB_TYPE_UTINYINT: + return sizeof(uint8_t); + case DUCKDB_TYPE_USMALLINT: + return sizeof(uint16_t); + case DUCKDB_TYPE_UINTEGER: + return sizeof(uint32_t); + case DUCKDB_TYPE_UBIGINT: + return sizeof(uint64_t); + case DUCKDB_TYPE_HUGEINT: + return sizeof(duckdb_hugeint); + case DUCKDB_TYPE_FLOAT: + return sizeof(float); + case DUCKDB_TYPE_DOUBLE: + return sizeof(double); + case DUCKDB_TYPE_DATE: + return sizeof(duckdb_date); + case DUCKDB_TYPE_TIME: + return sizeof(duckdb_time); + case DUCKDB_TYPE_TIMESTAMP: + return sizeof(duckdb_timestamp); + case DUCKDB_TYPE_VARCHAR: + return sizeof(const char *); + case DUCKDB_TYPE_BLOB: + return sizeof(duckdb_blob); + case DUCKDB_TYPE_INTERVAL: + return sizeof(duckdb_interval); + default: // LCOV_EXCL_START + // unsupported type + D_ASSERT(0); + return sizeof(const char *); + } // LCOV_EXCL_STOP +} +} // namespace duckdb +void *duckdb_malloc(size_t size) { + return malloc(size); +} -namespace duckdb { +void duckdb_free(void *ptr) { + free(ptr); +} -void BuiltinFunctions::RegisterSQLiteFunctions() { - PragmaVersion::RegisterFunction(*this); - PragmaFunctionPragma::RegisterFunction(*this); - PragmaCollations::RegisterFunction(*this); - PragmaTableInfo::RegisterFunction(*this); - SQLiteMaster::RegisterFunction(*this); - PragmaDatabaseSize::RegisterFunction(*this); - PragmaDatabaseList::RegisterFunction(*this); - PragmaLastProfilingOutput::RegisterFunction(*this); - PragmaDetailedProfilingOutput::RegisterFunction(*this); - // CreateViewInfo info; - // info.schema = DEFAULT_SCHEMA; - // info.view_name = "sqlite_master"; - // info.on_conflict = OnCreateConflict::REPLACE; - // auto select = make_unique(); - // select->select_list.push_back(make_unique()); - // vector> children; +using duckdb::Hugeint; +using duckdb::hugeint_t; +using duckdb::Value; - // auto function = make_unique(DEFAULT_SCHEMA, "sqlite_master", children); - // auto function_expr = make_unique(); - // function_expr->function = move(function); - // select->from_table = move(function_expr); - // info.query = move(select); - // catalog.CreateView(transaction, &info); +double duckdb_hugeint_to_double(duckdb_hugeint val) { + hugeint_t internal; + internal.lower = val.lower; + internal.upper = val.upper; + return Hugeint::Cast(internal); } -} // namespace duckdb - - +duckdb_hugeint duckdb_double_to_hugeint(double val) { + hugeint_t internal_result; + if (!Value::DoubleIsValid(val) || !Hugeint::TryConvert(val, internal_result)) { + internal_result.lower = 0; + internal_result.upper = 0; + } + duckdb_hugeint result; + result.lower = internal_result.lower; + result.upper = internal_result.upper; + return result; +} -// this function makes not that much sense on its own but is a demo for table-parameter table-producing functions -namespace duckdb { -static unique_ptr SummaryFunctionBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, vector &names) { - return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("summary"); +using duckdb::Connection; +using duckdb::date_t; +using duckdb::dtime_t; +using duckdb::hugeint_t; +using duckdb::MaterializedQueryResult; +using duckdb::PreparedStatementWrapper; +using duckdb::QueryResultType; +using duckdb::timestamp_t; +using duckdb::Value; - for (idx_t i = 0; i < input_table_types.size(); i++) { - return_types.push_back(input_table_types[i]); - names.emplace_back(input_table_names[i]); +duckdb_state duckdb_prepare(duckdb_connection connection, const char *query, + duckdb_prepared_statement *out_prepared_statement) { + if (!connection || !query || !out_prepared_statement) { + return DuckDBError; } + auto wrapper = new PreparedStatementWrapper(); + Connection *conn = (Connection *)connection; + wrapper->statement = conn->Prepare(query); + *out_prepared_statement = (duckdb_prepared_statement)wrapper; + return wrapper->statement->success ? DuckDBSuccess : DuckDBError; +} - return make_unique(); +const char *duckdb_prepare_error(duckdb_prepared_statement prepared_statement) { + auto wrapper = (PreparedStatementWrapper *)prepared_statement; + if (!wrapper || !wrapper->statement || wrapper->statement->success) { + return nullptr; + } + return wrapper->statement->error.c_str(); } -static void SummaryFunction(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *state_p, - DataChunk *input, DataChunk &output) { - D_ASSERT(input); - output.SetCardinality(input->size()); +idx_t duckdb_nparams(duckdb_prepared_statement prepared_statement) { + auto wrapper = (PreparedStatementWrapper *)prepared_statement; + if (!wrapper || !wrapper->statement || !wrapper->statement->success) { + return 0; + } + return wrapper->statement->n_param; +} - for (idx_t row_idx = 0; row_idx < input->size(); row_idx++) { - string summary_val = "["; +duckdb_type duckdb_param_type(duckdb_prepared_statement prepared_statement, idx_t param_idx) { + auto wrapper = (PreparedStatementWrapper *)prepared_statement; + if (!wrapper || !wrapper->statement || !wrapper->statement->success) { + return DUCKDB_TYPE_INVALID; + } + auto entry = wrapper->statement->data->value_map.find(param_idx); + if (entry == wrapper->statement->data->value_map.end()) { + return DUCKDB_TYPE_INVALID; + } + return ConvertCPPTypeToC(entry->second[0]->type()); +} - for (idx_t col_idx = 0; col_idx < input->ColumnCount(); col_idx++) { - summary_val += input->GetValue(col_idx, row_idx).ToString(); - if (col_idx < input->ColumnCount() - 1) { - summary_val += ", "; - } - } - summary_val += "]"; - output.SetValue(0, row_idx, Value(summary_val)); +static duckdb_state duckdb_bind_value(duckdb_prepared_statement prepared_statement, idx_t param_idx, Value val) { + auto wrapper = (PreparedStatementWrapper *)prepared_statement; + if (!wrapper || !wrapper->statement || !wrapper->statement->success) { + return DuckDBError; } - for (idx_t col_idx = 0; col_idx < input->ColumnCount(); col_idx++) { - output.data[col_idx + 1].Reference(input->data[col_idx]); + if (param_idx > wrapper->statement->n_param) { + return DuckDBError; + } + if (param_idx > wrapper->values.size()) { + wrapper->values.resize(param_idx); } + wrapper->values[param_idx - 1] = val; + return DuckDBSuccess; } -void SummaryTableFunction::RegisterFunction(BuiltinFunctions &set) { - TableFunctionSet summary("summary"); - summary.AddFunction(TableFunction({LogicalType::TABLE}, SummaryFunction, SummaryFunctionBind)); - set.AddFunction(summary); +duckdb_state duckdb_bind_boolean(duckdb_prepared_statement prepared_statement, idx_t param_idx, bool val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::BOOLEAN(val)); } -} // namespace duckdb - - - - - - - - - - - - +duckdb_state duckdb_bind_int8(duckdb_prepared_statement prepared_statement, idx_t param_idx, int8_t val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::TINYINT(val)); +} +duckdb_state duckdb_bind_int16(duckdb_prepared_statement prepared_statement, idx_t param_idx, int16_t val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::SMALLINT(val)); +} +duckdb_state duckdb_bind_int32(duckdb_prepared_statement prepared_statement, idx_t param_idx, int32_t val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::INTEGER(val)); +} +duckdb_state duckdb_bind_int64(duckdb_prepared_statement prepared_statement, idx_t param_idx, int64_t val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::BIGINT(val)); +} +duckdb_state duckdb_bind_hugeint(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_hugeint val) { + hugeint_t internal; + internal.lower = val.lower; + internal.upper = val.upper; + return duckdb_bind_value(prepared_statement, param_idx, Value::HUGEINT(internal)); +} -namespace duckdb { +duckdb_state duckdb_bind_uint8(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint8_t val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::UTINYINT(val)); +} -//===--------------------------------------------------------------------===// -// Table Scan -//===--------------------------------------------------------------------===// -bool TableScanParallelStateNext(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, ParallelState *parallel_state_p); +duckdb_state duckdb_bind_uint16(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint16_t val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::USMALLINT(val)); +} -struct TableScanOperatorData : public FunctionOperatorData { - //! The current position in the scan - TableScanState scan_state; - vector column_ids; -}; +duckdb_state duckdb_bind_uint32(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint32_t val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::UINTEGER(val)); +} -static unique_ptr TableScanInit(ClientContext &context, const FunctionData *bind_data_p, - vector &column_ids, TableFilterCollection *filters) { - auto result = make_unique(); - auto &transaction = Transaction::GetTransaction(context); - auto &bind_data = (const TableScanBindData &)*bind_data_p; - result->column_ids = column_ids; - result->scan_state.table_filters = filters->table_filters; - bind_data.table->storage->InitializeScan(transaction, result->scan_state, result->column_ids, - filters->table_filters); - return move(result); +duckdb_state duckdb_bind_uint64(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint64_t val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::UBIGINT(val)); } -static unique_ptr TableScanStatistics(ClientContext &context, const FunctionData *bind_data_p, - column_t column_id) { - auto &bind_data = (const TableScanBindData &)*bind_data_p; - auto &transaction = Transaction::GetTransaction(context); - if (transaction.storage.Find(bind_data.table->storage.get())) { - // we don't emit any statistics for tables that have outstanding transaction-local data - return nullptr; +duckdb_state duckdb_bind_float(duckdb_prepared_statement prepared_statement, idx_t param_idx, float val) { + if (!Value::FloatIsValid(val)) { + return DuckDBError; } - return bind_data.table->storage->GetStatistics(context, column_id); + return duckdb_bind_value(prepared_statement, param_idx, Value::FLOAT(val)); } -static unique_ptr TableScanParallelInit(ClientContext &context, const FunctionData *bind_data_p, - ParallelState *state, vector &column_ids, - TableFilterCollection *filters) { - auto result = make_unique(); - result->column_ids = column_ids; - result->scan_state.table_filters = filters->table_filters; - if (!TableScanParallelStateNext(context, bind_data_p, result.get(), state)) { - return nullptr; +duckdb_state duckdb_bind_double(duckdb_prepared_statement prepared_statement, idx_t param_idx, double val) { + if (!Value::DoubleIsValid(val)) { + return DuckDBError; } - return move(result); + return duckdb_bind_value(prepared_statement, param_idx, Value::DOUBLE(val)); } -static void TableScanFunc(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *operator_state, - DataChunk *, DataChunk &output) { - auto &bind_data = (TableScanBindData &)*bind_data_p; - auto &state = (TableScanOperatorData &)*operator_state; - auto &transaction = Transaction::GetTransaction(context); - bind_data.table->storage->Scan(transaction, output, state.scan_state, state.column_ids); - bind_data.chunk_count++; +duckdb_state duckdb_bind_date(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_date val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::DATE(date_t(val.days))); } -struct ParallelTableFunctionScanState : public ParallelState { - ParallelTableScanState state; - std::mutex lock; -}; - -idx_t TableScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = (const TableScanBindData &)*bind_data_p; - return bind_data.table->storage->MaxThreads(context); +duckdb_state duckdb_bind_time(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_time val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::TIME(dtime_t(val.micros))); } -unique_ptr TableScanInitParallelState(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = (const TableScanBindData &)*bind_data_p; - auto result = make_unique(); - bind_data.table->storage->InitializeParallelScan(result->state); - return move(result); +duckdb_state duckdb_bind_timestamp(duckdb_prepared_statement prepared_statement, idx_t param_idx, + duckdb_timestamp val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::TIMESTAMP(timestamp_t(val.micros))); } -bool TableScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, - FunctionOperatorData *operator_state, ParallelState *parallel_state_p) { - auto &bind_data = (const TableScanBindData &)*bind_data_p; - auto ¶llel_state = (ParallelTableFunctionScanState &)*parallel_state_p; - auto &state = (TableScanOperatorData &)*operator_state; - - lock_guard parallel_lock(parallel_state.lock); - return bind_data.table->storage->NextParallelScan(context, parallel_state.state, state.scan_state, - state.column_ids); +duckdb_state duckdb_bind_interval(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_interval val) { + return duckdb_bind_value(prepared_statement, param_idx, Value::INTERVAL(val.months, val.days, val.micros)); } -int TableScanProgress(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = (TableScanBindData &)*bind_data_p; - idx_t total_rows = bind_data.table->storage->GetTotalRows(); - if (total_rows == 0 || total_rows < STANDARD_VECTOR_SIZE) { - //! Table is either empty or smaller than a vector size, so it is finished - return 100; - } - auto percentage = (bind_data.chunk_count * STANDARD_VECTOR_SIZE * 100) / total_rows; - if (percentage > 100) { - //! In case the last chunk has less elements than STANDARD_VECTOR_SIZE, if our percentage is over 100 - //! It means we finished this table. - return 100; - } - return percentage; +duckdb_state duckdb_bind_varchar(duckdb_prepared_statement prepared_statement, idx_t param_idx, const char *val) { + return duckdb_bind_value(prepared_statement, param_idx, Value(val)); } -void TableScanDependency(unordered_set &entries, const FunctionData *bind_data_p) { - auto &bind_data = (const TableScanBindData &)*bind_data_p; - entries.insert(bind_data.table); +duckdb_state duckdb_bind_varchar_length(duckdb_prepared_statement prepared_statement, idx_t param_idx, const char *val, + idx_t length) { + return duckdb_bind_value(prepared_statement, param_idx, Value(std::string(val, length))); } -unique_ptr TableScanCardinality(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = (const TableScanBindData &)*bind_data_p; - auto &transaction = Transaction::GetTransaction(context); - idx_t estimated_cardinality = - bind_data.table->storage->info->cardinality + transaction.storage.AddedRows(bind_data.table->storage.get()); - return make_unique(bind_data.table->storage->info->cardinality, estimated_cardinality); +duckdb_state duckdb_bind_blob(duckdb_prepared_statement prepared_statement, idx_t param_idx, const void *data, + idx_t length) { + return duckdb_bind_value(prepared_statement, param_idx, Value::BLOB((duckdb::const_data_ptr_t)data, length)); } -//===--------------------------------------------------------------------===// -// Index Scan -//===--------------------------------------------------------------------===// -struct IndexScanOperatorData : public FunctionOperatorData { - Vector row_ids; - ColumnFetchState fetch_state; - LocalScanState local_storage_state; - vector column_ids; - bool finished; -}; +duckdb_state duckdb_bind_null(duckdb_prepared_statement prepared_statement, idx_t param_idx) { + return duckdb_bind_value(prepared_statement, param_idx, Value()); +} -static unique_ptr IndexScanInit(ClientContext &context, const FunctionData *bind_data_p, - vector &column_ids, TableFilterCollection *filters) { - auto result = make_unique(); - auto &transaction = Transaction::GetTransaction(context); - auto &bind_data = (const TableScanBindData &)*bind_data_p; - result->column_ids = column_ids; - result->row_ids.SetType(LOGICAL_ROW_TYPE); - if (!bind_data.result_ids.empty()) { - FlatVector::SetData(result->row_ids, (data_ptr_t)&bind_data.result_ids[0]); +duckdb_state duckdb_execute_prepared(duckdb_prepared_statement prepared_statement, duckdb_result *out_result) { + auto wrapper = (PreparedStatementWrapper *)prepared_statement; + if (!wrapper || !wrapper->statement || !wrapper->statement->success) { + return DuckDBError; } - transaction.storage.InitializeScan(bind_data.table->storage.get(), result->local_storage_state, - filters->table_filters); - - result->finished = false; - return move(result); + auto result = wrapper->statement->Execute(wrapper->values, false); + D_ASSERT(result->type == QueryResultType::MATERIALIZED_RESULT); + auto mat_res = (MaterializedQueryResult *)result.get(); + return duckdb_translate_result(mat_res, out_result); } -static void IndexScanFunction(ClientContext &context, const FunctionData *bind_data_p, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &bind_data = (const TableScanBindData &)*bind_data_p; - auto &state = (IndexScanOperatorData &)*operator_state; - auto &transaction = Transaction::GetTransaction(context); - if (!state.finished) { - bind_data.table->storage->Fetch(transaction, output, state.column_ids, state.row_ids, - bind_data.result_ids.size(), state.fetch_state); - state.finished = true; +void duckdb_destroy_prepare(duckdb_prepared_statement *prepared_statement) { + if (!prepared_statement) { + return; } - if (output.size() == 0) { - transaction.storage.Scan(state.local_storage_state, state.column_ids, output); + auto wrapper = (PreparedStatementWrapper *)*prepared_statement; + if (wrapper) { + delete wrapper; } + *prepared_statement = nullptr; } -static void RewriteIndexExpression(Index &index, LogicalGet &get, Expression &expr, bool &rewrite_possible) { - if (expr.type == ExpressionType::BOUND_COLUMN_REF) { - auto &bound_colref = (BoundColumnRefExpression &)expr; - // bound column ref: rewrite to fit in the current set of bound column ids - bound_colref.binding.table_index = get.table_index; - column_t referenced_column = index.column_ids[bound_colref.binding.column_index]; - // search for the referenced column in the set of column_ids - for (idx_t i = 0; i < get.column_ids.size(); i++) { - if (get.column_ids[i] == referenced_column) { - bound_colref.binding.column_index = i; - return; + + +namespace duckdb { + +template +void WriteData(duckdb_result *out, ChunkCollection &source, idx_t col) { + idx_t row = 0; + auto target = (T *)out->columns[col].data; + for (auto &chunk : source.Chunks()) { + auto source = FlatVector::GetData(chunk->data[col]); + auto &mask = FlatVector::Validity(chunk->data[col]); + + for (idx_t k = 0; k < chunk->size(); k++, row++) { + if (!mask.RowIsValid(k)) { + continue; } + target[row] = source[k]; } - // column id not found in bound columns in the LogicalGet: rewrite not possible - rewrite_possible = false; } - ExpressionIterator::EnumerateChildren( - expr, [&](Expression &child) { RewriteIndexExpression(index, get, child, rewrite_possible); }); } -void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p, - vector> &filters) { - auto &bind_data = (TableScanBindData &)*bind_data_p; - auto table = bind_data.table; - auto &storage = *table->storage; - - if (bind_data.is_index_scan) { - return; +duckdb_state duckdb_translate_result(MaterializedQueryResult *result, duckdb_result *out) { + D_ASSERT(result); + if (!out) { + // no result to write to, only return the status + return result->success ? DuckDBSuccess : DuckDBError; } - if (filters.empty() || storage.info->indexes.empty()) { - // no indexes or no filters: skip the pushdown - return; + memset(out, 0, sizeof(duckdb_result)); + if (!result->success) { + // write the error message + out->error_message = strdup(result->error.c_str()); + return DuckDBError; + } + // copy the data + // first write the meta data + out->column_count = result->types.size(); + out->row_count = result->collection.Count(); + out->rows_changed = 0; + if (out->row_count > 0 && StatementTypeReturnChanges(result->statement_type)) { + // update total changes + auto row_changes = result->GetValue(0, 0); + if (!row_changes.is_null && row_changes.TryCastAs(LogicalType::BIGINT)) { + out->rows_changed = row_changes.GetValue(); + } } - // check all the indexes - for (size_t j = 0; j < storage.info->indexes.size(); j++) { - auto &index = storage.info->indexes[j]; + out->columns = (duckdb_column *)duckdb_malloc(sizeof(duckdb_column) * out->column_count); + if (!out->columns) { // LCOV_EXCL_START + // malloc failure + return DuckDBError; + } // LCOV_EXCL_STOP - // first rewrite the index expression so the ColumnBindings align with the column bindings of the current table - if (index->unbound_expressions.size() > 1) { - continue; + // zero initialize the columns (so we can cleanly delete it in case a malloc fails) + memset(out->columns, 0, sizeof(duckdb_column) * out->column_count); + for (idx_t i = 0; i < out->column_count; i++) { + out->columns[i].type = ConvertCPPTypeToC(result->types[i]); + out->columns[i].name = strdup(result->names[i].c_str()); + out->columns[i].nullmask = (bool *)duckdb_malloc(sizeof(bool) * out->row_count); + out->columns[i].data = duckdb_malloc(GetCTypeSize(out->columns[i].type) * out->row_count); + if (!out->columns[i].nullmask || !out->columns[i].name || !out->columns[i].data) { // LCOV_EXCL_START + // malloc failure + return DuckDBError; + } // LCOV_EXCL_STOP + } + // now write the data + for (idx_t col = 0; col < out->column_count; col++) { + // first set the nullmask + idx_t row = 0; + for (auto &chunk : result->collection.Chunks()) { + for (idx_t k = 0; k < chunk->size(); k++) { + out->columns[col].nullmask[row++] = FlatVector::IsNull(chunk->data[col], k); + } } - auto index_expression = index->unbound_expressions[0]->Copy(); - bool rewrite_possible = true; - RewriteIndexExpression(*index, get, *index_expression, rewrite_possible); - if (!rewrite_possible) { - // could not rewrite! - continue; + // then write the data + switch (result->types[col].id()) { + case LogicalTypeId::BOOLEAN: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::TINYINT: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::SMALLINT: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::INTEGER: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::BIGINT: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::UTINYINT: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::USMALLINT: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::UINTEGER: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::UBIGINT: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::FLOAT: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::DOUBLE: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::DATE: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::TIME: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::TIMESTAMP: + WriteData(out, result->collection, col); + break; + case LogicalTypeId::VARCHAR: { + idx_t row = 0; + auto target = (const char **)out->columns[col].data; + for (auto &chunk : result->collection.Chunks()) { + auto source = FlatVector::GetData(chunk->data[col]); + for (idx_t k = 0; k < chunk->size(); k++) { + if (!FlatVector::IsNull(chunk->data[col], k)) { + target[row] = (char *)duckdb_malloc(source[k].GetSize() + 1); + assert(target[row]); + memcpy((void *)target[row], source[k].GetDataUnsafe(), source[k].GetSize()); + auto write_arr = (char *)target[row]; + write_arr[source[k].GetSize()] = '\0'; + } else { + target[row] = nullptr; + } + row++; + } + } + break; } - - Value low_value, high_value, equal_value; - ExpressionType low_comparison_type = ExpressionType::INVALID, high_comparison_type = ExpressionType::INVALID; - // try to find a matching index for any of the filter expressions - for (auto &filter : filters) { - auto expr = filter.get(); - - // create a matcher for a comparison with a constant - ComparisonExpressionMatcher matcher; - // match on a comparison type - matcher.expr_type = make_unique(); - // match on a constant comparison with the indexed expression - matcher.matchers.push_back(make_unique(index_expression.get())); - matcher.matchers.push_back(make_unique()); - - matcher.policy = SetMatcher::Policy::UNORDERED; - - vector bindings; - if (matcher.Match(expr, bindings)) { - // range or equality comparison with constant value - // we can use our index here - // bindings[0] = the expression - // bindings[1] = the index expression - // bindings[2] = the constant - auto comparison = (BoundComparisonExpression *)bindings[0]; - D_ASSERT(bindings[0]->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON); - D_ASSERT(bindings[2]->type == ExpressionType::VALUE_CONSTANT); - - auto constant_value = ((BoundConstantExpression *)bindings[2])->value; - auto comparison_type = comparison->type; - if (comparison->left->type == ExpressionType::VALUE_CONSTANT) { - // the expression is on the right side, we flip them around - comparison_type = FlipComparisionExpression(comparison_type); + case LogicalTypeId::BLOB: { + idx_t row = 0; + auto target = (duckdb_blob *)out->columns[col].data; + for (auto &chunk : result->collection.Chunks()) { + auto source = FlatVector::GetData(chunk->data[col]); + for (idx_t k = 0; k < chunk->size(); k++) { + if (!FlatVector::IsNull(chunk->data[col], k)) { + target[row].data = (char *)duckdb_malloc(source[k].GetSize()); + target[row].size = source[k].GetSize(); + assert(target[row].data); + memcpy((void *)target[row].data, source[k].GetDataUnsafe(), source[k].GetSize()); + } else { + target[row].data = nullptr; + target[row].size = 0; + } + row++; } - if (comparison_type == ExpressionType::COMPARE_EQUAL) { - // equality value - // equality overrides any other bounds so we just break here - equal_value = constant_value; - break; - } else if (comparison_type == ExpressionType::COMPARE_GREATERTHANOREQUALTO || - comparison_type == ExpressionType::COMPARE_GREATERTHAN) { - // greater than means this is a lower bound - low_value = constant_value; - low_comparison_type = comparison_type; - } else { - // smaller than means this is an upper bound - high_value = constant_value; - high_comparison_type = comparison_type; + } + break; + } + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP_SEC: { + idx_t row = 0; + auto target = (timestamp_t *)out->columns[col].data; + for (auto &chunk : result->collection.Chunks()) { + auto source = FlatVector::GetData(chunk->data[col]); + + for (idx_t k = 0; k < chunk->size(); k++) { + if (!FlatVector::IsNull(chunk->data[col], k)) { + if (result->types[col].id() == LogicalTypeId::TIMESTAMP_NS) { + target[row] = Timestamp::FromEpochNanoSeconds(source[k].value); + } else if (result->types[col].id() == LogicalTypeId::TIMESTAMP_MS) { + target[row] = Timestamp::FromEpochMs(source[k].value); + } else { + D_ASSERT(result->types[col].id() == LogicalTypeId::TIMESTAMP_SEC); + target[row] = Timestamp::FromEpochSeconds(source[k].value); + } + } + row++; } - } else if (expr->type == ExpressionType::COMPARE_BETWEEN) { - // BETWEEN expression - auto &between = (BoundBetweenExpression &)*expr; - if (!between.input->Equals(index_expression.get())) { - // expression doesn't match the current index expression - continue; + } + break; + } + case LogicalTypeId::HUGEINT: { + idx_t row = 0; + auto target = (duckdb_hugeint *)out->columns[col].data; + for (auto &chunk : result->collection.Chunks()) { + auto source = FlatVector::GetData(chunk->data[col]); + for (idx_t k = 0; k < chunk->size(); k++) { + if (!FlatVector::IsNull(chunk->data[col], k)) { + target[row].lower = source[k].lower; + target[row].upper = source[k].upper; + } + row++; } - if (between.lower->type != ExpressionType::VALUE_CONSTANT || - between.upper->type != ExpressionType::VALUE_CONSTANT) { - // not a constant comparison - continue; + } + break; + } + case LogicalTypeId::INTERVAL: { + idx_t row = 0; + auto target = (duckdb_interval *)out->columns[col].data; + for (auto &chunk : result->collection.Chunks()) { + auto source = FlatVector::GetData(chunk->data[col]); + for (idx_t k = 0; k < chunk->size(); k++) { + if (!FlatVector::IsNull(chunk->data[col], k)) { + target[row].days = source[k].days; + target[row].months = source[k].months; + target[row].micros = source[k].micros; + } + row++; } - low_value = ((BoundConstantExpression &)*between.lower).value; - low_comparison_type = between.lower_inclusive ? ExpressionType::COMPARE_GREATERTHANOREQUALTO - : ExpressionType::COMPARE_GREATERTHAN; - high_value = ((BoundConstantExpression &)*between.upper).value; - high_comparison_type = between.upper_inclusive ? ExpressionType::COMPARE_LESSTHANOREQUALTO - : ExpressionType::COMPARE_LESSTHAN; - break; } + break; } - if (!equal_value.is_null || !low_value.is_null || !high_value.is_null) { - // we can scan this index using this predicate: try a scan - auto &transaction = Transaction::GetTransaction(context); - unique_ptr index_state; - if (!equal_value.is_null) { - // equality predicate - index_state = - index->InitializeScanSinglePredicate(transaction, equal_value, ExpressionType::COMPARE_EQUAL); - } else if (!low_value.is_null && !high_value.is_null) { - // two-sided predicate - index_state = index->InitializeScanTwoPredicates(transaction, low_value, low_comparison_type, - high_value, high_comparison_type); - } else if (!low_value.is_null) { - // less than predicate - index_state = index->InitializeScanSinglePredicate(transaction, low_value, low_comparison_type); - } else { - D_ASSERT(!high_value.is_null); - index_state = index->InitializeScanSinglePredicate(transaction, high_value, high_comparison_type); + default: // LCOV_EXCL_START + // unsupported type for C API + D_ASSERT(0); + return DuckDBError; + } // LCOV_EXCL_STOP + } + return DuckDBSuccess; +} + +} // namespace duckdb + +static void duckdb_destroy_column(duckdb_column column, idx_t count) { + if (column.data) { + if (column.type == DUCKDB_TYPE_VARCHAR) { + // varchar, delete individual strings + auto data = (char **)column.data; + for (idx_t i = 0; i < count; i++) { + if (data[i]) { + duckdb_free(data[i]); + } } - if (index->Scan(transaction, storage, *index_state, STANDARD_VECTOR_SIZE, bind_data.result_ids)) { - // use an index scan! - bind_data.is_index_scan = true; - get.function.init = IndexScanInit; - get.function.function = IndexScanFunction; - get.function.max_threads = nullptr; - get.function.init_parallel_state = nullptr; - get.function.parallel_state_next = nullptr; - get.function.table_scan_progress = nullptr; - get.function.filter_pushdown = false; - } else { - bind_data.result_ids.clear(); + } else if (column.type == DUCKDB_TYPE_BLOB) { + // blob, delete individual blobs + auto data = (duckdb_blob *)column.data; + for (idx_t i = 0; i < count; i++) { + if (data[i].data) { + duckdb_free((void *)data[i].data); + } } - return; } + duckdb_free(column.data); + } + if (column.nullmask) { + duckdb_free(column.nullmask); + } + if (column.name) { + duckdb_free(column.name); } } -string TableScanToString(const FunctionData *bind_data_p) { - auto &bind_data = (const TableScanBindData &)*bind_data_p; - string result = bind_data.table->name; - return result; +void duckdb_destroy_result(duckdb_result *result) { + if (result->error_message) { + duckdb_free(result->error_message); + } + if (result->columns) { + for (idx_t i = 0; i < result->column_count; i++) { + duckdb_destroy_column(result->columns[i], result->row_count); + } + duckdb_free(result->columns); + } + memset(result, 0, sizeof(duckdb_result)); } -TableFunction TableScanFunction::GetFunction() { - TableFunction scan_function("seq_scan", {}, TableScanFunc); - scan_function.init = TableScanInit; - scan_function.statistics = TableScanStatistics; - scan_function.dependency = TableScanDependency; - scan_function.cardinality = TableScanCardinality; - scan_function.pushdown_complex_filter = TableScanPushdownComplexFilter; - scan_function.to_string = TableScanToString; - scan_function.max_threads = TableScanMaxThreads; - scan_function.init_parallel_state = TableScanInitParallelState; - scan_function.parallel_init = TableScanParallelInit; - scan_function.parallel_state_next = TableScanParallelStateNext; - scan_function.table_scan_progress = TableScanProgress; - scan_function.projection_pushdown = true; - scan_function.filter_pushdown = true; - return scan_function; +const char *duckdb_column_name(duckdb_result *result, idx_t col) { + if (!result || col >= result->column_count) { + return nullptr; + } + return result->columns[col].name; } -} // namespace duckdb - - - -namespace duckdb { - -struct PragmaVersionData : public FunctionOperatorData { - PragmaVersionData() : finished(false) { +duckdb_type duckdb_column_type(duckdb_result *result, idx_t col) { + if (!result || col >= result->column_count) { + return DUCKDB_TYPE_INVALID; } - bool finished; -}; + return result->columns[col].type; +} -static unique_ptr PragmaVersionBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, vector &return_types, - vector &names) { - names.emplace_back("library_version"); - return_types.push_back(LogicalType::VARCHAR); - names.emplace_back("source_id"); - return_types.push_back(LogicalType::VARCHAR); - return nullptr; +idx_t duckdb_column_count(duckdb_result *result) { + if (!result) { + return 0; + } + return result->column_count; } -static unique_ptr PragmaVersionInit(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, - TableFilterCollection *filters) { - return make_unique(); +idx_t duckdb_row_count(duckdb_result *result) { + if (!result) { + return 0; + } + return result->row_count; } -static void PragmaVersionFunction(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &data = (PragmaVersionData &)*operator_state; - if (data.finished) { - // finished returning values - return; +idx_t duckdb_rows_changed(duckdb_result *result) { + if (!result) { + return 0; } - output.SetCardinality(1); - output.SetValue(0, 0, DuckDB::LibraryVersion()); - output.SetValue(1, 0, DuckDB::SourceID()); - data.finished = true; + return result->rows_changed; } -void PragmaVersion::RegisterFunction(BuiltinFunctions &set) { - set.AddFunction(TableFunction("pragma_version", {}, PragmaVersionFunction, PragmaVersionBind, PragmaVersionInit)); +void *duckdb_column_data(duckdb_result *result, idx_t col) { + if (!result || col >= result->column_count) { + return nullptr; + } + return result->columns[col].data; } -const char *DuckDB::SourceID() { - return DUCKDB_SOURCE_ID; +bool *duckdb_nullmask_data(duckdb_result *result, idx_t col) { + if (!result || col >= result->column_count) { + return nullptr; + } + return result->columns[col].nullmask; } -const char *DuckDB::LibraryVersion() { - return DUCKDB_VERSION; +char *duckdb_result_error(duckdb_result *result) { + if (!result) { + return nullptr; + } + return result->error_message; } -} // namespace duckdb +using duckdb::const_data_ptr_t; +using duckdb::Date; +using duckdb::date_t; +using duckdb::dtime_t; +using duckdb::hugeint_t; +using duckdb::interval_t; +using duckdb::LogicalType; +using duckdb::string; +using duckdb::string_t; +using duckdb::Time; +using duckdb::Timestamp; +using duckdb::timestamp_t; +using duckdb::Value; +using duckdb::Vector; namespace duckdb { -void UDFWrapper::RegisterFunction(string name, vector args, LogicalType ret_type, - scalar_function_t udf_function, ClientContext &context, LogicalType varargs) { - - ScalarFunction scalar_function(move(name), move(args), move(ret_type), move(udf_function)); - scalar_function.varargs = move(varargs); - CreateScalarFunctionInfo info(scalar_function); - info.schema = DEFAULT_SCHEMA; - context.RegisterFunction(&info); +//===--------------------------------------------------------------------===// +// Unsafe Fetch (for internal use only) +//===--------------------------------------------------------------------===// +template +T UnsafeFetch(duckdb_result *result, idx_t col, idx_t row) { + D_ASSERT(row < result->row_count); + return ((T *)result->columns[col].data)[row]; } -void UDFWrapper::RegisterAggrFunction(AggregateFunction aggr_function, ClientContext &context, LogicalType varargs) { - aggr_function.varargs = move(varargs); - CreateAggregateFunctionInfo info(move(aggr_function)); - context.RegisterFunction(&info); -} +//===--------------------------------------------------------------------===// +// Fetch Default Value +//===--------------------------------------------------------------------===// +struct FetchDefaultValue { + template + static T Operation() { + return 0; + } +}; -} // namespace duckdb +template <> +date_t FetchDefaultValue::Operation() { + date_t result; + result.days = 0; + return result; +} +template <> +dtime_t FetchDefaultValue::Operation() { + dtime_t result; + result.micros = 0; + return result; +} +template <> +timestamp_t FetchDefaultValue::Operation() { + timestamp_t result; + result.value = 0; + return result; +} +template <> +interval_t FetchDefaultValue::Operation() { + interval_t result; + result.months = 0; + result.days = 0; + result.micros = 0; + return result; +} +template <> +char *FetchDefaultValue::Operation() { + return nullptr; +} +template <> +duckdb_blob FetchDefaultValue::Operation() { + duckdb_blob result; + result.data = nullptr; + result.size = 0; + return result; +} +//===--------------------------------------------------------------------===// +// String Casts +//===--------------------------------------------------------------------===// +template +struct FromCStringCastWrapper { + template + static bool Operation(SOURCE_TYPE input_str, RESULT_TYPE &result) { + string_t input(input_str); + return OP::template Operation(input, result); + } +}; +template +struct ToCStringCastWrapper { + template + static bool Operation(SOURCE_TYPE input, RESULT_TYPE &result) { + Vector result_vector(LogicalType::VARCHAR, nullptr); + auto result_string = OP::template Operation(input, result_vector); + auto result_size = result_string.GetSize(); + auto result_data = result_string.GetDataUnsafe(); + + result = (char *)duckdb_malloc(result_size + 1); + memcpy(result, result_data, result_size); + result[result_size] = '\0'; + return true; + } +}; +//===--------------------------------------------------------------------===// +// Blob Casts +//===--------------------------------------------------------------------===// +struct FromCBlobCastWrapper { + template + static bool Operation(SOURCE_TYPE input_str, RESULT_TYPE &result) { + return false; + } +}; +template <> +bool FromCBlobCastWrapper::Operation(duckdb_blob input, char *&result) { + string_t input_str((const char *)input.data, input.size); + return ToCStringCastWrapper::template Operation(input_str, result); +} +} // namespace duckdb -namespace duckdb { +using duckdb::FetchDefaultValue; +using duckdb::FromCBlobCastWrapper; +using duckdb::FromCStringCastWrapper; +using duckdb::ToCStringCastWrapper; +using duckdb::UnsafeFetch; -Appender::Appender(Connection &con, const string &schema_name, const string &table_name) - : context(con.context), column(0) { - description = con.TableInfo(schema_name, table_name); - if (!description) { - // table could not be found - throw CatalogException(StringUtil::Format("Table \"%s.%s\" could not be found", schema_name, table_name)); - } else { - vector types; - for (auto &column : description->columns) { - types.push_back(column.type); - } - chunk.Initialize(types); +//===--------------------------------------------------------------------===// +// Templated Casts +//===--------------------------------------------------------------------===// +template +RESULT_TYPE TryCastCInternal(duckdb_result *result, idx_t col, idx_t row) { + RESULT_TYPE result_value; + if (!OP::template Operation(UnsafeFetch(result, col, row), result_value)) { + return FetchDefaultValue::Operation(); } + return result_value; } -Appender::Appender(Connection &con, const string &table_name) : Appender(con, DEFAULT_SCHEMA, table_name) { +static bool CanFetchValue(duckdb_result *result, idx_t col, idx_t row) { + if (!result || col >= result->column_count || row >= result->row_count) { + return false; + } + if (result->columns[col].nullmask[row]) { + return false; + } + return true; } -Appender::~Appender() { - // flush any remaining chunks - // wrapped in a try/catch because Close() can throw if the table was dropped in the meantime - try { - Close(); - } catch (...) { +template +static RESULT_TYPE GetInternalCValue(duckdb_result *result, idx_t col, idx_t row) { + if (!CanFetchValue(result, col, row)) { + return FetchDefaultValue::Operation(); } + switch (result->columns[col].type) { + case DUCKDB_TYPE_BOOLEAN: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_TINYINT: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_SMALLINT: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_INTEGER: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_BIGINT: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_UTINYINT: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_USMALLINT: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_UINTEGER: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_UBIGINT: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_FLOAT: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_DOUBLE: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_DATE: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_TIME: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_TIMESTAMP: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_HUGEINT: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_INTERVAL: + return TryCastCInternal(result, col, row); + case DUCKDB_TYPE_VARCHAR: + return TryCastCInternal>(result, col, row); + case DUCKDB_TYPE_BLOB: + return TryCastCInternal(result, col, row); + default: // LCOV_EXCL_START + // invalid type for C to C++ conversion + D_ASSERT(0); + return FetchDefaultValue::Operation(); + } // LCOV_EXCL_STOP } -void Appender::BeginRow() { +//===--------------------------------------------------------------------===// +// duckdb_value_ functions +//===--------------------------------------------------------------------===// +bool duckdb_value_boolean(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -void Appender::EndRow() { - // check that all rows have been appended to - if (column != chunk.ColumnCount()) { - throw InvalidInputException("Call to EndRow before all rows have been appended to!"); - } - column = 0; - chunk.SetCardinality(chunk.size() + 1); - if (chunk.size() >= STANDARD_VECTOR_SIZE) { - Flush(); - } +int8_t duckdb_value_int8(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -template -void Appender::AppendValueInternal(Vector &col, SRC input) { - FlatVector::GetData(col)[chunk.size()] = Cast::Operation(input); +int16_t duckdb_value_int16(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -template -void Appender::AppendValueInternal(T input) { - if (column >= chunk.ColumnCount()) { - throw InvalidInputException("Too many appends for chunk!"); - } - auto &col = chunk.data[column]; - switch (col.GetType().InternalType()) { - case PhysicalType::BOOL: - AppendValueInternal(col, input); - break; - case PhysicalType::UINT8: - AppendValueInternal(col, input); - break; - case PhysicalType::INT8: - AppendValueInternal(col, input); - break; - case PhysicalType::UINT16: - AppendValueInternal(col, input); - break; - case PhysicalType::INT16: - AppendValueInternal(col, input); - break; - case PhysicalType::UINT32: - AppendValueInternal(col, input); - break; - case PhysicalType::INT32: - AppendValueInternal(col, input); - break; - case PhysicalType::UINT64: - AppendValueInternal(col, input); - break; - case PhysicalType::INT64: - AppendValueInternal(col, input); - break; - case PhysicalType::FLOAT: - AppendValueInternal(col, input); - break; - case PhysicalType::DOUBLE: - AppendValueInternal(col, input); - break; - default: - AppendValue(Value::CreateValue(input)); - return; - } - column++; +int32_t duckdb_value_int32(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -template <> -void Appender::Append(bool value) { - AppendValueInternal(value); +int64_t duckdb_value_int64(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -template <> -void Appender::Append(int8_t value) { - AppendValueInternal(value); +duckdb_hugeint duckdb_value_hugeint(duckdb_result *result, idx_t col, idx_t row) { + duckdb_hugeint result_value; + auto internal_value = GetInternalCValue(result, col, row); + result_value.lower = internal_value.lower; + result_value.upper = internal_value.upper; + return result_value; } -template <> -void Appender::Append(int16_t value) { - AppendValueInternal(value); +uint8_t duckdb_value_uint8(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -template <> -void Appender::Append(int32_t value) { - AppendValueInternal(value); +uint16_t duckdb_value_uint16(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -template <> -void Appender::Append(int64_t value) { - AppendValueInternal(value); +uint32_t duckdb_value_uint32(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -template <> -void Appender::Append(uint8_t value) { - AppendValueInternal(value); +uint64_t duckdb_value_uint64(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -template <> -void Appender::Append(uint16_t value) { - AppendValueInternal(value); +float duckdb_value_float(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -template <> -void Appender::Append(uint32_t value) { - AppendValueInternal(value); +double duckdb_value_double(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue(result, col, row); } -template <> -void Appender::Append(uint64_t value) { - AppendValueInternal(value); +duckdb_date duckdb_value_date(duckdb_result *result, idx_t col, idx_t row) { + duckdb_date result_value; + result_value.days = GetInternalCValue(result, col, row).days; + return result_value; } -template <> -void Appender::Append(const char *value) { - AppendValueInternal(string_t(value)); +duckdb_time duckdb_value_time(duckdb_result *result, idx_t col, idx_t row) { + duckdb_time result_value; + result_value.micros = GetInternalCValue(result, col, row).micros; + return result_value; } -void Appender::Append(const char *value, uint32_t length) { - AppendValueInternal(string_t(value, length)); +duckdb_timestamp duckdb_value_timestamp(duckdb_result *result, idx_t col, idx_t row) { + duckdb_timestamp result_value; + result_value.micros = GetInternalCValue(result, col, row).value; + return result_value; } -template <> -void Appender::Append(float value) { - if (!Value::FloatIsValid(value)) { - throw InvalidInputException("Float value is out of range!"); - } - AppendValueInternal(value); +duckdb_interval duckdb_value_interval(duckdb_result *result, idx_t col, idx_t row) { + duckdb_interval result_value; + auto ival = GetInternalCValue(result, col, row); + result_value.months = ival.months; + result_value.days = ival.days; + result_value.micros = ival.micros; + return result_value; } -template <> -void Appender::Append(double value) { - if (!Value::DoubleIsValid(value)) { - throw InvalidInputException("Double value is out of range!"); - } - AppendValueInternal(value); +char *duckdb_value_varchar(duckdb_result *result, idx_t col, idx_t row) { + return GetInternalCValue>(result, col, row); } -template <> -void Appender::Append(Value value) { // NOLINT: template shtuff - if (column >= chunk.ColumnCount()) { - throw InvalidInputException("Too many appends for chunk!"); +char *duckdb_value_varchar_internal(duckdb_result *result, idx_t col, idx_t row) { + if (!CanFetchValue(result, col, row)) { + return nullptr; } - AppendValue(value); -} - -template <> -void Appender::Append(std::nullptr_t value) { - if (column >= chunk.ColumnCount()) { - throw InvalidInputException("Too many appends for chunk!"); + if (duckdb_column_type(result, col) != DUCKDB_TYPE_VARCHAR) { + return nullptr; } - auto &col = chunk.data[column++]; - FlatVector::SetNull(col, chunk.size(), true); + return UnsafeFetch(result, col, row); } -void Appender::AppendValue(const Value &value) { - chunk.SetValue(column, chunk.size(), value); - column++; -} - -void Appender::Flush() { - // check that all vectors have the same length before appending - if (column != 0) { - throw InvalidInputException("Failed to Flush appender: incomplete append to row!"); - } +duckdb_blob duckdb_value_blob(duckdb_result *result, idx_t col, idx_t row) { + if (CanFetchValue(result, col, row) && result->columns[col].type == DUCKDB_TYPE_BLOB) { + auto internal_result = UnsafeFetch(result, col, row); - if (chunk.size() == 0) { - return; + duckdb_blob result_blob; + result_blob.data = malloc(internal_result.size); + result_blob.size = internal_result.size; + memcpy(result_blob.data, internal_result.data, internal_result.size); + return result_blob; } - context->Append(*description, chunk); - - chunk.Reset(); - column = 0; + return FetchDefaultValue::Operation(); } -void Appender::Close() { - if (column == 0 || column == chunk.ColumnCount()) { - Flush(); +bool duckdb_value_is_null(duckdb_result *result, idx_t col, idx_t row) { + if (!result || col >= result->column_count || row >= result->row_count) { + return false; } + return result->columns[col].nullmask[row]; } -} // namespace duckdb - @@ -78470,6 +88196,9 @@ class ExpressionRewriter : public LogicalOperatorVisitor { + +#include + namespace duckdb { class Binder; @@ -78482,12 +88211,16 @@ class Optimizer { ClientContext &context; Binder &binder; ExpressionRewriter rewriter; + +private: + void RunOptimizer(OptimizerType type, const std::function &callback); }; } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // @@ -78752,6 +88485,8 @@ class PragmaHandler { + + namespace duckdb { class ClientContextLock { @@ -78767,10 +88502,13 @@ class ClientContextLock { }; ClientContext::ClientContext(shared_ptr database) - : db(move(database)), transaction(db->GetTransactionManager(), *this), interrupted(false), executor(*this), + : profiler(make_unique()), query_profiler_history(make_unique()), + db(move(database)), transaction(db->GetTransactionManager(), *this), interrupted(false), executor(*this), temporary_objects(make_unique(&db->GetCatalog(), TEMP_SCHEMA, true)), open_result(nullptr) { std::random_device rd; random_engine.seed(rd()); + + progress_bar = make_unique(&executor, wait_time); } ClientContext::~ClientContext() { @@ -78800,35 +88538,38 @@ void ClientContext::Cleanup() { unique_ptr ClientContext::Fetch() { auto lock = LockContext(); if (!open_result) { - // no result to fetch from - throw Exception("Fetch was called, but there is no open result (or the result was previously closed)"); + throw InternalException("Fetch was called, but there is no open result (or the result was previously closed)"); } try { // fetch the chunk and return it auto chunk = FetchInternal(*lock); return chunk; - } catch (Exception &ex) { + } catch (std::exception &ex) { open_result->error = ex.what(); - } catch (...) { + } catch (...) { // LCOV_EXCL_START open_result->error = "Unhandled exception in Fetch"; - } + } // LCOV_EXCL_STOP open_result->success = false; CleanupInternal(*lock); return nullptr; } string ClientContext::FinalizeQuery(ClientContextLock &lock, bool success) { - profiler.EndQuery(); + profiler->EndQuery(); executor.Reset(); string error; if (transaction.HasActiveTransaction()) { ActiveTransaction().active_query = MAXIMUM_QUERY_ID; - query_profiler_history.GetPrevProfilers().emplace_back(transaction.ActiveTransaction().active_query, - move(profiler)); - profiler.save_location = query_profiler_history.GetPrevProfilers().back().second.save_location; - if (query_profiler_history.GetPrevProfilers().size() >= query_profiler_history.GetPrevProfilersSize()) { - query_profiler_history.GetPrevProfilers().pop_front(); + // Move the query profiler into the history + auto &prev_profilers = query_profiler_history->GetPrevProfilers(); + prev_profilers.emplace_back(transaction.ActiveTransaction().active_query, move(profiler)); + // Reinitialize the query profiler + profiler = make_unique(); + // Propagate settings of the saved query into the new profiler. + profiler->Propagate(*prev_profilers.back().second); + if (prev_profilers.size() >= query_profiler_history->GetPrevProfilersSize()) { + prev_profilers.pop_front(); } try { if (transaction.IsAutoCommit()) { @@ -78840,11 +88581,11 @@ string ClientContext::FinalizeQuery(ClientContextLock &lock, bool success) { transaction.Rollback(); } } - } catch (Exception &ex) { + } catch (std::exception &ex) { error = ex.what(); - } catch (...) { + } catch (...) { // LCOV_EXCL_START error = "Unhandled exception!"; - } + } // LCOV_EXCL_STOP } return error; } @@ -78877,13 +88618,16 @@ shared_ptr ClientContext::CreatePreparedStatement(ClientC StatementType statement_type = statement->type; auto result = make_shared(statement_type); - profiler.StartPhase("planner"); + profiler->StartPhase("planner"); Planner planner(*this); planner.CreatePlan(move(statement)); D_ASSERT(planner.plan); - profiler.EndPhase(); + profiler->EndPhase(); auto plan = move(planner.plan); +#ifdef DEBUG + plan->Verify(); +#endif // extract the result column names from the plan result->read_only = planner.read_only; result->requires_valid_transaction = planner.requires_valid_transaction; @@ -78894,19 +88638,26 @@ shared_ptr ClientContext::CreatePreparedStatement(ClientC result->catalog_version = Transaction::GetTransaction(*this).catalog_version; if (enable_optimizer) { - profiler.StartPhase("optimizer"); + profiler->StartPhase("optimizer"); Optimizer optimizer(*planner.binder, *this); plan = optimizer.Optimize(move(plan)); D_ASSERT(plan); - profiler.EndPhase(); + profiler->EndPhase(); + +#ifdef DEBUG + plan->Verify(); +#endif } - profiler.StartPhase("physical_planner"); + profiler->StartPhase("physical_planner"); // now convert logical query plan into a physical query plan PhysicalPlanGenerator physical_planner(*this); auto physical_plan = physical_planner.CreatePlan(move(plan)); - profiler.EndPhase(); + profiler->EndPhase(); +#ifdef DEBUG + D_ASSERT(!physical_plan->ToString().empty()); +#endif result->plan = move(physical_plan); return result; } @@ -78936,10 +88687,7 @@ unique_ptr ClientContext::ExecutePreparedStatement(ClientContextLoc bool create_stream_result = statement.allow_stream_result && allow_stream_result; if (enable_progress_bar) { - if (progress_bar) { - progress_bar.reset(); - } - progress_bar = make_unique(&executor, wait_time); + progress_bar->Initialize(wait_time); progress_bar->Start(); } // store the physical plan in the context for calls to Fetch() @@ -78950,7 +88698,7 @@ unique_ptr ClientContext::ExecutePreparedStatement(ClientContextLoc D_ASSERT(types == statement.types); if (create_stream_result) { - if (progress_bar) { + if (enable_progress_bar) { progress_bar->Stop(); } // successfully compiled SELECT clause and it is the last statement @@ -78974,7 +88722,7 @@ unique_ptr ClientContext::ExecutePreparedStatement(ClientContextLoc #endif result->collection.Append(*chunk); } - if (progress_bar) { + if (enable_progress_bar) { progress_bar->Stop(); } return move(result); @@ -79008,6 +88756,35 @@ void ClientContext::HandlePragmaStatements(vector> &sta handler.HandlePragmaStatements(*lock, statements); } +unique_ptr ClientContext::ExtractPlan(const string &query) { + auto lock = LockContext(); + + auto statements = ParseStatementsInternal(*lock, query); + if (statements.size() != 1) { + throw Exception("ExtractPlan can only prepare a single statement"); + } + + unique_ptr plan; + RunFunctionInTransactionInternal(*lock, [&]() { + Planner planner(*this); + planner.CreatePlan(move(statements[0])); + D_ASSERT(planner.plan); + + plan = move(planner.plan); + + if (enable_optimizer) { + Optimizer optimizer(*planner.binder, *this); + plan = optimizer.Optimize(move(plan)); + } + + ColumnBindingResolver resolver; + resolver.VisitOperator(*plan); + + plan->ResolveOperatorTypes(); + }); + return plan; +} + unique_ptr ClientContext::PrepareInternal(ClientContextLock &lock, unique_ptr statement) { auto n_param = statement->n_param; @@ -79105,7 +88882,7 @@ unique_ptr ClientContext::RunStatementOrPreparedStatement(ClientCon statement = move(copied_statement); } // start the profiler - profiler.StartQuery(query); + profiler->StartQuery(query); try { if (statement) { result = RunStatementInternal(lock, query, move(statement), allow_stream_result); @@ -79188,12 +88965,22 @@ unique_ptr ClientContext::RunStatements(ClientContextLock &lock, co void ClientContext::LogQueryInternal(ClientContextLock &, const string &query) { if (!log_query_writer) { +#ifdef DUCKDB_FORCE_QUERY_LOG + try { + string log_path(DUCKDB_FORCE_QUERY_LOG); + log_query_writer = make_unique(FileSystem::GetFileSystem(*this), log_path); + } catch (...) { + return; + } +#else return; +#endif } // log query path is set: log the query log_query_writer->WriteData((const_data_ptr_t)query.c_str(), query.size()); log_query_writer->WriteData((const_data_ptr_t) "\n", 1); log_query_writer->Flush(); + log_query_writer->Sync(); } unique_ptr ClientContext::Query(unique_ptr statement, bool allow_stream_result) { @@ -79233,12 +89020,12 @@ void ClientContext::Interrupt() { void ClientContext::EnableProfiling() { auto lock = LockContext(); - profiler.Enable(); + profiler->Enable(); } void ClientContext::DisableProfiling() { auto lock = LockContext(); - profiler.Disable(); + profiler->Disable(); } string ClientContext::VerifyQuery(ClientContextLock &lock, const string &query, unique_ptr statement) { @@ -79283,6 +89070,8 @@ string ClientContext::VerifyQuery(ClientContextLock &lock, const string &query, // check that the hashes are equivalent too D_ASSERT(orig_expr_list[i]->Hash() == de_expr_list[i]->Hash()); D_ASSERT(orig_expr_list[i]->Hash() == cp_expr_list[i]->Hash()); + + D_ASSERT(!orig_expr_list[i]->Equals(nullptr)); } // now perform additional checking within the expressions for (idx_t outer_idx = 0; outer_idx < orig_expr_list.size(); outer_idx++) { @@ -79298,9 +89087,9 @@ string ClientContext::VerifyQuery(ClientContextLock &lock, const string &query, #endif // disable profiling if it is enabled - bool profiling_is_enabled = profiler.IsEnabled(); + bool profiling_is_enabled = profiler->IsEnabled(); if (profiling_is_enabled) { - profiler.Disable(); + profiler->Disable(); } // see below @@ -79331,9 +89120,9 @@ string ClientContext::VerifyQuery(ClientContextLock &lock, const string &query, auto explain_stmt = make_unique(move(statement_copy_for_explain)); try { RunStatementInternal(lock, explain_q, move(explain_stmt), false); - } catch (std::exception &ex) { + } catch (std::exception &ex) { // LCOV_EXCL_START return "EXPLAIN failed but query did not (" + string(ex.what()) + ")"; - } + } // LCOV_EXCL_STOP } // now execute the copied statement @@ -79367,7 +89156,7 @@ string ClientContext::VerifyQuery(ClientContextLock &lock, const string &query, enable_optimizer = true; if (profiling_is_enabled) { - profiler.Enable(); + profiler->Enable(); } // now compare the results @@ -79378,26 +89167,58 @@ string ClientContext::VerifyQuery(ClientContextLock &lock, const string &query, results.push_back(move(unoptimized_result)); vector names = {"Copied Result", "Deserialized Result", "Unoptimized Result"}; for (idx_t i = 0; i < results.size(); i++) { - if (original_result->success != results[i]->success) { + if (original_result->success != results[i]->success) { // LCOV_EXCL_START string result = names[i] + " differs from original result!\n"; result += "Original Result:\n" + original_result->ToString(); result += names[i] + ":\n" + results[i]->ToString(); return result; - } - if (!original_result->collection.Equals(results[i]->collection)) { + } // LCOV_EXCL_STOP + if (!original_result->collection.Equals(results[i]->collection)) { // LCOV_EXCL_START string result = names[i] + " differs from original result!\n"; result += "Original Result:\n" + original_result->ToString(); result += names[i] + ":\n" + results[i]->ToString(); return result; - } + } // LCOV_EXCL_STOP } return ""; } +bool ClientContext::UpdateFunctionInfoFromEntry(ScalarFunctionCatalogEntry *existing_function, + CreateScalarFunctionInfo *new_info) { + if (new_info->functions.empty()) { + throw std::runtime_error("Registering function without scalar function definitions!"); + } + bool need_rewrite_entry = false; + idx_t size_new_func = new_info->functions.size(); + for (idx_t exist_idx = 0; exist_idx < existing_function->functions.size(); ++exist_idx) { + bool can_add = true; + for (idx_t new_idx = 0; new_idx < size_new_func; ++new_idx) { + if (new_info->functions[new_idx].Equal(existing_function->functions[exist_idx])) { + can_add = false; + break; + } + } + if (can_add) { + new_info->functions.push_back(existing_function->functions[exist_idx]); + need_rewrite_entry = true; + } + } + return need_rewrite_entry; +} + void ClientContext::RegisterFunction(CreateFunctionInfo *info) { RunFunctionInTransaction([&]() { auto &catalog = Catalog::GetCatalog(*this); + ScalarFunctionCatalogEntry *existing_function = (ScalarFunctionCatalogEntry *)catalog.GetEntry( + *this, CatalogType::SCALAR_FUNCTION_ENTRY, info->schema, info->name, true); + if (existing_function) { + if (UpdateFunctionInfoFromEntry(existing_function, (CreateScalarFunctionInfo *)info)) { + // function info was updated from catalog entry, rewrite is needed + info->on_conflict = OnCreateConflict::REPLACE_ON_CONFLICT; + } + } + // create function catalog.CreateFunction(*this, info); }); } @@ -79409,25 +89230,26 @@ void ClientContext::RunFunctionInTransactionInternal(ClientContextLock &lock, co throw Exception("Failed: transaction has been invalidated!"); } // check if we are on AutoCommit. In this case we should start a transaction - if (transaction.IsAutoCommit()) { + bool require_new_transaction = transaction.IsAutoCommit() && !transaction.HasActiveTransaction(); + if (require_new_transaction) { transaction.BeginTransaction(); } try { fun(); } catch (StandardException &ex) { - if (transaction.IsAutoCommit()) { + if (require_new_transaction) { transaction.Rollback(); } throw; } catch (std::exception &ex) { - if (transaction.IsAutoCommit()) { + if (require_new_transaction) { transaction.Rollback(); } else { ActiveTransaction().Invalidate(); } throw; } - if (transaction.IsAutoCommit()) { + if (require_new_transaction) { transaction.Commit(); } } @@ -79457,7 +89279,7 @@ unique_ptr ClientContext::TableInfo(const string &schema_name, return result; } -void ClientContext::Append(TableDescription &description, DataChunk &chunk) { +void ClientContext::Append(TableDescription &description, ChunkCollection &collection) { RunFunctionInTransaction([&]() { auto &catalog = Catalog::GetCatalog(*this); auto table_entry = catalog.GetEntry(*this, description.schema, description.table); @@ -79470,11 +89292,17 @@ void ClientContext::Append(TableDescription &description, DataChunk &chunk) { throw Exception("Failed to append: table entry has different number of columns!"); } } - table_entry->storage->Append(*table_entry, *this, chunk); + for (auto &chunk : collection.Chunks()) { + table_entry->storage->Append(*table_entry, *this, *chunk); + } }); } void ClientContext::TryBindRelation(Relation &relation, vector &result_columns) { +#ifdef DEBUG + D_ASSERT(!relation.GetAlias().empty()); + D_ASSERT(!relation.ToString().empty()); +#endif RunFunctionInTransaction([&]() { // bind the expressions auto binder = Binder::CreateBinder(*this); @@ -79488,10 +89316,13 @@ void ClientContext::TryBindRelation(Relation &relation, vector unique_ptr ClientContext::Execute(const shared_ptr &relation) { auto lock = LockContext(); + InitialCleanup(*lock); + string query; if (query_verification_enabled) { // run the ToString method of any relation we run, mostly to ensure it doesn't crash relation->ToString(); + relation->GetAlias(); if (relation->IsReadOnly()) { // verify read only statements by running a select statement auto select = make_unique(); @@ -79520,9 +89351,11 @@ unique_ptr ClientContext::Execute(const shared_ptr &relat } } // result mismatch - string err_str = "Result mismatch in query!\nExpected the following columns: "; + string err_str = "Result mismatch in query!\nExpected the following columns: ["; for (idx_t i = 0; i < expected_columns.size(); i++) { - err_str += i == 0 ? "[" : ", "; + if (i > 0) { + err_str += ", "; + } err_str += expected_columns[i].name + " " + expected_columns[i].type.ToString(); } err_str += "]\nBut result contained the following: "; @@ -79539,6 +89372,179 @@ unique_ptr ClientContext::Execute(const shared_ptr &relat +namespace duckdb { + +static ConfigurationOption internal_options[] = { + {ConfigurationOptionType::ACCESS_MODE, "access_mode", + "Access mode of the database ([AUTOMATIC], READ_ONLY or READ_WRITE)", LogicalTypeId::VARCHAR}, + {ConfigurationOptionType::DEFAULT_ORDER_TYPE, "default_order", + "The order type used when none is specified ([ASC] or DESC)", LogicalTypeId::VARCHAR}, + {ConfigurationOptionType::DEFAULT_NULL_ORDER, "default_null_order", + "Null ordering used when none is specified ([NULLS_FIRST] or NULLS_LAST)", LogicalTypeId::VARCHAR}, + {ConfigurationOptionType::ENABLE_EXTERNAL_ACCESS, "enable_external_access", + "Allow the database to access external state (through e.g. COPY TO/FROM, CSV readers, pandas replacement scans, " + "etc)", + LogicalTypeId::BOOLEAN}, + {ConfigurationOptionType::ENABLE_OBJECT_CACHE, "enable_object_cache", + "Whether or not object cache is used to cache e.g. Parquet metadata", LogicalTypeId::BOOLEAN}, + {ConfigurationOptionType::MAXIMUM_MEMORY, "max_memory", "The maximum memory of the system (e.g. 1GB)", + LogicalTypeId::VARCHAR}, + {ConfigurationOptionType::THREADS, "threads", "The number of total threads used by the system", + LogicalTypeId::BIGINT}, + {ConfigurationOptionType::INVALID, nullptr, nullptr, LogicalTypeId::INVALID}}; + +vector DBConfig::GetOptions() { + vector options; + for (idx_t index = 0; internal_options[index].name; index++) { + options.push_back(internal_options[index]); + } + return options; +} + +idx_t DBConfig::GetOptionCount() { + idx_t count = 0; + for (idx_t index = 0; internal_options[index].name; index++) { + count++; + } + return count; +} + +ConfigurationOption *DBConfig::GetOptionByIndex(idx_t target_index) { + for (idx_t index = 0; internal_options[index].name; index++) { + if (index == target_index) { + return internal_options + index; + } + } + return nullptr; +} + +ConfigurationOption *DBConfig::GetOptionByName(const string &name) { + for (idx_t index = 0; internal_options[index].name; index++) { + if (internal_options[index].name == name) { + return internal_options + index; + } + } + return nullptr; +} + +void DBConfig::SetOption(const ConfigurationOption &option, const Value &value) { + switch (option.type) { + case ConfigurationOptionType::ACCESS_MODE: { + auto parameter = StringUtil::Lower(value.ToString()); + if (parameter == "automatic") { + access_mode = AccessMode::AUTOMATIC; + } else if (parameter == "read_only") { + access_mode = AccessMode::READ_ONLY; + } else if (parameter == "read_write") { + access_mode = AccessMode::READ_WRITE; + } else { + throw InvalidInputException( + "Unrecognized parameter for option ACCESS_MODE \"%s\". Expected READ_ONLY or READ_WRITE.", parameter); + } + break; + } + case ConfigurationOptionType::DEFAULT_ORDER_TYPE: { + auto parameter = StringUtil::Lower(value.ToString()); + if (parameter == "asc") { + default_order_type = OrderType::ASCENDING; + } else if (parameter == "desc") { + default_order_type = OrderType::DESCENDING; + } else { + throw InvalidInputException("Unrecognized parameter for option DEFAULT_ORDER \"%s\". Expected ASC or DESC.", + parameter); + } + break; + } + case ConfigurationOptionType::DEFAULT_NULL_ORDER: { + auto parameter = StringUtil::Lower(value.ToString()); + if (parameter == "nulls_first") { + default_null_order = OrderByNullType::NULLS_FIRST; + } else if (parameter == "nulls_last") { + default_null_order = OrderByNullType::NULLS_LAST; + } else { + throw InvalidInputException( + "Unrecognized parameter for option NULL_ORDER \"%s\". Expected NULLS_FIRST or NULLS_LAST.", parameter); + } + break; + } + case ConfigurationOptionType::ENABLE_EXTERNAL_ACCESS: { + enable_external_access = value.CastAs(LogicalType::BOOLEAN).GetValueUnsafe(); + break; + } + case ConfigurationOptionType::ENABLE_OBJECT_CACHE: { + object_cache_enable = value.CastAs(LogicalType::BOOLEAN).GetValueUnsafe(); + break; + } + case ConfigurationOptionType::MAXIMUM_MEMORY: { + maximum_memory = ParseMemoryLimit(value.ToString()); + break; + } + case ConfigurationOptionType::THREADS: { + maximum_threads = value.GetValue(); + break; + } + default: // LCOV_EXCL_START + break; + } // LCOV_EXCL_STOP +} + +idx_t DBConfig::ParseMemoryLimit(const string &arg) { + if (arg[0] == '-' || arg == "null" || arg == "none") { + return INVALID_INDEX; + } + // split based on the number/non-number + idx_t idx = 0; + while (StringUtil::CharacterIsSpace(arg[idx])) { + idx++; + } + idx_t num_start = idx; + while ((arg[idx] >= '0' && arg[idx] <= '9') || arg[idx] == '.' || arg[idx] == 'e' || arg[idx] == 'E' || + arg[idx] == '-') { + idx++; + } + if (idx == num_start) { + throw ParserException("Memory limit must have a number (e.g. PRAGMA memory_limit=1GB"); + } + string number = arg.substr(num_start, idx - num_start); + + // try to parse the number + double limit = Cast::Operation(string_t(number)); + + // now parse the memory limit unit (e.g. bytes, gb, etc) + while (StringUtil::CharacterIsSpace(arg[idx])) { + idx++; + } + idx_t start = idx; + while (idx < arg.size() && !StringUtil::CharacterIsSpace(arg[idx])) { + idx++; + } + if (limit < 0) { + // limit < 0, set limit to infinite + return (idx_t)-1; + } + string unit = StringUtil::Lower(arg.substr(start, idx - start)); + idx_t multiplier; + if (unit == "byte" || unit == "bytes" || unit == "b") { + multiplier = 1; + } else if (unit == "kilobyte" || unit == "kilobytes" || unit == "kb" || unit == "k") { + multiplier = 1000LL; + } else if (unit == "megabyte" || unit == "megabytes" || unit == "mb" || unit == "m") { + multiplier = 1000LL * 1000LL; + } else if (unit == "gigabyte" || unit == "gigabytes" || unit == "gb" || unit == "g") { + multiplier = 1000LL * 1000LL * 1000LL; + } else if (unit == "terabyte" || unit == "terabytes" || unit == "tb" || unit == "t") { + multiplier = 1000LL * 1000LL * 1000LL * 1000LL; + } else { + throw ParserException("Unknown unit for memory_limit: %s (expected: b, mb, gb or tb)", unit); + } + return (idx_t)multiplier * limit; +} + +} // namespace duckdb + + + + //===----------------------------------------------------------------------===// // DuckDB @@ -79811,6 +89817,7 @@ class ConnectionManager { } // namespace duckdb + namespace duckdb { Connection::Connection(DatabaseInstance &database) : context(make_shared(database.shared_from_this())) { @@ -79825,9 +89832,9 @@ Connection::Connection(DuckDB &database) : Connection(*database.instance) { string Connection::GetProfilingInformation(ProfilerPrintFormat format) { if (format == ProfilerPrintFormat::JSON) { - return context->profiler.ToJSON(); + return context->profiler->ToJSON(); } else { - return context->profiler.ToString(); + return context->profiler->ToString(); } } @@ -79899,8 +89906,18 @@ vector> Connection::ExtractStatements(const string &que return context->ParseStatements(query); } +unique_ptr Connection::ExtractPlan(const string &query) { + return context->ExtractPlan(query); +} + void Connection::Append(TableDescription &description, DataChunk &chunk) { - context->Append(description, chunk); + ChunkCollection collection; + collection.Append(chunk); + Append(description, collection); +} + +void Connection::Append(TableDescription &description, ChunkCollection &collection) { + context->Append(description, collection); } shared_ptr Connection::Table(const string &table_name) { @@ -79954,6 +89971,7 @@ shared_ptr Connection::Values(const string &values, const vector Connection::ReadCSV(const string &csv_file) { BufferedCSVReaderOptions options; options.file_path = csv_file; + options.auto_detect = true; BufferedCSVReader reader(*context, options); vector column_list; for (idx_t i = 0; i < reader.sql_types.size(); i++) { @@ -80020,8 +90038,17 @@ bool Connection::IsAutoCommit() { + +#ifndef DUCKDB_NO_THREADS + +#endif + namespace duckdb { +DBConfig::DBConfig() { + compression_functions = make_unique(); +} + DBConfig::~DBConfig() { } @@ -80124,6 +90151,9 @@ void DatabaseInstance::Initialize(const char *path, DBConfig *new_config) { // initialize the database storage->Initialize(); + + // only increase thread count after storage init because we get races on catalog otherwise + scheduler->SetThreads(config.maximum_threads); } DuckDB::DuckDB(const char *path, DBConfig *new_config) : instance(make_shared()) { @@ -80168,30 +90198,51 @@ FileSystem &DuckDB::GetFileSystem() { return instance->GetFileSystem(); } +Allocator &Allocator::Get(ClientContext &context) { + return Allocator::Get(*context.db); +} + +Allocator &Allocator::Get(DatabaseInstance &db) { + return db.config.allocator; +} + void DatabaseInstance::Configure(DBConfig &new_config) { + config.access_mode = AccessMode::READ_WRITE; if (new_config.access_mode != AccessMode::UNDEFINED) { config.access_mode = new_config.access_mode; - } else { - config.access_mode = AccessMode::READ_WRITE; } if (new_config.file_system) { config.file_system = move(new_config.file_system); } else { config.file_system = make_unique(); } - if (new_config.maximum_memory == (idx_t)-1) { + config.maximum_memory = new_config.maximum_memory; + if (config.maximum_memory == (idx_t)-1) { config.maximum_memory = config.file_system->GetAvailableMemory() * 8 / 10; + } + if (new_config.maximum_threads == (idx_t)-1) { +#ifndef DUCKDB_NO_THREADS + config.maximum_threads = 1; + // FIXME: next release + // config.maximum_threads = std::thread::hardware_concurrency(); +#else + config.maximum_threads = 1; +#endif } else { - config.maximum_memory = new_config.maximum_memory; + config.maximum_threads = new_config.maximum_threads; } + config.force_compression = new_config.force_compression; + config.allocator = move(new_config.allocator); config.checkpoint_wal_size = new_config.checkpoint_wal_size; config.use_direct_io = new_config.use_direct_io; config.temporary_directory = new_config.temporary_directory; config.collation = new_config.collation; config.default_order_type = new_config.default_order_type; config.default_null_order = new_config.default_null_order; - config.enable_copy = new_config.enable_copy; + config.enable_external_access = new_config.enable_external_access; config.replacement_scans = move(new_config.replacement_scans); + config.initialize_default_database = new_config.initialize_default_database; + config.disabled_optimizers = move(new_config.disabled_optimizers); } DBConfig &DBConfig::GetConfig(ClientContext &context) { @@ -80210,887 +90261,6 @@ idx_t DuckDB::NumberOfThreads() { - - - - - - -#include -#include - -#ifdef _WIN32 -#define strdup _strdup -#endif - -#ifdef GetCValue -#undef GetCValue -#endif - -using namespace duckdb; - -static duckdb_type ConvertCPPTypeToC(LogicalType type); -static idx_t GetCTypeSize(duckdb_type type); -namespace duckdb { -struct DatabaseData { - DatabaseData() : database(nullptr) { - } - ~DatabaseData() { - if (database) { - delete database; - } - } - - DuckDB *database; -}; -} // namespace duckdb -duckdb_state duckdb_open(const char *path, duckdb_database *out) { - auto wrapper = new DatabaseData(); - try { - wrapper->database = new DuckDB(path); - } catch (...) { - delete wrapper; - return DuckDBError; - } - *out = (duckdb_database)wrapper; - return DuckDBSuccess; -} - -void duckdb_close(duckdb_database *database) { - if (*database) { - auto wrapper = (DatabaseData *)*database; - delete wrapper; - *database = nullptr; - } -} - -duckdb_state duckdb_connect(duckdb_database database, duckdb_connection *out) { - auto wrapper = (DatabaseData *)database; - Connection *connection; - try { - connection = new Connection(*wrapper->database); - } catch (...) { - return DuckDBError; - } - *out = (duckdb_connection)connection; - return DuckDBSuccess; -} - -void duckdb_disconnect(duckdb_connection *connection) { - if (*connection) { - Connection *conn = (Connection *)*connection; - delete conn; - *connection = nullptr; - } -} - -template -void WriteData(duckdb_result *out, ChunkCollection &source, idx_t col) { - idx_t row = 0; - auto target = (T *)out->columns[col].data; - for (auto &chunk : source.Chunks()) { - auto source = FlatVector::GetData(chunk->data[col]); - auto &mask = FlatVector::Validity(chunk->data[col]); - - for (idx_t k = 0; k < chunk->size(); k++, row++) { - if (!mask.RowIsValid(k)) { - continue; - } - target[row] = source[k]; - } - } -} - -static duckdb_state duckdb_translate_result(MaterializedQueryResult *result, duckdb_result *out) { - D_ASSERT(result); - if (!out) { - // no result to write to, only return the status - return result->success ? DuckDBSuccess : DuckDBError; - } - out->error_message = nullptr; - if (!result->success) { - // write the error message - out->error_message = strdup(result->error.c_str()); - return DuckDBError; - } - // copy the data - // first write the meta data - out->column_count = result->types.size(); - out->row_count = result->collection.Count(); - out->columns = (duckdb_column *)malloc(sizeof(duckdb_column) * out->column_count); - if (!out->columns) { - return DuckDBError; - } - // zero initialize the columns (so we can cleanly delete it in case a malloc fails) - memset(out->columns, 0, sizeof(duckdb_column) * out->column_count); - for (idx_t i = 0; i < out->column_count; i++) { - out->columns[i].type = ConvertCPPTypeToC(result->types[i]); - out->columns[i].name = strdup(result->names[i].c_str()); - out->columns[i].nullmask = (bool *)malloc(sizeof(bool) * out->row_count); - out->columns[i].data = malloc(GetCTypeSize(out->columns[i].type) * out->row_count); - if (!out->columns[i].nullmask || !out->columns[i].name || !out->columns[i].data) { - // malloc failure - return DuckDBError; - } - // memset data to 0 for VARCHAR columns for safe deletion later - if (result->types[i].InternalType() == PhysicalType::VARCHAR) { - memset(out->columns[i].data, 0, GetCTypeSize(out->columns[i].type) * out->row_count); - } - } - // now write the data - for (idx_t col = 0; col < out->column_count; col++) { - // first set the nullmask - idx_t row = 0; - for (auto &chunk : result->collection.Chunks()) { - for (idx_t k = 0; k < chunk->size(); k++) { - out->columns[col].nullmask[row++] = FlatVector::IsNull(chunk->data[col], k); - } - } - // then write the data - switch (result->types[col].id()) { - case LogicalTypeId::BOOLEAN: - WriteData(out, result->collection, col); - break; - case LogicalTypeId::TINYINT: - WriteData(out, result->collection, col); - break; - case LogicalTypeId::SMALLINT: - WriteData(out, result->collection, col); - break; - case LogicalTypeId::INTEGER: - WriteData(out, result->collection, col); - break; - case LogicalTypeId::BIGINT: - WriteData(out, result->collection, col); - break; - case LogicalTypeId::FLOAT: - WriteData(out, result->collection, col); - break; - case LogicalTypeId::DOUBLE: - WriteData(out, result->collection, col); - break; - case LogicalTypeId::VARCHAR: { - idx_t row = 0; - auto target = (const char **)out->columns[col].data; - for (auto &chunk : result->collection.Chunks()) { - auto source = FlatVector::GetData(chunk->data[col]); - for (idx_t k = 0; k < chunk->size(); k++) { - if (!FlatVector::IsNull(chunk->data[col], k)) { - target[row] = (char *)malloc(source[k].GetSize() + 1); - assert(target[row]); - memcpy((void *)target[row], source[k].GetDataUnsafe(), source[k].GetSize()); - auto write_arr = (char *)target[row]; - write_arr[source[k].GetSize()] = '\0'; - } - row++; - } - } - break; - } - case LogicalTypeId::BLOB: { - idx_t row = 0; - auto target = (duckdb_blob *)out->columns[col].data; - for (auto &chunk : result->collection.Chunks()) { - auto source = FlatVector::GetData(chunk->data[col]); - for (idx_t k = 0; k < chunk->size(); k++) { - if (!FlatVector::IsNull(chunk->data[col], k)) { - target[row].data = (char *)malloc(source[k].GetSize()); - target[row].size = source[k].GetSize(); - assert(target[row].data); - memcpy((void *)target[row].data, source[k].GetDataUnsafe(), source[k].GetSize()); - } - row++; - } - } - break; - } - case LogicalTypeId::DATE: { - idx_t row = 0; - auto target = (duckdb_date *)out->columns[col].data; - for (auto &chunk : result->collection.Chunks()) { - auto source = FlatVector::GetData(chunk->data[col]); - for (idx_t k = 0; k < chunk->size(); k++) { - if (!FlatVector::IsNull(chunk->data[col], k)) { - int32_t year, month, day; - Date::Convert(source[k], year, month, day); - target[row].year = year; - target[row].month = month; - target[row].day = day; - } - row++; - } - } - break; - } - case LogicalTypeId::TIME: { - idx_t row = 0; - auto target = (duckdb_time *)out->columns[col].data; - for (auto &chunk : result->collection.Chunks()) { - auto source = FlatVector::GetData(chunk->data[col]); - for (idx_t k = 0; k < chunk->size(); k++) { - if (!FlatVector::IsNull(chunk->data[col], k)) { - int32_t hour, min, sec, micros; - Time::Convert(source[k], hour, min, sec, micros); - target[row].hour = hour; - target[row].min = min; - target[row].sec = sec; - target[row].micros = micros; - } - row++; - } - } - break; - } - case LogicalTypeId::TIMESTAMP: { - idx_t row = 0; - auto target = (duckdb_timestamp *)out->columns[col].data; - for (auto &chunk : result->collection.Chunks()) { - auto source = FlatVector::GetData(chunk->data[col]); - for (idx_t k = 0; k < chunk->size(); k++) { - if (!FlatVector::IsNull(chunk->data[col], k)) { - date_t date; - dtime_t time; - Timestamp::Convert(source[k], date, time); - - int32_t year, month, day; - Date::Convert(date, year, month, day); - - int32_t hour, min, sec, micros; - Time::Convert(time, hour, min, sec, micros); - - target[row].date.year = year; - target[row].date.month = month; - target[row].date.day = day; - target[row].time.hour = hour; - target[row].time.min = min; - target[row].time.sec = sec; - target[row].time.micros = micros; - } - row++; - } - } - break; - } - case LogicalTypeId::HUGEINT: { - idx_t row = 0; - auto target = (duckdb_hugeint *)out->columns[col].data; - for (auto &chunk : result->collection.Chunks()) { - auto source = FlatVector::GetData(chunk->data[col]); - for (idx_t k = 0; k < chunk->size(); k++) { - if (!FlatVector::IsNull(chunk->data[col], k)) { - target[row].lower = source[k].lower; - target[row].upper = source[k].upper; - } - row++; - } - } - break; - } - case LogicalTypeId::INTERVAL: { - idx_t row = 0; - auto target = (duckdb_interval *)out->columns[col].data; - for (auto &chunk : result->collection.Chunks()) { - auto source = FlatVector::GetData(chunk->data[col]); - for (idx_t k = 0; k < chunk->size(); k++) { - if (!FlatVector::IsNull(chunk->data[col], k)) { - target[row].days = source[k].days; - target[row].months = source[k].months; - target[row].micros = source[k].micros; - } - row++; - } - } - break; - } - default: - // unsupported type for C API - D_ASSERT(0); - return DuckDBError; - } - } - return DuckDBSuccess; -} - -duckdb_state duckdb_query(duckdb_connection connection, const char *query, duckdb_result *out) { - Connection *conn = (Connection *)connection; - auto result = conn->Query(query); - return duckdb_translate_result(result.get(), out); -} - -static void duckdb_destroy_column(duckdb_column column, idx_t count) { - if (column.data) { - if (column.type == DUCKDB_TYPE_VARCHAR) { - // varchar, delete individual strings - auto data = (char **)column.data; - for (idx_t i = 0; i < count; i++) { - if (data[i]) { - free(data[i]); - } - } - } else if (column.type == DUCKDB_TYPE_BLOB) { - // blob, delete individual blobs - auto data = (duckdb_blob *)column.data; - for (idx_t i = 0; i < count; i++) { - if (data[i].data) { - free((void *)data[i].data); - } - } - } - free(column.data); - } - if (column.nullmask) { - free(column.nullmask); - } - if (column.name) { - free(column.name); - } -} - -void duckdb_destroy_result(duckdb_result *result) { - if (result->error_message) { - free(result->error_message); - } - if (result->columns) { - for (idx_t i = 0; i < result->column_count; i++) { - duckdb_destroy_column(result->columns[i], result->row_count); - } - free(result->columns); - } - memset(result, 0, sizeof(duckdb_result)); -} -namespace duckdb { -struct PreparedStatementWrapper { - PreparedStatementWrapper() : statement(nullptr) { - } - ~PreparedStatementWrapper() { - } - unique_ptr statement; - vector values; -}; -} // namespace duckdb -duckdb_state duckdb_prepare(duckdb_connection connection, const char *query, - duckdb_prepared_statement *out_prepared_statement) { - if (!connection || !query) { - return DuckDBError; - } - auto wrapper = new PreparedStatementWrapper(); - Connection *conn = (Connection *)connection; - wrapper->statement = conn->Prepare(query); - *out_prepared_statement = (duckdb_prepared_statement)wrapper; - return wrapper->statement->success ? DuckDBSuccess : DuckDBError; -} - -duckdb_state duckdb_nparams(duckdb_prepared_statement prepared_statement, idx_t *nparams_out) { - auto wrapper = (PreparedStatementWrapper *)prepared_statement; - if (!wrapper || !wrapper->statement || !wrapper->statement->success) { - return DuckDBError; - } - *nparams_out = wrapper->statement->n_param; - return DuckDBSuccess; -} - -static duckdb_state duckdb_bind_value(duckdb_prepared_statement prepared_statement, idx_t param_idx, Value val) { - auto wrapper = (PreparedStatementWrapper *)prepared_statement; - if (!wrapper || !wrapper->statement || !wrapper->statement->success) { - return DuckDBError; - } - if (param_idx > wrapper->statement->n_param) { - return DuckDBError; - } - if (param_idx > wrapper->values.size()) { - wrapper->values.resize(param_idx); - } - wrapper->values[param_idx - 1] = val; - return DuckDBSuccess; -} - -duckdb_state duckdb_bind_boolean(duckdb_prepared_statement prepared_statement, idx_t param_idx, bool val) { - return duckdb_bind_value(prepared_statement, param_idx, Value::BOOLEAN(val)); -} - -duckdb_state duckdb_bind_int8(duckdb_prepared_statement prepared_statement, idx_t param_idx, int8_t val) { - return duckdb_bind_value(prepared_statement, param_idx, Value::TINYINT(val)); -} - -duckdb_state duckdb_bind_int16(duckdb_prepared_statement prepared_statement, idx_t param_idx, int16_t val) { - return duckdb_bind_value(prepared_statement, param_idx, Value::SMALLINT(val)); -} - -duckdb_state duckdb_bind_int32(duckdb_prepared_statement prepared_statement, idx_t param_idx, int32_t val) { - return duckdb_bind_value(prepared_statement, param_idx, Value::INTEGER(val)); -} - -duckdb_state duckdb_bind_int64(duckdb_prepared_statement prepared_statement, idx_t param_idx, int64_t val) { - return duckdb_bind_value(prepared_statement, param_idx, Value::BIGINT(val)); -} - -duckdb_state duckdb_bind_uint8(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint8_t val) { - return duckdb_bind_value(prepared_statement, param_idx, Value::UTINYINT(val)); -} - -duckdb_state duckdb_bind_uint16(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint16_t val) { - return duckdb_bind_value(prepared_statement, param_idx, Value::USMALLINT(val)); -} - -duckdb_state duckdb_bind_uint32(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint32_t val) { - return duckdb_bind_value(prepared_statement, param_idx, Value::UINTEGER(val)); -} - -duckdb_state duckdb_bind_uint64(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint64_t val) { - return duckdb_bind_value(prepared_statement, param_idx, Value::UBIGINT(val)); -} - -duckdb_state duckdb_bind_float(duckdb_prepared_statement prepared_statement, idx_t param_idx, float val) { - return duckdb_bind_value(prepared_statement, param_idx, Value(val)); -} - -duckdb_state duckdb_bind_double(duckdb_prepared_statement prepared_statement, idx_t param_idx, double val) { - return duckdb_bind_value(prepared_statement, param_idx, Value(val)); -} - -duckdb_state duckdb_bind_varchar(duckdb_prepared_statement prepared_statement, idx_t param_idx, const char *val) { - return duckdb_bind_value(prepared_statement, param_idx, Value(val)); -} - -duckdb_state duckdb_bind_varchar_length(duckdb_prepared_statement prepared_statement, idx_t param_idx, const char *val, - idx_t length) { - return duckdb_bind_value(prepared_statement, param_idx, Value(string(val, length))); -} - -duckdb_state duckdb_bind_blob(duckdb_prepared_statement prepared_statement, idx_t param_idx, const void *data, - idx_t length) { - return duckdb_bind_value(prepared_statement, param_idx, Value::BLOB((const_data_ptr_t)data, length)); -} - -duckdb_state duckdb_bind_null(duckdb_prepared_statement prepared_statement, idx_t param_idx) { - return duckdb_bind_value(prepared_statement, param_idx, Value()); -} - -duckdb_state duckdb_execute_prepared(duckdb_prepared_statement prepared_statement, duckdb_result *out_result) { - auto wrapper = (PreparedStatementWrapper *)prepared_statement; - if (!wrapper || !wrapper->statement || !wrapper->statement->success) { - return DuckDBError; - } - auto result = wrapper->statement->Execute(wrapper->values, false); - D_ASSERT(result->type == QueryResultType::MATERIALIZED_RESULT); - auto mat_res = (MaterializedQueryResult *)result.get(); - return duckdb_translate_result(mat_res, out_result); -} - -void duckdb_destroy_prepare(duckdb_prepared_statement *prepared_statement) { - if (!prepared_statement) { - return; - } - auto wrapper = (PreparedStatementWrapper *)*prepared_statement; - if (wrapper) { - delete wrapper; - } - *prepared_statement = nullptr; -} - -duckdb_type ConvertCPPTypeToC(LogicalType sql_type) { - switch (sql_type.id()) { - case LogicalTypeId::BOOLEAN: - return DUCKDB_TYPE_BOOLEAN; - case LogicalTypeId::TINYINT: - return DUCKDB_TYPE_TINYINT; - case LogicalTypeId::SMALLINT: - return DUCKDB_TYPE_SMALLINT; - case LogicalTypeId::INTEGER: - return DUCKDB_TYPE_INTEGER; - case LogicalTypeId::BIGINT: - return DUCKDB_TYPE_BIGINT; - case LogicalTypeId::HUGEINT: - return DUCKDB_TYPE_HUGEINT; - case LogicalTypeId::FLOAT: - return DUCKDB_TYPE_FLOAT; - case LogicalTypeId::DOUBLE: - return DUCKDB_TYPE_DOUBLE; - case LogicalTypeId::TIMESTAMP: - return DUCKDB_TYPE_TIMESTAMP; - case LogicalTypeId::DATE: - return DUCKDB_TYPE_DATE; - case LogicalTypeId::TIME: - return DUCKDB_TYPE_TIME; - case LogicalTypeId::VARCHAR: - return DUCKDB_TYPE_VARCHAR; - case LogicalTypeId::BLOB: - return DUCKDB_TYPE_BLOB; - case LogicalTypeId::INTERVAL: - return DUCKDB_TYPE_INTERVAL; - default: - return DUCKDB_TYPE_INVALID; - } -} - -idx_t GetCTypeSize(duckdb_type type) { - switch (type) { - case DUCKDB_TYPE_BOOLEAN: - return sizeof(bool); - case DUCKDB_TYPE_TINYINT: - return sizeof(int8_t); - case DUCKDB_TYPE_SMALLINT: - return sizeof(int16_t); - case DUCKDB_TYPE_INTEGER: - return sizeof(int32_t); - case DUCKDB_TYPE_BIGINT: - return sizeof(int64_t); - case DUCKDB_TYPE_HUGEINT: - return sizeof(duckdb_hugeint); - case DUCKDB_TYPE_FLOAT: - return sizeof(float); - case DUCKDB_TYPE_DOUBLE: - return sizeof(double); - case DUCKDB_TYPE_DATE: - return sizeof(duckdb_date); - case DUCKDB_TYPE_TIME: - return sizeof(duckdb_time); - case DUCKDB_TYPE_TIMESTAMP: - return sizeof(duckdb_timestamp); - case DUCKDB_TYPE_VARCHAR: - return sizeof(const char *); - case DUCKDB_TYPE_BLOB: - return sizeof(duckdb_blob); - case DUCKDB_TYPE_INTERVAL: - return sizeof(duckdb_interval); - default: - // unsupported type - D_ASSERT(0); - return sizeof(const char *); - } -} - -template -T UnsafeFetch(duckdb_result *result, idx_t col, idx_t row) { - D_ASSERT(row < result->row_count); - return ((T *)result->columns[col].data)[row]; -} - -static Value GetCValue(duckdb_result *result, idx_t col, idx_t row) { - if (col >= result->column_count) { - return Value(); - } - if (row >= result->row_count) { - return Value(); - } - if (result->columns[col].nullmask[row]) { - return Value(); - } - switch (result->columns[col].type) { - case DUCKDB_TYPE_BOOLEAN: - return Value::BOOLEAN(UnsafeFetch(result, col, row)); - case DUCKDB_TYPE_TINYINT: - return Value::TINYINT(UnsafeFetch(result, col, row)); - case DUCKDB_TYPE_SMALLINT: - return Value::SMALLINT(UnsafeFetch(result, col, row)); - case DUCKDB_TYPE_INTEGER: - return Value::INTEGER(UnsafeFetch(result, col, row)); - case DUCKDB_TYPE_BIGINT: - return Value::BIGINT(UnsafeFetch(result, col, row)); - case DUCKDB_TYPE_FLOAT: - return Value(UnsafeFetch(result, col, row)); - case DUCKDB_TYPE_DOUBLE: - return Value(UnsafeFetch(result, col, row)); - case DUCKDB_TYPE_DATE: { - auto date = UnsafeFetch(result, col, row); - return Value::DATE(date.year, date.month, date.day); - } - case DUCKDB_TYPE_TIME: { - auto time = UnsafeFetch(result, col, row); - return Value::TIME(time.hour, time.min, time.sec, time.micros); - } - case DUCKDB_TYPE_TIMESTAMP: { - auto timestamp = UnsafeFetch(result, col, row); - return Value::TIMESTAMP(timestamp.date.year, timestamp.date.month, timestamp.date.day, timestamp.time.hour, - timestamp.time.min, timestamp.time.sec, timestamp.time.micros); - } - case DUCKDB_TYPE_HUGEINT: { - hugeint_t val; - auto hugeint = UnsafeFetch(result, col, row); - val.lower = hugeint.lower; - val.upper = hugeint.upper; - return Value::HUGEINT(val); - } - case DUCKDB_TYPE_INTERVAL: { - interval_t val; - auto interval = UnsafeFetch(result, col, row); - val.days = interval.days; - val.months = interval.months; - val.micros = interval.micros; - return Value::INTERVAL(val); - } - case DUCKDB_TYPE_VARCHAR: - return Value(string(UnsafeFetch(result, col, row))); - case DUCKDB_TYPE_BLOB: { - auto blob = UnsafeFetch(result, col, row); - return Value::BLOB((const_data_ptr_t)blob.data, blob.size); - } - default: - // invalid type for C to C++ conversion - D_ASSERT(0); - return Value(); - } -} - -const char *duckdb_column_name(duckdb_result *result, idx_t col) { - if (!result || col >= result->column_count) { - return nullptr; - } - return result->columns[col].name; -} - -bool duckdb_value_boolean(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return false; - } else { - return val.GetValue(); - } -} - -int8_t duckdb_value_int8(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return 0; - } else { - return val.GetValue(); - } -} - -int16_t duckdb_value_int16(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return 0; - } else { - return val.GetValue(); - } -} - -int32_t duckdb_value_int32(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return 0; - } else { - return val.GetValue(); - } -} - -int64_t duckdb_value_int64(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return 0; - } else { - return val.GetValue(); - } -} - -uint8_t duckdb_value_uint8(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return 0; - } else { - return val.GetValue(); - } -} - -uint16_t duckdb_value_uint16(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return 0; - } else { - return val.GetValue(); - } -} - -uint32_t duckdb_value_uint32(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return 0; - } else { - return val.GetValue(); - } -} - -uint64_t duckdb_value_uint64(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return 0; - } else { - return val.GetValue(); - } -} - -float duckdb_value_float(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return 0.0; - } else { - return val.GetValue(); - } -} - -double duckdb_value_double(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - if (val.is_null) { - return 0.0; - } else { - return val.GetValue(); - } -} - -char *duckdb_value_varchar(duckdb_result *result, idx_t col, idx_t row) { - Value val = GetCValue(result, col, row); - return strdup(val.ToString().c_str()); -} - -duckdb_blob duckdb_value_blob(duckdb_result *result, idx_t col, idx_t row) { - duckdb_blob blob; - Value val = GetCValue(result, col, row).CastAs(LogicalType::BLOB); - if (val.is_null) { - blob.data = nullptr; - blob.size = 0; - } else { - blob.data = malloc(val.str_value.size()); - memcpy((void *)blob.data, val.str_value.c_str(), val.str_value.size()); - blob.size = val.str_value.size(); - } - return blob; -} - -duckdb_state duckdb_appender_create(duckdb_connection connection, const char *schema, const char *table, - duckdb_appender *out_appender) { - Connection *conn = (Connection *)connection; - - if (!connection || !table || !out_appender) { - return DuckDBError; - } - if (schema == nullptr) { - - schema = DEFAULT_SCHEMA; - } - try { - auto *appender = new Appender(*conn, schema, table); - *out_appender = appender; - } catch (...) { - return DuckDBError; - } - return DuckDBSuccess; -} - -duckdb_state duckdb_appender_destroy(duckdb_appender *appender) { - if (!appender || !*appender) { - return DuckDBError; - } - auto *appender_instance = *((Appender **)appender); - delete appender_instance; - *appender = nullptr; - return DuckDBSuccess; -} - -#define APPENDER_CALL(FUN) \ - if (!appender) { \ - return DuckDBError; \ - } \ - auto *appender_instance = (Appender *)appender; \ - try { \ - appender_instance->FUN(); \ - } catch (...) { \ - return DuckDBError; \ - } \ - return DuckDBSuccess; - -#define APPENDER_CALL_PARAM(FUN, PARAM) \ - if (!appender) { \ - return DuckDBError; \ - } \ - auto *appender_instance = (Appender *)appender; \ - try { \ - appender_instance->FUN(PARAM); \ - } catch (...) { \ - return DuckDBError; \ - } \ - return DuckDBSuccess; - -duckdb_state duckdb_appender_begin_row(duckdb_appender appender) { - APPENDER_CALL(BeginRow); -} - -duckdb_state duckdb_appender_end_row(duckdb_appender appender) { - APPENDER_CALL(EndRow); -} - -duckdb_state duckdb_append_bool(duckdb_appender appender, bool value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_int8(duckdb_appender appender, int8_t value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_int16(duckdb_appender appender, int16_t value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_int32(duckdb_appender appender, int32_t value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_int64(duckdb_appender appender, int64_t value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_uint8(duckdb_appender appender, uint8_t value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_uint16(duckdb_appender appender, uint16_t value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_uint32(duckdb_appender appender, uint32_t value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_uint64(duckdb_appender appender, uint64_t value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_float(duckdb_appender appender, float value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_double(duckdb_appender appender, double value) { - APPENDER_CALL_PARAM(Append, value); -} - -duckdb_state duckdb_append_null(duckdb_appender appender) { - APPENDER_CALL_PARAM(Append, nullptr); -} - -duckdb_state duckdb_append_varchar(duckdb_appender appender, const char *val) { - auto string_val = Value(val); - APPENDER_CALL_PARAM(Append, string_val); -} - -duckdb_state duckdb_append_varchar_length(duckdb_appender appender, const char *val, idx_t length) { - auto string_val = Value(string(val, length)); // TODO this copies orr - APPENDER_CALL_PARAM(Append, string_val); -} -duckdb_state duckdb_append_blob(duckdb_appender appender, const void *data, idx_t length) { - auto blob_val = Value::BLOB((const_data_ptr_t)data, length); - APPENDER_CALL_PARAM(Append, blob_val); -} - -duckdb_state duckdb_appender_flush(duckdb_appender appender) { - APPENDER_CALL(Flush); -} - -duckdb_state duckdb_appender_close(duckdb_appender appender) { - APPENDER_CALL(Close); -} - - - namespace duckdb { MaterializedQueryResult::MaterializedQueryResult(StatementType statement_type) @@ -81138,7 +90308,7 @@ unique_ptr MaterializedQueryResult::Fetch() { unique_ptr MaterializedQueryResult::FetchRaw() { if (!success) { - throw InvalidInputException("Attempting to fetch from an unsuccessful query result"); + throw InvalidInputException("Attempting to fetch from an unsuccessful query result\nError: %s", error); } return collection.Fetch(); } @@ -81165,7 +90335,7 @@ PreparedStatement::~PreparedStatement() { idx_t PreparedStatement::ColumnCount() { D_ASSERT(data); - return data ? data->types.size() : 0; + return data->types.size(); } StatementType PreparedStatement::GetStatementType() { @@ -81218,13 +90388,11 @@ void PreparedStatementData::Bind(vector values) { if (it == value_map.end()) { throw BinderException("Could not find parameter with index %llu", i + 1); } - if (it->second.empty()) { - throw BinderException("No value found for parameter with index %llu", i + 1); - } + D_ASSERT(!it->second.empty()); if (!values[i].TryCastAs(it->second[0]->type())) { throw BinderException( "Type mismatch for binding parameter with index %llu, expected type %s but got type %s", i + 1, - values[i].type().ToString().c_str(), it->second[0]->type().ToString().c_str()); + it->second[0]->type().ToString().c_str(), values[i].type().ToString().c_str()); } for (auto &target : it->second) { *target = values[i]; @@ -81237,15 +90405,34 @@ LogicalType PreparedStatementData::GetType(idx_t param_idx) { if (it == value_map.end()) { throw BinderException("Could not find parameter with index %llu", param_idx); } - if (it->second.empty()) { - throw BinderException("No value found for parameter with index %llu", param_idx); - } + D_ASSERT(!it->second.empty()); return it->second[0]->type(); } } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/fstream.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#include +#include + +namespace duckdb { +using std::endl; +using std::fstream; +using std::ifstream; +using std::ios; +using std::ios_base; +using std::ofstream; +} // namespace duckdb @@ -81283,20 +90470,16 @@ bool QueryProfiler::OperatorRequiresProfiling(PhysicalOperatorType op_type) { case PhysicalOperatorType::STREAMING_SAMPLE: case PhysicalOperatorType::LIMIT: case PhysicalOperatorType::TOP_N: - case PhysicalOperatorType::AGGREGATE: case PhysicalOperatorType::WINDOW: case PhysicalOperatorType::UNNEST: case PhysicalOperatorType::SIMPLE_AGGREGATE: case PhysicalOperatorType::HASH_GROUP_BY: - case PhysicalOperatorType::SORT_GROUP_BY: case PhysicalOperatorType::FILTER: case PhysicalOperatorType::PROJECTION: case PhysicalOperatorType::COPY_TO_FILE: case PhysicalOperatorType::TABLE_SCAN: case PhysicalOperatorType::CHUNK_SCAN: case PhysicalOperatorType::DELIM_SCAN: - case PhysicalOperatorType::EXTERNAL_FILE_SCAN: - case PhysicalOperatorType::QUERY_DERIVED_SCAN: case PhysicalOperatorType::EXPRESSION_SCAN: case PhysicalOperatorType::BLOCKWISE_NL_JOIN: case PhysicalOperatorType::NESTED_LOOP_JOIN: @@ -81402,10 +90585,10 @@ void QueryProfiler::Initialize(PhysicalOperator *root_op) { } OperatorProfiler::OperatorProfiler(bool enabled_p) : enabled(enabled_p) { - execution_stack = std::stack(); + execution_stack = std::stack(); } -void OperatorProfiler::StartOperator(PhysicalOperator *phys_op) { +void OperatorProfiler::StartOperator(const PhysicalOperator *phys_op) { if (!enabled) { return; } @@ -81442,43 +90625,61 @@ void OperatorProfiler::EndOperator(DataChunk *chunk) { } } -void OperatorProfiler::AddTiming(PhysicalOperator *op, double time, idx_t elements) { +void OperatorProfiler::AddTiming(const PhysicalOperator *op, double time, idx_t elements) { if (!enabled) { return; } - + if (!Value::DoubleIsValid(time)) { + return; + } auto entry = timings.find(op); if (entry == timings.end()) { // add new entry - timings[op] = OperatorTimingInformation(time, elements); + timings[op] = OperatorInformation(time, elements); } else { // add to existing entry entry->second.time += time; entry->second.elements += elements; } } -void OperatorProfiler::Flush(PhysicalOperator *phys_op, ExpressionExecutor *expression_executor) { +void OperatorProfiler::Flush(const PhysicalOperator *phys_op, ExpressionExecutor *expression_executor, + const string &name, int id) { auto entry = timings.find(phys_op); - if (entry != timings.end()) { - auto &operator_timing = timings.find(phys_op)->second; - operator_timing.executors_info = make_unique(*expression_executor); - operator_timing.has_executor = true; + if (entry == timings.end()) { + return; + } + auto &operator_timing = timings.find(phys_op)->second; + if (int(operator_timing.executors_info.size()) <= id) { + operator_timing.executors_info.resize(id + 1); } + operator_timing.executors_info[id] = make_unique(*expression_executor, name, id); + operator_timing.name = phys_op->GetName(); } void QueryProfiler::Flush(OperatorProfiler &profiler) { if (!enabled || !running) { return; } - + lock_guard guard(flush_lock); for (auto &node : profiler.timings) { auto entry = tree_map.find(node.first); D_ASSERT(entry != tree_map.end()); entry->second->info.time += node.second.time; entry->second->info.elements += node.second.elements; - entry->second->info.executors_info = move(node.second.executors_info); - entry->second->info.has_executor = node.second.has_executor; + if (!detailed_enabled) { + continue; + } + for (auto &info : node.second.executors_info) { + if (!info) { + continue; + } + if (int(entry->second->info.executors_info.size()) <= info->id) { + entry->second->info.executors_info.resize(info->id + 1); + } + const auto& id = info->id; + entry->second->info.executors_info[id] = move(info); + } } } @@ -81584,27 +90785,84 @@ void QueryProfiler::ToStream(std::ostream &ss, bool print_optimizer_output) cons } } +// Print a row +static void PrintRow(std::ostream &ss, const string &annotation, int id, const string &name, double time, + int sample_counter, int tuple_counter, string extra_info, int depth) { + ss << string(depth * 3, ' ') << " {\n"; + ss << string(depth * 3, ' ') << " \"annotation\": \"" + annotation + "\",\n"; + ss << string(depth * 3, ' ') << " \"id\": " + to_string(id) + ",\n"; + ss << string(depth * 3, ' ') << " \"name\": \"" + name + "\",\n"; +#if defined(RDTSC) + ss << string(depth * 3, ' ') << " \"timing\": \"NULL\" ,\n"; + ss << string(depth * 3, ' ') << " \"cycles_per_tuple\": " + StringUtil::Format("%.4f", time) + ",\n"; +#else + ss << string(depth * 3, ' ') << " \"timing\":" + to_string(time) + ",\n"; + ss << string(depth * 3, ' ') << " \"cycles_per_tuple\": \"NULL\" ,\n"; +#endif + ss << string(depth * 3, ' ') << " \"sample_size\": " << to_string(sample_counter) + ",\n"; + ss << string(depth * 3, ' ') << " \"input_size\": " << to_string(tuple_counter) + ",\n"; + ss << string(depth * 3, ' ') << " \"extra_info\": \"" + << StringUtil::Replace(std::move(extra_info), "\n", "\\n") + "\"\n"; + ss << string(depth * 3, ' ') << " },\n"; +} + +static void ExtractFunctions(std::ostream &ss, ExpressionInfo &info, int &fun_id, int depth) { + if (info.hasfunction) { + D_ASSERT(info.sample_tuples_count != 0); + PrintRow(ss, "Function", fun_id++, info.function_name, + int(info.function_time) / double(info.sample_tuples_count), info.sample_tuples_count, + info.tuples_count, "", depth); + } + if (info.children.empty()) { + return; + } + // extract the children of this node + for (auto &child : info.children) { + ExtractFunctions(ss, *child, fun_id, depth); + } +} + static void ToJSONRecursive(QueryProfiler::TreeNode &node, std::ostream &ss, int depth = 1) { - ss << "{\n"; - ss << string(depth * 3, ' ') << "\"name\": \"" + node.name + "\",\n"; - ss << string(depth * 3, ' ') << "\"timing\":" + StringUtil::Format("%.2f", node.info.time) + ",\n"; - ss << string(depth * 3, ' ') << "\"cardinality\":" + to_string(node.info.elements) + ",\n"; - ss << string(depth * 3, ' ') << "\"extra_info\": \"" + StringUtil::Replace(node.extra_info, "\n", "\\n") + "\",\n"; - ss << string(depth * 3, ' ') << "\"children\": ["; + ss << string(depth * 3, ' ') << " {\n"; + ss << string(depth * 3, ' ') << " \"name\": \"" + node.name + "\",\n"; + ss << string(depth * 3, ' ') << " \"timing\":" + to_string(node.info.time) + ",\n"; + ss << string(depth * 3, ' ') << " \"cardinality\":" + to_string(node.info.elements) + ",\n"; + ss << string(depth * 3, ' ') + << " \"extra_info\": \"" + StringUtil::Replace(node.extra_info, "\n", "\\n") + "\",\n"; + ss << string(depth * 3, ' ') << " \"timings\": ["; + int32_t function_counter = 1; + int32_t expression_counter = 1; + ss << "\n "; + for (auto &expr_executor : node.info.executors_info) { + // For each Expression tree + if (!expr_executor) { + continue; + } + for (auto &expr_timer : expr_executor->roots) { + D_ASSERT(expr_timer->sample_tuples_count != 0); + PrintRow(ss, "ExpressionRoot", expression_counter++, expr_timer->name, + int(expr_timer->time) / double(expr_timer->sample_tuples_count), expr_timer->sample_tuples_count, + expr_timer->tuples_count, expr_timer->extra_info, depth + 1); + // Extract all functions inside the tree + ExtractFunctions(ss, *expr_timer->root, function_counter, depth + 1); + } + } + ss.seekp(-2, ss.cur); + ss << "\n"; + ss << string(depth * 3, ' ') << " ],\n"; + ss << string(depth * 3, ' ') << " \"children\": [\n"; if (node.children.empty()) { - ss << "]\n"; + ss << string(depth * 3, ' ') << " ]\n"; } else { for (idx_t i = 0; i < node.children.size(); i++) { if (i > 0) { - ss << ","; + ss << ",\n"; } - ss << "\n" << string(depth * 3, ' '); ToJSONRecursive(*node.children[i], ss, depth + 1); } - ss << "\n"; - ss << string(depth * 3, ' ') << "]\n"; + ss << string(depth * 3, ' ') << " ]\n"; } - ss << string(depth * 3, ' ') << "}"; + ss << string(depth * 3, ' ') << " }\n"; } string QueryProfiler::ToJSON() const { @@ -81619,24 +90877,33 @@ string QueryProfiler::ToJSON() const { } std::stringstream ss; ss << "{\n"; + ss << " \"name\": \"Query\", \n"; ss << " \"result\": " + to_string(main_query.Elapsed()) + ",\n"; + ss << " \"timing\": " + to_string(main_query.Elapsed()) + ",\n"; + ss << " \"cardinality\": " + to_string(root->info.elements) + ",\n"; + // JSON cannot have literal control characters in string literals + string extra_info = StringUtil::Replace(query, "\t", "\\t"); + extra_info = StringUtil::Replace(extra_info, "\n", "\\n"); + ss << " \"extra-info\": \"" + extra_info + "\", \n"; // print the phase timings - ss << " \"timings\": {\n"; + ss << " \"timings\": [\n"; const auto &ordered_phase_timings = GetOrderedPhaseTimings(); for (idx_t i = 0; i < ordered_phase_timings.size(); i++) { if (i > 0) { ss << ",\n"; } - ss << " \""; - ss << ordered_phase_timings[i].first; - ss << "\": "; - ss << to_string(ordered_phase_timings[i].second); + ss << " {\n"; + ss << " \"annotation\": \"" + ordered_phase_timings[i].first + "\", \n"; + ss << " \"timing\": " + to_string(ordered_phase_timings[i].second) + "\n"; + ss << " }"; } - ss << "\n },\n"; + ss << "\n"; + ss << " ],\n"; // recursively print the physical operator tree - ss << " \"tree\": "; + ss << " \"children\": [\n"; ToJSONRecursive(*root, ss); - ss << "\n}"; + ss << " ]\n"; + ss << "}"; return ss.str(); } @@ -81644,6 +90911,10 @@ void QueryProfiler::WriteToFile(const char *path, string &info) const { ofstream out(path); out << info; out.close(); + // throw an IO exception if it fails to write the file + if (out.fail()) { + throw IOException(strerror(errno)); + } } unique_ptr QueryProfiler::CreateTree(PhysicalOperator *root, idx_t depth) { @@ -81709,39 +90980,61 @@ vector QueryProfiler::GetOrderedPhaseTimings() c } return result; } +void QueryProfiler::Propagate(QueryProfiler &qp) { + this->automatic_print_format = qp.automatic_print_format; + this->save_location = qp.save_location; + this->enabled = qp.enabled; + this->detailed_enabled = qp.detailed_enabled; +} -void ExpressionInformation::ExtractExpressionsRecursive(unique_ptr &state) { +void ExpressionInfo::ExtractExpressionsRecursive(unique_ptr &state) { if (state->child_states.empty()) { return; } // extract the children of this node for (auto &child : state->child_states) { - auto expression_info_p = make_unique(child.get()->name, child.get()->time); + auto expr_info = make_unique(); if (child->expr.expression_class == ExpressionClass::BOUND_FUNCTION) { - expression_info_p->hasfunction = true; - expression_info_p->function_name = ((BoundFunctionExpression &)child->expr).function.name; + expr_info->hasfunction = true; + expr_info->function_name = ((BoundFunctionExpression &)child->expr).function.ToString(); + expr_info->function_time = child->profiler.time; + expr_info->sample_tuples_count = child->profiler.sample_tuples_count; + expr_info->tuples_count = child->profiler.tuples_count; } - expression_info_p->ExtractExpressionsRecursive(child); - children.push_back(move(expression_info_p)); + expr_info->ExtractExpressionsRecursive(child); + children.push_back(move(expr_info)); } return; } -ExpressionExecutorInformation::ExpressionExecutorInformation(ExpressionExecutor &executor) - : total_count(executor.total_count), current_count(executor.current_count), sample_count(executor.sample_count), - sample_tuples_count(executor.sample_tuples_count), tuples_count(executor.tuples_count) { +ExpressionExecutorInfo::ExpressionExecutorInfo(ExpressionExecutor &executor, const string &name, int id) : id(id) { + // Extract Expression Root Information from ExpressionExecutorStats for (auto &state : executor.GetStates()) { - auto expression_info_p = - make_unique(state.get()->root_state->name, state.get()->root_state.get()->time); - if (state->root_state->expr.expression_class == ExpressionClass::BOUND_FUNCTION) { - expression_info_p->hasfunction = true; - expression_info_p->function_name = ((BoundFunctionExpression &)state->root_state->expr).function.name; - } - expression_info_p->ExtractExpressionsRecursive(state.get()->root_state); - roots.push_back(move(expression_info_p)); + roots.push_back(make_unique(*state, name)); + } +} + +ExpressionRootInfo::ExpressionRootInfo(ExpressionExecutorState &state, string name) + : current_count(state.profiler.current_count), sample_count(state.profiler.sample_count), + sample_tuples_count(state.profiler.sample_tuples_count), tuples_count(state.profiler.tuples_count), + name(state.name), time(state.profiler.time) { + // Use the name of expression-tree as extra-info + extra_info = move(name); + auto expression_info_p = make_unique(); + // Maybe root has a function + if (state.root_state->expr.expression_class == ExpressionClass::BOUND_FUNCTION) { + expression_info_p->hasfunction = true; + expression_info_p->function_name = ((BoundFunctionExpression &)state.root_state->expr).function.name; + expression_info_p->function_time = state.root_state->profiler.time; + expression_info_p->sample_tuples_count = state.root_state->profiler.sample_tuples_count; + expression_info_p->tuples_count = state.root_state->profiler.tuples_count; } + expression_info_p->ExtractExpressionsRecursive(state.root_state); + root = move(expression_info_p); } } // namespace duckdb +#include + @@ -81835,9 +91128,14 @@ string QueryResult::HeaderToString() { struct DuckDBArrowSchemaHolder { // unused in children - vector children = {}; + vector children; // unused in children - vector children_ptrs = {}; + vector children_ptrs; + //! used for nested structures + std::list> nested_children; + std::list> nested_children_ptr; + //! This holds strings created to represent decimal types + vector> owned_type_names; }; static void ReleaseDuckDBArrowSchema(ArrowSchema *schema) { @@ -81849,6 +91147,173 @@ static void ReleaseDuckDBArrowSchema(ArrowSchema *schema) { delete holder; } +void InitializeChild(ArrowSchema &child, const string &name = "") { + //! Child is cleaned up by parent + child.private_data = nullptr; + child.release = ReleaseDuckDBArrowSchema; + + //! Store the child schema + child.flags = ARROW_FLAG_NULLABLE; + child.name = name.c_str(); + child.n_children = 0; + child.children = nullptr; + child.metadata = nullptr; + child.dictionary = nullptr; +} +void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type); + +void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type) { + child.format = "+m"; + //! Map has one child which is a struct + child.n_children = 1; + root_holder.nested_children.emplace_back(); + root_holder.nested_children.back().resize(1); + root_holder.nested_children_ptr.emplace_back(); + root_holder.nested_children_ptr.back().push_back(&root_holder.nested_children.back()[0]); + InitializeChild(root_holder.nested_children.back()[0]); + child.children = &root_holder.nested_children_ptr.back()[0]; + child.children[0]->name = "entries"; + child_list_t struct_child_types; + struct_child_types.push_back(std::make_pair("key", ListType::GetChildType(StructType::GetChildType(type, 0)))); + struct_child_types.push_back(std::make_pair("value", ListType::GetChildType(StructType::GetChildType(type, 1)))); + auto struct_type = LogicalType::STRUCT(move(struct_child_types)); + SetArrowFormat(root_holder, *child.children[0], struct_type); +} + +void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type) { + switch (type.id()) { + case LogicalTypeId::BOOLEAN: + child.format = "b"; + break; + case LogicalTypeId::TINYINT: + child.format = "c"; + break; + case LogicalTypeId::SMALLINT: + child.format = "s"; + break; + case LogicalTypeId::INTEGER: + child.format = "i"; + break; + case LogicalTypeId::BIGINT: + child.format = "l"; + break; + case LogicalTypeId::UTINYINT: + child.format = "C"; + break; + case LogicalTypeId::USMALLINT: + child.format = "S"; + break; + case LogicalTypeId::UINTEGER: + child.format = "I"; + break; + case LogicalTypeId::UBIGINT: + child.format = "L"; + break; + case LogicalTypeId::FLOAT: + child.format = "f"; + break; + case LogicalTypeId::HUGEINT: + child.format = "d:38,0"; + break; + case LogicalTypeId::DOUBLE: + child.format = "g"; + break; + case LogicalTypeId::VARCHAR: + child.format = "u"; + break; + case LogicalTypeId::DATE: + child.format = "tdD"; + break; + case LogicalTypeId::TIME: + child.format = "ttu"; + break; + case LogicalTypeId::TIMESTAMP: + child.format = "tsu:"; + break; + case LogicalTypeId::TIMESTAMP_SEC: + child.format = "tss:"; + break; + case LogicalTypeId::TIMESTAMP_NS: + child.format = "tsn:"; + break; + case LogicalTypeId::TIMESTAMP_MS: + child.format = "tsm:"; + break; + case LogicalTypeId::INTERVAL: + child.format = "tDm"; + break; + case LogicalTypeId::DECIMAL: { + uint8_t width, scale; + type.GetDecimalProperties(width, scale); + string format = "d:" + to_string(width) + "," + to_string(scale); + unique_ptr format_ptr = unique_ptr(new char[format.size() + 1]); + for (size_t i = 0; i < format.size(); i++) { + format_ptr[i] = format[i]; + } + format_ptr[format.size()] = '\0'; + root_holder.owned_type_names.push_back(move(format_ptr)); + child.format = root_holder.owned_type_names.back().get(); + break; + } + case LogicalTypeId::SQLNULL: { + child.format = "n"; + break; + } + case LogicalTypeId::BLOB: { + child.format = "z"; + break; + } + case LogicalTypeId::LIST: { + child.format = "+l"; + child.n_children = 1; + root_holder.nested_children.emplace_back(); + root_holder.nested_children.back().resize(1); + root_holder.nested_children_ptr.emplace_back(); + root_holder.nested_children_ptr.back().push_back(&root_holder.nested_children.back()[0]); + InitializeChild(root_holder.nested_children.back()[0]); + child.children = &root_holder.nested_children_ptr.back()[0]; + child.children[0]->name = "l"; + SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type)); + break; + } + case LogicalTypeId::STRUCT: { + child.format = "+s"; + auto &child_types = StructType::GetChildTypes(type); + child.n_children = child_types.size(); + root_holder.nested_children.emplace_back(); + root_holder.nested_children.back().resize(child_types.size()); + root_holder.nested_children_ptr.emplace_back(); + root_holder.nested_children_ptr.back().resize(child_types.size()); + for (idx_t type_idx = 0; type_idx < child_types.size(); type_idx++) { + root_holder.nested_children_ptr.back()[type_idx] = &root_holder.nested_children.back()[type_idx]; + } + child.children = &root_holder.nested_children_ptr.back()[0]; + for (size_t type_idx = 0; type_idx < child_types.size(); type_idx++) { + + InitializeChild(*child.children[type_idx]); + + auto &struct_col_name = child_types[type_idx].first; + unique_ptr name_ptr = unique_ptr(new char[struct_col_name.size() + 1]); + for (size_t i = 0; i < struct_col_name.size(); i++) { + name_ptr[i] = struct_col_name[i]; + } + name_ptr[struct_col_name.size()] = '\0'; + root_holder.owned_type_names.push_back(move(name_ptr)); + + child.children[type_idx]->name = root_holder.owned_type_names.back().get(); + SetArrowFormat(root_holder, *child.children[type_idx], child_types[type_idx].second); + } + break; + } + case LogicalTypeId::MAP: { + SetArrowMapFormat(root_holder, child, type); + break; + } + default: + throw InternalException("Unsupported Arrow type " + type.ToString()); + } +} + void QueryResult::ToArrowSchema(ArrowSchema *out_schema) { D_ASSERT(out_schema); @@ -81873,73 +91338,10 @@ void QueryResult::ToArrowSchema(ArrowSchema *out_schema) { // Configure all child schemas for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) { - auto &child = root_holder->children[col_idx]; - - // Child is cleaned up by parent - child.private_data = nullptr; - child.release = ReleaseDuckDBArrowSchema; - // Store the child schema - child.flags = ARROW_FLAG_NULLABLE; - child.name = names[col_idx].c_str(); - child.n_children = 0; - child.children = nullptr; - child.flags = 0; - child.metadata = nullptr; - child.dictionary = nullptr; - - switch (types[col_idx].id()) { - case LogicalTypeId::BOOLEAN: - child.format = "b"; - break; - case LogicalTypeId::TINYINT: - child.format = "c"; - break; - case LogicalTypeId::SMALLINT: - child.format = "s"; - break; - case LogicalTypeId::INTEGER: - child.format = "i"; - break; - case LogicalTypeId::BIGINT: - child.format = "l"; - break; - case LogicalTypeId::UTINYINT: - child.format = "C"; - break; - case LogicalTypeId::USMALLINT: - child.format = "S"; - break; - case LogicalTypeId::UINTEGER: - child.format = "I"; - break; - case LogicalTypeId::UBIGINT: - child.format = "L"; - break; - case LogicalTypeId::FLOAT: - child.format = "f"; - break; - case LogicalTypeId::HUGEINT: - child.format = "d:38,0"; - break; - case LogicalTypeId::DOUBLE: - child.format = "g"; - break; - case LogicalTypeId::VARCHAR: - child.format = "u"; - break; - case LogicalTypeId::DATE: - child.format = "tdD"; - break; - case LogicalTypeId::TIME: - child.format = "ttm"; - break; - case LogicalTypeId::TIMESTAMP: - child.format = "tsn:"; - break; - default: - throw NotImplementedException("Unsupported Arrow type " + types[col_idx].ToString()); - } + auto &child = root_holder->children[col_idx]; + InitializeChild(child, names[col_idx]); + SetArrowFormat(*root_holder, child, types[col_idx]); } // Release ownership to caller @@ -82087,7 +91489,6 @@ class CreateTableRelation : public Relation { vector columns; public: - unique_ptr GetQueryNode() override; BoundStatement Bind(Binder &binder) override; const vector &Columns() override; string ToString(idx_t depth) override; @@ -82138,10 +91539,6 @@ CreateTableRelation::CreateTableRelation(shared_ptr child_p, string sc context.TryBindRelation(*this, this->columns); } -unique_ptr CreateTableRelation::GetQueryNode() { - throw InternalException("Cannot create a query node from a CreateTableRelation!"); -} - BoundStatement CreateTableRelation::Bind(Binder &binder) { auto select = make_unique(); select->node = child->GetQueryNode(); @@ -82161,7 +91558,7 @@ const vector &CreateTableRelation::Columns() { } string CreateTableRelation::ToString(idx_t depth) { - string str = RenderWhitespace(depth) + "Create View\n"; + string str = RenderWhitespace(depth) + "Create Table\n"; return str + child->ToString(depth + 1); } @@ -82191,7 +91588,6 @@ class CreateViewRelation : public Relation { vector columns; public: - unique_ptr GetQueryNode() override; BoundStatement Bind(Binder &binder) override; const vector &Columns() override; string ToString(idx_t depth) override; @@ -82216,10 +91612,6 @@ CreateViewRelation::CreateViewRelation(shared_ptr child_p, string view context.TryBindRelation(*this, this->columns); } -unique_ptr CreateViewRelation::GetQueryNode() { - throw InternalException("Cannot create a query node from a CreateViewRelation!"); -} - BoundStatement CreateViewRelation::Bind(Binder &binder) { auto select = make_unique(); select->node = child->GetQueryNode(); @@ -82271,7 +91663,6 @@ class DeleteRelation : public Relation { string table_name; public: - unique_ptr GetQueryNode() override; BoundStatement Bind(Binder &binder) override; const vector &Columns() override; string ToString(idx_t depth) override; @@ -82325,10 +91716,6 @@ DeleteRelation::DeleteRelation(ClientContext &context, unique_ptrcolumns); } -unique_ptr DeleteRelation::GetQueryNode() { - throw InternalException("Cannot create a query node from a DeleteRelation!"); -} - BoundStatement DeleteRelation::Bind(Binder &binder) { auto basetable = make_unique(); basetable->schema_name = schema_name; @@ -82445,7 +91832,6 @@ class ExplainRelation : public Relation { vector columns; public: - unique_ptr GetQueryNode() override; BoundStatement Bind(Binder &binder) override; const vector &Columns() override; string ToString(idx_t depth) override; @@ -82469,10 +91855,6 @@ ExplainRelation::ExplainRelation(shared_ptr child_p) context.TryBindRelation(*this, this->columns); } -unique_ptr ExplainRelation::GetQueryNode() { - throw InternalException("Cannot create a query node from a ExplainRelation!"); -} - BoundStatement ExplainRelation::Bind(Binder &binder) { auto select = make_unique(); select->node = child->GetQueryNode(); @@ -82669,7 +92051,6 @@ class InsertRelation : public Relation { vector columns; public: - unique_ptr GetQueryNode() override; BoundStatement Bind(Binder &binder) override; const vector &Columns() override; string ToString(idx_t depth) override; @@ -82728,10 +92109,6 @@ InsertRelation::InsertRelation(shared_ptr child_p, string schema_name, context.TryBindRelation(*this, this->columns); } -unique_ptr InsertRelation::GetQueryNode() { - throw Exception("Cannot create a query node from a InsertRelation!"); -} - BoundStatement InsertRelation::Bind(Binder &binder) { InsertStatement stmt; auto select = make_unique(); @@ -83215,7 +92592,7 @@ unique_ptr ReadCSVRelation::GetTableRef() { make_unique("auto_detect"), make_unique(Value::BOOLEAN(true)))); } - table_ref->function = make_unique("read_csv", children); + table_ref->function = make_unique("read_csv", move(children)); return move(table_ref); } @@ -83309,7 +92686,7 @@ string SetOpRelation::ToString(idx_t depth) { str += "Intersect"; break; default: - throw Exception("Unknown setop type"); + throw InternalException("Unknown setop type"); } return str + "\n" + left->ToString(depth + 1) + right->ToString(depth + 1); } @@ -83422,7 +92799,7 @@ unique_ptr TableFunctionRelation::GetTableRef() { } auto table_function = make_unique(); - auto function = make_unique(name, children); + auto function = make_unique(name, move(children)); table_function->function = move(function); return move(table_function); } @@ -83481,7 +92858,6 @@ class UpdateRelation : public Relation { vector> expressions; public: - unique_ptr GetQueryNode() override; BoundStatement Bind(Binder &binder) override; const vector &Columns() override; string ToString(idx_t depth) override; @@ -83604,10 +92980,6 @@ UpdateRelation::UpdateRelation(ClientContext &context, unique_ptrcolumns); } -unique_ptr UpdateRelation::GetQueryNode() { - throw InternalException("Cannot create a query node from a UpdateRelation!"); -} - BoundStatement UpdateRelation::Bind(Binder &binder) { auto basetable = make_unique(); basetable->schema_name = schema_name; @@ -83793,7 +93165,6 @@ class WriteCSVRelation : public Relation { vector columns; public: - unique_ptr GetQueryNode() override; BoundStatement Bind(Binder &binder) override; const vector &Columns() override; string ToString(idx_t depth) override; @@ -83844,10 +93215,6 @@ WriteCSVRelation::WriteCSVRelation(shared_ptr child_p, string csv_file context.TryBindRelation(*this, this->columns); } -unique_ptr WriteCSVRelation::GetQueryNode() { - throw InternalException("Cannot create a query node from a WriteCSVRelation!"); -} - BoundStatement WriteCSVRelation::Bind(Binder &binder) { CopyStatement copy; copy.select_statement = child->GetQueryNode(); @@ -83946,9 +93313,8 @@ shared_ptr Relation::Filter(const string &expression) { shared_ptr Relation::Filter(const vector &expressions) { // if there are multiple expressions, we AND them together auto expression_list = StringListToExpressionList(expressions); - if (expression_list.empty()) { - throw ParserException("Zero filter conditions provided"); - } + D_ASSERT(!expression_list.empty()); + auto expr = move(expression_list[0]); for (idx_t i = 1; i < expression_list.size(); i++) { expr = @@ -83983,9 +93349,8 @@ shared_ptr Relation::Order(const vector &expressions) { shared_ptr Relation::Join(const shared_ptr &other, const string &condition, JoinType type) { auto expression_list = Parser::ParseExpressionList(condition); - if (expression_list.empty()) { - throw ParserException("Expected a single expression as join condition"); - } + D_ASSERT(!expression_list.empty()); + if (expression_list.size() > 1 || expression_list[0]->type == ExpressionType::COLUMN_REF) { // multiple columns or single column ref: the condition is a USING list vector using_columns; @@ -84106,11 +93471,6 @@ void Relation::WriteCSV(const string &csv_file) { } } -void Relation::Head(idx_t limit) { - auto limit_node = Limit(limit); - limit_node->Execute()->Print(); -} - shared_ptr Relation::CreateView(const string &name, bool replace, bool temporary) { auto view = make_shared(shared_from_this(), name, replace, temporary); auto res = view->Execute(); @@ -84142,8 +93502,8 @@ void Relation::Delete(const string &condition) { throw Exception("DELETE can only be used on base tables!"); } -shared_ptr Relation::TableFunction(const std::string &fname, vector &values) { - return make_shared(context, fname, values, shared_from_this()); +shared_ptr Relation::TableFunction(const std::string &fname, vector values) { + return make_shared(context, fname, move(values), shared_from_this()); } string Relation::ToString() { @@ -84163,6 +93523,17 @@ string Relation::ToString() { return str; } +// LCOV_EXCL_START +unique_ptr Relation::GetQueryNode() { + throw InternalException("Cannot create a query node from this node type"); +} + +void Relation::Head(idx_t limit) { + auto limit_node = Limit(limit); + limit_node->Execute()->Print(); +} +// LCOV_EXCL_STOP + void Relation::Print() { Printer::Print(ToString()); } @@ -84203,7 +93574,8 @@ string StreamQueryResult::ToString() { unique_ptr StreamQueryResult::FetchRaw() { if (!success || !is_open) { - throw InvalidInputException("Attempting to fetch from an unsuccessful or closed streaming query result"); + throw InvalidInputException( + "Attempting to fetch from an unsuccessful or closed streaming query result\nError: %s", error); } auto chunk = context->Fetch(); if (!chunk || chunk->ColumnCount() == 0 || chunk->size() == 0) { @@ -84221,10 +93593,13 @@ unique_ptr StreamQueryResult::Materialize() { while (true) { auto chunk = Fetch(); if (!chunk || chunk->size() == 0) { - return result; + break; } result->collection.Append(*chunk); } + if (!success) { + return make_unique(error); + } return result; } @@ -85394,20 +94769,22 @@ class FilterCombiner { void GenerateFilters(const std::function filter)> &callback); bool HasFilters(); - vector GenerateTableScanFilters(vector &column_ids); - vector GenerateZonemapChecks(vector &column_ids, vector &pushed_filters); + TableFilterSet GenerateTableScanFilters(vector &column_ids); + // vector> GenerateZonemapChecks(vector &column_ids, vector> + // &pushed_filters); private: FilterResult AddFilter(Expression *expr); FilterResult AddBoundComparisonFilter(Expression *expr); FilterResult AddTransitiveFilters(BoundComparisonExpression &comparison); unique_ptr FindTransitiveFilter(Expression *expr); - unordered_map> - FindZonemapChecks(vector &column_ids, unordered_set ¬_constants, Expression *filter); + // unordered_map> + // FindZonemapChecks(vector &column_ids, unordered_set ¬_constants, Expression *filter); Expression *GetNode(Expression *expr); idx_t GetEquivalenceSet(Expression *expr); FilterResult AddConstantComparison(vector &info_list, ExpressionValueInformation info); +private: vector> remaining_filters; expression_map_t> stored_expressions; @@ -85434,6 +94811,8 @@ class FilterCombiner { + + namespace duckdb { using ExpressionValueInformation = FilterCombiner::ExpressionValueInformation; @@ -85578,228 +94957,227 @@ bool FilterCombiner::HasFilters() { return has_filters; } -unordered_map> MergeAnd(unordered_map> &f_1, - unordered_map> &f_2) { - unordered_map> result; - for (auto &f : f_1) { - auto it = f_2.find(f.first); - if (it == f_2.end()) { - result[f.first] = f.second; - } else { - Value *min = nullptr, *max = nullptr; - if (it->second.first && f.second.first) { - if (*f.second.first > *it->second.first) { - min = f.second.first; - } else { - min = it->second.first; - } - - } else if (it->second.first) { - min = it->second.first; - } else if (f.second.first) { - min = f.second.first; - } else { - min = nullptr; - } - if (it->second.second && f.second.second) { - if (*f.second.second < *it->second.second) { - max = f.second.second; - } else { - max = it->second.second; - } - } else if (it->second.second) { - max = it->second.second; - } else if (f.second.second) { - max = f.second.second; - } else { - max = nullptr; - } - result[f.first] = {min, max}; - f_2.erase(f.first); - } - } - for (auto &f : f_2) { - result[f.first] = f.second; - } - return result; -} - -unordered_map> MergeOr(unordered_map> &f_1, - unordered_map> &f_2) { - unordered_map> result; - for (auto &f : f_1) { - auto it = f_2.find(f.first); - if (it != f_2.end()) { - Value *min = nullptr, *max = nullptr; - if (it->second.first && f.second.first) { - if (*f.second.first < *it->second.first) { - min = f.second.first; - } else { - min = it->second.first; - } - } - if (it->second.second && f.second.second) { - if (*f.second.second > *it->second.second) { - max = f.second.second; - } else { - max = it->second.second; - } - } - result[f.first] = {min, max}; - f_2.erase(f.first); - } - } - return result; -} - -unordered_map> -FilterCombiner::FindZonemapChecks(vector &column_ids, unordered_set ¬_constants, Expression *filter) { - unordered_map> checks; - switch (filter->type) { - case ExpressionType::CONJUNCTION_OR: { - //! For a filter to - auto &or_exp = (BoundConjunctionExpression &)*filter; - checks = FindZonemapChecks(column_ids, not_constants, or_exp.children[0].get()); - for (size_t i = 1; i < or_exp.children.size(); ++i) { - auto child_check = FindZonemapChecks(column_ids, not_constants, or_exp.children[i].get()); - checks = MergeOr(checks, child_check); - } - return checks; - } - case ExpressionType::CONJUNCTION_AND: { - auto &and_exp = (BoundConjunctionExpression &)*filter; - checks = FindZonemapChecks(column_ids, not_constants, and_exp.children[0].get()); - for (size_t i = 1; i < and_exp.children.size(); ++i) { - auto child_check = FindZonemapChecks(column_ids, not_constants, and_exp.children[i].get()); - checks = MergeAnd(checks, child_check); - } - return checks; - } - case ExpressionType::COMPARE_IN: { - auto &comp_in_exp = (BoundOperatorExpression &)*filter; - if (comp_in_exp.children[0]->type == ExpressionType::BOUND_COLUMN_REF) { - Value *min = nullptr, *max = nullptr; - auto &column_ref = (BoundColumnRefExpression &)*comp_in_exp.children[0].get(); - for (size_t i {1}; i < comp_in_exp.children.size(); i++) { - if (comp_in_exp.children[i]->type != ExpressionType::VALUE_CONSTANT) { - //! This indicates the column has a comparison that is not with a constant - not_constants.insert(column_ids[column_ref.binding.column_index]); - break; - } else { - auto &const_value_expr = (BoundConstantExpression &)*comp_in_exp.children[i].get(); - if (const_value_expr.value.is_null) { - return checks; - } - if (!min && !max) { - min = &const_value_expr.value; - max = min; - } else { - if (*min > const_value_expr.value) { - min = &const_value_expr.value; - } - if (*max < const_value_expr.value) { - max = &const_value_expr.value; - } - } - } - } - checks[column_ids[column_ref.binding.column_index]] = {min, max}; - } - return checks; - } - case ExpressionType::COMPARE_EQUAL: { - auto &comp_exp = (BoundComparisonExpression &)*filter; - if ((comp_exp.left->expression_class == ExpressionClass::BOUND_COLUMN_REF && - comp_exp.right->expression_class == ExpressionClass::BOUND_CONSTANT)) { - auto &column_ref = (BoundColumnRefExpression &)*comp_exp.left; - auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.right; - checks[column_ids[column_ref.binding.column_index]] = {&constant_value_expr.value, - &constant_value_expr.value}; - } - if ((comp_exp.left->expression_class == ExpressionClass::BOUND_CONSTANT && - comp_exp.right->expression_class == ExpressionClass::BOUND_COLUMN_REF)) { - auto &column_ref = (BoundColumnRefExpression &)*comp_exp.right; - auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.left; - checks[column_ids[column_ref.binding.column_index]] = {&constant_value_expr.value, - &constant_value_expr.value}; - } - return checks; - } - case ExpressionType::COMPARE_LESSTHAN: - case ExpressionType::COMPARE_LESSTHANOREQUALTO: { - auto &comp_exp = (BoundComparisonExpression &)*filter; - if ((comp_exp.left->expression_class == ExpressionClass::BOUND_COLUMN_REF && - comp_exp.right->expression_class == ExpressionClass::BOUND_CONSTANT)) { - auto &column_ref = (BoundColumnRefExpression &)*comp_exp.left; - auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.right; - checks[column_ids[column_ref.binding.column_index]] = {nullptr, &constant_value_expr.value}; - } - if ((comp_exp.left->expression_class == ExpressionClass::BOUND_CONSTANT && - comp_exp.right->expression_class == ExpressionClass::BOUND_COLUMN_REF)) { - auto &column_ref = (BoundColumnRefExpression &)*comp_exp.right; - auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.left; - checks[column_ids[column_ref.binding.column_index]] = {&constant_value_expr.value, nullptr}; - } - return checks; - } - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - case ExpressionType::COMPARE_GREATERTHAN: { - auto &comp_exp = (BoundComparisonExpression &)*filter; - if ((comp_exp.left->expression_class == ExpressionClass::BOUND_COLUMN_REF && - comp_exp.right->expression_class == ExpressionClass::BOUND_CONSTANT)) { - auto &column_ref = (BoundColumnRefExpression &)*comp_exp.left; - auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.right; - checks[column_ids[column_ref.binding.column_index]] = {&constant_value_expr.value, nullptr}; - } - if ((comp_exp.left->expression_class == ExpressionClass::BOUND_CONSTANT && - comp_exp.right->expression_class == ExpressionClass::BOUND_COLUMN_REF)) { - auto &column_ref = (BoundColumnRefExpression &)*comp_exp.right; - auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.left; - checks[column_ids[column_ref.binding.column_index]] = {nullptr, &constant_value_expr.value}; - } - return checks; - } - default: - return checks; - } -} - -vector FilterCombiner::GenerateZonemapChecks(vector &column_ids, - vector &pushed_filters) { - vector zonemap_checks; - unordered_set not_constants; - //! We go through the remaining filters and capture their min max - if (remaining_filters.empty()) { - return zonemap_checks; - } - - auto checks = FindZonemapChecks(column_ids, not_constants, remaining_filters[0].get()); - for (size_t i = 1; i < remaining_filters.size(); ++i) { - auto child_check = FindZonemapChecks(column_ids, not_constants, remaining_filters[i].get()); - checks = MergeAnd(checks, child_check); - } - //! We construct the equivalent filters - for (auto not_constant : not_constants) { - checks.erase(not_constant); - } - for (const auto &pushed_filter : pushed_filters) { - checks.erase(column_ids[pushed_filter.column_index]); - } - for (const auto &check : checks) { - if (check.second.first) { - zonemap_checks.emplace_back(check.second.first->Copy(), ExpressionType::COMPARE_GREATERTHANOREQUALTO, - check.first); - } - if (check.second.second) { - zonemap_checks.emplace_back(check.second.second->Copy(), ExpressionType::COMPARE_LESSTHANOREQUALTO, - check.first); - } - } - return zonemap_checks; -} - -vector FilterCombiner::GenerateTableScanFilters(vector &column_ids) { - vector table_filters; +// unordered_map> MergeAnd(unordered_map> &f_1, +// unordered_map> &f_2) { +// unordered_map> result; +// for (auto &f : f_1) { +// auto it = f_2.find(f.first); +// if (it == f_2.end()) { +// result[f.first] = f.second; +// } else { +// Value *min = nullptr, *max = nullptr; +// if (it->second.first && f.second.first) { +// if (*f.second.first > *it->second.first) { +// min = f.second.first; +// } else { +// min = it->second.first; +// } + +// } else if (it->second.first) { +// min = it->second.first; +// } else if (f.second.first) { +// min = f.second.first; +// } else { +// min = nullptr; +// } +// if (it->second.second && f.second.second) { +// if (*f.second.second < *it->second.second) { +// max = f.second.second; +// } else { +// max = it->second.second; +// } +// } else if (it->second.second) { +// max = it->second.second; +// } else if (f.second.second) { +// max = f.second.second; +// } else { +// max = nullptr; +// } +// result[f.first] = {min, max}; +// f_2.erase(f.first); +// } +// } +// for (auto &f : f_2) { +// result[f.first] = f.second; +// } +// return result; +// } + +// unordered_map> MergeOr(unordered_map> &f_1, +// unordered_map> &f_2) { +// unordered_map> result; +// for (auto &f : f_1) { +// auto it = f_2.find(f.first); +// if (it != f_2.end()) { +// Value *min = nullptr, *max = nullptr; +// if (it->second.first && f.second.first) { +// if (*f.second.first < *it->second.first) { +// min = f.second.first; +// } else { +// min = it->second.first; +// } +// } +// if (it->second.second && f.second.second) { +// if (*f.second.second > *it->second.second) { +// max = f.second.second; +// } else { +// max = it->second.second; +// } +// } +// result[f.first] = {min, max}; +// f_2.erase(f.first); +// } +// } +// return result; +// } + +// unordered_map> +// FilterCombiner::FindZonemapChecks(vector &column_ids, unordered_set ¬_constants, Expression *filter) +// { unordered_map> checks; switch (filter->type) { case +// ExpressionType::CONJUNCTION_OR: { +// //! For a filter to +// auto &or_exp = (BoundConjunctionExpression &)*filter; +// checks = FindZonemapChecks(column_ids, not_constants, or_exp.children[0].get()); +// for (size_t i = 1; i < or_exp.children.size(); ++i) { +// auto child_check = FindZonemapChecks(column_ids, not_constants, or_exp.children[i].get()); +// checks = MergeOr(checks, child_check); +// } +// return checks; +// } +// case ExpressionType::CONJUNCTION_AND: { +// auto &and_exp = (BoundConjunctionExpression &)*filter; +// checks = FindZonemapChecks(column_ids, not_constants, and_exp.children[0].get()); +// for (size_t i = 1; i < and_exp.children.size(); ++i) { +// auto child_check = FindZonemapChecks(column_ids, not_constants, and_exp.children[i].get()); +// checks = MergeAnd(checks, child_check); +// } +// return checks; +// } +// case ExpressionType::COMPARE_IN: { +// auto &comp_in_exp = (BoundOperatorExpression &)*filter; +// if (comp_in_exp.children[0]->type == ExpressionType::BOUND_COLUMN_REF) { +// Value *min = nullptr, *max = nullptr; +// auto &column_ref = (BoundColumnRefExpression &)*comp_in_exp.children[0].get(); +// for (size_t i {1}; i < comp_in_exp.children.size(); i++) { +// if (comp_in_exp.children[i]->type != ExpressionType::VALUE_CONSTANT) { +// //! This indicates the column has a comparison that is not with a constant +// not_constants.insert(column_ids[column_ref.binding.column_index]); +// break; +// } else { +// auto &const_value_expr = (BoundConstantExpression &)*comp_in_exp.children[i].get(); +// if (const_value_expr.value.is_null) { +// return checks; +// } +// if (!min && !max) { +// min = &const_value_expr.value; +// max = min; +// } else { +// if (*min > const_value_expr.value) { +// min = &const_value_expr.value; +// } +// if (*max < const_value_expr.value) { +// max = &const_value_expr.value; +// } +// } +// } +// } +// checks[column_ids[column_ref.binding.column_index]] = {min, max}; +// } +// return checks; +// } +// case ExpressionType::COMPARE_EQUAL: { +// auto &comp_exp = (BoundComparisonExpression &)*filter; +// if ((comp_exp.left->expression_class == ExpressionClass::BOUND_COLUMN_REF && +// comp_exp.right->expression_class == ExpressionClass::BOUND_CONSTANT)) { +// auto &column_ref = (BoundColumnRefExpression &)*comp_exp.left; +// auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.right; +// checks[column_ids[column_ref.binding.column_index]] = {&constant_value_expr.value, +// &constant_value_expr.value}; +// } +// if ((comp_exp.left->expression_class == ExpressionClass::BOUND_CONSTANT && +// comp_exp.right->expression_class == ExpressionClass::BOUND_COLUMN_REF)) { +// auto &column_ref = (BoundColumnRefExpression &)*comp_exp.right; +// auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.left; +// checks[column_ids[column_ref.binding.column_index]] = {&constant_value_expr.value, +// &constant_value_expr.value}; +// } +// return checks; +// } +// case ExpressionType::COMPARE_LESSTHAN: +// case ExpressionType::COMPARE_LESSTHANOREQUALTO: { +// auto &comp_exp = (BoundComparisonExpression &)*filter; +// if ((comp_exp.left->expression_class == ExpressionClass::BOUND_COLUMN_REF && +// comp_exp.right->expression_class == ExpressionClass::BOUND_CONSTANT)) { +// auto &column_ref = (BoundColumnRefExpression &)*comp_exp.left; +// auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.right; +// checks[column_ids[column_ref.binding.column_index]] = {nullptr, &constant_value_expr.value}; +// } +// if ((comp_exp.left->expression_class == ExpressionClass::BOUND_CONSTANT && +// comp_exp.right->expression_class == ExpressionClass::BOUND_COLUMN_REF)) { +// auto &column_ref = (BoundColumnRefExpression &)*comp_exp.right; +// auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.left; +// checks[column_ids[column_ref.binding.column_index]] = {&constant_value_expr.value, nullptr}; +// } +// return checks; +// } +// case ExpressionType::COMPARE_GREATERTHANOREQUALTO: +// case ExpressionType::COMPARE_GREATERTHAN: { +// auto &comp_exp = (BoundComparisonExpression &)*filter; +// if ((comp_exp.left->expression_class == ExpressionClass::BOUND_COLUMN_REF && +// comp_exp.right->expression_class == ExpressionClass::BOUND_CONSTANT)) { +// auto &column_ref = (BoundColumnRefExpression &)*comp_exp.left; +// auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.right; +// checks[column_ids[column_ref.binding.column_index]] = {&constant_value_expr.value, nullptr}; +// } +// if ((comp_exp.left->expression_class == ExpressionClass::BOUND_CONSTANT && +// comp_exp.right->expression_class == ExpressionClass::BOUND_COLUMN_REF)) { +// auto &column_ref = (BoundColumnRefExpression &)*comp_exp.right; +// auto &constant_value_expr = (BoundConstantExpression &)*comp_exp.left; +// checks[column_ids[column_ref.binding.column_index]] = {nullptr, &constant_value_expr.value}; +// } +// return checks; +// } +// default: +// return checks; +// } +// } + +// vector FilterCombiner::GenerateZonemapChecks(vector &column_ids, +// vector &pushed_filters) { +// vector zonemap_checks; +// unordered_set not_constants; +// //! We go through the remaining filters and capture their min max +// if (remaining_filters.empty()) { +// return zonemap_checks; +// } + +// auto checks = FindZonemapChecks(column_ids, not_constants, remaining_filters[0].get()); +// for (size_t i = 1; i < remaining_filters.size(); ++i) { +// auto child_check = FindZonemapChecks(column_ids, not_constants, remaining_filters[i].get()); +// checks = MergeAnd(checks, child_check); +// } +// //! We construct the equivalent filters +// for (auto not_constant : not_constants) { +// checks.erase(not_constant); +// } +// for (const auto &pushed_filter : pushed_filters) { +// checks.erase(column_ids[pushed_filter.column_index]); +// } +// for (const auto &check : checks) { +// if (check.second.first) { +// zonemap_checks.emplace_back(check.second.first->Copy(), ExpressionType::COMPARE_GREATERTHANOREQUALTO, +// check.first); +// } +// if (check.second.second) { +// zonemap_checks.emplace_back(check.second.second->Copy(), ExpressionType::COMPARE_LESSTHANOREQUALTO, +// check.first); +// } +// } +// return zonemap_checks; +// } + +TableFilterSet FilterCombiner::GenerateTableScanFilters(vector &column_ids) { + TableFilterSet table_filters; //! First, we figure the filters that have constant expressions that we can push down to the table scan for (auto &constant_value : constant_values) { if (!constant_value.second.empty()) { @@ -85815,7 +95193,8 @@ vector FilterCombiner::GenerateTableScanFilters(vector &colu filter_exp = equivalence_map.find(constant_value.first); if (filter_exp->second.size() == 1 && filter_exp->second[0]->type == ExpressionType::BOUND_COLUMN_REF) { auto filter_col_exp = static_cast(filter_exp->second[0]); - if (column_ids[filter_col_exp->binding.column_index] == COLUMN_IDENTIFIER_ROW_ID) { + auto column_index = column_ids[filter_col_exp->binding.column_index]; + if (column_index == COLUMN_IDENTIFIER_ROW_ID) { break; } auto equivalence_set = filter_exp->first; @@ -85825,10 +95204,11 @@ vector FilterCombiner::GenerateTableScanFilters(vector &colu for (idx_t i = 0; i < entries.size(); i++) { // for each entry also create a comparison with each constant for (idx_t k = 0; k < constant_list.size(); k++) { - table_filters.emplace_back(constant_value.second[k].constant, - constant_value.second[k].comparison_type, - filter_col_exp->binding.column_index); + auto constant_filter = make_unique(constant_value.second[k].comparison_type, + constant_value.second[k].constant); + table_filters.PushFilter(column_index, move(constant_filter)); } + table_filters.PushFilter(column_index, make_unique()); } equivalence_map.erase(filter_exp); } @@ -85850,14 +95230,17 @@ vector FilterCombiner::GenerateTableScanFilters(vector &colu if (like_string.empty()) { continue; } + auto column_index = column_ids[column_ref.binding.column_index]; auto const_value = constant_value_expr.value.Copy(); const_value.str_value = like_string; //! Here the like must be transformed to a BOUND COMPARISON geq le - table_filters.emplace_back(const_value, ExpressionType::COMPARE_GREATERTHANOREQUALTO, - column_ref.binding.column_index); + auto lower_bound = + make_unique(ExpressionType::COMPARE_GREATERTHANOREQUALTO, const_value); const_value.str_value[const_value.str_value.size() - 1]++; - table_filters.emplace_back(const_value, ExpressionType::COMPARE_LESSTHAN, - column_ref.binding.column_index); + auto upper_bound = make_unique(ExpressionType::COMPARE_LESSTHAN, const_value); + table_filters.PushFilter(column_index, move(lower_bound)); + table_filters.PushFilter(column_index, move(upper_bound)); + table_filters.PushFilter(column_index, make_unique()); } if (func.function.name == "~~" && func.children[0]->expression_class == ExpressionClass::BOUND_COLUMN_REF && func.children[1]->type == ExpressionType::VALUE_CONSTANT) { @@ -85880,17 +95263,21 @@ vector FilterCombiner::GenerateTableScanFilters(vector &colu prefix += c; } const_value.str_value = prefix; + auto column_index = column_ids[column_ref.binding.column_index]; if (equality) { //! Here the like can be transformed to an equality query - table_filters.emplace_back(const_value, ExpressionType::COMPARE_EQUAL, - column_ref.binding.column_index); + auto equal_filter = make_unique(ExpressionType::COMPARE_EQUAL, const_value); + table_filters.PushFilter(column_index, move(equal_filter)); + table_filters.PushFilter(column_index, make_unique()); } else { //! Here the like must be transformed to a BOUND COMPARISON geq le - table_filters.emplace_back(const_value, ExpressionType::COMPARE_GREATERTHANOREQUALTO, - column_ref.binding.column_index); + auto lower_bound = + make_unique(ExpressionType::COMPARE_GREATERTHANOREQUALTO, const_value); const_value.str_value[const_value.str_value.size() - 1]++; - table_filters.emplace_back(const_value, ExpressionType::COMPARE_LESSTHAN, - column_ref.binding.column_index); + auto upper_bound = make_unique(ExpressionType::COMPARE_LESSTHAN, const_value); + table_filters.PushFilter(column_index, move(lower_bound)); + table_filters.PushFilter(column_index, move(upper_bound)); + table_filters.PushFilter(column_index, make_unique()); } } } else if (remaining_filter->type == ExpressionType::COMPARE_IN) { @@ -85901,7 +95288,8 @@ vector FilterCombiner::GenerateTableScanFilters(vector &colu continue; } auto &column_ref = (BoundColumnRefExpression &)*func.children[0].get(); - if (column_ids[column_ref.binding.column_index] == COLUMN_IDENTIFIER_ROW_ID) { + auto column_index = column_ids[column_ref.binding.column_index]; + if (column_index == COLUMN_IDENTIFIER_ROW_ID) { break; } //! check if all children are const expr @@ -85915,32 +95303,44 @@ vector FilterCombiner::GenerateTableScanFilters(vector &colu continue; } auto &fst_const_value_expr = (BoundConstantExpression &)*func.children[1].get(); + //! Check if values are consecutive, if yes transform them to >= <= (only for integers) + // e.g. if we have x IN (1, 2, 3, 4, 5) we transform this into x >= 1 AND x <= 5 if (!fst_const_value_expr.value.type().IsIntegral()) { continue; } + bool can_simplify_in_clause = true; for (idx_t i = 1; i < func.children.size(); i++) { auto &const_value_expr = (BoundConstantExpression &)*func.children[i].get(); + if (const_value_expr.value.is_null) { + can_simplify_in_clause = false; + break; + } in_values.push_back(const_value_expr.value); } + if (!can_simplify_in_clause || in_values.empty()) { + continue; + } Value one(1); sort(in_values.begin(), in_values.end()); - bool is_consecutive = true; for (idx_t in_val_idx = 1; in_val_idx < in_values.size(); in_val_idx++) { if (in_values[in_val_idx] - in_values[in_val_idx - 1] > one || in_values[in_val_idx - 1].is_null) { - is_consecutive = false; + can_simplify_in_clause = false; + break; } } - if (!is_consecutive || in_values.empty()) { + if (!can_simplify_in_clause) { continue; } - table_filters.emplace_back(in_values.front(), ExpressionType::COMPARE_GREATERTHANOREQUALTO, - column_ref.binding.column_index); - table_filters.emplace_back(in_values.back(), ExpressionType::COMPARE_LESSTHANOREQUALTO, - column_ref.binding.column_index); + auto lower_bound = + make_unique(ExpressionType::COMPARE_GREATERTHANOREQUALTO, in_values.front()); + auto upper_bound = make_unique(ExpressionType::COMPARE_LESSTHANOREQUALTO, in_values.back()); + table_filters.PushFilter(column_index, move(lower_bound)); + table_filters.PushFilter(column_index, move(upper_bound)); + table_filters.PushFilter(column_index, make_unique()); remaining_filters.erase(remaining_filters.begin() + rem_fil_idx); } @@ -86505,8 +95905,7 @@ unique_ptr FilterPullup::PullupJoin(unique_ptr } unique_ptr FilterPullup::PullupInnerJoin(unique_ptr op) { - auto &join = (LogicalJoin &)*op; - D_ASSERT(join.join_type == JoinType::INNER); + D_ASSERT(((LogicalJoin &)*op).join_type == JoinType::INNER); D_ASSERT(op->type != LogicalOperatorType::LOGICAL_DELIM_JOIN); return PullupBothSide(move(op)); } @@ -86697,6 +96096,7 @@ void FilterPushdown::PushFilters() { } filters.clear(); } + FilterResult FilterPushdown::AddFilter(unique_ptr expr) { PushFilters(); // split up the filters by AND predicate @@ -86893,19 +96293,6 @@ unique_ptr InClauseRewriter::VisitReplace(BoundOperatorExpression &e } } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/optimizer/join_order/query_graph.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - //===----------------------------------------------------------------------===// // DuckDB // @@ -86967,8 +96354,8 @@ class JoinRelationSetManager { JoinRelationSet *GetJoinRelation(unique_ptr relations, idx_t count); //! Union two sets of relations together and create a new relation set JoinRelationSet *Union(JoinRelationSet *left, JoinRelationSet *right); - //! Create the set difference of left \ right (i.e. all elements in left that are not in right) - JoinRelationSet *Difference(JoinRelationSet *left, JoinRelationSet *right); + // //! Create the set difference of left \ right (i.e. all elements in left that are not in right) + // JoinRelationSet *Difference(JoinRelationSet *left, JoinRelationSet *right); private: JoinRelationTreeNode root; @@ -86978,6 +96365,165 @@ class JoinRelationSetManager { + +#include + +namespace duckdb { + +using JoinRelationTreeNode = JoinRelationSetManager::JoinRelationTreeNode; + +// LCOV_EXCL_START +string JoinRelationSet::ToString() const { + string result = "["; + result += StringUtil::Join(relations, count, ", ", [](const idx_t &relation) { return to_string(relation); }); + result += "]"; + return result; +} +// LCOV_EXCL_STOP + +//! Returns true if sub is a subset of super +bool JoinRelationSet::IsSubset(JoinRelationSet *super, JoinRelationSet *sub) { + D_ASSERT(sub->count > 0); + if (sub->count > super->count) { + return false; + } + idx_t j = 0; + for (idx_t i = 0; i < super->count; i++) { + if (sub->relations[j] == super->relations[i]) { + j++; + if (j == sub->count) { + return true; + } + } + } + return false; +} + +JoinRelationSet *JoinRelationSetManager::GetJoinRelation(unique_ptr relations, idx_t count) { + // now look it up in the tree + JoinRelationTreeNode *info = &root; + for (idx_t i = 0; i < count; i++) { + auto entry = info->children.find(relations[i]); + if (entry == info->children.end()) { + // node not found, create it + auto insert_it = info->children.insert(make_pair(relations[i], make_unique())); + entry = insert_it.first; + } + // move to the next node + info = entry->second.get(); + } + // now check if the JoinRelationSet has already been created + if (!info->relation) { + // if it hasn't we need to create it + info->relation = make_unique(move(relations), count); + } + return info->relation.get(); +} + +//! Create or get a JoinRelationSet from a single node with the given index +JoinRelationSet *JoinRelationSetManager::GetJoinRelation(idx_t index) { + // create a sorted vector of the relations + auto relations = unique_ptr(new idx_t[1]); + relations[0] = index; + idx_t count = 1; + return GetJoinRelation(move(relations), count); +} + +JoinRelationSet *JoinRelationSetManager::GetJoinRelation(unordered_set &bindings) { + // create a sorted vector of the relations + unique_ptr relations = bindings.empty() ? nullptr : unique_ptr(new idx_t[bindings.size()]); + idx_t count = 0; + for (auto &entry : bindings) { + relations[count++] = entry; + } + std::sort(relations.get(), relations.get() + count); + return GetJoinRelation(move(relations), count); +} + +JoinRelationSet *JoinRelationSetManager::Union(JoinRelationSet *left, JoinRelationSet *right) { + auto relations = unique_ptr(new idx_t[left->count + right->count]); + idx_t count = 0; + // move through the left and right relations, eliminating duplicates + idx_t i = 0, j = 0; + while (true) { + if (i == left->count) { + // exhausted left relation, add remaining of right relation + for (; j < right->count; j++) { + relations[count++] = right->relations[j]; + } + break; + } else if (j == right->count) { + // exhausted right relation, add remaining of left + for (; i < left->count; i++) { + relations[count++] = left->relations[i]; + } + break; + } else if (left->relations[i] == right->relations[j]) { + // equivalent, add only one of the two pairs + relations[count++] = left->relations[i]; + i++; + j++; + } else if (left->relations[i] < right->relations[j]) { + // left is smaller, progress left and add it to the set + relations[count++] = left->relations[i]; + i++; + } else { + // right is smaller, progress right and add it to the set + relations[count++] = right->relations[j]; + j++; + } + } + return GetJoinRelation(move(relations), count); +} + +// JoinRelationSet *JoinRelationSetManager::Difference(JoinRelationSet *left, JoinRelationSet *right) { +// auto relations = unique_ptr(new idx_t[left->count]); +// idx_t count = 0; +// // move through the left and right relations +// idx_t i = 0, j = 0; +// while (true) { +// if (i == left->count) { +// // exhausted left relation, we are done +// break; +// } else if (j == right->count) { +// // exhausted right relation, add remaining of left +// for (; i < left->count; i++) { +// relations[count++] = left->relations[i]; +// } +// break; +// } else if (left->relations[i] == right->relations[j]) { +// // equivalent, add nothing +// i++; +// j++; +// } else if (left->relations[i] < right->relations[j]) { +// // left is smaller, progress left and add it to the set +// relations[count++] = left->relations[i]; +// i++; +// } else { +// // right is smaller, progress right +// j++; +// } +// } +// return GetJoinRelation(move(relations), count); +// } + +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/optimizer/join_order/query_graph.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + + #include namespace duckdb { @@ -87043,6 +96589,7 @@ namespace duckdb { using QueryEdge = QueryGraph::QueryEdge; +// LCOV_EXCL_START static string QueryEdgeToString(const QueryEdge *info, vector prefix) { string result = ""; string source = "["; @@ -87065,6 +96612,11 @@ string QueryGraph::ToString() const { return QueryEdgeToString(&root, {}); } +void QueryGraph::Print() { + Printer::Print(ToString()); +} +// LCOV_EXCL_STOP + QueryEdge *QueryGraph::GetQueryEdge(JoinRelationSet *left) { D_ASSERT(left && left->count > 0); // find the EdgeInfo corresponding to the left set @@ -87156,156 +96708,6 @@ NeighborInfo *QueryGraph::GetConnection(JoinRelationSet *node, JoinRelationSet * return connection; } -void QueryGraph::Print() { - Printer::Print(ToString()); -} - -} // namespace duckdb - - - - -#include - -namespace duckdb { - -using JoinRelationTreeNode = JoinRelationSetManager::JoinRelationTreeNode; - -string JoinRelationSet::ToString() const { - string result = "["; - result += StringUtil::Join(relations, count, ", ", [](const idx_t &relation) { return to_string(relation); }); - result += "]"; - return result; -} - -//! Returns true if sub is a subset of super -bool JoinRelationSet::IsSubset(JoinRelationSet *super, JoinRelationSet *sub) { - if (sub->count == 0) { - return false; - } - if (sub->count > super->count) { - return false; - } - idx_t j = 0; - for (idx_t i = 0; i < super->count; i++) { - if (sub->relations[j] == super->relations[i]) { - j++; - if (j == sub->count) { - return true; - } - } - } - return false; -} - -JoinRelationSet *JoinRelationSetManager::GetJoinRelation(unique_ptr relations, idx_t count) { - // now look it up in the tree - JoinRelationTreeNode *info = &root; - for (idx_t i = 0; i < count; i++) { - auto entry = info->children.find(relations[i]); - if (entry == info->children.end()) { - // node not found, create it - auto insert_it = info->children.insert(make_pair(relations[i], make_unique())); - entry = insert_it.first; - } - // move to the next node - info = entry->second.get(); - } - // now check if the JoinRelationSet has already been created - if (!info->relation) { - // if it hasn't we need to create it - info->relation = make_unique(move(relations), count); - } - return info->relation.get(); -} - -//! Create or get a JoinRelationSet from a single node with the given index -JoinRelationSet *JoinRelationSetManager::GetJoinRelation(idx_t index) { - // create a sorted vector of the relations - auto relations = unique_ptr(new idx_t[1]); - relations[0] = index; - idx_t count = 1; - return GetJoinRelation(move(relations), count); -} - -JoinRelationSet *JoinRelationSetManager::GetJoinRelation(unordered_set &bindings) { - // create a sorted vector of the relations - unique_ptr relations = bindings.empty() ? nullptr : unique_ptr(new idx_t[bindings.size()]); - idx_t count = 0; - for (auto &entry : bindings) { - relations[count++] = entry; - } - std::sort(relations.get(), relations.get() + count); - return GetJoinRelation(move(relations), count); -} - -JoinRelationSet *JoinRelationSetManager::Union(JoinRelationSet *left, JoinRelationSet *right) { - auto relations = unique_ptr(new idx_t[left->count + right->count]); - idx_t count = 0; - // move through the left and right relations, eliminating duplicates - idx_t i = 0, j = 0; - while (true) { - if (i == left->count) { - // exhausted left relation, add remaining of right relation - for (; j < right->count; j++) { - relations[count++] = right->relations[j]; - } - break; - } else if (j == right->count) { - // exhausted right relation, add remaining of left - for (; i < left->count; i++) { - relations[count++] = left->relations[i]; - } - break; - } else if (left->relations[i] == right->relations[j]) { - // equivalent, add only one of the two pairs - relations[count++] = left->relations[i]; - i++; - j++; - } else if (left->relations[i] < right->relations[j]) { - // left is smaller, progress left and add it to the set - relations[count++] = left->relations[i]; - i++; - } else { - // right is smaller, progress right and add it to the set - relations[count++] = right->relations[j]; - j++; - } - } - return GetJoinRelation(move(relations), count); -} - -JoinRelationSet *JoinRelationSetManager::Difference(JoinRelationSet *left, JoinRelationSet *right) { - auto relations = unique_ptr(new idx_t[left->count]); - idx_t count = 0; - // move through the left and right relations - idx_t i = 0, j = 0; - while (true) { - if (i == left->count) { - // exhausted left relation, we are done - break; - } else if (j == right->count) { - // exhausted right relation, add remaining of left - for (; i < left->count; i++) { - relations[count++] = left->relations[i]; - } - break; - } else if (left->relations[i] == right->relations[j]) { - // equivalent, add nothing - i++; - j++; - } else if (left->relations[i] < right->relations[j]) { - // left is smaller, progress left and add it to the set - relations[count++] = left->relations[i]; - i++; - } else { - // right is smaller, progress right - j++; - } - } - return GetJoinRelation(move(relations), count); -} - } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB @@ -88201,16 +97603,6 @@ unique_ptr JoinOrderOptimizer::Optimize(unique_ptrright_set, filter_info->left_set, filter_info); } else { continue; - // the sets are not disjoint, we create two sets of edges - // auto left_difference = set_manager.Difference(filter_info->left_set, filter_info->right_set); - // auto right_difference = set_manager.Difference(filter_info->right_set, - // filter_info->left_set); - // // -> LEFT <-> RIGHT \ LEFT - // query_graph.CreateEdge(filter_info->left_set, right_difference, filter_info); - // query_graph.CreateEdge(right_difference, filter_info->left_set, filter_info); - // // -> RIGHT <-> LEFT \ RIGHT - // query_graph.CreateEdge(left_difference, filter_info->right_set, filter_info); - // query_graph.CreateEdge(filter_info->right_set, left_difference, filter_info); } continue; } @@ -88297,17 +97689,6 @@ bool CaseExpressionMatcher::Match(Expression *expr_p, vector &bind return true; } -bool CastExpressionMatcher::Match(Expression *expr_p, vector &bindings) { - if (!ExpressionMatcher::Match(expr_p, bindings)) { - return false; - } - auto expr = (BoundCastExpression *)expr_p; - if (child && !child->Match(expr->child.get(), bindings)) { - return false; - } - return true; -} - bool ComparisonExpressionMatcher::Match(Expression *expr_p, vector &bindings) { if (!ExpressionMatcher::Match(expr_p, bindings)) { return false; @@ -88322,6 +97703,9 @@ bool InClauseExpressionMatcher::Match(Expression *expr_p, vector & return false; } auto expr = (BoundOperatorExpression *)expr_p; + if (expr->type != ExpressionType::COMPARE_IN || expr->type == ExpressionType::COMPARE_NOT_IN) { + return false; + } return SetMatcher::Match(matchers, expr->children, bindings, policy); } @@ -88336,14 +97720,6 @@ bool ConjunctionExpressionMatcher::Match(Expression *expr_p, vector &bindings) { - if (!ExpressionMatcher::Match(expr_p, bindings)) { - return false; - } - auto expr = (BoundOperatorExpression *)expr_p; - return SetMatcher::Match(matchers, expr->children, bindings, policy); -} - bool FunctionExpressionMatcher::Match(Expression *expr_p, vector &bindings) { if (!ExpressionMatcher::Match(expr_p, bindings)) { return false; @@ -88731,17 +98107,11 @@ class MoveConstantsRule : public Rule { + namespace duckdb { class ClientContext; class LogicalOperator; - -enum class FilterPropagateResult : uint8_t { - NO_PRUNING_POSSIBLE = 0, - FILTER_ALWAYS_TRUE = 1, - FILTER_ALWAYS_FALSE = 2, - FILTER_TRUE_OR_NULL = 3, - FILTER_FALSE_OR_NULL = 4 -}; +class TableFilter; class StatisticsPropagator { public: @@ -88784,6 +98154,11 @@ class StatisticsPropagator { //! Set the statistics of a specific column binding to not contain null values void SetStatisticsNotNull(ColumnBinding binding); + //! Run a comparison between the statistics and the table filter; returns the prune result + FilterPropagateResult PropagateTableFilter(BaseStatistics &stats, TableFilter &filter); + //! Update filter statistics from a TableFilter + void UpdateFilterStatistics(BaseStatistics &input, TableFilter &filter); + //! Add cardinalities together (i.e. new max is stats.max + new_stats.max): used for union void AddCardinalities(unique_ptr &stats, NodeStatistics &new_stats); //! Multiply the cardinalities together (i.e. new max cardinality is stats.max * new_stats.max): used for @@ -88844,6 +98219,7 @@ class TopN { + //===----------------------------------------------------------------------===// // DuckDB // @@ -88892,86 +98268,95 @@ Optimizer::Optimizer(Binder &binder, ClientContext &context) : context(context), #endif } +void Optimizer::RunOptimizer(OptimizerType type, const std::function &callback) { + auto &config = DBConfig::GetConfig(context); + if (config.disabled_optimizers.find(type) != config.disabled_optimizers.end()) { + // optimizer is marked as disabled: skip + return; + } + context.profiler->StartPhase(OptimizerTypeToString(type)); + callback(); + context.profiler->EndPhase(); +} + unique_ptr Optimizer::Optimize(unique_ptr plan) { // first we perform expression rewrites using the ExpressionRewriter // this does not change the logical plan structure, but only simplifies the expression trees - context.profiler.StartPhase("expression_rewriter"); - rewriter.VisitOperator(*plan); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::EXPRESSION_REWRITER, [&]() { rewriter.VisitOperator(*plan); }); // perform filter pullup - context.profiler.StartPhase("filter_pullup"); - FilterPullup filter_pullup; - plan = filter_pullup.Rewrite(move(plan)); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::FILTER_PULLUP, [&]() { + FilterPullup filter_pullup; + plan = filter_pullup.Rewrite(move(plan)); + }); // perform filter pushdown - context.profiler.StartPhase("filter_pushdown"); - FilterPushdown filter_pushdown(*this); - plan = filter_pushdown.Rewrite(move(plan)); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::FILTER_PUSHDOWN, [&]() { + FilterPushdown filter_pushdown(*this); + plan = filter_pushdown.Rewrite(move(plan)); + }); - context.profiler.StartPhase("regex_range"); - RegexRangeFilter regex_opt; - plan = regex_opt.Rewrite(move(plan)); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::REGEX_RANGE, [&]() { + RegexRangeFilter regex_opt; + plan = regex_opt.Rewrite(move(plan)); + }); - context.profiler.StartPhase("in_clause"); - InClauseRewriter rewriter(*this); - plan = rewriter.Rewrite(move(plan)); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::IN_CLAUSE, [&]() { + InClauseRewriter rewriter(*this); + plan = rewriter.Rewrite(move(plan)); + }); // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns - context.profiler.StartPhase("join_order"); - JoinOrderOptimizer optimizer(context); - plan = optimizer.Optimize(move(plan)); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::JOIN_ORDER, [&]() { + JoinOrderOptimizer optimizer(context); + plan = optimizer.Optimize(move(plan)); + }); // removes any redundant DelimGets/DelimJoins - context.profiler.StartPhase("deliminator"); - Deliminator deliminator; - plan = deliminator.Optimize(move(plan)); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::DELIMINATOR, [&]() { + Deliminator deliminator; + plan = deliminator.Optimize(move(plan)); + }); - context.profiler.StartPhase("unused_columns"); - RemoveUnusedColumns unused(binder, context, true); - unused.VisitOperator(*plan); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::UNUSED_COLUMNS, [&]() { + RemoveUnusedColumns unused(binder, context, true); + unused.VisitOperator(*plan); + }); // perform statistics propagation - context.profiler.StartPhase("statistics_propagation"); - StatisticsPropagator propagator(context); - propagator.PropagateStatistics(plan); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() { + StatisticsPropagator propagator(context); + propagator.PropagateStatistics(plan); + }); // then we extract common subexpressions inside the different operators - context.profiler.StartPhase("common_subexpressions"); - CommonSubExpressionOptimizer cse_optimizer(binder); - cse_optimizer.VisitOperator(*plan); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::COMMON_SUBEXPRESSIONS, [&]() { + CommonSubExpressionOptimizer cse_optimizer(binder); + cse_optimizer.VisitOperator(*plan); + }); - context.profiler.StartPhase("common_aggregate"); - CommonAggregateOptimizer common_aggregate; - common_aggregate.VisitOperator(*plan); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() { + CommonAggregateOptimizer common_aggregate; + common_aggregate.VisitOperator(*plan); + }); - context.profiler.StartPhase("column_lifetime"); - ColumnLifetimeAnalyzer column_lifetime(true); - column_lifetime.VisitOperator(*plan); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { + ColumnLifetimeAnalyzer column_lifetime(true); + column_lifetime.VisitOperator(*plan); + }); // transform ORDER BY + LIMIT to TopN - context.profiler.StartPhase("top_n"); - TopN topn; - plan = topn.Optimize(move(plan)); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::TOP_N, [&]() { + TopN topn; + plan = topn.Optimize(move(plan)); + }); // apply simple expression heuristics to get an initial reordering - context.profiler.StartPhase("reorder_filter"); - ExpressionHeuristics expression_heuristics(*this); - plan = expression_heuristics.Rewrite(move(plan)); - context.profiler.EndPhase(); + RunOptimizer(OptimizerType::REORDER_FILTER, [&]() { + ExpressionHeuristics expression_heuristics(*this); + plan = expression_heuristics.Rewrite(move(plan)); + }); return plan; } @@ -89347,7 +98732,8 @@ unique_ptr FilterPushdown::PushdownGet(unique_ptr FilterPushdown::PushdownGet(unique_ptr DatePartSimplificationRule::Apply(LogicalOperator &op, ve case DatePartSpecifier::WEEK: new_function_name = "week"; break; + case DatePartSpecifier::YEARWEEK: + new_function_name = "yearweek"; + break; case DatePartSpecifier::DOW: new_function_name = "dayofweek"; break; @@ -90602,11 +99992,21 @@ unique_ptr DistributivityRule::Apply(LogicalOperator &op, vectorchildren.push_back(move(result)); - // remove any expressions that were set to nullptr - for (idx_t i = 0; i < initial_or->children.size(); i++) { - if (!initial_or->children[i]) { - initial_or->children.erase(initial_or->children.begin() + i); - i--; + } + + // check if we completely erased one of the children of the OR + // this happens if we have an OR in the form of "X OR (X AND A)" + // the left child will be completely empty, as it only contains common expressions + // in this case, any other children are not useful: + // X OR (X AND A) is the same as "X" + // since (1) only tuples that do not qualify "X" will not pass this predicate + // and (2) all tuples that qualify "X" will pass this predicate + for (idx_t i = 0; i < initial_or->children.size(); i++) { + if (!initial_or->children[i]) { + if (new_root->children.size() <= 1) { + return move(new_root->children[0]); + } else { + return move(new_root); } } } @@ -90853,6 +100253,9 @@ unique_ptr MoveConstantsRule::Apply(LogicalOperator &op, vectorreturn_type.IsNumeric()) { return nullptr; } + if (inner_constant->value.is_null || outer_constant->value.is_null) { + return make_unique(Value(comparison->return_type)); + } int arithmetic_child_index = arithmetic->children[0].get() == inner_constant ? 1 : 0; auto &op_type = arithmetic->function.name; @@ -91060,6 +100463,7 @@ unique_ptr StatisticsPropagator::PropagateExpression(BoundCastEx if (!child_stats) { return nullptr; } + unique_ptr result_stats; switch (cast.child->return_type.InternalType()) { case PhysicalType::INT8: case PhysicalType::INT16: @@ -91068,10 +100472,15 @@ unique_ptr StatisticsPropagator::PropagateExpression(BoundCastEx case PhysicalType::INT128: case PhysicalType::FLOAT: case PhysicalType::DOUBLE: - return StatisticsNumericCastSwitch(child_stats.get(), cast.return_type); + result_stats = StatisticsNumericCastSwitch(child_stats.get(), cast.return_type); + break; default: return nullptr; } + if (cast.try_cast && result_stats) { + result_stats->validity_stats = make_unique(true, true); + } + return result_stats; } } // namespace duckdb @@ -91223,6 +100632,8 @@ unique_ptr StatisticsPropagator::PropagateExpression(BoundCompar + + namespace duckdb { unique_ptr StatisticsPropagator::StatisticsFromValue(const Value &input) { @@ -91236,14 +100647,48 @@ unique_ptr StatisticsPropagator::StatisticsFromValue(const Value case PhysicalType::FLOAT: case PhysicalType::DOUBLE: { auto result = make_unique(input.type(), input, input); - result->validity_stats = make_unique(input.is_null); + result->validity_stats = make_unique(input.is_null, !input.is_null); return move(result); } case PhysicalType::VARCHAR: { auto result = make_unique(input.type()); - result->validity_stats = make_unique(input.is_null); - string_t str(input.str_value.c_str(), input.str_value.size()); - result->Update(str); + result->validity_stats = make_unique(input.is_null, !input.is_null); + if (!input.is_null) { + string_t str(input.str_value.c_str(), input.str_value.size()); + result->Update(str); + } + return move(result); + } + case PhysicalType::STRUCT: { + auto result = make_unique(input.type()); + result->validity_stats = make_unique(input.is_null, !input.is_null); + if (input.is_null) { + for (auto &child_stat : result->child_stats) { + child_stat.reset(); + } + } else { + D_ASSERT(result->child_stats.size() == input.struct_value.size()); + for (idx_t i = 0; i < result->child_stats.size(); i++) { + result->child_stats[i] = StatisticsFromValue(input.struct_value[i]); + } + } + return move(result); + } + case PhysicalType::LIST: { + auto result = make_unique(input.type()); + result->validity_stats = make_unique(input.is_null, !input.is_null); + if (input.is_null) { + result->child_stats.reset(); + } else { + for (auto &child_element : input.list_value) { + auto child_element_stats = StatisticsFromValue(child_element); + if (child_element_stats) { + result->child_stats->Merge(*child_element_stats); + } else { + result->child_stats.reset(); + } + } + } return move(result); } default: @@ -91617,8 +101062,35 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalFilt + + + namespace duckdb { +FilterPropagateResult StatisticsPropagator::PropagateTableFilter(BaseStatistics &stats, TableFilter &filter) { + return filter.CheckStatistics(stats); +} + +void StatisticsPropagator::UpdateFilterStatistics(BaseStatistics &input, TableFilter &filter) { + // FIXME: update stats... + switch (filter.filter_type) { + case TableFilterType::CONJUNCTION_AND: { + auto &conjunction_and = (ConjunctionAndFilter &)filter; + for (auto &child_filter : conjunction_and.child_filters) { + UpdateFilterStatistics(input, *child_filter); + } + break; + } + case TableFilterType::CONSTANT_COMPARISON: { + auto &constant_filter = (ConstantFilter &)filter; + UpdateFilterStatistics(input, constant_filter.comparison_type, constant_filter.constant); + break; + } + default: + break; + } +} + unique_ptr StatisticsPropagator::PropagateStatistics(LogicalGet &get, unique_ptr *node_ptr) { if (get.function.cardinality) { @@ -91636,15 +101108,22 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalGet } } // push table filters into the statistics - for (idx_t i = 0; i < get.table_filters.size(); i++) { - auto &table_filter = get.table_filters[i]; + vector column_indexes; + column_indexes.reserve(get.table_filters.filters.size()); + for (auto &kv : get.table_filters.filters) { + column_indexes.push_back(kv.first); + } + + for (auto &table_filter_column : column_indexes) { idx_t column_index; for (column_index = 0; column_index < get.column_ids.size(); column_index++) { - if (get.column_ids[column_index] == table_filter.column_index) { + if (get.column_ids[column_index] == table_filter_column) { break; } } - D_ASSERT(get.column_ids[column_index] == table_filter.column_index); + D_ASSERT(column_index < get.column_ids.size()); + D_ASSERT(get.column_ids[column_index] == table_filter_column); + // find the stats ColumnBinding stats_binding(get.table_index, column_index); auto entry = statistics_map.find(stats_binding); @@ -91652,17 +101131,17 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalGet // no stats for this entry continue; } - auto constant_stats = StatisticsFromValue(table_filter.constant); - if (!constant_stats) { - continue; - } - auto propagate_result = PropagateComparison(*entry->second, *constant_stats, table_filter.comparison_type); + auto &stats = *entry->second; + + // fetch the table filter + D_ASSERT(get.table_filters.filters.count(table_filter_column) > 0); + auto &filter = get.table_filters.filters[table_filter_column]; + auto propagate_result = PropagateTableFilter(stats, *filter); switch (propagate_result) { case FilterPropagateResult::FILTER_ALWAYS_TRUE: // filter is always true; it is useless to execute it // erase this condition - get.table_filters.erase(get.table_filters.begin() + i); - i--; + get.table_filters.filters.erase(table_filter_column); break; case FilterPropagateResult::FILTER_FALSE_OR_NULL: case FilterPropagateResult::FILTER_ALWAYS_FALSE: @@ -91671,7 +101150,7 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalGet return make_unique(0, 0); default: // general case: filter can be true or false, update this columns' statistics - UpdateFilterStatistics(*entry->second, table_filter.comparison_type, table_filter.constant); + UpdateFilterStatistics(stats, *filter); break; } } @@ -91896,12 +101375,25 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalLimi + namespace duckdb { unique_ptr StatisticsPropagator::PropagateStatistics(LogicalOrder &order, unique_ptr *node_ptr) { - // propagate statistics in the child node - return PropagateStatistics(order.children[0]); + // first propagate to the child + node_stats = PropagateStatistics(order.children[0]); + + // then propagate to each of the order expressions + for (idx_t i = 0; i < order.orders.size(); i++) { + auto &expr = order.orders[i].expression; + PropagateExpression(expr); + if (expr->stats) { + order.statistics.push_back(expr->stats->Copy()); + } else { + order.statistics.push_back(nullptr); + } + } + return move(node_stats); } } // namespace duckdb @@ -92023,6 +101515,7 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalWind + namespace duckdb { StatisticsPropagator::StatisticsPropagator(ClientContext &context) : context(context) { @@ -92061,6 +101554,8 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalOper case LogicalOperatorType::LOGICAL_EXCEPT: case LogicalOperatorType::LOGICAL_INTERSECT: return PropagateStatistics((LogicalSetOperation &)node, node_ptr); + case LogicalOperatorType::LOGICAL_ORDER_BY: + return PropagateStatistics((LogicalOrder &)node, node_ptr); default: return PropagateChildren(node, node_ptr); } @@ -92163,21 +101658,27 @@ Executor::~Executor() { void Executor::Initialize(PhysicalOperator *plan) { Reset(); - physical_plan = plan; - physical_state = physical_plan->GetOperatorState(); - - context.profiler.Initialize(physical_plan); auto &scheduler = TaskScheduler::GetScheduler(context); - this->producer = scheduler.CreateProducer(); + { + lock_guard elock(executor_lock); + physical_plan = plan; + physical_state = physical_plan->GetOperatorState(); - BuildPipelines(physical_plan, nullptr); + context.profiler->Initialize(physical_plan); + this->producer = scheduler.CreateProducer(); - this->total_pipelines = pipelines.size(); + BuildPipelines(physical_plan, nullptr); - // schedule pipelines that do not have dependents - for (auto &pipeline : pipelines) { - if (!pipeline->HasDependencies()) { - pipeline->Schedule(); + this->total_pipelines = pipelines.size(); + + // schedule pipelines that do not have dependents + for (auto &pipeline : pipelines) { +#ifdef DEBUG + D_ASSERT(!pipeline->ToString().empty()); +#endif + if (!pipeline->HasDependencies()) { + pipeline->Schedule(); + } } } @@ -92188,8 +101689,38 @@ void Executor::Initialize(PhysicalOperator *plan) { task->Execute(); task.reset(); } + string exception; + if (!GetError(exception)) { + // no exceptions: continue + continue; + } + + // an exception has occurred executing one of the pipelines + // we need to wait until all threads are finished + // we do this by creating weak pointers to all pipelines + // then clearing our references to the pipelines + // and waiting until all pipelines have been destroyed + vector> weak_references; + { + lock_guard elock(executor_lock); + weak_references.reserve(pipelines.size()); + for (auto &pipeline : pipelines) { + weak_references.push_back(weak_ptr(pipeline)); + } + pipelines.clear(); + } + for (auto &weak_ref : weak_references) { + while (true) { + auto weak = weak_ref.lock(); + if (!weak) { + break; + } + } + } + throw Exception(exception); } + lock_guard elock(executor_lock); pipelines.clear(); if (!exceptions.empty()) { // an exception has occurred executing one of the pipelines @@ -92198,6 +101729,7 @@ void Executor::Initialize(PhysicalOperator *plan) { } void Executor::Reset() { + lock_guard elock(executor_lock); delim_join_dependencies.clear(); recursive_cte = nullptr; physical_plan = nullptr; @@ -92211,12 +101743,12 @@ void Executor::Reset() { void Executor::BuildPipelines(PhysicalOperator *op, Pipeline *parent) { if (op->IsSink()) { // operator is a sink, build a pipeline - auto pipeline = make_unique(*this, *producer); + auto pipeline = make_shared(*this, *producer); pipeline->sink = (PhysicalSink *)op; pipeline->sink_state = pipeline->sink->GetGlobalState(context); if (parent) { // the parent is dependent on this pipeline to complete - parent->AddDependency(pipeline.get()); + parent->AddDependency(pipeline); } switch (op->type) { case PhysicalOperatorType::CREATE_TABLE_AS: @@ -92255,7 +101787,8 @@ void Executor::BuildPipelines(PhysicalOperator *op, Pipeline *parent) { } // recurse into the pipeline child BuildPipelines(pipeline->child, pipeline.get()); - for (auto &dependency : pipeline->GetDependencies()) { + for (auto &entry : pipeline->dependencies) { + auto dependency = entry.second.lock(); auto dependency_cte = dependency->GetRecursiveCTE(); if (dependency_cte) { pipeline->SetRecursiveCTE(dependency_cte); @@ -92291,7 +101824,8 @@ void Executor::BuildPipelines(PhysicalOperator *op, Pipeline *parent) { // this chunk scan introduces a dependency to the current pipeline // namely a dependency on the duplicate elimination pipeline to finish D_ASSERT(parent); - parent->AddDependency(entry->second); + auto delim_dependency = entry->second->shared_from_this(); + parent->AddDependency(delim_dependency); break; } case PhysicalOperatorType::EXECUTE: { @@ -92313,7 +101847,7 @@ void Executor::BuildPipelines(PhysicalOperator *op, Pipeline *parent) { BuildPipelines(op->children[1].get(), parent); // re-order the pipelines such that they are executed in the correct order of dependencies for (idx_t i = 0; i < cte_node.pipelines.size(); i++) { - auto &deps = cte_node.pipelines[i]->GetDependencies(); + auto &deps = cte_node.pipelines[i]->dependencies; for (idx_t j = i + 1; j < cte_node.pipelines.size(); j++) { if (deps.find(cte_node.pipelines[j].get()) != deps.end()) { // pipeline "i" depends on pipeline "j" but pipeline "i" is scheduled to be executed before @@ -92367,12 +101901,23 @@ void Executor::PushError(const string &exception) { exceptions.push_back(exception); } +bool Executor::GetError(string &exception) { + lock_guard elock(executor_lock); + if (exceptions.empty()) { + return false; + } + exception = exceptions[0]; + return true; +} + void Executor::Flush(ThreadContext &tcontext) { lock_guard elock(executor_lock); - context.profiler.Flush(tcontext.profiler); + context.profiler->Flush(tcontext.profiler); } bool Executor::GetPipelinesProgress(int ¤t_progress) { + lock_guard elock(executor_lock); + if (!pipelines.empty()) { return pipelines.back()->GetProgress(current_progress); } else { @@ -92390,10 +101935,10 @@ unique_ptr Executor::FetchChunk() { auto chunk = make_unique(); // run the plan to get the next chunks - physical_plan->InitializeChunkEmpty(*chunk); + physical_plan->InitializeChunk(*chunk); physical_plan->GetChunk(econtext, *chunk, physical_state.get()); physical_plan->FinalizeOperatorState(*physical_state, econtext); - context.profiler.Flush(thread.profiler); + context.profiler->Flush(thread.profiler); return chunk; } @@ -92419,11 +101964,11 @@ namespace duckdb { class PipelineTask : public Task { public: - explicit PipelineTask(Pipeline *pipeline_p) : pipeline(pipeline_p) { + explicit PipelineTask(shared_ptr pipeline_p) : pipeline(move(pipeline_p)) { } TaskContext task; - Pipeline *pipeline; + shared_ptr pipeline; public: void Execute() override { @@ -92493,7 +102038,7 @@ void Pipeline::Execute(TaskContext &task) { auto lstate = sink->GetLocalSinkState(context); // incrementally process the pipeline DataChunk intermediate; - child->InitializeChunkEmpty(intermediate); + child->InitializeChunk(intermediate); while (true) { child->GetChunk(context, intermediate, state.get()); thread.profiler.StartOperator(sink); @@ -92507,24 +102052,26 @@ void Pipeline::Execute(TaskContext &task) { child->FinalizeOperatorState(*state, context); } catch (std::exception &ex) { executor.PushError(ex.what()); - } catch (...) { + } catch (...) { // LCOV_EXCL_START executor.PushError("Unknown exception in pipeline!"); - } + } // LCOV_EXCL_STOP executor.Flush(thread); } void Pipeline::FinishTask() { D_ASSERT(finished_tasks < total_tasks); + idx_t current_tasks = total_tasks; idx_t current_finished = ++finished_tasks; - if (current_finished == total_tasks) { + if (current_finished == current_tasks) { + bool finish_pipeline = false; try { - sink->Finalize(*this, executor.context, move(sink_state)); + finish_pipeline = sink->Finalize(*this, executor.context, move(sink_state)); } catch (std::exception &ex) { executor.PushError(ex.what()); - } catch (...) { + } catch (...) { // LCOV_EXCL_START executor.PushError("Unknown exception in Finalize!"); - } - if (current_finished == total_tasks) { + } // LCOV_EXCL_STOP + if (finish_pipeline) { Finish(); } } @@ -92532,7 +102079,7 @@ void Pipeline::FinishTask() { void Pipeline::ScheduleSequentialTask() { auto &scheduler = TaskScheduler::GetScheduler(executor.context); - auto task = make_unique(this); + auto task = make_unique(shared_from_this()); this->total_tasks = 1; scheduler.ScheduleTask(*executor.producer, move(task)); @@ -92555,7 +102102,7 @@ bool Pipeline::LaunchScanTasks(PhysicalOperator *op, idx_t max_threads, unique_p // launch a task for every thread this->total_tasks = max_threads; for (idx_t i = 0; i < max_threads; i++) { - auto task = make_unique(this); + auto task = make_unique(shared_from_this()); scheduler.ScheduleTask(*executor.producer, move(task)); } @@ -92567,12 +102114,19 @@ bool Pipeline::ScheduleOperator(PhysicalOperator *op) { case PhysicalOperatorType::UNNEST: case PhysicalOperatorType::FILTER: case PhysicalOperatorType::PROJECTION: - case PhysicalOperatorType::HASH_JOIN: case PhysicalOperatorType::CROSS_PRODUCT: case PhysicalOperatorType::STREAMING_SAMPLE: case PhysicalOperatorType::INOUT_FUNCTION: // filter, projection or hash probe: continue in children return ScheduleOperator(op->children[0].get()); + case PhysicalOperatorType::HASH_JOIN: { + // hash join; for now we can't safely parallelize right or full outer join probes + auto &join = (PhysicalHashJoin &)*op; + if (IsRightOuterJoin(join.join_type)) { + return false; + } + return ScheduleOperator(op->children[0].get()); + } case PhysicalOperatorType::TABLE_SCAN: { auto &get = (PhysicalTableScan &)*op; if (!get.function.max_threads) { @@ -92585,12 +102139,6 @@ bool Pipeline::ScheduleOperator(PhysicalOperator *op) { auto pstate = get.function.init_parallel_state(executor.context, get.bind_data.get()); return LaunchScanTasks(op, max_threads, move(pstate)); } - case PhysicalOperatorType::ORDER_BY: { - auto &ord = (PhysicalOrder &)*op; - idx_t max_threads = ord.MaxThreads(executor.context); - auto pstate = ord.GetParallelState(); - return LaunchScanTasks(op, max_threads, move(pstate)); - } case PhysicalOperatorType::WINDOW: { auto &win = (PhysicalWindow &)*op; idx_t max_threads = win.MaxThreads(executor.context); @@ -92608,10 +102156,18 @@ bool Pipeline::ScheduleOperator(PhysicalOperator *op) { } void Pipeline::ClearParents() { - for (auto &parent : parents) { + for (auto &parent_entry : parents) { + auto parent = parent_entry.second.lock(); + if (!parent) { + continue; + } parent->dependencies.erase(this); } - for (auto &dep : dependencies) { + for (auto &dep_entry : dependencies) { + auto dep = dep_entry.second.lock(); + if (!dep) { + continue; + } dep->parents.erase(this); } parents.clear(); @@ -92691,9 +102247,12 @@ void Pipeline::Schedule() { ScheduleSequentialTask(); } -void Pipeline::AddDependency(Pipeline *pipeline) { - this->dependencies.insert(pipeline); - pipeline->parents.insert(this); +void Pipeline::AddDependency(shared_ptr &pipeline) { + if (!pipeline) { + return; + } + dependencies[pipeline.get()] = weak_ptr(pipeline); + pipeline->parents[this] = weak_ptr(shared_from_this()); } void Pipeline::CompleteDependency() { @@ -92709,7 +102268,11 @@ void Pipeline::Finish() { D_ASSERT(!finished); finished = true; // finished processing the pipeline, now we can schedule pipelines that depend on this pipeline - for (auto &parent : parents) { + for (auto &parent_entry : parents) { + auto parent = parent_entry.second.lock(); + if (!parent) { + continue; + } // mark a dependency as completed for each of the parents parent->CompleteDependency(); } @@ -92721,7 +102284,7 @@ string Pipeline::ToString() const { auto node = this->child; while (node) { str = PhysicalOperatorToString(node->type) + " -> " + str; - node = node->children[0].get(); + node = node->children.empty() ? nullptr : node->children[0].get(); } return str; } @@ -92741,7 +102304,7 @@ void Pipeline::Print() const { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 // See the end of this file for a list // Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue. @@ -92829,7 +102392,7 @@ namespace duckdb_moodycamel { namespace details { #elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) namespace duckdb_moodycamel { namespace details { static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes"); - + typedef std::thread::id thread_id_t; static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID @@ -92947,7 +102510,7 @@ namespace duckdb_moodycamel { namespace details { #endif #endif -// VS2012 doesn't support deleted functions. +// VS2012 doesn't support deleted functions. // In this case, we declare the function normally but don't define it. A link error will be generated if the function is called. #ifndef MOODYCAMEL_DELETE_FUNCTION #if defined(_MSC_VER) && _MSC_VER < 1800 @@ -92972,12 +102535,13 @@ namespace duckdb_moodycamel { namespace details { // Compiler-specific likely/unlikely hints namespace duckdb_moodycamel { namespace details { + #if defined(__GNUC__) static inline bool (likely)(bool x) { return __builtin_expect((x), true); } - static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); } +// static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); } #else static inline bool (likely)(bool x) { return x; } - static inline bool (unlikely)(bool x) { return x; } +// static inline bool (unlikely)(bool x) { return x; } #endif } } @@ -93016,7 +102580,7 @@ struct ConcurrentQueueDefaultTraits { // General-purpose size type. std::size_t is strongly recommended. typedef std::size_t size_t; - + // The type used for the enqueue and dequeue indices. Must be at least as // large as size_t. Should be significantly larger than the number of elements // you expect to hold at once, especially if you have a high turnover rate; @@ -93028,47 +102592,47 @@ struct ConcurrentQueueDefaultTraits // whether the queue is lock-free with a 64-int type depends on the whether // std::atomic is lock-free, which is platform-specific. typedef std::size_t index_t; - + // Internally, all elements are enqueued and dequeued from multi-element // blocks; this is the smallest controllable unit. If you expect few elements // but many producers, a smaller block size should be favoured. For few producers // and/or many elements, a larger block size is preferred. A sane default // is provided. Must be a power of 2. static const size_t BLOCK_SIZE = 32; - + // For explicit producers (i.e. when using a producer token), the block is // checked for being empty by iterating through a list of flags, one per element. // For large block sizes, this is too inefficient, and switching to an atomic // counter-based approach is faster. The switch is made for block sizes strictly // larger than this threshold. static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; - + // How many full blocks can be expected for a single explicit producer? This should // reflect that number's maximum for optimal performance. Must be a power of 2. static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; - + // How many full blocks can be expected for a single implicit producer? This should // reflect that number's maximum for optimal performance. Must be a power of 2. static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32; - + // The initial size of the hash table mapping thread IDs to implicit producers. // Note that the hash is resized every time it becomes half full. // Must be a power of two, and either 0 or at least 1. If 0, implicit production // (using the enqueue methods without an explicit producer token) is disabled. static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; - + // Controls the number of items that an explicit consumer (i.e. one with a token) // must consume before it causes all consumers to rotate and move on to the next // internal queue. static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; - + // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. // Enqueue operations that would cause this limit to be surpassed will fail. Note // that this limit is enforced at the block level (for performance reasons), i.e. // it's rounded up to the nearest block size. static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max::value; - - + + #ifndef MCDBGQ_USE_RELACY // Memory allocation can be customized if needed. // malloc should return nullptr on failure, and handle alignment like std::malloc. @@ -93114,13 +102678,13 @@ namespace details ConcurrentQueueProducerTypelessBase* next; std::atomic inactive; ProducerToken* token; - + ConcurrentQueueProducerTypelessBase() : next(nullptr), inactive(false), token(nullptr) { } }; - + template struct _hash_32_or_64 { static inline std::uint32_t hash(std::uint32_t h) { @@ -93146,14 +102710,14 @@ namespace details } }; template struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> { }; - + static inline size_t hash_thread_id(thread_id_t id) { static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values"); return static_cast(hash_32_or_64::thread_id_hash_t)>::hash( thread_id_converter::prehash(id))); } - + template static inline bool circular_less_than(T a, T b) { @@ -93167,7 +102731,7 @@ namespace details #pragma warning(pop) #endif } - + template static inline char* align_for(char* ptr) { @@ -93191,7 +102755,7 @@ namespace details ++x; return x; } - + template static inline void swap_relaxed(std::atomic& left, std::atomic& right) { @@ -93199,13 +102763,13 @@ namespace details left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed); right.store(std::move(temp), std::memory_order_relaxed); } - + template static inline T const& nomove(T const& x) { return x; } - + template struct nomove_if { @@ -93215,7 +102779,7 @@ namespace details return x; } }; - + template<> struct nomove_if { @@ -93226,19 +102790,19 @@ namespace details return std::forward(x); } }; - + template static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it) { return *it; } - + #if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) template struct is_trivially_destructible : std::is_trivially_destructible { }; #else template struct is_trivially_destructible : std::has_trivial_destructor { }; #endif - + #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED #ifdef MCDBGQ_USE_RELACY typedef RelacyThreadExitListener ThreadExitListener; @@ -93249,11 +102813,11 @@ namespace details typedef void (*callback_t)(void*); callback_t callback; void* userData; - + ThreadExitListener* next; // reserved for use by the ThreadExitNotifier }; - - + + class ThreadExitNotifier { public: @@ -93263,7 +102827,7 @@ namespace details listener->next = tlsInst.tail; tlsInst.tail = listener; } - + static void unsubscribe(ThreadExitListener* listener) { auto& tlsInst = instance(); @@ -93276,12 +102840,12 @@ namespace details prev = &ptr->next; } } - + private: ThreadExitNotifier() : tail(nullptr) { } ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; - + ~ThreadExitNotifier() { // This thread is about to exit, let everyone know! @@ -93290,20 +102854,20 @@ namespace details ptr->callback(ptr->userData); } } - + // Thread-local static inline ThreadExitNotifier& instance() { static thread_local ThreadExitNotifier notifier; return notifier; } - + private: ThreadExitListener* tail; }; #endif #endif - + template struct static_is_lock_free_num { enum { value = 0 }; }; template<> struct static_is_lock_free_num { enum { value = ATOMIC_CHAR_LOCK_FREE }; }; template<> struct static_is_lock_free_num { enum { value = ATOMIC_SHORT_LOCK_FREE }; }; @@ -93320,10 +102884,10 @@ struct ProducerToken { template explicit ProducerToken(ConcurrentQueue& queue); - + template explicit ProducerToken(BlockingConcurrentQueue& queue); - + ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT : producer(other.producer) { @@ -93332,13 +102896,13 @@ struct ProducerToken producer->token = this; } } - + inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT { swap(other); return *this; } - + void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT { std::swap(producer, other.producer); @@ -93349,7 +102913,7 @@ struct ProducerToken other.producer->token = &other; } } - + // A token is always valid unless: // 1) Memory allocation failed during construction // 2) It was moved via the move constructor @@ -93359,7 +102923,7 @@ struct ProducerToken // that the token is valid for use with a specific queue, // but not which one; that's up to the user to track. inline bool valid() const { return producer != nullptr; } - + ~ProducerToken() { if (producer != nullptr) { @@ -93367,15 +102931,15 @@ struct ProducerToken producer->inactive.store(true, std::memory_order_release); } } - + // Disable copying and assignment ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; - + private: template friend class ConcurrentQueue; friend class ConcurrentQueueTests; - + protected: details::ConcurrentQueueProducerTypelessBase* producer; }; @@ -93385,21 +102949,21 @@ struct ConsumerToken { template explicit ConsumerToken(ConcurrentQueue& q); - + template explicit ConsumerToken(BlockingConcurrentQueue& q); - + ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT : initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer) { } - + inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT { swap(other); return *this; } - + void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT { std::swap(initialOffset, other.initialOffset); @@ -93408,7 +102972,7 @@ struct ConsumerToken std::swap(currentProducer, other.currentProducer); std::swap(desiredProducer, other.desiredProducer); } - + // Disable copying and assignment ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; @@ -93416,7 +102980,7 @@ struct ConsumerToken private: template friend class ConcurrentQueue; friend class ConcurrentQueueTests; - + private: // but shared with ConcurrentQueue std::uint32_t initialOffset; std::uint32_t lastKnownGlobalOffset; @@ -93437,10 +103001,10 @@ class ConcurrentQueue public: typedef ::duckdb_moodycamel::ProducerToken producer_token_t; typedef ::duckdb_moodycamel::ConsumerToken consumer_token_t; - + typedef typename Traits::index_t index_t; typedef typename Traits::size_t size_t; - + static const size_t BLOCK_SIZE = static_cast(Traits::BLOCK_SIZE); static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::EXPLICIT_INITIAL_INDEX_SIZE); @@ -93488,7 +103052,7 @@ class ConcurrentQueue implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); populate_initial_implicit_producer_hash(); populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG // Track all the producers using a fully-resolved typed list for // each kind; this makes it possible to debug them starting from @@ -93498,7 +103062,7 @@ class ConcurrentQueue implicitProducers.store(nullptr, std::memory_order_relaxed); #endif } - + // Computes the correct amount of pre-allocated blocks for you based // on the minimum number of elements you want available at any given // time, and the maximum concurrent number of each type of producer. @@ -93513,13 +103077,13 @@ class ConcurrentQueue populate_initial_implicit_producer_hash(); size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers); populate_initial_block_list(blocks); - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG explicitProducers.store(nullptr, std::memory_order_relaxed); implicitProducers.store(nullptr, std::memory_order_relaxed); #endif } - + // Note: The queue should not be accessed concurrently while it's // being deleted. It's up to the user to synchronize this. // This method is not thread safe. @@ -93535,7 +103099,7 @@ class ConcurrentQueue destroy(ptr); ptr = next; } - + // Destroy implicit producer hash tables MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { auto hash = implicitProducerHash.load(std::memory_order_relaxed); @@ -93551,7 +103115,7 @@ class ConcurrentQueue hash = prev; } } - + // Destroy global free list auto block = freeList.head_unsafe(); while (block != nullptr) { @@ -93561,7 +103125,7 @@ class ConcurrentQueue } block = next; } - + // Destroy initial free list destroy_array(initialBlockPool, initialBlockPoolSize); } @@ -93569,7 +103133,7 @@ class ConcurrentQueue // Disable copying and copy assignment ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; - + // Moving is supported, but note that it is *not* a thread-safe operation. // Nobody can use the queue while it's being moved, and the memory effects // of that move must be propagated to other threads before they can use it. @@ -93590,31 +103154,31 @@ class ConcurrentQueue implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); populate_initial_implicit_producer_hash(); swap_implicit_producer_hashes(other); - + other.producerListTail.store(nullptr, std::memory_order_relaxed); other.producerCount.store(0, std::memory_order_relaxed); other.nextExplicitConsumerId.store(0, std::memory_order_relaxed); other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed); - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); other.explicitProducers.store(nullptr, std::memory_order_relaxed); implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); other.implicitProducers.store(nullptr, std::memory_order_relaxed); #endif - + other.initialBlockPoolIndex.store(0, std::memory_order_relaxed); other.initialBlockPoolSize = 0; other.initialBlockPool = nullptr; - + reown_producers(); } - + inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT { return swap_internal(other); } - + // Swaps this queue's state with the other's. Not thread-safe. // Swapping two queues does not invalidate their tokens, however // the tokens that were created for one queue must be used with @@ -93624,14 +103188,14 @@ class ConcurrentQueue { swap_internal(other); } - + private: ConcurrentQueue& swap_internal(ConcurrentQueue& other) { if (this == &other) { return *this; } - + details::swap_relaxed(producerListTail, other.producerListTail); details::swap_relaxed(producerCount, other.producerCount); details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex); @@ -93640,20 +103204,20 @@ class ConcurrentQueue freeList.swap(other.freeList); details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId); details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset); - + swap_implicit_producer_hashes(other); - + reown_producers(); other.reown_producers(); - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG details::swap_relaxed(explicitProducers, other.explicitProducers); details::swap_relaxed(implicitProducers, other.implicitProducers); #endif - + return *this; } - + public: // Enqueues a single item (by copying it). // Allocates memory if required. Only fails if memory allocation fails (or implicit @@ -93665,7 +103229,7 @@ class ConcurrentQueue MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; else return inner_enqueue(item); } - + // Enqueues a single item (by moving it, if possible). // Allocates memory if required. Only fails if memory allocation fails (or implicit // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, @@ -93676,7 +103240,7 @@ class ConcurrentQueue MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; else return inner_enqueue(std::move(item)); } - + // Enqueues a single item (by copying it) using an explicit producer token. // Allocates memory if required. Only fails if memory allocation fails (or // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). @@ -93685,7 +103249,7 @@ class ConcurrentQueue { return inner_enqueue(token, item); } - + // Enqueues a single item (by moving it, if possible) using an explicit producer token. // Allocates memory if required. Only fails if memory allocation fails (or // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). @@ -93694,7 +103258,7 @@ class ConcurrentQueue { return inner_enqueue(token, std::move(item)); } - + // Enqueues several items. // Allocates memory if required. Only fails if memory allocation fails (or // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE @@ -93707,7 +103271,7 @@ class ConcurrentQueue MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; else return inner_enqueue_bulk(itemFirst, count); } - + // Enqueues several items using an explicit producer token. // Allocates memory if required. Only fails if memory allocation fails // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). @@ -93719,7 +103283,7 @@ class ConcurrentQueue { return inner_enqueue_bulk(token, itemFirst, count); } - + // Enqueues a single item (by copying it). // Does not allocate memory. Fails if not enough room to enqueue (or implicit // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE @@ -93730,7 +103294,7 @@ class ConcurrentQueue MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; else return inner_enqueue(item); } - + // Enqueues a single item (by moving it, if possible). // Does not allocate memory (except for one-time implicit producer). // Fails if not enough room to enqueue (or implicit production is @@ -93741,7 +103305,7 @@ class ConcurrentQueue MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; else return inner_enqueue(std::move(item)); } - + // Enqueues a single item (by copying it) using an explicit producer token. // Does not allocate memory. Fails if not enough room to enqueue. // Thread-safe. @@ -93749,7 +103313,7 @@ class ConcurrentQueue { return inner_enqueue(token, item); } - + // Enqueues a single item (by moving it, if possible) using an explicit producer token. // Does not allocate memory. Fails if not enough room to enqueue. // Thread-safe. @@ -93757,7 +103321,7 @@ class ConcurrentQueue { return inner_enqueue(token, std::move(item)); } - + // Enqueues several items. // Does not allocate memory (except for one-time implicit producer). // Fails if not enough room to enqueue (or implicit production is @@ -93771,7 +103335,7 @@ class ConcurrentQueue MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; else return inner_enqueue_bulk(itemFirst, count); } - + // Enqueues several items using an explicit producer token. // Does not allocate memory. Fails if not enough room to enqueue. // Note: Use std::make_move_iterator if the elements should be moved @@ -93782,9 +103346,9 @@ class ConcurrentQueue { return inner_enqueue_bulk(token, itemFirst, count); } - - - + + + // Attempts to dequeue from the queue. // Returns false if all producer streams appeared empty at the time they // were checked (so, the queue is likely but not guaranteed to be empty). @@ -93807,7 +103371,7 @@ class ConcurrentQueue ++nonEmptyCount; } } - + // If there was at least one non-empty queue but it appears empty at the time // we try to dequeue from it, we need to make sure every queue's been tried if (nonEmptyCount > 0) { @@ -93822,7 +103386,7 @@ class ConcurrentQueue } return false; } - + // Attempts to dequeue from the queue. // Returns false if all producer streams appeared empty at the time they // were checked (so, the queue is likely but not guaranteed to be empty). @@ -93842,7 +103406,7 @@ class ConcurrentQueue } return false; } - + // Attempts to dequeue from the queue using an explicit consumer token. // Returns false if all producer streams appeared empty at the time they // were checked (so, the queue is likely but not guaranteed to be empty). @@ -93855,13 +103419,13 @@ class ConcurrentQueue // If you see that the global offset has changed, you must reset your consumption counter and move to your designated place // If there's no items where you're supposed to be, keep moving until you find a producer with some items // If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it - + if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { if (!update_current_producer_after_rotation(token)) { return false; } } - + // If there was at least one non-empty queue but it appears empty at the time // we try to dequeue from it, we need to make sure every queue's been tried if (static_cast(token.currentProducer)->dequeue(item)) { @@ -93870,7 +103434,7 @@ class ConcurrentQueue } return true; } - + auto tail = producerListTail.load(std::memory_order_acquire); auto ptr = static_cast(token.currentProducer)->next_prod(); if (ptr == nullptr) { @@ -93889,7 +103453,7 @@ class ConcurrentQueue } return false; } - + // Attempts to dequeue several elements from the queue. // Returns the number of items actually dequeued. // Returns 0 if all producer streams appeared empty at the time they @@ -93907,7 +103471,7 @@ class ConcurrentQueue } return count; } - + // Attempts to dequeue several elements from the queue using an explicit consumer token. // Returns the number of items actually dequeued. // Returns 0 if all producer streams appeared empty at the time they @@ -93921,7 +103485,7 @@ class ConcurrentQueue return 0; } } - + size_t count = static_cast(token.currentProducer)->dequeue_bulk(itemFirst, max); if (count == max) { if ((token.itemsConsumedFromCurrent += static_cast(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { @@ -93931,7 +103495,7 @@ class ConcurrentQueue } token.itemsConsumedFromCurrent += static_cast(count); max -= count; - + auto tail = producerListTail.load(std::memory_order_acquire); auto ptr = static_cast(token.currentProducer)->next_prod(); if (ptr == nullptr) { @@ -93955,9 +103519,9 @@ class ConcurrentQueue } return count; } - - - + + + // Attempts to dequeue from a specific producer's inner queue. // If you happen to know which producer you want to dequeue from, this // is significantly faster than using the general-case try_dequeue methods. @@ -93969,7 +103533,7 @@ class ConcurrentQueue { return static_cast(producer.producer)->dequeue(item); } - + // Attempts to dequeue several elements from a specific producer's inner queue. // Returns the number of items actually dequeued. // If you happen to know which producer you want to dequeue from, this @@ -93982,8 +103546,8 @@ class ConcurrentQueue { return static_cast(producer.producer)->dequeue_bulk(itemFirst, max); } - - + + // Returns an estimate of the total number of elements currently in the queue. This // estimate is only accurate if the queue has completely stabilized before it is called // (i.e. all enqueue and dequeue operations have completed and their memory effects are @@ -93998,8 +103562,8 @@ class ConcurrentQueue } return size; } - - + + // Returns true if the underlying atomic variables used by // the queue are lock-free (they should be on most platforms). // Thread-safe. @@ -94023,40 +103587,40 @@ class ConcurrentQueue struct ImplicitProducer; friend struct ImplicitProducer; friend class ConcurrentQueueTests; - + enum AllocationMode { CanAlloc, CannotAlloc }; - - + + /////////////////////////////// // Queue methods /////////////////////////////// - + template inline bool inner_enqueue(producer_token_t const& token, U&& element) { return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue(std::forward(element)); } - + template inline bool inner_enqueue(U&& element) { auto producer = get_or_add_implicit_producer(); return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue(std::forward(element)); } - + template inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) { return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk(itemFirst, count); } - + template inline bool inner_enqueue_bulk(It itemFirst, size_t count) { auto producer = get_or_add_implicit_producer(); return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk(itemFirst, count); } - + inline bool update_current_producer_after_rotation(consumer_token_t& token) { // Ah, there's been a rotation, figure out where we should be! @@ -94066,7 +103630,7 @@ class ConcurrentQueue } auto prodCount = producerCount.load(std::memory_order_relaxed); auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed); - if ((details::unlikely)(token.desiredProducer == nullptr)) { + if (token.desiredProducer == nullptr) { // Aha, first time we're dequeueing anything. // Figure out our local position // Note: offset is from start, not end, but we're traversing from end -- subtract from count first @@ -94079,7 +103643,7 @@ class ConcurrentQueue } } } - + std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; if (delta >= prodCount) { delta = delta % prodCount; @@ -94090,27 +103654,27 @@ class ConcurrentQueue token.desiredProducer = tail; } } - + token.lastKnownGlobalOffset = globalOffset; token.currentProducer = token.desiredProducer; token.itemsConsumedFromCurrent = 0; return true; } - - + + /////////////////////////// // Free list /////////////////////////// - + template struct FreeListNode { FreeListNode() : freeListRefs(0), freeListNext(nullptr) { } - + std::atomic freeListRefs; std::atomic freeListNext; }; - + // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly // speedy under low contention. @@ -94120,15 +103684,15 @@ class ConcurrentQueue FreeList() : freeListHead(nullptr) { } FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); } void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); } - + FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; - + inline void add(N* node) { #ifdef MCDBGQ_NOLOCKFREE_FREELIST debug::DebugLock lock(mutex); -#endif +#endif // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to // set it using a fetch_add if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) { @@ -94137,12 +103701,12 @@ class ConcurrentQueue add_knowing_refcount_is_zero(node); } } - + inline N* try_get() { #ifdef MCDBGQ_NOLOCKFREE_FREELIST debug::DebugLock lock(mutex); -#endif +#endif auto head = freeListHead.load(std::memory_order_acquire); while (head != nullptr) { auto prevHead = head; @@ -94151,7 +103715,7 @@ class ConcurrentQueue head = freeListHead.load(std::memory_order_acquire); continue; } - + // Good, reference count has been incremented (it wasn't at zero), which means we can read the // next and not worry about it changing between now and the time we do the CAS auto next = head->freeListNext.load(std::memory_order_relaxed); @@ -94159,12 +103723,12 @@ class ConcurrentQueue // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on). assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0); - + // Decrease refcount twice, once for our ref, and once for the list's ref head->freeListRefs.fetch_sub(2, std::memory_order_release); return head; } - + // OK, the head must have changed on us, but we still need to decrease the refcount we increased. // Note that we don't need to release any memory effects, but we do need to ensure that the reference // count decrement happens-after the CAS on the head. @@ -94173,13 +103737,13 @@ class ConcurrentQueue add_knowing_refcount_is_zero(prevHead); } } - + return nullptr; } - + // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes) N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); } - + private: inline void add_knowing_refcount_is_zero(N* node) { @@ -94204,26 +103768,26 @@ class ConcurrentQueue return; } } - + private: // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention) std::atomic freeListHead; - + static const std::uint32_t REFS_MASK = 0x7FFFFFFF; static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; - + #ifdef MCDBGQ_NOLOCKFREE_FREELIST debug::DebugMutex mutex; #endif }; - - + + /////////////////////////// // Block /////////////////////////// - + enum InnerQueueContext { implicit_context = 0, explicit_context = 1 }; - + struct Block { Block() @@ -94233,7 +103797,7 @@ class ConcurrentQueue owner = nullptr; #endif } - + template inline bool is_empty() const { @@ -94244,7 +103808,7 @@ class ConcurrentQueue return false; } } - + // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set std::atomic_thread_fence(std::memory_order_acquire); return true; @@ -94259,7 +103823,7 @@ class ConcurrentQueue return false; } } - + // Returns true if the block is now empty (does not apply in explicit context) template inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) @@ -94277,7 +103841,7 @@ class ConcurrentQueue return prevVal == BLOCK_SIZE - 1; } } - + // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0). // Returns true if the block is now empty (does not apply in explicit context). template @@ -94300,7 +103864,7 @@ class ConcurrentQueue return prevVal + count == BLOCK_SIZE; } } - + template inline void set_all_empty() { @@ -94315,7 +103879,7 @@ class ConcurrentQueue elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); } } - + template inline void reset_empty() { @@ -94330,10 +103894,10 @@ class ConcurrentQueue elementsCompletelyDequeued.store(0, std::memory_order_relaxed); } } - + inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } - + private: static_assert(std::alignment_of::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time"); MOODYCAMEL_ALIGNAS(MOODYCAMEL_ALIGNOF(T)) char elements[sizeof(T) * BLOCK_SIZE]; @@ -94346,7 +103910,7 @@ class ConcurrentQueue std::atomic freeListNext; std::atomic shouldBeOnFreeList; bool dynamicallyAllocated; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool' - + #ifdef MCDBGQ_TRACKMEM void* owner; #endif @@ -94359,11 +103923,11 @@ class ConcurrentQueue struct MemStats; private: #endif - + /////////////////////////// // Producer base /////////////////////////// - + struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase { ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) : @@ -94376,9 +103940,9 @@ class ConcurrentQueue parent(parent_) { } - + virtual ~ProducerBase() { }; - + template inline bool dequeue(U& element) { @@ -94389,7 +103953,7 @@ class ConcurrentQueue return static_cast(this)->dequeue(element); } } - + template inline size_t dequeue_bulk(It& itemFirst, size_t max) { @@ -94400,41 +103964,41 @@ class ConcurrentQueue return static_cast(this)->dequeue_bulk(itemFirst, max); } } - + inline ProducerBase* next_prod() const { return static_cast(next); } - + inline size_t size_approx() const { auto tail = tailIndex.load(std::memory_order_relaxed); auto head = headIndex.load(std::memory_order_relaxed); return details::circular_less_than(head, tail) ? static_cast(tail - head) : 0; } - + inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); } protected: std::atomic tailIndex; // Where to enqueue to next std::atomic headIndex; // Where to dequeue from next - + std::atomic dequeueOptimisticCount; std::atomic dequeueOvercommit; - + Block* tailBlock; - + public: bool isExplicit; ConcurrentQueue* parent; - + protected: #ifdef MCDBGQ_TRACKMEM friend struct MemStats; #endif }; - - + + /////////////////////////// // Explicit queue /////////////////////////// - + struct ExplicitProducer : public ProducerBase { explicit ExplicitProducer(ConcurrentQueue* parent_) : @@ -94450,10 +104014,10 @@ class ConcurrentQueue if (poolBasedIndexSize > pr_blockIndexSize) { pr_blockIndexSize = poolBasedIndexSize; } - + new_block_index(0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE } - + ~ExplicitProducer() { // Destruct any elements not yet dequeued. @@ -94472,7 +104036,7 @@ class ConcurrentQueue assert(details::circular_less_than(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed))); halfDequeuedBlock = pr_blockIndexEntries[i].block; } - + // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration) auto block = this->tailBlock; do { @@ -94480,12 +104044,12 @@ class ConcurrentQueue if (block->ConcurrentQueue::Block::template is_empty()) { continue; } - + size_t i = 0; // Offset into block if (block == halfDequeuedBlock) { i = static_cast(this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); } - + // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast(this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) { @@ -94493,7 +104057,7 @@ class ConcurrentQueue } } while (block != this->tailBlock); } - + // Destroy all blocks that we own if (this->tailBlock != nullptr) { auto block = this->tailBlock; @@ -94508,7 +104072,7 @@ class ConcurrentQueue block = nextBlock; } while (block != this->tailBlock); } - + // Destroy the block indices auto header = static_cast(pr_blockIndexRaw); while (header != nullptr) { @@ -94518,7 +104082,7 @@ class ConcurrentQueue header = prev; } } - + template inline bool enqueue(U&& element) { @@ -94529,10 +104093,10 @@ class ConcurrentQueue auto startBlock = this->tailBlock; auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) { - // We can re-use the block ahead of us, it's empty! + // We can re-use the block ahead of us, it's empty! this->tailBlock = this->tailBlock->next; this->tailBlock->ConcurrentQueue::Block::template reset_empty(); - + // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the // last block from it first -- except instead of removing then adding, we can just overwrite). // Note that there must be a valid block index here, since even if allocation failed in the ctor, @@ -94557,7 +104121,7 @@ class ConcurrentQueue // Hmm, the circular block index is already full -- we'll need // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if // the initial allocation failed in the constructor. - + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { return false; } @@ -94565,7 +104129,7 @@ class ConcurrentQueue return false; } } - + // Insert a new block in the circular linked list auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); if (newBlock == nullptr) { @@ -94604,27 +104168,27 @@ class ConcurrentQueue (void)startBlock; (void)originalBlockIndexSlotsUsed; } - + // Add block to block index auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; entry.base = currentTailIndex; entry.block = this->tailBlock; blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - + if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } } - + // Enqueue new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); - + this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } - + template bool dequeue(U& element) { @@ -94632,10 +104196,10 @@ class ConcurrentQueue auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); if (details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { // Might be something to dequeue, let's give it a try - + // Note that this if is purely for performance purposes in the common case when the queue is // empty and the values are eventually consistent -- we may enter here spuriously. - + // Note that whatever the values of overcommit and tail are, they are not going to change (unless we // change them) and must be the same value at this point (inside the if) as when the if condition was // evaluated. @@ -94648,24 +104212,24 @@ class ConcurrentQueue // unfortunately that can't be shown to be correct using only the C++11 standard. // See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case std::atomic_thread_fence(std::memory_order_acquire); - + // Increment optimistic counter, then check if it went over the boundary auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); - + // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever // incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now // have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon // incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount. // However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently) // overflow; in such a case, though, the logic still holds since the difference between the two is maintained. - + // Note that we reload tail here in case it changed; it will be the same value as before or greater, since // this load is sequenced after (happens after) the earlier load above. This is supported by read-read // coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order tail = this->tailIndex.load(std::memory_order_acquire); if ((details::likely)(details::circular_less_than(myDequeueCount - overcommit, tail))) { // Guaranteed to be at least one element to dequeue! - + // Get the index. Note that since there's guaranteed to be at least one element, this // will never exceed tail. We need to do an acquire-release fence here since it's possible // that whatever condition got us to this point was for an earlier enqueued element (that @@ -94675,13 +104239,13 @@ class ConcurrentQueue // place with the more current condition (they must have acquired a tail that is at least // as recent). auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); - - + + // Determine which block the element is in - + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); - + // We need to be careful here about subtracting and dividing because of index wrap-around. // When an index wraps, we need to preserve the sign of the offset when dividing it by the // block size (in order to get a correct signed block count offset in all cases): @@ -94689,7 +104253,7 @@ class ConcurrentQueue auto blockBaseIndex = index & ~static_cast(BLOCK_SIZE - 1); auto offset = static_cast(static_cast::type>(blockBaseIndex - headBase) / BLOCK_SIZE); auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block; - + // Dequeue auto& el = *((*block)[index]); if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { @@ -94698,7 +104262,7 @@ class ConcurrentQueue struct Guard { Block* block; index_t index; - + ~Guard() { (*block)[index]->~T(); @@ -94713,7 +104277,7 @@ class ConcurrentQueue el.~T(); // NOLINT block->ConcurrentQueue::Block::template set_empty(index); } - + return true; } else { @@ -94721,10 +104285,10 @@ class ConcurrentQueue this->dequeueOvercommit.fetch_add(1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write } } - + return false; } - + template bool enqueue_bulk(It itemFirst, size_t count) { @@ -94735,9 +104299,9 @@ class ConcurrentQueue auto startBlock = this->tailBlock; auto originalBlockIndexFront = pr_blockIndexFront; auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; - + Block* firstAllocatedBlock = nullptr; - + // Figure out how many blocks we'll need to allocate, and do so size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); @@ -94746,21 +104310,21 @@ class ConcurrentQueue while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) { blockBaseDiff -= static_cast(BLOCK_SIZE); currentTailIndex += static_cast(BLOCK_SIZE); - + this->tailBlock = this->tailBlock->next; firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; - + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; entry.base = currentTailIndex; entry.block = this->tailBlock; pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); } - + // Now allocate as many blocks as necessary from the block pool while (blockBaseDiff > 0) { blockBaseDiff -= static_cast(BLOCK_SIZE); currentTailIndex += static_cast(BLOCK_SIZE); - + auto head = this->headIndex.load(std::memory_order_relaxed); assert(!details::circular_less_than(currentTailIndex, head)); bool full = !details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); @@ -94779,13 +104343,13 @@ class ConcurrentQueue this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; return false; } - + // pr_blockIndexFront is updated inside new_block_index, so we need to // update our fallback value too (since we keep the new index even if we // later fail) originalBlockIndexFront = originalBlockIndexSlotsUsed; } - + // Insert a new block in the circular linked list auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); if (newBlock == nullptr) { @@ -94794,7 +104358,7 @@ class ConcurrentQueue this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; return false; } - + #ifdef MCDBGQ_TRACKMEM newBlock->owner = this; #endif @@ -94808,15 +104372,15 @@ class ConcurrentQueue } this->tailBlock = newBlock; firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; - + ++pr_blockIndexSlotsUsed; - + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; entry.base = currentTailIndex; entry.block = this->tailBlock; pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); } - + // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and // publish the new block index front auto block = firstAllocatedBlock; @@ -94827,12 +104391,12 @@ class ConcurrentQueue } block = block->next; } - + if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) { blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); } } - + // Enqueue, one block at a time index_t newTailIndex = startTailIndex + static_cast(count); currentTailIndex = startTailIndex; @@ -94873,11 +104437,11 @@ class ConcurrentQueue // any allocated blocks in our linked list for later, though). auto constructedStopIndex = currentTailIndex; auto lastBlockEnqueued = this->tailBlock; - + pr_blockIndexFront = originalBlockIndexFront; pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; - + if (!details::is_trivially_destructible::value) { auto block = startBlock; if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { @@ -94901,22 +104465,22 @@ class ConcurrentQueue MOODYCAMEL_RETHROW; } } - + if (this->tailBlock == endBlock) { assert(currentTailIndex == newTailIndex); break; } this->tailBlock = this->tailBlock->next; } - + if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) { blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); } - + this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } - + template size_t dequeue_bulk(It& itemFirst, size_t max) { @@ -94926,9 +104490,9 @@ class ConcurrentQueue if (details::circular_less_than(0, desiredCount)) { desiredCount = desiredCount < max ? desiredCount : max; std::atomic_thread_fence(std::memory_order_acquire); - + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);; - + tail = this->tailIndex.load(std::memory_order_acquire); auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); if (details::circular_less_than(0, actualCount)) { @@ -94936,20 +104500,20 @@ class ConcurrentQueue if (actualCount < desiredCount) { this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); } - + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this // will never exceed tail. auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); - + // Determine which block the first element is in auto localBlockIndex = blockIndex.load(std::memory_order_acquire); auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); - + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; auto firstBlockBaseIndex = firstIndex & ~static_cast(BLOCK_SIZE - 1); auto offset = static_cast(static_cast::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE); auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1); - + // Iterate the blocks and dequeue auto index = firstIndex; do { @@ -94986,19 +104550,19 @@ class ConcurrentQueue } block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); - + firstIndexInBlock = index; endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; } while (index != firstIndex + actualCount); - + MOODYCAMEL_RETHROW; } } block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); } while (index != firstIndex + actualCount); - + return actualCount; } else { @@ -95006,17 +104570,17 @@ class ConcurrentQueue this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); } } - + return 0; } - + private: struct BlockIndexEntry { index_t base; Block* block; }; - + struct BlockIndexHeader { size_t size; @@ -95024,12 +104588,12 @@ class ConcurrentQueue BlockIndexEntry* entries; void* prev; }; - - + + bool new_block_index(size_t numberOfFilledSlotsToExpose) { auto prevBlockSizeMask = pr_blockIndexSize - 1; - + // Create the new block pr_blockIndexSize <<= 1; auto newRawPtr = static_cast((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize)); @@ -95037,9 +104601,9 @@ class ConcurrentQueue pr_blockIndexSize >>= 1; // Reset to allow graceful retry return false; } - + auto newBlockIndexEntries = reinterpret_cast(details::align_for(newRawPtr + sizeof(BlockIndexHeader))); - + // Copy in all the old indices, if any size_t j = 0; if (pr_blockIndexSlotsUsed != 0) { @@ -95049,50 +104613,50 @@ class ConcurrentQueue i = (i + 1) & prevBlockSizeMask; } while (i != pr_blockIndexFront); } - + // Update everything auto header = new (newRawPtr) BlockIndexHeader; header->size = pr_blockIndexSize; header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed); header->entries = newBlockIndexEntries; header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later - + pr_blockIndexFront = j; pr_blockIndexEntries = newBlockIndexEntries; pr_blockIndexRaw = newRawPtr; blockIndex.store(header, std::memory_order_release); - + return true; } - + private: std::atomic blockIndex; - + // To be used by producer only -- consumer must use the ones in referenced by blockIndex size_t pr_blockIndexSlotsUsed; size_t pr_blockIndexSize; size_t pr_blockIndexFront; // Next slot (not current) BlockIndexEntry* pr_blockIndexEntries; void* pr_blockIndexRaw; - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG public: ExplicitProducer* nextExplicitProducer; private: #endif - + #ifdef MCDBGQ_TRACKMEM friend struct MemStats; #endif }; - - + + ////////////////////////////////// // Implicit queue ////////////////////////////////// - + struct ImplicitProducer : public ProducerBase - { + { ImplicitProducer(ConcurrentQueue* parent_) : ProducerBase(parent_, false), nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), @@ -95100,21 +104664,21 @@ class ConcurrentQueue { new_block_index(); } - + ~ImplicitProducer() { // Note that since we're in the destructor we can assume that all enqueue/dequeue operations // completed already; this means that all undequeued elements are placed contiguously across // contiguous blocks, and that only the first and last remaining blocks can be only partially // empty (all other remaining blocks must be completely full). - + #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // Unregister ourselves for thread termination notification if (!this->inactive.load(std::memory_order_relaxed)) { details::ThreadExitNotifier::unsubscribe(&threadExitListener); } #endif - + // Destroy all remaining elements! auto tail = this->tailIndex.load(std::memory_order_relaxed); auto index = this->headIndex.load(std::memory_order_relaxed); @@ -95127,10 +104691,10 @@ class ConcurrentQueue // Free the old block this->parent->add_block_to_free_list(block); } - + block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed); } - + ((*block)[index])->~T(); ++index; } @@ -95140,7 +104704,7 @@ class ConcurrentQueue if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast(BLOCK_SIZE - 1)) != 0)) { this->parent->add_block_to_free_list(this->tailBlock); } - + // Destroy block index auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); if (localBlockIndex != nullptr) { @@ -95155,7 +104719,7 @@ class ConcurrentQueue } while (localBlockIndex != nullptr); } } - + template inline bool enqueue(U&& element) { @@ -95176,7 +104740,7 @@ class ConcurrentQueue if (!insert_block_index_entry(idxEntry, currentTailIndex)) { return false; } - + // Get ahold of a new block auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); if (newBlock == nullptr) { @@ -95188,7 +104752,7 @@ class ConcurrentQueue newBlock->owner = this; #endif newBlock->ConcurrentQueue::Block::template reset_empty(); - + if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { // May throw, try to insert now before we publish the fact that we have this new block MOODYCAMEL_TRY { @@ -95201,25 +104765,25 @@ class ConcurrentQueue MOODYCAMEL_RETHROW; } } - + // Insert the new block into the index idxEntry->value.store(newBlock, std::memory_order_relaxed); - + this->tailBlock = newBlock; - + if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } } - + // Enqueue new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); - + this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } - + template bool dequeue(U& element) { @@ -95228,19 +104792,19 @@ class ConcurrentQueue index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); if (details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { std::atomic_thread_fence(std::memory_order_acquire); - + index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); tail = this->tailIndex.load(std::memory_order_acquire); if ((details::likely)(details::circular_less_than(myDequeueCount - overcommit, tail))) { index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); - + // Determine which block the element is in auto entry = get_block_index_entry_for_index(index); - + // Dequeue auto block = entry->value.load(std::memory_order_relaxed); auto& el = *((*block)[index]); - + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX // Note: Acquiring the mutex with every dequeue instead of only when a block @@ -95252,7 +104816,7 @@ class ConcurrentQueue index_t index; BlockIndexEntry* entry; ConcurrentQueue* parent; - + ~Guard() { (*block)[index]->~T(); @@ -95280,34 +104844,34 @@ class ConcurrentQueue this->parent->add_block_to_free_list(block); // releases the above store } } - + return true; } else { this->dequeueOvercommit.fetch_add(1, std::memory_order_release); } } - + return false; } - + template bool enqueue_bulk(It itemFirst, size_t count) { // First, we need to make sure we have enough room to enqueue all of the elements; // this means pre-allocating blocks and putting them in the block index (but only if // all the allocations succeeded). - + // Note that the tailBlock we start off with may not be owned by us any more; // this happens if it was filled up exactly to the top (setting tailIndex to // the first index of the next block which is not yet allocated), then dequeued // completely (putting it on the free list) before we enqueue again. - + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); auto startBlock = this->tailBlock; Block* firstAllocatedBlock = nullptr; auto endBlock = this->tailBlock; - + // Figure out how many blocks we'll need to allocate, and do so size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); @@ -95318,7 +104882,7 @@ class ConcurrentQueue do { blockBaseDiff -= static_cast(BLOCK_SIZE); currentTailIndex += static_cast(BLOCK_SIZE); - + // Find out where we'll be inserting this block in the block index BlockIndexEntry* idxEntry = nullptr; // initialization here unnecessary but compiler can't always tell Block* newBlock; @@ -95342,19 +104906,19 @@ class ConcurrentQueue } this->parent->add_blocks_to_free_list(firstAllocatedBlock); this->tailBlock = startBlock; - + return false; } - + #ifdef MCDBGQ_TRACKMEM newBlock->owner = this; #endif newBlock->ConcurrentQueue::Block::template reset_empty(); newBlock->next = nullptr; - + // Insert the new block into the index idxEntry->value.store(newBlock, std::memory_order_relaxed); - + // Store the chain of blocks so that we can undo if later allocations fail, // and so that we can find the blocks when we do the actual enqueueing if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) { @@ -95366,7 +104930,7 @@ class ConcurrentQueue firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock; } while (blockBaseDiff > 0); } - + // Enqueue, one block at a time index_t newTailIndex = startTailIndex + static_cast(count); currentTailIndex = startTailIndex; @@ -95396,7 +104960,7 @@ class ConcurrentQueue MOODYCAMEL_CATCH (...) { auto constructedStopIndex = currentTailIndex; auto lastBlockEnqueued = this->tailBlock; - + if (!details::is_trivially_destructible::value) { auto block = startBlock; if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { @@ -95417,7 +104981,7 @@ class ConcurrentQueue block = block->next; } } - + currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { currentTailIndex += static_cast(BLOCK_SIZE); @@ -95430,7 +104994,7 @@ class ConcurrentQueue MOODYCAMEL_RETHROW; } } - + if (this->tailBlock == endBlock) { assert(currentTailIndex == newTailIndex); break; @@ -95440,7 +105004,7 @@ class ConcurrentQueue this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } - + template size_t dequeue_bulk(It& itemFirst, size_t max) { @@ -95450,9 +105014,9 @@ class ConcurrentQueue if (details::circular_less_than(0, desiredCount)) { desiredCount = desiredCount < max ? desiredCount : max; std::atomic_thread_fence(std::memory_order_acquire); - + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); - + tail = this->tailIndex.load(std::memory_order_acquire); auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); if (details::circular_less_than(0, actualCount)) { @@ -95460,11 +105024,11 @@ class ConcurrentQueue if (actualCount < desiredCount) { this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); } - + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this // will never exceed tail. auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); - + // Iterate the blocks and dequeue auto index = firstIndex; BlockIndexHeader* localBlockIndex; @@ -95473,7 +105037,7 @@ class ConcurrentQueue auto blockStartIndex = index; auto endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; - + auto entry = localBlockIndex->index[indexIndex]; auto block = entry->value.load(std::memory_order_relaxed); if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { @@ -95501,7 +105065,7 @@ class ConcurrentQueue while (index != endIndex) { (*block)[index++]->~T(); } - + if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, static_cast(endIndex - blockStartIndex))) { #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX debug::DebugLock lock(mutex); @@ -95510,12 +105074,12 @@ class ConcurrentQueue this->parent->add_block_to_free_list(block); } indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); - + blockStartIndex = index; endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; } while (index != firstIndex + actualCount); - + MOODYCAMEL_RETHROW; } } @@ -95532,27 +105096,27 @@ class ConcurrentQueue } indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); } while (index != firstIndex + actualCount); - + return actualCount; } else { this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); } } - + return 0; } - + private: // The block size must be > 1, so any number with the low bit set is an invalid block base index static const index_t INVALID_BLOCK_BASE = 1; - + struct BlockIndexEntry { std::atomic key; std::atomic value; }; - + struct BlockIndexHeader { size_t capacity; @@ -95561,7 +105125,7 @@ class ConcurrentQueue BlockIndexEntry** index; BlockIndexHeader* prev; }; - + template inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex) { @@ -95573,12 +105137,12 @@ class ConcurrentQueue idxEntry = localBlockIndex->index[newTail]; if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || idxEntry->value.load(std::memory_order_relaxed) == nullptr) { - + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); localBlockIndex->tail.store(newTail, std::memory_order_release); return true; } - + // No room in the old block index, try to allocate another one! MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { return false; @@ -95594,20 +105158,20 @@ class ConcurrentQueue localBlockIndex->tail.store(newTail, std::memory_order_release); return true; } - + inline void rewind_block_index_tail() { auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed); } - + inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const { BlockIndexHeader* localBlockIndex; auto idx = get_block_index_index_for_index(index, localBlockIndex); return localBlockIndex->index[idx]; } - + inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const { #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX @@ -95625,7 +105189,7 @@ class ConcurrentQueue assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr); return idx; } - + bool new_block_index() { auto prev = blockIndex.load(std::memory_order_relaxed); @@ -95638,7 +105202,7 @@ class ConcurrentQueue if (raw == nullptr) { return false; } - + auto header = new (raw) BlockIndexHeader; auto entries = reinterpret_cast(details::align_for(raw + sizeof(BlockIndexHeader))); auto index = reinterpret_cast(details::align_for(reinterpret_cast(entries) + sizeof(BlockIndexEntry) * entryCount)); @@ -95662,14 +105226,14 @@ class ConcurrentQueue header->index = index; header->capacity = nextBlockIndexCapacity; header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed); - + blockIndex.store(header, std::memory_order_release); - + nextBlockIndexCapacity <<= 1; - + return true; } - + private: size_t nextBlockIndexCapacity; std::atomic blockIndex; @@ -95679,7 +105243,7 @@ class ConcurrentQueue details::ThreadExitListener threadExitListener; private: #endif - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG public: ImplicitProducer* nextImplicitProducer; @@ -95693,12 +105257,12 @@ class ConcurrentQueue friend struct MemStats; #endif }; - - + + ////////////////////////////////// // Block pool manipulation ////////////////////////////////// - + void populate_initial_block_list(size_t blockCount) { initialBlockPoolSize = blockCount; @@ -95706,7 +105270,7 @@ class ConcurrentQueue initialBlockPool = nullptr; return; } - + initialBlockPool = create_array(blockCount); if (initialBlockPool == nullptr) { initialBlockPoolSize = 0; @@ -95715,18 +105279,18 @@ class ConcurrentQueue initialBlockPool[i].dynamicallyAllocated = false; } } - + inline Block* try_get_block_from_initial_pool() { if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) { return nullptr; } - + auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); - + return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; } - + inline void add_block_to_free_list(Block* block) { #ifdef MCDBGQ_TRACKMEM @@ -95734,7 +105298,7 @@ class ConcurrentQueue #endif freeList.add(block); } - + inline void add_blocks_to_free_list(Block* block) { while (block != nullptr) { @@ -95743,12 +105307,12 @@ class ConcurrentQueue block = next; } } - + inline Block* try_get_block_from_free_list() { return freeList.try_get(); } - + // Gets a free block from one of the memory pools, or allocates a new one (if applicable) template Block* requisition_block() @@ -95757,12 +105321,12 @@ class ConcurrentQueue if (block != nullptr) { return block; } - + block = try_get_block_from_free_list(); if (block != nullptr) { return block; } - + MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) { return create(); } @@ -95770,7 +105334,7 @@ class ConcurrentQueue return nullptr; } } - + #ifdef MCDBGQ_TRACKMEM public: @@ -95787,28 +105351,28 @@ class ConcurrentQueue size_t queueClassBytes; size_t implicitBlockIndexBytes; size_t explicitBlockIndexBytes; - + friend class ConcurrentQueue; - + private: static MemStats getFor(ConcurrentQueue* q) { MemStats stats = { 0 }; - + stats.elementsEnqueued = q->size_approx(); - + auto block = q->freeList.head_unsafe(); while (block != nullptr) { ++stats.allocatedBlocks; ++stats.freeBlocks; block = block->freeListNext.load(std::memory_order_relaxed); } - + for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { bool implicit = dynamic_cast(ptr) != nullptr; stats.implicitProducers += implicit ? 1 : 0; stats.explicitProducers += implicit ? 0 : 1; - + if (implicit) { auto prod = static_cast(ptr); stats.queueClassBytes += sizeof(ImplicitProducer); @@ -95856,18 +105420,18 @@ class ConcurrentQueue } } } - + auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed); stats.allocatedBlocks += freeOnInitialPool; stats.freeBlocks += freeOnInitialPool; - + stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks; stats.queueClassBytes += sizeof(ConcurrentQueue); - + return stats; } }; - + // For debugging only. Not thread-safe. MemStats getMemStats() { @@ -95876,18 +105440,18 @@ class ConcurrentQueue private: friend struct MemStats; #endif - - + + ////////////////////////////////// // Producer list manipulation - ////////////////////////////////// - + ////////////////////////////////// + ProducerBase* recycle_or_create_producer(bool isExplicit) { bool recycled; return recycle_or_create_producer(isExplicit, recycled); } - + ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled) { #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH @@ -95904,26 +105468,26 @@ class ConcurrentQueue } } } - + recycled = false; return add_producer(isExplicit ? static_cast(create(this)) : create(this)); } - + ProducerBase* add_producer(ProducerBase* producer) { // Handle failed memory allocation if (producer == nullptr) { return nullptr; } - + producerCount.fetch_add(1, std::memory_order_relaxed); - + // Add it to the lock-free list auto prevTail = producerListTail.load(std::memory_order_relaxed); do { producer->next = prevTail; } while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed)); - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG if (producer->isExplicit) { auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed); @@ -95938,10 +105502,10 @@ class ConcurrentQueue } while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast(producer), std::memory_order_release, std::memory_order_relaxed)); } #endif - + return producer; } - + void reown_producers() { // After another instance is moved-into/swapped-with this one, all the @@ -95951,31 +105515,31 @@ class ConcurrentQueue ptr->parent = this; } } - - + + ////////////////////////////////// // Implicit producer hash ////////////////////////////////// - + struct ImplicitProducerKVP { std::atomic key; ImplicitProducer* value; // No need for atomicity since it's only read by the thread that sets it in the first place - + ImplicitProducerKVP() : value(nullptr) { } - + ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT { key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed); value = other.value; } - + inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT { swap(other); return *this; } - + inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT { if (this != &other) { @@ -95984,17 +105548,17 @@ class ConcurrentQueue } } }; - + template friend void duckdb_moodycamel::swap(typename ConcurrentQueue::ImplicitProducerKVP&, typename ConcurrentQueue::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT; - + struct ImplicitProducerHash { size_t capacity; ImplicitProducerKVP* entries; ImplicitProducerHash* prev; }; - + inline void populate_initial_implicit_producer_hash() { MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { @@ -96012,7 +105576,7 @@ class ConcurrentQueue implicitProducerHash.store(hash, std::memory_order_relaxed); } } - + void swap_implicit_producer_hashes(ConcurrentQueue& other) { MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { @@ -96023,9 +105587,9 @@ class ConcurrentQueue initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; - + details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); - + details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) { implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); @@ -96049,27 +105613,27 @@ class ConcurrentQueue } } } - + // Only fails (returns nullptr) if memory allocation fails ImplicitProducer* get_or_add_implicit_producer() { // Note that since the data is essentially thread-local (key is thread ID), // there's a reduced need for fences (memory ordering is already consistent // for any individual thread), except for the current table itself. - + // Start by looking for the thread ID in the current and all previous hash tables. // If it's not found, it must not be in there yet, since this same thread would // have added it previously to one of the tables that we traversed. - + // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table - + #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH debug::DebugLock lock(implicitProdMutex); #endif - + auto id = details::thread_id(); auto hashedId = details::hash_thread_id(id); - + auto mainHash = implicitProducerHash.load(std::memory_order_acquire); assert(mainHash != nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null) for (auto hash = mainHash; hash != nullptr; hash = hash->prev) { @@ -96077,7 +105641,7 @@ class ConcurrentQueue auto index = hashedId; while (true) { // Not an infinite loop because at least one slot is free in the hash table index &= hash->capacity - 1; - + auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed); if (probedKey == id) { // Found it! If we had to search several hashes deep, though, we should lazily add it @@ -96105,7 +105669,7 @@ class ConcurrentQueue ++index; } } - + return value; } if (probedKey == details::invalid_thread_id) { @@ -96114,7 +105678,7 @@ class ConcurrentQueue ++index; } } - + // Insert! auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed); while (true) { @@ -96137,7 +105701,7 @@ class ConcurrentQueue implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); return nullptr; } - + auto newHash = new (raw) ImplicitProducerHash; newHash->capacity = newCapacity; newHash->entries = reinterpret_cast(details::align_for(raw + sizeof(ImplicitProducerHash))); @@ -96154,7 +105718,7 @@ class ConcurrentQueue implicitProducerHashResizeInProgress.clear(std::memory_order_release); } } - + // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table // to finish being allocated by another thread (and if we just finished allocating above, the condition will // always be true) @@ -96168,18 +105732,18 @@ class ConcurrentQueue if (recycled) { implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); } - + #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback; producer->threadExitListener.userData = producer; details::ThreadExitNotifier::subscribe(&producer->threadExitListener); #endif - + auto index = hashedId; while (true) { index &= mainHash->capacity - 1; auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed); - + auto empty = details::invalid_thread_id; #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED auto reusable = details::invalid_thread_id2; @@ -96195,20 +105759,20 @@ class ConcurrentQueue } return producer; } - + // Hmm, the old hash is quite full and somebody else is busy allocating a new one. // We need to wait for the allocating thread to finish (if it succeeds, we add, if not, // we try to allocate ourselves). mainHash = implicitProducerHash.load(std::memory_order_acquire); } } - + #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED void implicit_producer_thread_exited(ImplicitProducer* producer) { // Remove from thread exit listeners details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener); - + // Remove from hash #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH debug::DebugLock lock(implicitProdMutex); @@ -96218,7 +105782,7 @@ class ConcurrentQueue auto id = details::thread_id(); auto hashedId = details::hash_thread_id(id); details::thread_id_t probedKey; - + // We need to traverse all the hashes just in case other threads aren't on the current one yet and are // trying to add an entry thinking there's a free slot (because they reused a producer) for (; hash != nullptr; hash = hash->prev) { @@ -96233,11 +105797,11 @@ class ConcurrentQueue ++index; } while (probedKey != details::invalid_thread_id); // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place } - + // Mark the queue as being recyclable producer->inactive.store(true, std::memory_order_release); } - + static void implicit_producer_thread_exited_callback(void* userData) { auto producer = static_cast(userData); @@ -96245,7 +105809,7 @@ class ConcurrentQueue queue->implicit_producer_thread_exited(producer); } #endif - + ////////////////////////////////// // Utility functions ////////////////////////////////// @@ -96321,30 +105885,30 @@ class ConcurrentQueue private: std::atomic producerListTail; std::atomic producerCount; - + std::atomic initialBlockPoolIndex; Block* initialBlockPool; size_t initialBlockPoolSize; - + #ifndef MCDBGQ_USEDEBUGFREELIST FreeList freeList; #else debug::DebugFreeList freeList; #endif - + std::atomic implicitProducerHash; std::atomic implicitProducerHashCount; // Number of slots logically used ImplicitProducerHash initialImplicitProducerHash; std::array initialImplicitProducerHashEntries; std::atomic_flag implicitProducerHashResizeInProgress; - + std::atomic nextExplicitConsumerId; std::atomic globalExplicitConsumerOffset; - + #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH debug::DebugMutex implicitProdMutex; #endif - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG std::atomic explicitProducers; std::atomic implicitProducers; @@ -96417,7 +105981,7 @@ inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, ty // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 // See the end of this file for a list // Provides an efficient implementation of a semaphore (LightweightSemaphore). @@ -96853,21 +106417,6 @@ class LightweightSemaphore // LICENSE_CHANGE_END -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/thread.hpp -// -// -//===----------------------------------------------------------------------===// - - - -#include - -namespace duckdb { -using std::thread; -} #else #include @@ -96981,7 +106530,7 @@ bool TaskScheduler::GetTaskFromProducer(ProducerToken &token, unique_ptr & return queue->DequeueFromProducer(token, task); } -void TaskScheduler::ExecuteForever(bool *marker) { +void TaskScheduler::ExecuteForever(atomic *marker) { #ifndef DUCKDB_NO_THREADS unique_ptr task; // loop until the marker is set to false @@ -96999,7 +106548,7 @@ void TaskScheduler::ExecuteForever(bool *marker) { } #ifndef DUCKDB_NO_THREADS -static void ThreadExecuteTasks(TaskScheduler *scheduler, bool *marker) { +static void ThreadExecuteTasks(TaskScheduler *scheduler, atomic *marker) { scheduler->ExecuteForever(marker); } #endif @@ -97015,7 +106564,9 @@ void TaskScheduler::SetThreads(int32_t n) { } SetThreadsInternal(n); #else - throw NotImplementedException("DuckDB was compiled without threads! Setting threads is not allowed."); + if (n != 1) { + throw NotImplementedException("DuckDB was compiled without threads! Setting threads > 1 is not allowed."); + } #endif } @@ -97030,7 +106581,7 @@ void TaskScheduler::SetThreadsInternal(int32_t n) { idx_t create_new_threads = new_thread_count - threads.size(); for (idx_t i = 0; i < create_new_threads; i++) { // launch a thread and assign it a cancellation marker - auto marker = unique_ptr(new bool(true)); + auto marker = unique_ptr>(new atomic(true)); auto worker_thread = make_unique(ThreadExecuteTasks, this, marker.get()); auto thread_wrapper = make_unique(move(worker_thread)); @@ -97060,7 +106611,7 @@ void TaskScheduler::SetThreadsInternal(int32_t n) { namespace duckdb { -ThreadContext::ThreadContext(ClientContext &context) : profiler(context.profiler.IsEnabled()) { +ThreadContext::ThreadContext(ClientContext &context) : profiler(context.profiler->IsEnabled()) { } } // namespace duckdb @@ -97137,8 +106688,7 @@ unique_ptr Constraint::Deserialize(Deserializer &source) { case ConstraintType::UNIQUE: return UniqueConstraint::Deserialize(source); default: - // don't know how to serialize this constraint type - return nullptr; + throw InternalException("Unrecognized constraint type for serialization"); } } @@ -97179,7 +106729,7 @@ unique_ptr CheckConstraint::Deserialize(Deserializer &source) { namespace duckdb { string NotNullConstraint::ToString() const { - return "NOT NULL Constraint"; + return "NOT NULL"; } unique_ptr NotNullConstraint::Copy() { @@ -97220,8 +106770,9 @@ unique_ptr UniqueConstraint::Copy() { if (index == INVALID_INDEX) { return make_unique(columns, is_primary_key); } else { - D_ASSERT(columns.size() == 0); - return make_unique(index, is_primary_key); + auto result = make_unique(index, is_primary_key); + result->columns = columns; + return move(result); } } @@ -97240,24 +106791,73 @@ unique_ptr UniqueConstraint::Deserialize(Deserializer &source) { auto is_primary_key = source.Read(); auto index = source.Read(); auto column_count = source.Read(); + vector columns; + for (uint32_t i = 0; i < column_count; i++) { + auto column_name = source.Read(); + columns.push_back(column_name); + } if (index != INVALID_INDEX) { // single column parsed constraint - return make_unique(index, is_primary_key); + auto result = make_unique(index, is_primary_key); + result->columns = move(columns); + return move(result); } else { // column list parsed constraint - vector columns; - for (uint32_t i = 0; i < column_count; i++) { - auto column_name = source.Read(); - columns.push_back(column_name); - } - return make_unique(columns, is_primary_key); + return make_unique(move(columns), is_primary_key); } } } // namespace duckdb +namespace duckdb { + +BetweenExpression::BetweenExpression(unique_ptr input_p, unique_ptr lower_p, + unique_ptr upper_p) + : ParsedExpression(ExpressionType::COMPARE_BETWEEN, ExpressionClass::BETWEEN), input(move(input_p)), + lower(move(lower_p)), upper(move(upper_p)) { +} + +string BetweenExpression::ToString() const { + return input->ToString() + " BETWEEN " + lower->ToString() + " AND " + upper->ToString(); +} + +bool BetweenExpression::Equals(const BetweenExpression *a, const BetweenExpression *b) { + if (!a->input->Equals(b->input.get())) { + return false; + } + if (!a->lower->Equals(b->lower.get())) { + return false; + } + if (!a->upper->Equals(b->upper.get())) { + return false; + } + return true; +} + +unique_ptr BetweenExpression::Copy() const { + auto copy = make_unique(input->Copy(), lower->Copy(), upper->Copy()); + copy->CopyProperties(*this); + return move(copy); +} + +void BetweenExpression::Serialize(Serializer &serializer) { + ParsedExpression::Serialize(serializer); + input->Serialize(serializer); + lower->Serialize(serializer); + upper->Serialize(serializer); +} + +unique_ptr BetweenExpression::Deserialize(ExpressionType type, Deserializer &source) { + auto input = ParsedExpression::Deserialize(source); + auto lower = ParsedExpression::Deserialize(source); + auto upper = ParsedExpression::Deserialize(source); + return make_unique(move(input), move(lower), move(upper)); +} + +} // namespace duckdb + @@ -97346,7 +106946,7 @@ CastExpression::CastExpression(LogicalType target, unique_ptr } string CastExpression::ToString() const { - return "CAST(" + child->ToString() + " AS " + cast_type.ToString() + ")"; + return (try_cast ? "TRY_CAST(" : "CAST(") + child->ToString() + " AS " + cast_type.ToString() + ")"; } bool CastExpression::Equals(const CastExpression *a, const CastExpression *b) { @@ -97621,10 +107221,11 @@ bool ConjunctionExpression::Equals(const ConjunctionExpression *a, const Conjunc } unique_ptr ConjunctionExpression::Copy() const { - auto copy = make_unique(type); + vector> copy_children; for (auto &expr : children) { - copy->children.push_back(expr->Copy()); + copy_children.push_back(expr->Copy()); } + auto copy = make_unique(type, move(copy_children)); copy->CopyProperties(*this); return move(copy); } @@ -97658,7 +107259,7 @@ string ConstantExpression::ToString() const { } bool ConstantExpression::Equals(const ConstantExpression *a, const ConstantExpression *b) { - return a->value == b->value; + return !ValueOperations::DistinctFrom(a->value, b->value); } hash_t ConstantExpression::Hash() const { @@ -97678,7 +107279,7 @@ void ConstantExpression::Serialize(Serializer &serializer) { unique_ptr ConstantExpression::Deserialize(ExpressionType type, Deserializer &source) { Value value = Value::Deserialize(source); - return make_unique(value); + return make_unique(move(value)); } } // namespace duckdb @@ -97717,19 +107318,16 @@ unique_ptr DefaultExpression::Deserialize(ExpressionType type, namespace duckdb { FunctionExpression::FunctionExpression(string schema, const string &function_name, - vector> &children, + vector> children_p, unique_ptr filter, bool distinct, bool is_operator) : ParsedExpression(ExpressionType::FUNCTION, ExpressionClass::FUNCTION), schema(std::move(schema)), - function_name(StringUtil::Lower(function_name)), is_operator(is_operator), distinct(distinct), - filter(move(filter)) { - for (auto &child : children) { - this->children.push_back(move(child)); - } + function_name(StringUtil::Lower(function_name)), is_operator(is_operator), children(move(children_p)), + distinct(distinct), filter(move(filter)) { } -FunctionExpression::FunctionExpression(const string &function_name, vector> &children, +FunctionExpression::FunctionExpression(const string &function_name, vector> children_p, unique_ptr filter, bool distinct, bool is_operator) - : FunctionExpression(INVALID_SCHEMA, function_name, children, move(filter), distinct, is_operator) { + : FunctionExpression(INVALID_SCHEMA, function_name, move(children_p), move(filter), distinct, is_operator) { } string FunctionExpression::ToString() const { @@ -97783,7 +107381,8 @@ unique_ptr FunctionExpression::Copy() const { if (filter) { filter_copy = filter->Copy(); } - auto copy = make_unique(function_name, copy_children, move(filter_copy), distinct, is_operator); + auto copy = + make_unique(function_name, move(copy_children), move(filter_copy), distinct, is_operator); copy->schema = schema; copy->CopyProperties(*this); return move(copy); @@ -97808,7 +107407,7 @@ unique_ptr FunctionExpression::Deserialize(ExpressionType type auto distinct = source.Read(); auto is_operator = source.Read(); unique_ptr function; - function = make_unique(function_name, children, move(filter), distinct, is_operator); + function = make_unique(function_name, move(children), move(filter), distinct, is_operator); function->schema = schema; return move(function); } @@ -98171,6 +107770,7 @@ WindowExpression::WindowExpression(ExpressionType type, string schema, const str case ExpressionType::WINDOW_ROW_NUMBER: case ExpressionType::WINDOW_FIRST_VALUE: case ExpressionType::WINDOW_LAST_VALUE: + case ExpressionType::WINDOW_NTH_VALUE: case ExpressionType::WINDOW_RANK: case ExpressionType::WINDOW_RANK_DENSE: case ExpressionType::WINDOW_PERCENT_RANK: @@ -98184,8 +107784,121 @@ WindowExpression::WindowExpression(ExpressionType type, string schema, const str } } +string WindowExpression::GetName() const { + return !alias.empty() ? alias : function_name; +} + string WindowExpression::ToString() const { - return "WINDOW"; + // Start with function call + string result = function_name + "("; + result += StringUtil::Join(children, children.size(), ", ", + [](const unique_ptr &child) { return child->ToString(); }); + // Lead/Lag extra arguments + if (offset_expr.get()) { + result += ", "; + result += offset_expr->ToString(); + } + if (default_expr.get()) { + result += ", "; + result += default_expr->ToString(); + } + // Over clause + result += ") OVER("; + string sep; + + // Partitions + if (!partitions.empty()) { + result += "PARTITION BY "; + result += StringUtil::Join(partitions, partitions.size(), ", ", + [](const unique_ptr &partition) { return partition->ToString(); }); + sep = " "; + } + + // Orders + if (!orders.empty()) { + result += sep; + result += "ORDER BY "; + result += + StringUtil::Join(orders, orders.size(), ", ", [](const OrderByNode &order) { return order.ToString(); }); + sep = " "; + } + + // Rows/Range + string units = "ROWS"; + string from; + switch (start) { + case WindowBoundary::CURRENT_ROW_RANGE: + case WindowBoundary::CURRENT_ROW_ROWS: + from = "CURRENT ROW"; + units = (start == WindowBoundary::CURRENT_ROW_RANGE) ? "RANGE" : "ROWS"; + break; + case WindowBoundary::UNBOUNDED_PRECEDING: + if (end != WindowBoundary::CURRENT_ROW_RANGE) { + from = "UNBOUNDED PRECEDING"; + } + break; + case WindowBoundary::EXPR_PRECEDING_ROWS: + case WindowBoundary::EXPR_PRECEDING_RANGE: + from = start_expr->GetName() + " PRECEDING"; + units = (start == WindowBoundary::EXPR_PRECEDING_RANGE) ? "RANGE" : "ROWS"; + break; + case WindowBoundary::EXPR_FOLLOWING_ROWS: + case WindowBoundary::EXPR_FOLLOWING_RANGE: + from = start_expr->GetName() + " FOLLOWING"; + units = (start == WindowBoundary::EXPR_FOLLOWING_RANGE) ? "RANGE" : "ROWS"; + break; + default: + break; + } + + string to; + switch (end) { + case WindowBoundary::CURRENT_ROW_RANGE: + if (start != WindowBoundary::UNBOUNDED_PRECEDING) { + to = "CURRENT ROW"; + units = "RANGE"; + } + break; + case WindowBoundary::CURRENT_ROW_ROWS: + to = "CURRENT ROW"; + units = "ROWS"; + break; + case WindowBoundary::UNBOUNDED_PRECEDING: + to = "UNBOUNDED PRECEDING"; + break; + case WindowBoundary::EXPR_PRECEDING_ROWS: + case WindowBoundary::EXPR_PRECEDING_RANGE: + to = end_expr->GetName() + " PRECEDING"; + units = (start == WindowBoundary::EXPR_PRECEDING_RANGE) ? "RANGE" : "ROWS"; + break; + case WindowBoundary::EXPR_FOLLOWING_ROWS: + case WindowBoundary::EXPR_FOLLOWING_RANGE: + to = end_expr->GetName() + " FOLLOWING"; + units = (start == WindowBoundary::EXPR_FOLLOWING_RANGE) ? "RANGE" : "ROWS"; + break; + default: + break; + } + + if (!from.empty() || !to.empty()) { + result += sep + units; + } + if (!from.empty() && !to.empty()) { + result += " BETWEEN "; + result += from; + result += " AND "; + result += to; + } else if (!from.empty()) { + result += " "; + result += from; + } else if (!to.empty()) { + result += " "; + result += to; + } + + result += ")"; + + return result; } bool WindowExpression::Equals(const WindowExpression *a, const WindowExpression *b) { @@ -98718,14 +108431,12 @@ bool ParsedExpression::HasSubquery() const { } bool ParsedExpression::Equals(const BaseExpression *other) const { - if (other->expression_class == ExpressionClass::BOUND_EXPRESSION) { - auto bound_expr = (BoundExpression *)other; - other = bound_expr->parsed_expr.get(); - } if (!BaseExpression::Equals(other)) { return false; } switch (expression_class) { + case ExpressionClass::BETWEEN: + return BetweenExpression::Equals((BetweenExpression *)this, (BetweenExpression *)other); case ExpressionClass::CASE: return CaseExpression::Equals((CaseExpression *)this, (CaseExpression *)other); case ExpressionClass::CAST: @@ -98785,6 +108496,9 @@ unique_ptr ParsedExpression::Deserialize(Deserializer &source) auto alias = source.Read(); unique_ptr result; switch (expression_class) { + case ExpressionClass::BETWEEN: + result = BetweenExpression::Deserialize(type, source); + break; case ExpressionClass::CASE: result = CaseExpression::Deserialize(type, source); break; @@ -98852,17 +108566,30 @@ namespace duckdb { void ParsedExpressionIterator::EnumerateChildren(const ParsedExpression &expression, const std::function &callback) { - EnumerateChildren((ParsedExpression &)expression, [&](unique_ptr &child) { callback(*child); }); + EnumerateChildren((ParsedExpression &)expression, [&](unique_ptr &child) { + D_ASSERT(child); + callback(*child); + }); } void ParsedExpressionIterator::EnumerateChildren(ParsedExpression &expr, const std::function &callback) { - EnumerateChildren(expr, [&](unique_ptr &child) { callback(*child); }); + EnumerateChildren(expr, [&](unique_ptr &child) { + D_ASSERT(child); + callback(*child); + }); } void ParsedExpressionIterator::EnumerateChildren( ParsedExpression &expr, const std::function &child)> &callback) { switch (expr.expression_class) { + case ExpressionClass::BETWEEN: { + auto &cast_expr = (BetweenExpression &)expr; + callback(cast_expr.input); + callback(cast_expr.lower); + callback(cast_expr.upper); + break; + } case ExpressionClass::CASE: { auto &case_expr = (CaseExpression &)expr; for (auto &check : case_expr.case_checks) { @@ -98936,6 +108663,12 @@ void ParsedExpressionIterator::EnumerateChildren( for (auto &child : window_expr.children) { callback(child); } + if (window_expr.start_expr) { + callback(window_expr.start_expr); + } + if (window_expr.end_expr) { + callback(window_expr.end_expr); + } if (window_expr.offset_expr) { callback(window_expr.offset_expr); } @@ -98983,7 +108716,7 @@ void ParsedExpressionIterator::EnumerateChildren( // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list // this is a bit of a mess from c.h, port.h and some others. Upside is it makes the parser compile with minimal @@ -99095,7 +108828,7 @@ typedef enum PGPostgresAttributIdentityTypes { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -99124,7 +108857,7 @@ typedef enum PGPostgresAttributIdentityTypes { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -99235,7 +108968,7 @@ uint32_t bms_hash_value(const PGBitmapset *a); // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -99288,7 +109021,7 @@ typedef enum PGLockWaitPolicy { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -99312,7 +109045,7 @@ typedef enum PGLockWaitPolicy { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -99385,7 +109118,7 @@ typedef int16_t PGAttrNumber; // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -99429,7 +109162,7 @@ typedef int16_t PGAttrNumber; // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -101875,7 +111608,7 @@ typedef struct PGOnConflictExpr { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -103913,8 +113646,11 @@ struct CommonTableExpressionInfo; //! The transformer class is responsible for transforming the internal Postgres //! parser representation into the DuckDB representation class Transformer { + static constexpr const idx_t DEFAULT_MAX_EXPRESSION_DEPTH = 1000; + public: - explicit Transformer(Transformer *parent = nullptr) : parent(parent) { + explicit Transformer(Transformer *parent = nullptr, idx_t max_expression_depth_p = DEFAULT_MAX_EXPRESSION_DEPTH) + : parent(parent), max_expression_depth(parent ? parent->max_expression_depth : max_expression_depth_p) { } //! Transforms a Postgres parse tree into a set of SQL Statements @@ -103927,6 +113663,7 @@ class Transformer { private: Transformer *parent; + idx_t max_expression_depth; //! The current prepared statement parameter index idx_t prepared_statement_parameter_index = 0; //! Holds window expressions defined by name. We need those when transforming the expressions referring to them. @@ -104008,43 +113745,44 @@ class Transformer { // Expression Transform //===--------------------------------------------------------------------===// //! Transform a Postgres boolean expression into an Expression - unique_ptr TransformBoolExpr(duckdb_libpgquery::PGBoolExpr *root); + unique_ptr TransformBoolExpr(duckdb_libpgquery::PGBoolExpr *root, idx_t depth); //! Transform a Postgres case expression into an Expression - unique_ptr TransformCase(duckdb_libpgquery::PGCaseExpr *root); + unique_ptr TransformCase(duckdb_libpgquery::PGCaseExpr *root, idx_t depth); //! Transform a Postgres type cast into an Expression - unique_ptr TransformTypeCast(duckdb_libpgquery::PGTypeCast *root); + unique_ptr TransformTypeCast(duckdb_libpgquery::PGTypeCast *root, idx_t depth); //! Transform a Postgres coalesce into an Expression - unique_ptr TransformCoalesce(duckdb_libpgquery::PGAExpr *root); + unique_ptr TransformCoalesce(duckdb_libpgquery::PGAExpr *root, idx_t depth); //! Transform a Postgres column reference into an Expression - unique_ptr TransformColumnRef(duckdb_libpgquery::PGColumnRef *root); + unique_ptr TransformColumnRef(duckdb_libpgquery::PGColumnRef *root, idx_t depth); //! Transform a Postgres constant value into an Expression - unique_ptr TransformValue(duckdb_libpgquery::PGValue val); + unique_ptr TransformValue(duckdb_libpgquery::PGValue val, idx_t depth); //! Transform a Postgres operator into an Expression - unique_ptr TransformAExpr(duckdb_libpgquery::PGAExpr *root); + unique_ptr TransformAExpr(duckdb_libpgquery::PGAExpr *root, idx_t depth); //! Transform a Postgres abstract expression into an Expression - unique_ptr TransformExpression(duckdb_libpgquery::PGNode *node); + unique_ptr TransformExpression(duckdb_libpgquery::PGNode *node, idx_t depth); //! Transform a Postgres function call into an Expression - unique_ptr TransformFuncCall(duckdb_libpgquery::PGFuncCall *root); + unique_ptr TransformFuncCall(duckdb_libpgquery::PGFuncCall *root, idx_t depth); //! Transform a Postgres boolean expression into an Expression - unique_ptr TransformInterval(duckdb_libpgquery::PGIntervalConstant *root); + unique_ptr TransformInterval(duckdb_libpgquery::PGIntervalConstant *root, idx_t depth); //! Transform a Postgres lambda node [e.g. (x, y) -> x + y] into a lambda expression - unique_ptr TransformLambda(duckdb_libpgquery::PGLambdaFunction *node); + unique_ptr TransformLambda(duckdb_libpgquery::PGLambdaFunction *node, idx_t depth); //! Transform a Postgres array access node (e.g. x[1] or x[1:3]) - unique_ptr TransformArrayAccess(duckdb_libpgquery::PGAIndirection *node); + unique_ptr TransformArrayAccess(duckdb_libpgquery::PGAIndirection *node, idx_t depth); //! Transform a positional reference (e.g. #1) - unique_ptr TransformPositionalReference(duckdb_libpgquery::PGPositionalReference *node); + unique_ptr TransformPositionalReference(duckdb_libpgquery::PGPositionalReference *node, + idx_t depth); //! Transform a Postgres constant value into an Expression - unique_ptr TransformConstant(duckdb_libpgquery::PGAConst *c); + unique_ptr TransformConstant(duckdb_libpgquery::PGAConst *c, idx_t depth); - unique_ptr TransformResTarget(duckdb_libpgquery::PGResTarget *root); - unique_ptr TransformNullTest(duckdb_libpgquery::PGNullTest *root); - unique_ptr TransformParamRef(duckdb_libpgquery::PGParamRef *node); - unique_ptr TransformNamedArg(duckdb_libpgquery::PGNamedArgExpr *root); + unique_ptr TransformResTarget(duckdb_libpgquery::PGResTarget *root, idx_t depth); + unique_ptr TransformNullTest(duckdb_libpgquery::PGNullTest *root, idx_t depth); + unique_ptr TransformParamRef(duckdb_libpgquery::PGParamRef *node, idx_t depth); + unique_ptr TransformNamedArg(duckdb_libpgquery::PGNamedArgExpr *root, idx_t depth); - unique_ptr TransformSQLValueFunction(duckdb_libpgquery::PGSQLValueFunction *node); + unique_ptr TransformSQLValueFunction(duckdb_libpgquery::PGSQLValueFunction *node, idx_t depth); - unique_ptr TransformSubquery(duckdb_libpgquery::PGSubLink *root); + unique_ptr TransformSubquery(duckdb_libpgquery::PGSubLink *root, idx_t depth); //===--------------------------------------------------------------------===// // Constraints transform //===--------------------------------------------------------------------===// @@ -104056,7 +113794,7 @@ class Transformer { //===--------------------------------------------------------------------===// // Collation transform //===--------------------------------------------------------------------===// - unique_ptr TransformCollateExpr(duckdb_libpgquery::PGCollateClause *collate); + unique_ptr TransformCollateExpr(duckdb_libpgquery::PGCollateClause *collate, idx_t depth); string TransformCollation(duckdb_libpgquery::PGCollateClause *collate); @@ -104104,12 +113842,13 @@ class Transformer { bool TransformOrderBy(duckdb_libpgquery::PGList *order, vector &result); //! Transform a Postgres SELECT clause into a list of Expressions - bool TransformExpressionList(duckdb_libpgquery::PGList *list, vector> &result); + void TransformExpressionList(duckdb_libpgquery::PGList &list, vector> &result, + idx_t depth); //! Transform a Postgres PARTITION BY/ORDER BY specification into lists of expressions - void TransformWindowDef(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr); + void TransformWindowDef(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr, idx_t depth); //! Transform a Postgres window frame specification into frame expressions - void TransformWindowFrame(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr); + void TransformWindowFrame(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr, idx_t depth); unique_ptr TransformSampleOptions(duckdb_libpgquery::PGNode *options); }; @@ -104125,7 +113864,7 @@ class Transformer { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list //===----------------------------------------------------------------------===// @@ -104144,7 +113883,7 @@ class Transformer { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list @@ -104199,7 +113938,7 @@ class PostgresParser { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -104305,10 +114044,11 @@ vector Parser::Tokenize(const string &query) { case duckdb_libpgquery::PGSimplifiedTokenType::PG_SIMPLIFIED_TOKEN_KEYWORD: token.type = SimplifiedTokenType::SIMPLIFIED_TOKEN_KEYWORD; break; - case duckdb_libpgquery::PGSimplifiedTokenType::PG_SIMPLIFIED_TOKEN_COMMENT: + // comments are not supported by our tokenizer right now + case duckdb_libpgquery::PGSimplifiedTokenType::PG_SIMPLIFIED_TOKEN_COMMENT: // LCOV_EXCL_START token.type = SimplifiedTokenType::SIMPLIFIED_TOKEN_COMMENT; break; - } + } // LCOV_EXCL_STOP token.start = pg_token.start; result.push_back(token); } @@ -104349,11 +114089,12 @@ vector Parser::ParseOrderList(const string &select_list) { } auto &select = (SelectStatement &)*parser.statements[0]; if (select.node->type != QueryNodeType::SELECT_NODE) { - throw ParserException("Expected a single SELECT node"); + throw InternalException("Expected a single SELECT node"); } auto &select_node = (SelectNode &)*select.node; - if (select_node.modifiers.empty() || select_node.modifiers[0]->type != ResultModifierType::ORDER_MODIFIER) { - throw ParserException("Expected a single ORDER clause"); + if (select_node.modifiers.empty() || select_node.modifiers[0]->type != ResultModifierType::ORDER_MODIFIER || + select_node.modifiers.size() != 1) { + throw InternalException("Expected a single ORDER clause"); } auto &order = (OrderModifier &)*select_node.modifiers[0]; return move(order.orders); @@ -104391,7 +114132,7 @@ vector>> Parser::ParseValuesList(const strin } auto &select_node = (SelectNode &)*select.node; if (!select_node.from_table || select_node.from_table->type != TableReferenceType::EXPRESSION_LIST) { - throw ParserException("Expected a single VALUES statement"); + throw InternalException("Expected a single VALUES statement"); } auto &values_list = (ExpressionListRef &)*select_node.from_table; return move(values_list.values); @@ -104406,7 +114147,7 @@ vector Parser::ParseColumnList(const string &column_list) { } auto &create = (CreateStatement &)*parser.statements[0]; if (create.info->type != CatalogType::TABLE_ENTRY) { - throw ParserException("Expected a single CREATE TABLE statement"); + throw InternalException("Expected a single CREATE TABLE statement"); } auto &info = ((CreateTableInfo &)*create.info); return move(info.columns); @@ -104463,14 +114204,14 @@ string QueryErrorContext::Format(const string &query, const string &error_messag render_widths.push_back(char_render_width); cpos = Utf8Proc::NextGraphemeCluster(buf, len, cpos); } - } else { + } else { // LCOV_EXCL_START // invalid utf-8, we can't do much at this point // we just assume every character is a character, and every character has a render width of 1 for (idx_t cpos = 0; cpos < len; cpos++) { positions.push_back(cpos); render_widths.push_back(1); } - } + } // LCOV_EXCL_STOP // now we want to find the (unicode aware) start and end position idx_t epos = 0; // start by finding the error location inside the array @@ -104525,7 +114266,7 @@ string QueryErrorContext::Format(const string &query, const string &error_messag } string QueryErrorContext::FormatErrorRecursive(const string &msg, vector &values) { - string error_message = ExceptionFormatValue::Format(msg, values); + string error_message = values.empty() ? msg : ExceptionFormatValue::Format(msg, values); if (!statement || query_location >= statement->query.size()) { // no statement provided or query location out of range return error_message; @@ -104825,7 +114566,7 @@ bool QueryNode::Equals(const QueryNode *other) const { return other->type == type; } -void QueryNode::CopyProperties(QueryNode &other) { +void QueryNode::CopyProperties(QueryNode &other) const { for (auto &modifier : modifiers) { other.modifiers.push_back(modifier->Copy()); } @@ -104919,7 +114660,7 @@ unique_ptr ResultModifier::Deserialize(Deserializer &source) { case ResultModifierType::DISTINCT_MODIFIER: return DistinctModifier::Deserialize(source); default: - return nullptr; + throw InternalException("Unrecognized ResultModifierType for Deserialization"); } } @@ -105018,6 +114759,22 @@ unique_ptr OrderModifier::Copy() { return move(copy); } +string OrderByNode::ToString() const { + auto str = expression->ToString(); + str += (type == OrderType::ASCENDING) ? " ASC" : " DESC"; + switch (null_order) { + case OrderByNullType::NULLS_FIRST: + str += " NULLS FIRST"; + break; + case OrderByNullType::NULLS_LAST: + str += " NULLS LAST"; + break; + default: + break; + } + return str; +} + void OrderByNode::Serialize(Serializer &serializer) { serializer.Write(type); serializer.Write(null_order); @@ -105585,6 +115342,10 @@ unique_ptr VacuumStatement::Copy() const { namespace duckdb { +string BaseTableRef::ToString() const { + return "GET(" + schema_name + "." + table_name + ")"; +} + bool BaseTableRef::Equals(const TableRef *other_p) const { if (!TableRef::Equals(other_p)) { return false; @@ -105889,6 +115650,10 @@ unique_ptr SubqueryRef::Deserialize(Deserializer &source) { namespace duckdb { +string TableFunctionRef::ToString() const { + return function->ToString(); +} + bool TableFunctionRef::Equals(const TableRef *other_p) const { if (!TableRef::Equals(other_p)) { return false; @@ -105933,6 +115698,10 @@ unique_ptr TableFunctionRef::Copy() { namespace duckdb { +string TableRef::ToString() const { + return string(); +} + bool TableRef::Equals(const TableRef *other) const { return other && type == other->type && alias == other->alias && SampleOptions::Equals(sample.get(), other->sample.get()); @@ -105974,14 +115743,14 @@ unique_ptr TableRef::Deserialize(Deserializer &source) { break; case TableReferenceType::CTE: case TableReferenceType::INVALID: - return nullptr; + throw InternalException("Unsupported type for TableRef::Deserialize"); } result->alias = alias; result->sample = move(sample); return result; } -void TableRef::CopyProperties(TableRef &target) { +void TableRef::CopyProperties(TableRef &target) const { D_ASSERT(type == target.type); target.alias = alias; target.query_location = query_location; @@ -106013,14 +115782,11 @@ unique_ptr Transformer::TransformConstraint(duckdb_libpgquery::PGLis return make_unique(columns, is_primary_key); } case duckdb_libpgquery::PG_CONSTR_CHECK: { - auto expression = TransformExpression(constraint->raw_expr); + auto expression = TransformExpression(constraint->raw_expr, 0); if (expression->HasSubquery()) { throw ParserException("subqueries prohibited in CHECK constraints"); } - if (expression->IsAggregate()) { - throw ParserException("aggregates prohibited in CHECK constraints"); - } - return make_unique(TransformExpression(constraint->raw_expr)); + return make_unique(TransformExpression(constraint->raw_expr, 0)); } default: throw NotImplementedException("Constraint type not handled yet!"); @@ -106043,7 +115809,7 @@ unique_ptr Transformer::TransformConstraint(duckdb_libpgquery::PGLis case duckdb_libpgquery::PG_CONSTR_NULL: return nullptr; case duckdb_libpgquery::PG_CONSTR_DEFAULT: - column.default_value = TransformExpression(constraint->raw_expr); + column.default_value = TransformExpression(constraint->raw_expr, 0); return nullptr; case duckdb_libpgquery::PG_CONSTR_FOREIGN: default: @@ -106060,19 +115826,19 @@ unique_ptr Transformer::TransformConstraint(duckdb_libpgquery::PGLis namespace duckdb { -unique_ptr Transformer::TransformArrayAccess(duckdb_libpgquery::PGAIndirection *indirection_node) { +unique_ptr Transformer::TransformArrayAccess(duckdb_libpgquery::PGAIndirection *indirection_node, + idx_t depth) { // transform the source expression unique_ptr result; - result = TransformExpression(indirection_node->arg); + result = TransformExpression(indirection_node->arg, depth + 1); // now go over the indices // note that a single indirection node can contain multiple indices // this happens for e.g. more complex accesses (e.g. (foo).field1[42]) for (auto node = indirection_node->indirection->head; node != nullptr; node = node->next) { auto target = reinterpret_cast(node->data.ptr_value); - if (!target) { - break; - } + D_ASSERT(target); + switch (target->type) { case duckdb_libpgquery::T_PGAIndices: { // index access (either slice or extract) @@ -106082,15 +115848,15 @@ unique_ptr Transformer::TransformArrayAccess(duckdb_libpgquery if (index->is_slice) { // slice children.push_back(!index->lidx ? make_unique(Value()) - : TransformExpression(index->lidx)); + : TransformExpression(index->lidx, depth + 1)); children.push_back(!index->uidx ? make_unique(Value()) - : TransformExpression(index->uidx)); + : TransformExpression(index->uidx, depth + 1)); result = make_unique(ExpressionType::ARRAY_SLICE, move(children)); } else { // array access D_ASSERT(!index->lidx); D_ASSERT(index->uidx); - children.push_back(TransformExpression(index->uidx)); + children.push_back(TransformExpression(index->uidx, depth + 1)); result = make_unique(ExpressionType::ARRAY_EXTRACT, move(children)); } break; @@ -106099,11 +115865,10 @@ unique_ptr Transformer::TransformArrayAccess(duckdb_libpgquery auto val = (duckdb_libpgquery::PGValue *)target; vector> children; children.push_back(move(result)); - children.push_back(TransformValue(*val)); + children.push_back(TransformValue(*val, depth + 1)); result = make_unique(ExpressionType::STRUCT_EXTRACT, move(children)); break; } - case duckdb_libpgquery::T_PGAStar: default: throw NotImplementedException("Unimplemented subscript type"); } @@ -106118,10 +115883,10 @@ unique_ptr Transformer::TransformArrayAccess(duckdb_libpgquery namespace duckdb { -unique_ptr Transformer::TransformBoolExpr(duckdb_libpgquery::PGBoolExpr *root) { +unique_ptr Transformer::TransformBoolExpr(duckdb_libpgquery::PGBoolExpr *root, idx_t depth) { unique_ptr result; for (auto node = root->args->head; node != nullptr; node = node->next) { - auto next = TransformExpression(reinterpret_cast(node->data.ptr_value)); + auto next = TransformExpression(reinterpret_cast(node->data.ptr_value), depth + 1); switch (root->boolop) { case duckdb_libpgquery::PG_AND_EXPR: { @@ -106169,33 +115934,30 @@ unique_ptr Transformer::TransformBoolExpr(duckdb_libpgquery::P namespace duckdb { -unique_ptr Transformer::TransformCase(duckdb_libpgquery::PGCaseExpr *root) { - if (!root) { - return nullptr; - } - // CASE expression WHEN value THEN result [WHEN ...] ELSE result uses this, - // but we rewrite to CASE WHEN expression = value THEN result ... to only - // have to handle one case downstream. +unique_ptr Transformer::TransformCase(duckdb_libpgquery::PGCaseExpr *root, idx_t depth) { + D_ASSERT(root); + auto case_node = make_unique(); for (auto cell = root->args->head; cell != nullptr; cell = cell->next) { CaseCheck case_check; auto w = reinterpret_cast(cell->data.ptr_value); - auto test_raw = TransformExpression(reinterpret_cast(w->expr)); + auto test_raw = TransformExpression(reinterpret_cast(w->expr), depth + 1); unique_ptr test; - auto arg = TransformExpression(reinterpret_cast(root->arg)); + auto arg = TransformExpression(reinterpret_cast(root->arg), depth + 1); if (arg) { case_check.when_expr = make_unique(ExpressionType::COMPARE_EQUAL, move(arg), move(test_raw)); } else { case_check.when_expr = move(test_raw); } - case_check.then_expr = TransformExpression(reinterpret_cast(w->result)); + case_check.then_expr = TransformExpression(reinterpret_cast(w->result), depth + 1); case_node->case_checks.push_back(move(case_check)); } if (root->defresult) { - case_node->else_expr = TransformExpression(reinterpret_cast(root->defresult)); + case_node->else_expr = + TransformExpression(reinterpret_cast(root->defresult), depth + 1); } else { case_node->else_expr = make_unique(Value(LogicalType::SQLNULL)); } @@ -106211,23 +115973,22 @@ unique_ptr Transformer::TransformCase(duckdb_libpgquery::PGCas namespace duckdb { -unique_ptr Transformer::TransformTypeCast(duckdb_libpgquery::PGTypeCast *root) { - if (!root) { - return nullptr; - } +unique_ptr Transformer::TransformTypeCast(duckdb_libpgquery::PGTypeCast *root, idx_t depth) { + D_ASSERT(root); + // get the type to cast to auto type_name = root->typeName; LogicalType target_type = TransformTypeName(type_name); // check for a constant BLOB value, then return ConstantExpression with BLOB - if (target_type == LogicalType::BLOB && root->arg->type == duckdb_libpgquery::T_PGAConst) { + if (!root->tryCast && target_type == LogicalType::BLOB && root->arg->type == duckdb_libpgquery::T_PGAConst) { auto c = reinterpret_cast(root->arg); if (c->val.type == duckdb_libpgquery::T_PGString) { return make_unique(Value::BLOB(string(c->val.val.str))); } } // transform the expression node - auto expression = TransformExpression(root->arg); + auto expression = TransformExpression(root->arg, depth + 1); bool try_cast = root->tryCast; // now create a cast operation @@ -106242,17 +116003,17 @@ namespace duckdb { // COALESCE(a,b,c) returns the first argument that is NOT NULL, so // rewrite into CASE(a IS NOT NULL, a, CASE(b IS NOT NULL, b, c)) -unique_ptr Transformer::TransformCoalesce(duckdb_libpgquery::PGAExpr *root) { - if (!root) { - return nullptr; - } +unique_ptr Transformer::TransformCoalesce(duckdb_libpgquery::PGAExpr *root, idx_t depth) { + D_ASSERT(root); + auto coalesce_args = reinterpret_cast(root->lexpr); D_ASSERT(coalesce_args->length > 0); // parser ensures this already auto coalesce_op = make_unique(ExpressionType::OPERATOR_COALESCE); for (auto cell = coalesce_args->head; cell; cell = cell->next) { // get the value of the COALESCE - auto value_expr = TransformExpression(reinterpret_cast(cell->data.ptr_value)); + auto value_expr = + TransformExpression(reinterpret_cast(cell->data.ptr_value), depth + 1); coalesce_op->children.push_back(move(value_expr)); } return move(coalesce_op); @@ -106267,12 +116028,12 @@ unique_ptr Transformer::TransformCoalesce(duckdb_libpgquery::P namespace duckdb { -unique_ptr Transformer::TransformColumnRef(duckdb_libpgquery::PGColumnRef *root) { +unique_ptr Transformer::TransformColumnRef(duckdb_libpgquery::PGColumnRef *root, idx_t depth) { auto fields = root->fields; switch ((reinterpret_cast(fields->head->data.ptr_value))->type) { case duckdb_libpgquery::T_PGString: { if (fields->length < 1) { - throw ParserException("Unexpected field length"); + throw InternalException("Unexpected field length"); } string column_name, table_name; if (fields->length == 1) { @@ -106304,9 +116065,8 @@ unique_ptr Transformer::TransformColumnRef(duckdb_libpgquery:: return make_unique(); } default: - break; + throw NotImplementedException("ColumnRef not implemented!"); } - throw NotImplementedException("ColumnRef not implemented!"); } } // namespace duckdb @@ -106318,7 +116078,7 @@ unique_ptr Transformer::TransformColumnRef(duckdb_libpgquery:: namespace duckdb { -unique_ptr Transformer::TransformValue(duckdb_libpgquery::PGValue val) { +unique_ptr Transformer::TransformValue(duckdb_libpgquery::PGValue val, idx_t depth) { switch (val.type) { case duckdb_libpgquery::T_PGInteger: D_ASSERT(val.val.ival <= NumericLimits::Maximum()); @@ -106367,7 +116127,7 @@ unique_ptr Transformer::TransformValue(duckdb_libpgquery::PG if (width <= Decimal::MAX_WIDTH_DECIMAL) { // we can cast the value as a decimal Value val = Value(str_val); - val = val.CastAs(LogicalType(LogicalTypeId::DECIMAL, width, scale)); + val = val.CastAs(LogicalType::DECIMAL(width, scale)); return make_unique(move(val)); } } @@ -106385,8 +116145,8 @@ unique_ptr Transformer::TransformValue(duckdb_libpgquery::PG } } -unique_ptr Transformer::TransformConstant(duckdb_libpgquery::PGAConst *c) { - return TransformValue(c->val); +unique_ptr Transformer::TransformConstant(duckdb_libpgquery::PGAConst *c, idx_t depth) { + return TransformValue(c->val, depth + 1); } } // namespace duckdb @@ -106396,11 +116156,10 @@ unique_ptr Transformer::TransformConstant(duckdb_libpgquery::P namespace duckdb { -unique_ptr Transformer::TransformResTarget(duckdb_libpgquery::PGResTarget *root) { - if (!root) { - return nullptr; - } - auto expr = TransformExpression(root->val); +unique_ptr Transformer::TransformResTarget(duckdb_libpgquery::PGResTarget *root, idx_t depth) { + D_ASSERT(root); + + auto expr = TransformExpression(root->val, depth + 1); if (!expr) { return nullptr; } @@ -106410,85 +116169,82 @@ unique_ptr Transformer::TransformResTarget(duckdb_libpgquery:: return expr; } -unique_ptr Transformer::TransformNamedArg(duckdb_libpgquery::PGNamedArgExpr *root) { - if (!root) { - return nullptr; - } - auto expr = TransformExpression((duckdb_libpgquery::PGNode *)root->arg); +unique_ptr Transformer::TransformNamedArg(duckdb_libpgquery::PGNamedArgExpr *root, idx_t depth) { + D_ASSERT(root); + + auto expr = TransformExpression((duckdb_libpgquery::PGNode *)root->arg, depth + 1); if (root->name) { expr->alias = string(root->name); } return expr; } -unique_ptr Transformer::TransformExpression(duckdb_libpgquery::PGNode *node) { +unique_ptr Transformer::TransformExpression(duckdb_libpgquery::PGNode *node, idx_t depth) { if (!node) { return nullptr; } + if (depth > max_expression_depth) { + throw ParserException("Expression tree is too deep (maximum depth %d)", max_expression_depth); + } + switch (node->type) { case duckdb_libpgquery::T_PGColumnRef: - return TransformColumnRef(reinterpret_cast(node)); + return TransformColumnRef(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGAConst: - return TransformConstant(reinterpret_cast(node)); + return TransformConstant(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGAExpr: - return TransformAExpr(reinterpret_cast(node)); + return TransformAExpr(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGFuncCall: - return TransformFuncCall(reinterpret_cast(node)); + return TransformFuncCall(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGBoolExpr: - return TransformBoolExpr(reinterpret_cast(node)); + return TransformBoolExpr(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGTypeCast: - return TransformTypeCast(reinterpret_cast(node)); + return TransformTypeCast(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGCaseExpr: - return TransformCase(reinterpret_cast(node)); + return TransformCase(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGSubLink: - return TransformSubquery(reinterpret_cast(node)); + return TransformSubquery(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGCoalesceExpr: - return TransformCoalesce(reinterpret_cast(node)); + return TransformCoalesce(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGNullTest: - return TransformNullTest(reinterpret_cast(node)); + return TransformNullTest(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGResTarget: - return TransformResTarget(reinterpret_cast(node)); + return TransformResTarget(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGParamRef: - return TransformParamRef(reinterpret_cast(node)); + return TransformParamRef(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGNamedArgExpr: - return TransformNamedArg(reinterpret_cast(node)); + return TransformNamedArg(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGSQLValueFunction: - return TransformSQLValueFunction(reinterpret_cast(node)); + return TransformSQLValueFunction(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGSetToDefault: return make_unique(); case duckdb_libpgquery::T_PGCollateClause: - return TransformCollateExpr(reinterpret_cast(node)); + return TransformCollateExpr(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGIntervalConstant: - return TransformInterval(reinterpret_cast(node)); + return TransformInterval(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGLambdaFunction: - return TransformLambda(reinterpret_cast(node)); + return TransformLambda(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGAIndirection: - return TransformArrayAccess(reinterpret_cast(node)); + return TransformArrayAccess(reinterpret_cast(node), depth); case duckdb_libpgquery::T_PGPositionalReference: - return TransformPositionalReference(reinterpret_cast(node)); + return TransformPositionalReference(reinterpret_cast(node), depth); default: throw NotImplementedException("Expr of type %d not implemented\n", (int)node->type); } } -bool Transformer::TransformExpressionList(duckdb_libpgquery::PGList *list, - vector> &result) { - if (!list) { - return false; - } - for (auto node = list->head; node != nullptr; node = node->next) { +void Transformer::TransformExpressionList(duckdb_libpgquery::PGList &list, vector> &result, + idx_t depth) { + for (auto node = list.head; node != nullptr; node = node->next) { auto target = reinterpret_cast(node->data.ptr_value); - if (!target) { - return false; - } - auto expr = TransformExpression(target); - if (!expr) { - return false; - } + D_ASSERT(target); + + auto expr = TransformExpression(target, depth + 1); + D_ASSERT(expr); + result.push_back(move(expr)); } - return true; } } // namespace duckdb @@ -106518,6 +116274,8 @@ static ExpressionType WindowToExpressionType(string &fun_name) { return ExpressionType::WINDOW_FIRST_VALUE; } else if (fun_name == "last_value" || fun_name == "last") { return ExpressionType::WINDOW_LAST_VALUE; + } else if (fun_name == "nth_value" || fun_name == "last") { + return ExpressionType::WINDOW_NTH_VALUE; } else if (fun_name == "cume_dist") { return ExpressionType::WINDOW_CUME_DIST; } else if (fun_name == "lead") { @@ -106531,71 +116289,63 @@ static ExpressionType WindowToExpressionType(string &fun_name) { return ExpressionType::WINDOW_AGGREGATE; } -void Transformer::TransformWindowDef(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr) { +void Transformer::TransformWindowDef(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr, idx_t depth) { D_ASSERT(window_spec); D_ASSERT(expr); // next: partitioning/ordering expressions - TransformExpressionList(window_spec->partitionClause, expr->partitions); + if (window_spec->partitionClause) { + TransformExpressionList(*window_spec->partitionClause, expr->partitions, depth); + } TransformOrderBy(window_spec->orderClause, expr->orders); } -void Transformer::TransformWindowFrame(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr) { +void Transformer::TransformWindowFrame(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr, + idx_t depth) { D_ASSERT(window_spec); D_ASSERT(expr); // finally: specifics of bounds - expr->start_expr = TransformExpression(window_spec->startOffset); - expr->end_expr = TransformExpression(window_spec->endOffset); + expr->start_expr = TransformExpression(window_spec->startOffset, depth + 1); + expr->end_expr = TransformExpression(window_spec->endOffset, depth + 1); if ((window_spec->frameOptions & FRAMEOPTION_END_UNBOUNDED_PRECEDING) || (window_spec->frameOptions & FRAMEOPTION_START_UNBOUNDED_FOLLOWING)) { - throw Exception( + throw InternalException( "Window frames starting with unbounded following or ending in unbounded preceding make no sense"); } + const bool rangeMode = (window_spec->frameOptions & FRAMEOPTION_RANGE) != 0; if (window_spec->frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING) { expr->start = WindowBoundary::UNBOUNDED_PRECEDING; - } else if (window_spec->frameOptions & FRAMEOPTION_START_UNBOUNDED_FOLLOWING) { - expr->start = WindowBoundary::UNBOUNDED_FOLLOWING; } else if (window_spec->frameOptions & FRAMEOPTION_START_VALUE_PRECEDING) { - expr->start = WindowBoundary::EXPR_PRECEDING; + expr->start = rangeMode ? WindowBoundary::EXPR_PRECEDING_RANGE : WindowBoundary::EXPR_PRECEDING_ROWS; } else if (window_spec->frameOptions & FRAMEOPTION_START_VALUE_FOLLOWING) { - expr->start = WindowBoundary::EXPR_FOLLOWING; - } else if ((window_spec->frameOptions & FRAMEOPTION_START_CURRENT_ROW) && - (window_spec->frameOptions & FRAMEOPTION_RANGE)) { - expr->start = WindowBoundary::CURRENT_ROW_RANGE; - } else if ((window_spec->frameOptions & FRAMEOPTION_START_CURRENT_ROW) && - (window_spec->frameOptions & FRAMEOPTION_ROWS)) { - expr->start = WindowBoundary::CURRENT_ROW_ROWS; - } - - if (window_spec->frameOptions & FRAMEOPTION_END_UNBOUNDED_PRECEDING) { - expr->end = WindowBoundary::UNBOUNDED_PRECEDING; - } else if (window_spec->frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING) { + expr->start = rangeMode ? WindowBoundary::EXPR_FOLLOWING_RANGE : WindowBoundary::EXPR_FOLLOWING_ROWS; + } else if (window_spec->frameOptions & FRAMEOPTION_START_CURRENT_ROW) { + expr->start = rangeMode ? WindowBoundary::CURRENT_ROW_RANGE : WindowBoundary::CURRENT_ROW_ROWS; + } + + if (window_spec->frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING) { expr->end = WindowBoundary::UNBOUNDED_FOLLOWING; } else if (window_spec->frameOptions & FRAMEOPTION_END_VALUE_PRECEDING) { - expr->end = WindowBoundary::EXPR_PRECEDING; + expr->end = rangeMode ? WindowBoundary::EXPR_PRECEDING_RANGE : WindowBoundary::EXPR_PRECEDING_ROWS; } else if (window_spec->frameOptions & FRAMEOPTION_END_VALUE_FOLLOWING) { - expr->end = WindowBoundary::EXPR_FOLLOWING; - } else if ((window_spec->frameOptions & FRAMEOPTION_END_CURRENT_ROW) && - (window_spec->frameOptions & FRAMEOPTION_RANGE)) { - expr->end = WindowBoundary::CURRENT_ROW_RANGE; - } else if ((window_spec->frameOptions & FRAMEOPTION_END_CURRENT_ROW) && - (window_spec->frameOptions & FRAMEOPTION_ROWS)) { - expr->end = WindowBoundary::CURRENT_ROW_ROWS; + expr->end = rangeMode ? WindowBoundary::EXPR_FOLLOWING_RANGE : WindowBoundary::EXPR_FOLLOWING_ROWS; + } else if (window_spec->frameOptions & FRAMEOPTION_END_CURRENT_ROW) { + expr->end = rangeMode ? WindowBoundary::CURRENT_ROW_RANGE : WindowBoundary::CURRENT_ROW_ROWS; } D_ASSERT(expr->start != WindowBoundary::INVALID && expr->end != WindowBoundary::INVALID); - if (((expr->start == WindowBoundary::EXPR_PRECEDING || expr->start == WindowBoundary::EXPR_PRECEDING) && + if (((window_spec->frameOptions & (FRAMEOPTION_START_VALUE_PRECEDING | FRAMEOPTION_START_VALUE_FOLLOWING)) && !expr->start_expr) || - ((expr->end == WindowBoundary::EXPR_PRECEDING || expr->end == WindowBoundary::EXPR_PRECEDING) && + ((window_spec->frameOptions & (FRAMEOPTION_END_VALUE_PRECEDING | FRAMEOPTION_END_VALUE_FOLLOWING)) && !expr->end_expr)) { - throw Exception("Failed to transform window boundary expression"); + throw InternalException("Failed to transform window boundary expression"); } } -unique_ptr Transformer::TransformFuncCall(duckdb_libpgquery::PGFuncCall *root) { +unique_ptr Transformer::TransformFuncCall(duckdb_libpgquery::PGFuncCall *root, idx_t depth) { auto name = root->funcname; string schema, function_name; if (name->length == 2) { @@ -106622,17 +116372,15 @@ unique_ptr Transformer::TransformFuncCall(duckdb_libpgquery::P auto win_fun_type = WindowToExpressionType(lowercase_name); if (win_fun_type == ExpressionType::INVALID) { - throw Exception("Unknown/unsupported window function"); + throw InternalException("Unknown/unsupported window function"); } auto expr = make_unique(win_fun_type, schema, lowercase_name); if (root->args) { vector> function_list; - auto res = TransformExpressionList(root->args, function_list); - if (!res) { - throw Exception("Failed to transform window function children"); - } + TransformExpressionList(*root->args, function_list, depth); + if (win_fun_type == ExpressionType::WINDOW_AGGREGATE) { for (auto &child : function_list) { expr->children.push_back(move(child)); @@ -106641,15 +116389,28 @@ unique_ptr Transformer::TransformFuncCall(duckdb_libpgquery::P if (!function_list.empty()) { expr->children.push_back(move(function_list[0])); } - if (function_list.size() > 1) { - D_ASSERT(win_fun_type == ExpressionType::WINDOW_LEAD || win_fun_type == ExpressionType::WINDOW_LAG); - expr->offset_expr = move(function_list[1]); - } - if (function_list.size() > 2) { - D_ASSERT(win_fun_type == ExpressionType::WINDOW_LEAD || win_fun_type == ExpressionType::WINDOW_LAG); - expr->default_expr = move(function_list[2]); + if (win_fun_type == ExpressionType::WINDOW_LEAD || win_fun_type == ExpressionType::WINDOW_LAG) { + if (function_list.size() > 1) { + expr->offset_expr = move(function_list[1]); + } + if (function_list.size() > 2) { + expr->default_expr = move(function_list[2]); + } + if (function_list.size() > 3) { + throw ParserException("Incorrect number of parameters for function %s", lowercase_name); + } + } else if (win_fun_type == ExpressionType::WINDOW_NTH_VALUE) { + if (function_list.size() > 1) { + expr->children.push_back(move(function_list[1])); + } + if (function_list.size() > 2) { + throw ParserException("Incorrect number of parameters for function %s", lowercase_name); + } + } else { + if (function_list.size() > 1) { + throw ParserException("Incorrect number of parameters for function %s", lowercase_name); + } } - D_ASSERT(function_list.size() <= 3); } } auto window_spec = reinterpret_cast(root->over); @@ -106670,8 +116431,8 @@ unique_ptr Transformer::TransformFuncCall(duckdb_libpgquery::P window_ref = it->second; D_ASSERT(window_ref); } - TransformWindowDef(window_ref, expr.get()); - TransformWindowFrame(window_spec, expr.get()); + TransformWindowDef(window_ref, expr.get(), depth); + TransformWindowFrame(window_spec, expr.get(), depth); return move(expr); } @@ -106680,13 +116441,13 @@ unique_ptr Transformer::TransformFuncCall(duckdb_libpgquery::P vector> children; if (root->args != nullptr) { for (auto node = root->args->head; node != nullptr; node = node->next) { - auto child_expr = TransformExpression((duckdb_libpgquery::PGNode *)node->data.ptr_value); + auto child_expr = TransformExpression((duckdb_libpgquery::PGNode *)node->data.ptr_value, depth + 1); children.push_back(move(child_expr)); } } unique_ptr filter_expr; if (root->agg_filter) { - filter_expr = TransformExpression(root->agg_filter); + filter_expr = TransformExpression(root->agg_filter, depth + 1); } // star gets eaten in the parser @@ -106721,7 +116482,7 @@ unique_ptr Transformer::TransformFuncCall(duckdb_libpgquery::P return move(coalesce_op); } - auto function = make_unique(schema, lowercase_name.c_str(), children, move(filter_expr), + auto function = make_unique(schema, lowercase_name.c_str(), move(children), move(filter_expr), root->agg_distinct); function->query_location = root->location; return move(function); @@ -106760,17 +116521,16 @@ static string SQLValueOpToString(duckdb_libpgquery::PGSQLValueFunctionOp op) { case duckdb_libpgquery::PG_SVFOP_CURRENT_SCHEMA: return "current_schema"; default: - throw Exception("Could not find named SQL value function specification " + to_string((int)op)); + throw InternalException("Could not find named SQL value function specification " + to_string((int)op)); } } -unique_ptr Transformer::TransformSQLValueFunction(duckdb_libpgquery::PGSQLValueFunction *node) { - if (!node) { - return nullptr; - } +unique_ptr Transformer::TransformSQLValueFunction(duckdb_libpgquery::PGSQLValueFunction *node, + idx_t depth) { + D_ASSERT(node); vector> children; auto fname = SQLValueOpToString(node->op); - return make_unique(DEFAULT_SCHEMA, fname, children); + return make_unique(DEFAULT_SCHEMA, fname, move(children)); } } // namespace duckdb @@ -106782,7 +116542,7 @@ unique_ptr Transformer::TransformSQLValueFunction(duckdb_libpg namespace duckdb { -unique_ptr Transformer::TransformInterval(duckdb_libpgquery::PGIntervalConstant *node) { +unique_ptr Transformer::TransformInterval(duckdb_libpgquery::PGIntervalConstant *node, idx_t depth) { // handle post-fix notation of INTERVAL // three scenarios @@ -106792,7 +116552,7 @@ unique_ptr Transformer::TransformInterval(duckdb_libpgquery::P unique_ptr expr; switch (node->val_type) { case duckdb_libpgquery::T_PGAExpr: - expr = TransformExpression(node->eval); + expr = TransformExpression(node->eval, depth + 1); break; case duckdb_libpgquery::T_PGString: expr = make_unique(Value(node->sval)); @@ -106801,7 +116561,7 @@ unique_ptr Transformer::TransformInterval(duckdb_libpgquery::P expr = make_unique(Value(node->ival)); break; default: - throw ParserException("Unsupported interval transformation"); + throw InternalException("Unsupported interval transformation"); } if (!node->typmods) { @@ -106882,14 +116642,14 @@ unique_ptr Transformer::TransformInterval(duckdb_libpgquery::P fname = "to_microseconds"; target_type = LogicalType::BIGINT; } else { - throw ParserException("Unsupported interval post-fix"); + throw InternalException("Unsupported interval post-fix"); } // first push a cast to the target type expr = make_unique(target_type, move(expr)); // now push the operation vector> children; children.push_back(move(expr)); - return make_unique(fname, children); + return make_unique(fname, move(children)); } } // namespace duckdb @@ -106899,9 +116659,9 @@ unique_ptr Transformer::TransformInterval(duckdb_libpgquery::P namespace duckdb { -unique_ptr Transformer::TransformNullTest(duckdb_libpgquery::PGNullTest *root) { +unique_ptr Transformer::TransformNullTest(duckdb_libpgquery::PGNullTest *root, idx_t depth) { D_ASSERT(root); - auto arg = TransformExpression(reinterpret_cast(root->arg)); + auto arg = TransformExpression(reinterpret_cast(root->arg), depth + 1); if (root->argisrow) { throw NotImplementedException("IS NULL argisrow"); } @@ -106931,18 +116691,16 @@ static string ExtractColumnFromLambda(ParsedExpression &expr) { return colref.column_name; } -unique_ptr Transformer::TransformLambda(duckdb_libpgquery::PGLambdaFunction *node) { +unique_ptr Transformer::TransformLambda(duckdb_libpgquery::PGLambdaFunction *node, idx_t depth) { vector> parameter_expressions; - if (!TransformExpressionList(node->parameters, parameter_expressions)) { - throw ParserException("Failed to transform expression list"); - } + TransformExpressionList(*node->parameters, parameter_expressions, depth + 1); vector parameters; parameters.reserve(parameter_expressions.size()); for (auto &expr : parameter_expressions) { parameters.push_back(ExtractColumnFromLambda(*expr)); } - auto lambda_function = TransformExpression(node->function); + auto lambda_function = TransformExpression(node->function, depth + 1); return make_unique(move(parameters), move(lambda_function)); } @@ -106956,6 +116714,11 @@ unique_ptr Transformer::TransformLambda(duckdb_libpgquery::PGL + + + + + namespace duckdb { ExpressionType Transformer::OperatorToExpressionType(const string &op) { @@ -106982,7 +116745,7 @@ unique_ptr Transformer::TransformUnaryOperator(const string &o children.push_back(move(child)); // built-in operator function - auto result = make_unique(schema, op, children); + auto result = make_unique(schema, op, move(children)); result->is_operator = true; return move(result); } @@ -106999,7 +116762,7 @@ unique_ptr Transformer::TransformBinaryOperator(const string & // rewrite 'asdf' SIMILAR TO '.*sd.*' into regexp_full_match('asdf', '.*sd.*') bool invert_similar = op == "!~"; - auto result = make_unique(schema, "regexp_full_match", children); + auto result = make_unique(schema, "regexp_full_match", move(children)); if (invert_similar) { return make_unique(ExpressionType::OPERATOR_NOT, move(result)); } else { @@ -107012,21 +116775,49 @@ unique_ptr Transformer::TransformBinaryOperator(const string & return make_unique(target_type, move(children[0]), move(children[1])); } // not a special operator: convert to a function expression - auto result = make_unique(schema, op, children); + auto result = make_unique(schema, op, move(children)); result->is_operator = true; return move(result); } } -unique_ptr Transformer::TransformAExpr(duckdb_libpgquery::PGAExpr *root) { - if (!root) { - return nullptr; - } +unique_ptr Transformer::TransformAExpr(duckdb_libpgquery::PGAExpr *root, idx_t depth) { + D_ASSERT(root); auto name = string((reinterpret_cast(root->name->head->data.ptr_value))->val.str); switch (root->kind) { + case duckdb_libpgquery::PG_AEXPR_OP_ALL: + case duckdb_libpgquery::PG_AEXPR_OP_ANY: { + // left=ANY(right) + // we turn this into left=ANY((SELECT UNNEST(right))) + auto left_expr = TransformExpression(root->lexpr, depth + 1); + auto right_expr = TransformExpression(root->rexpr, depth + 1); + + auto subquery_expr = make_unique(); + auto select_statement = make_unique(); + auto select_node = make_unique(); + vector> children; + children.push_back(move(right_expr)); + + select_node->select_list.push_back(make_unique("UNNEST", move(children))); + select_node->from_table = make_unique(); + select_statement->node = move(select_node); + subquery_expr->subquery = move(select_statement); + subquery_expr->subquery_type = SubqueryType::ANY; + subquery_expr->child = move(left_expr); + subquery_expr->comparison_type = OperatorToExpressionType(name); + + if (root->kind == duckdb_libpgquery::PG_AEXPR_OP_ALL) { + // ALL sublink is equivalent to NOT(ANY) with inverted comparison + // e.g. [= ALL()] is equivalent to [NOT(<> ANY())] + // first invert the comparison type + subquery_expr->comparison_type = NegateComparisionExpression(subquery_expr->comparison_type); + return make_unique(ExpressionType::OPERATOR_NOT, move(subquery_expr)); + } + return move(subquery_expr); + } case duckdb_libpgquery::PG_AEXPR_IN: { - auto left_expr = TransformExpression(root->lexpr); + auto left_expr = TransformExpression(root->lexpr, depth + 1); ExpressionType operator_type; // this looks very odd, but seems to be the way to find out its NOT IN if (name == "<>") { @@ -107037,15 +116828,15 @@ unique_ptr Transformer::TransformAExpr(duckdb_libpgquery::PGAE operator_type = ExpressionType::COMPARE_IN; } auto result = make_unique(operator_type, move(left_expr)); - TransformExpressionList((duckdb_libpgquery::PGList *)root->rexpr, result->children); + TransformExpressionList(*((duckdb_libpgquery::PGList *)root->rexpr), result->children, depth); return move(result); } // rewrite NULLIF(a, b) into CASE WHEN a=b THEN NULL ELSE a END case duckdb_libpgquery::PG_AEXPR_NULLIF: { vector> children; - children.push_back(TransformExpression(root->lexpr)); - children.push_back(TransformExpression(root->rexpr)); - return make_unique("nullif", children); + children.push_back(TransformExpression(root->lexpr, depth + 1)); + children.push_back(TransformExpression(root->rexpr, depth + 1)); + return make_unique("nullif", move(children)); } // rewrite (NOT) X BETWEEN A AND B into (NOT) AND(GREATERTHANOREQUALTO(X, // A), LESSTHANOREQUALTO(X, B)) @@ -107053,20 +116844,16 @@ unique_ptr Transformer::TransformAExpr(duckdb_libpgquery::PGAE case duckdb_libpgquery::PG_AEXPR_NOT_BETWEEN: { auto between_args = reinterpret_cast(root->rexpr); if (between_args->length != 2 || !between_args->head->data.ptr_value || !between_args->tail->data.ptr_value) { - throw Exception("(NOT) BETWEEN needs two args"); + throw InternalException("(NOT) BETWEEN needs two args"); } - auto between_left = - TransformExpression(reinterpret_cast(between_args->head->data.ptr_value)); - auto between_right = - TransformExpression(reinterpret_cast(between_args->tail->data.ptr_value)); + auto input = TransformExpression(root->lexpr, depth + 1); + auto between_left = TransformExpression( + reinterpret_cast(between_args->head->data.ptr_value), depth + 1); + auto between_right = TransformExpression( + reinterpret_cast(between_args->tail->data.ptr_value), depth + 1); - auto compare_left = make_unique(ExpressionType::COMPARE_GREATERTHANOREQUALTO, - TransformExpression(root->lexpr), move(between_left)); - auto compare_right = make_unique(ExpressionType::COMPARE_LESSTHANOREQUALTO, - TransformExpression(root->lexpr), move(between_right)); - auto compare_between = make_unique(ExpressionType::CONJUNCTION_AND, move(compare_left), - move(compare_right)); + auto compare_between = make_unique(move(input), move(between_left), move(between_right)); if (root->kind == duckdb_libpgquery::PG_AEXPR_BETWEEN) { return move(compare_between); } else { @@ -107075,8 +116862,8 @@ unique_ptr Transformer::TransformAExpr(duckdb_libpgquery::PGAE } // rewrite SIMILAR TO into regexp_full_match('asdf', '.*sd.*') case duckdb_libpgquery::PG_AEXPR_SIMILAR: { - auto left_expr = TransformExpression(root->lexpr); - auto right_expr = TransformExpression(root->rexpr); + auto left_expr = TransformExpression(root->lexpr, depth + 1); + auto right_expr = TransformExpression(root->rexpr, depth + 1); vector> children; children.push_back(move(left_expr)); @@ -107102,7 +116889,7 @@ unique_ptr Transformer::TransformAExpr(duckdb_libpgquery::PGAE } const auto schema = DEFAULT_SCHEMA; const auto regex_function = "regexp_full_match"; - auto result = make_unique(schema, regex_function, children); + auto result = make_unique(schema, regex_function, move(children)); if (invert_similar) { return make_unique(ExpressionType::OPERATOR_NOT, move(result)); @@ -107111,14 +116898,14 @@ unique_ptr Transformer::TransformAExpr(duckdb_libpgquery::PGAE } } case duckdb_libpgquery::PG_AEXPR_NOT_DISTINCT: { - auto left_expr = TransformExpression(root->lexpr); - auto right_expr = TransformExpression(root->rexpr); + auto left_expr = TransformExpression(root->lexpr, depth + 1); + auto right_expr = TransformExpression(root->rexpr, depth + 1); return make_unique(ExpressionType::COMPARE_NOT_DISTINCT_FROM, move(left_expr), move(right_expr)); } case duckdb_libpgquery::PG_AEXPR_DISTINCT: { - auto left_expr = TransformExpression(root->lexpr); - auto right_expr = TransformExpression(root->rexpr); + auto left_expr = TransformExpression(root->lexpr, depth + 1); + auto right_expr = TransformExpression(root->rexpr, depth + 1); return make_unique(ExpressionType::COMPARE_DISTINCT_FROM, move(left_expr), move(right_expr)); } @@ -107126,8 +116913,8 @@ unique_ptr Transformer::TransformAExpr(duckdb_libpgquery::PGAE default: break; } - auto left_expr = TransformExpression(root->lexpr); - auto right_expr = TransformExpression(root->rexpr); + auto left_expr = TransformExpression(root->lexpr, depth + 1); + auto right_expr = TransformExpression(root->rexpr, depth + 1); if (!left_expr) { // prefix operator @@ -107147,10 +116934,8 @@ unique_ptr Transformer::TransformAExpr(duckdb_libpgquery::PGAE namespace duckdb { -unique_ptr Transformer::TransformParamRef(duckdb_libpgquery::PGParamRef *node) { - if (!node) { - return nullptr; - } +unique_ptr Transformer::TransformParamRef(duckdb_libpgquery::PGParamRef *node, idx_t depth) { + D_ASSERT(node); auto expr = make_unique(); if (node->number == 0) { expr->parameter_nr = ParamCount() + 1; @@ -107168,7 +116953,8 @@ unique_ptr Transformer::TransformParamRef(duckdb_libpgquery::P namespace duckdb { -unique_ptr Transformer::TransformPositionalReference(duckdb_libpgquery::PGPositionalReference *node) { +unique_ptr Transformer::TransformPositionalReference(duckdb_libpgquery::PGPositionalReference *node, + idx_t depth) { if (node->position <= 0) { throw ParserException("Positional reference node needs to be >= 1"); } @@ -107184,15 +116970,12 @@ unique_ptr Transformer::TransformPositionalReference(duckdb_li namespace duckdb { -unique_ptr Transformer::TransformSubquery(duckdb_libpgquery::PGSubLink *root) { - if (!root) { - return nullptr; - } +unique_ptr Transformer::TransformSubquery(duckdb_libpgquery::PGSubLink *root, idx_t depth) { + D_ASSERT(root); auto subquery_expr = make_unique(); + subquery_expr->subquery = TransformSelect(root->subselect); - if (!subquery_expr->subquery) { - return nullptr; - } + D_ASSERT(subquery_expr->subquery); D_ASSERT(subquery_expr->subquery->node->GetSelectList().size() > 0); switch (root->subLinkType) { @@ -107204,7 +116987,7 @@ unique_ptr Transformer::TransformSubquery(duckdb_libpgquery::P case duckdb_libpgquery::PG_ALL_SUBLINK: { // comparison with ANY() or ALL() subquery_expr->subquery_type = SubqueryType::ANY; - subquery_expr->child = TransformExpression(root->testexpr); + subquery_expr->child = TransformExpression(root->testexpr, depth + 1); // get the operator name if (!root->operName) { // simple IN @@ -107246,7 +117029,7 @@ unique_ptr Transformer::TransformSubquery(duckdb_libpgquery::P namespace duckdb { -std::string Transformer::NodetypeToString(duckdb_libpgquery::PGNodeTag type) { +std::string Transformer::NodetypeToString(duckdb_libpgquery::PGNodeTag type) { // LCOV_EXCL_START switch (type) { case duckdb_libpgquery::T_PGInvalid: return "T_Invalid"; @@ -108057,10 +117840,9 @@ std::string Transformer::NodetypeToString(duckdb_libpgquery::PGNodeTag type) { case duckdb_libpgquery::T_PGForeignKeyCacheInfo: return "T_ForeignKeyCacheInfo"; default: - D_ASSERT(0); - return ""; + return "(UNKNOWN)"; } -} +} // LCOV_EXCL_STOP } // namespace duckdb @@ -108119,7 +117901,7 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause *de_with_clause, } // we need a query if (!cte->ctequery || cte->ctequery->type != duckdb_libpgquery::T_PGSelectStmt) { - throw Exception("A CTE needs a SELECT"); + throw InternalException("A CTE needs a SELECT"); } // CTE transformation can either result in inlining for non recursive CTEs, or in recursive CTE bindings @@ -108129,16 +117911,13 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause *de_with_clause, } else { info->query = TransformSelect(cte->ctequery); } - - if (!info->query) { - throw Exception("A CTE needs a SELECT"); - } + D_ASSERT(info->query); auto cte_name = string(cte->ctename); auto it = select.cte_map.find(cte_name); if (it != select.cte_map.end()) { // can't have two CTEs with same name - throw Exception("A CTE needs an unique name"); + throw ParserException("Duplicate CTE name \"%s\"", cte_name); } select.cte_map[cte_name] = move(info); } @@ -108162,23 +117941,11 @@ unique_ptr Transformer::TransformRecursiveCTE(duckdb_libpgquery result->right = TransformSelectNode(stmt->rarg); result->aliases = info.aliases; - if (!result->left || !result->right) { - throw Exception("Failed to transform recursive CTE children."); - } + D_ASSERT(result->left); + D_ASSERT(result->right); - bool select_distinct = true; - switch (stmt->op) { - case duckdb_libpgquery::PG_SETOP_UNION: - // We don't need a DISTINCT operation on top of a recursive UNION CTE. - select_distinct = false; - break; - default: - throw Exception("Unexpected setop type for recursive CTE"); - } - // if we compute the distinct result here, we do not have to do this in - // the children. This saves a bunch of unnecessary DISTINCTs. - if (select_distinct) { - result->modifiers.push_back(make_unique()); + if (stmt->op != duckdb_libpgquery::PG_SETOP_UNION) { + throw ParserException("Unsupported setop type for recursive CTE: only UNION or UNION ALL are supported"); } break; } @@ -108210,7 +117977,7 @@ bool Transformer::TransformGroupBy(duckdb_libpgquery::PGList *group, vectorhead; node != nullptr; node = node->next) { auto n = reinterpret_cast(node->data.ptr_value); - result.push_back(TransformExpression(n)); + result.push_back(TransformExpression(n, 0)); } return true; } @@ -108252,7 +118019,7 @@ bool Transformer::TransformOrderBy(duckdb_libpgquery::PGList *order, vectortype); @@ -108289,7 +118056,7 @@ unique_ptr Transformer::TransformSampleOptions(duckdb_libpgquery: auto result = make_unique(); auto &sample_options = (duckdb_libpgquery::PGSampleOptions &)*options; auto &sample_size = (duckdb_libpgquery::PGSampleSize &)*sample_options.sample_size; - auto sample_value = TransformValue(sample_size.sample_size)->value; + auto sample_value = TransformValue(sample_size.sample_size, 0)->value; result->is_percentage = sample_size.is_percentage; if (sample_size.is_percentage) { // sample size is given in sample_size: use system sampling @@ -108311,7 +118078,7 @@ unique_ptr Transformer::TransformSampleOptions(duckdb_libpgquery: if (sample_options.method) { result->method = GetSampleMethod(sample_options.method); } - result->seed = sample_options.seed; + result->seed = sample_options.seed == 0 ? -1 : sample_options.seed; return result; } @@ -108328,10 +118095,13 @@ namespace duckdb { LogicalType Transformer::TransformTypeName(duckdb_libpgquery::PGTypeName *type_name) { auto name = (reinterpret_cast(type_name->names->tail->data.ptr_value)->val.str); // transform it to the SQL type - LogicalType base_type = TransformStringToLogicalType(name); + LogicalTypeId base_type = TransformStringToLogicalType(name); + LogicalType result_type; if (base_type == LogicalTypeId::STRUCT) { - D_ASSERT(type_name->typmods && type_name->typmods->length > 0); + if (!type_name->typmods || type_name->typmods->length == 0) { + throw ParserException("Struct needs a name and entries"); + } child_list_t children; unordered_set name_collision_set; @@ -108348,7 +118118,7 @@ LogicalType Transformer::TransformTypeName(duckdb_libpgquery::PGTypeName *type_n D_ASSERT(!entry_name.empty()); if (name_collision_set.find(entry_name) != name_collision_set.end()) { - throw ParserException("Duplicate struct entry name"); + throw ParserException("Duplicate struct entry name \"%s\"", entry_name); } name_collision_set.insert(entry_name); @@ -108356,87 +118126,99 @@ LogicalType Transformer::TransformTypeName(duckdb_libpgquery::PGTypeName *type_n children.push_back(make_pair(entry_name, entry_type)); } D_ASSERT(!children.empty()); - return LogicalType(base_type.id(), children); - } + result_type = LogicalType::STRUCT(move(children)); + } else if (base_type == LogicalTypeId::MAP) { + //! We transform MAP to STRUCT, LIST> - if (base_type == LogicalTypeId::MAP) { if (!type_name->typmods || type_name->typmods->length != 2) { throw ParserException("Map type needs exactly two entries, key and value type"); } child_list_t children; - unordered_set name_collision_set; - auto key_type = TransformTypeName((duckdb_libpgquery::PGTypeName *)type_name->typmods->head->data.ptr_value); auto value_type = TransformTypeName((duckdb_libpgquery::PGTypeName *)type_name->typmods->tail->data.ptr_value); - children.push_back(make_pair("key", key_type)); - children.push_back(make_pair("value", value_type)); + children.push_back({"key", LogicalType::LIST(key_type)}); + children.push_back({"value", LogicalType::LIST(value_type)}); - // for now we just transform MAP to LIST> D_ASSERT(children.size() == 2); - auto struct_type = LogicalType(LogicalTypeId::STRUCT, children); - child_list_t list_children; - list_children.push_back(make_pair("", struct_type)); - return LogicalType(LogicalTypeId::LIST, list_children); - } - - int8_t width = base_type.width(), scale = base_type.scale(); - // check any modifiers - int modifier_idx = 0; - if (type_name->typmods) { - for (auto node = type_name->typmods->head; node; node = node->next) { - auto &const_val = *((duckdb_libpgquery::PGAConst *)node->data.ptr_value); - if (const_val.type != duckdb_libpgquery::T_PGAConst || - const_val.val.type != duckdb_libpgquery::T_PGInteger) { - throw ParserException("Expected an integer constant as type modifier"); - } - if (const_val.val.val.ival < 0) { - throw ParserException("Negative modifier not supported"); - } - if (modifier_idx == 0) { - width = const_val.val.val.ival; - } else if (modifier_idx == 1) { - scale = const_val.val.val.ival; - } else { - throw ParserException("A maximum of two modifiers is supported"); - } - modifier_idx++; - } - } - switch (base_type.id()) { - case LogicalTypeId::VARCHAR: - if (modifier_idx > 1) { - throw ParserException("VARCHAR only supports a single modifier"); - } - // FIXME: create CHECK constraint based on varchar width - width = 0; - break; - case LogicalTypeId::DECIMAL: - if (modifier_idx == 1) { - // only width is provided: set scale to 0 + result_type = LogicalType::MAP(move(children)); + } else { + int8_t width, scale; + if (base_type == LogicalTypeId::DECIMAL) { + // default decimal width/scale + width = 18; + scale = 3; + } else { + width = 0; scale = 0; } - if (width <= 0 || width > Decimal::MAX_WIDTH_DECIMAL) { - throw ParserException("Width must be between 1 and %d!", (int)Decimal::MAX_WIDTH_DECIMAL); - } - if (scale > width) { - throw ParserException("Scale cannot be bigger than width"); + // check any modifiers + int modifier_idx = 0; + if (type_name->typmods) { + for (auto node = type_name->typmods->head; node; node = node->next) { + auto &const_val = *((duckdb_libpgquery::PGAConst *)node->data.ptr_value); + if (const_val.type != duckdb_libpgquery::T_PGAConst || + const_val.val.type != duckdb_libpgquery::T_PGInteger) { + throw ParserException("Expected an integer constant as type modifier"); + } + if (const_val.val.val.ival < 0) { + throw ParserException("Negative modifier not supported"); + } + if (modifier_idx == 0) { + width = const_val.val.val.ival; + } else if (modifier_idx == 1) { + scale = const_val.val.val.ival; + } else { + throw ParserException("A maximum of two modifiers is supported"); + } + modifier_idx++; + } } - break; - case LogicalTypeId::INTERVAL: - if (modifier_idx > 1) { - throw ParserException("INTERVAL only supports a single modifier"); + switch (base_type) { + case LogicalTypeId::VARCHAR: + if (modifier_idx > 1) { + throw ParserException("VARCHAR only supports a single modifier"); + } + // FIXME: create CHECK constraint based on varchar width + width = 0; + result_type = LogicalType::VARCHAR; + break; + case LogicalTypeId::DECIMAL: + if (modifier_idx == 1) { + // only width is provided: set scale to 0 + scale = 0; + } + if (width <= 0 || width > Decimal::MAX_WIDTH_DECIMAL) { + throw ParserException("Width must be between 1 and %d!", (int)Decimal::MAX_WIDTH_DECIMAL); + } + if (scale > width) { + throw ParserException("Scale cannot be bigger than width"); + } + result_type = LogicalType::DECIMAL(width, scale); + break; + case LogicalTypeId::INTERVAL: + if (modifier_idx > 1) { + throw ParserException("INTERVAL only supports a single modifier"); + } + width = 0; + result_type = LogicalType::INTERVAL; + break; + default: + if (modifier_idx > 0) { + throw ParserException("Type %s does not support any modifiers!", LogicalType(base_type).ToString()); + } + result_type = LogicalType(base_type); + break; } - width = 0; - break; - default: - if (modifier_idx > 0) { - throw ParserException("Type %s does not support any modifiers!", base_type.ToString()); + } + if (type_name->arrayBounds) { + // array bounds: turn the type into a list + for (auto cell = type_name->arrayBounds->head; cell != nullptr; cell = cell->next) { + result_type = LogicalType::LIST(move(result_type)); } } - - return LogicalType(base_type.id(), width, scale); + return result_type; } } // namespace duckdb @@ -108481,7 +118263,7 @@ unique_ptr Transformer::TransformAlter(duckdb_libpgquery::PGNode break; } case duckdb_libpgquery::PG_AT_ColumnDefault: { - auto expr = TransformExpression(command->def); + auto expr = TransformExpression(command->def, 0); result->info = make_unique(qname.schema, qname.name, command->name, move(expr)); break; } @@ -108491,7 +118273,7 @@ unique_ptr Transformer::TransformAlter(duckdb_libpgquery::PGNode unique_ptr expr; if (cdef->raw_default) { - expr = TransformExpression(cdef->raw_default); + expr = TransformExpression(cdef->raw_default, 0); } else { auto colref = make_unique(command->name); expr = make_unique(column_definition.type, move(colref)); @@ -108521,7 +118303,7 @@ unique_ptr Transformer::TransformCall(duckdb_libpgquery::PGNode * D_ASSERT(stmt); auto result = make_unique(); - result->function = TransformFuncCall((duckdb_libpgquery::PGFuncCall *)stmt->func); + result->function = TransformFuncCall((duckdb_libpgquery::PGFuncCall *)stmt->func, 0); return result; } @@ -108538,7 +118320,8 @@ unique_ptr Transformer::TransformCheckpoint(duckdb_libpgquery::PGN vector> children; // transform into "CALL checkpoint()" or "CALL force_checkpoint()" auto result = make_unique(); - result->function = make_unique(checkpoint->force ? "force_checkpoint" : "checkpoint", children); + result->function = + make_unique(checkpoint->force ? "force_checkpoint" : "checkpoint", move(children)); return move(result); } @@ -108594,7 +118377,7 @@ void Transformer::TransformCopyOptions(CopyInfo &info, duckdb_libpgquery::PGList break; default: info.options[def_elem->defname].push_back( - TransformValue(*((duckdb_libpgquery::PGValue *)def_elem->arg))->value); + TransformValue(*((duckdb_libpgquery::PGValue *)def_elem->arg), 0)->value); break; } } @@ -108659,16 +118442,13 @@ unique_ptr Transformer::TransformCreateFunction(duckdb_libpgque info->schema = qname.schema; info->name = qname.name; - auto function = TransformExpression(stmt->function); + auto function = TransformExpression(stmt->function, 0); D_ASSERT(function); auto macro_func = make_unique(move(function)); if (stmt->params) { vector> parameters; - auto res = TransformExpressionList(stmt->params, parameters); - if (!res) { - throw ParserException("Failed to transform macro parameters!"); - } + TransformExpressionList(*stmt->params, parameters, 0); for (auto ¶m : parameters) { if (param->type == ExpressionType::COMPARE_EQUAL) { // parameters with default value @@ -108748,7 +118528,7 @@ unique_ptr Transformer::TransformCreateIndex(duckdb_libpgquery: } else { // parse the index expression D_ASSERT(index_element->expr); - info->expressions.push_back(TransformExpression(index_element->expr)); + info->expressions.push_back(TransformExpression(index_element->expr, 0)); } } @@ -108919,8 +118699,9 @@ string Transformer::TransformCollation(duckdb_libpgquery::PGCollateClause *colla return collation; } -unique_ptr Transformer::TransformCollateExpr(duckdb_libpgquery::PGCollateClause *collate) { - auto child = TransformExpression(collate->arg); +unique_ptr Transformer::TransformCollateExpr(duckdb_libpgquery::PGCollateClause *collate, + idx_t depth) { + auto child = TransformExpression(collate->arg, depth + 1); auto collation = TransformCollation(collate); return make_unique(collation, move(child)); } @@ -108935,7 +118716,7 @@ ColumnDefinition Transformer::TransformColumnDefinition(duckdb_libpgquery::PGCol if (target_type.id() != LogicalTypeId::VARCHAR) { throw ParserException("Only VARCHAR columns can have collations!"); } - target_type = LogicalType(LogicalTypeId::VARCHAR, TransformCollation(cdef->collClause)); + target_type = LogicalType::VARCHAR_COLLATION(TransformCollation(cdef->collClause)); } return ColumnDefinition(colname, target_type); @@ -109102,7 +118883,7 @@ unique_ptr Transformer::TransformDelete(duckdb_libpgquery::PGNo D_ASSERT(stmt); auto result = make_unique(); - result->condition = TransformExpression(stmt->whereClause); + result->condition = TransformExpression(stmt->whereClause, 0); result->table = TransformRangeVar(stmt->relation); if (result->table->type != TableReferenceType::BASE_TABLE) { throw Exception("Can only delete from base tables!"); @@ -109232,9 +119013,7 @@ unique_ptr Transformer::TransformValuesList(duckdb_libpgquery::PGList auto target = (duckdb_libpgquery::PGList *)(value_list->data.ptr_value); vector> insert_values; - if (!TransformExpressionList(target, insert_values)) { - throw ParserException("Could not parse expression list!"); - } + TransformExpressionList(*target, insert_values, 0); if (!result->values.empty()) { if (result->values[0].size() != insert_values.size()) { throw ParserException("VALUES lists must all be the same length"); @@ -109249,6 +119028,9 @@ unique_ptr Transformer::TransformValuesList(duckdb_libpgquery::PGList unique_ptr Transformer::TransformInsert(duckdb_libpgquery::PGNode *node) { auto stmt = reinterpret_cast(node); D_ASSERT(stmt); + if (stmt->onConflictClause && stmt->onConflictClause->action != duckdb_libpgquery::PG_ONCONFLICT_NONE) { + throw ParserException("ON CONFLICT IGNORE/UPDATE clauses are not supported"); + } auto result = make_unique(); @@ -109304,13 +119086,13 @@ unique_ptr Transformer::TransformPragma(duckdb_libpgquery::PGNo if (stmt->args) { for (auto cell = stmt->args->head; cell != nullptr; cell = cell->next) { auto node = reinterpret_cast(cell->data.ptr_value); - auto expr = TransformExpression(node); + auto expr = TransformExpression(node, 0); if (expr->type == ExpressionType::COMPARE_EQUAL) { auto &comp = (ComparisonExpression &)*expr; info.named_parameters[comp.left->ToString()] = Value(comp.right->ToString()); } else if (node->type == duckdb_libpgquery::T_PGAConst) { - auto constant = TransformConstant((duckdb_libpgquery::PGAConst *)node); + auto constant = TransformConstant((duckdb_libpgquery::PGAConst *)node, 0); info.parameters.push_back(((ConstantExpression &)*constant).value); } else { info.parameters.emplace_back(expr->ToString()); @@ -109372,7 +119154,9 @@ unique_ptr Transformer::TransformExecute(duckdb_libpgquery::PG auto result = make_unique(); result->name = string(stmt->name); - TransformExpressionList(stmt->params, result->values); + if (stmt->params) { + TransformExpressionList(*stmt->params, result->values, 0); + } for (auto &expr : result->values) { if (!expr->IsScalar()) { throw Exception("Only scalar parameters or NULL supported for EXECUTE"); @@ -109536,9 +119320,7 @@ unique_ptr Transformer::TransformSelectNode(duckdb_libpgquery::PGSele auto target = reinterpret_cast(stmt->distinctClause->head->data.ptr_value); if (target) { // add the columns defined in the ON clause to the select list - if (!TransformExpressionList(stmt->distinctClause, modifier->distinct_on_targets)) { - throw Exception("Failed to transform expression list from DISTINCT ON."); - } + TransformExpressionList(*stmt->distinctClause, modifier->distinct_on_targets, 0); } result->modifiers.push_back(move(modifier)); } @@ -109554,18 +119336,16 @@ unique_ptr Transformer::TransformSelectNode(duckdb_libpgquery::PGSele throw ParserException("SELECT clause without selection list"); } // select list - if (!TransformExpressionList(stmt->targetList, result->select_list)) { - throw InternalException("Failed to transform expression list."); - } + TransformExpressionList(*stmt->targetList, result->select_list, 0); result->from_table = TransformFrom(stmt->fromClause); } // where - result->where_clause = TransformExpression(stmt->whereClause); + result->where_clause = TransformExpression(stmt->whereClause, 0); // group by TransformGroupBy(stmt->groupClause, result->groups); // having - result->having = TransformExpression(stmt->havingClause); + result->having = TransformExpression(stmt->havingClause, 0); // sample result->sample = TransformSampleOptions(stmt->sampleOptions); break; @@ -109622,10 +119402,10 @@ unique_ptr Transformer::TransformSelectNode(duckdb_libpgquery::PGSele if (stmt->limitCount || stmt->limitOffset) { auto limit_modifier = make_unique(); if (stmt->limitCount) { - limit_modifier->limit = TransformExpression(stmt->limitCount); + limit_modifier->limit = TransformExpression(stmt->limitCount, 0); } if (stmt->limitOffset) { - limit_modifier->offset = TransformExpression(stmt->limitOffset); + limit_modifier->offset = TransformExpression(stmt->limitOffset, 0); } node->modifiers.push_back(move(limit_modifier)); } @@ -109654,7 +119434,7 @@ unique_ptr Transformer::TransformSet(duckdb_libpgquery::PGNode *no D_ASSERT(stmt->args->head && stmt->args->head->data.ptr_value); D_ASSERT(((duckdb_libpgquery::PGNode *)stmt->args->head->data.ptr_value)->type == duckdb_libpgquery::T_PGAConst); - auto value = TransformValue(((duckdb_libpgquery::PGAConst *)stmt->args->head->data.ptr_value)->val)->value; + auto value = TransformValue(((duckdb_libpgquery::PGAConst *)stmt->args->head->data.ptr_value)->val, 0)->value; return make_unique(name, value); } @@ -109748,10 +119528,10 @@ unique_ptr Transformer::TransformUpdate(duckdb_libpgquery::PGNo for (auto cell = root->head; cell != nullptr; cell = cell->next) { auto target = (duckdb_libpgquery::PGResTarget *)(cell->data.ptr_value); result->columns.emplace_back(target->name); - result->expressions.push_back(TransformExpression(target->val)); + result->expressions.push_back(TransformExpression(target->val, 0)); } - result->condition = TransformExpression(stmt->whereClause); + result->condition = TransformExpression(stmt->whereClause, 0); return result; } @@ -109905,7 +119685,7 @@ unique_ptr Transformer::TransformJoin(duckdb_libpgquery::PGJoinExpr *r cross->right = move(result->right); return move(cross); } - result->condition = TransformExpression(root->quals); + result->condition = TransformExpression(root->quals, 0); return move(result); } @@ -109954,16 +119734,25 @@ unique_ptr Transformer::TransformRangeFunction(duckdb_libpgquery::PGRa auto call_tree = (duckdb_libpgquery::PGNode *)function_sublist->head->data.ptr_value; auto coldef = function_sublist->head->next->data.ptr_value; - D_ASSERT(call_tree->type == duckdb_libpgquery::T_PGFuncCall); if (coldef) { throw NotImplementedException("Explicit column definition not supported yet"); } - auto func_call = (duckdb_libpgquery::PGFuncCall *)call_tree; // transform the function call auto result = make_unique(); - result->function = TransformFuncCall(func_call); + switch (call_tree->type) { + case duckdb_libpgquery::T_PGFuncCall: { + auto func_call = (duckdb_libpgquery::PGFuncCall *)call_tree; + result->function = TransformFuncCall(func_call, 0); + result->query_location = func_call->location; + break; + } + case duckdb_libpgquery::T_PGSQLValueFunction: + result->function = TransformSQLValueFunction((duckdb_libpgquery::PGSQLValueFunction *)call_tree, 0); + break; + default: + throw ParserException("Not a function call or value function"); + } result->alias = TransformAlias(root->alias, result->column_name_alias); - result->query_location = func_call->location; if (root->sample) { result->sample = TransformSampleOptions(root->sample); } @@ -110025,10 +119814,7 @@ bool Transformer::TransformParseTree(duckdb_libpgquery::PGList *tree, vectorhead; entry != nullptr; entry = entry->next) { SetParamCount(0); auto stmt = TransformStatement((duckdb_libpgquery::PGNode *)entry->data.ptr_value); - if (!stmt) { - statements.clear(); - return false; - } + D_ASSERT(stmt); stmt->n_param = ParamCount(); statements.push_back(move(stmt)); } @@ -110747,6 +120533,65 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti + + +namespace duckdb { + +BindResult ExpressionBinder::BindExpression(BetweenExpression &expr, idx_t depth) { + // first try to bind the children of the case expression + string error; + BindChild(expr.input, depth, error); + BindChild(expr.lower, depth, error); + BindChild(expr.upper, depth, error); + if (!error.empty()) { + return BindResult(error); + } + // the children have been successfully resolved + auto &input = (BoundExpression &)*expr.input; + auto &lower = (BoundExpression &)*expr.lower; + auto &upper = (BoundExpression &)*expr.upper; + + auto input_sql_type = input.expr->return_type; + auto lower_sql_type = lower.expr->return_type; + auto upper_sql_type = upper.expr->return_type; + + // cast the input types to the same type + // now obtain the result type of the input types + auto input_type = BoundComparisonExpression::BindComparison(input_sql_type, lower_sql_type); + input_type = BoundComparisonExpression::BindComparison(input_type, upper_sql_type); + // add casts (if necessary) + input.expr = BoundCastExpression::AddCastToType(move(input.expr), input_type); + lower.expr = BoundCastExpression::AddCastToType(move(lower.expr), input_type); + upper.expr = BoundCastExpression::AddCastToType(move(upper.expr), input_type); + if (input_type.id() == LogicalTypeId::VARCHAR) { + // handle collation + auto collation = StringType::GetCollation(input_type); + input.expr = PushCollation(context, move(input.expr), collation, false); + lower.expr = PushCollation(context, move(lower.expr), collation, false); + upper.expr = PushCollation(context, move(upper.expr), collation, false); + } + if (!input.expr->HasSideEffects() && !input.expr->HasParameter()) { + // the expression does not have side effects: create two comparisons + auto left_compare = make_unique(ExpressionType::COMPARE_GREATERTHANOREQUALTO, + input.expr->Copy(), move(lower.expr)); + auto right_compare = make_unique(ExpressionType::COMPARE_LESSTHANOREQUALTO, + move(input.expr), move(upper.expr)); + return BindResult(make_unique(ExpressionType::CONJUNCTION_AND, move(left_compare), + move(right_compare))); + } else { + // expression has side effects: we cannot duplicate it + // create a bound_between directly + return BindResult( + make_unique(move(input.expr), move(lower.expr), move(upper.expr), true, true)); + } +} + +} // namespace duckdb + + + + + namespace duckdb { BindResult ExpressionBinder::BindExpression(CaseExpression &expr, idx_t depth) { @@ -110809,18 +120654,23 @@ BindResult ExpressionBinder::BindExpression(CastExpression &expr, idx_t depth) { if (!error.empty()) { return BindResult(error); } - if (expr.try_cast) { - throw NotImplementedException("TRY_CAST not implemented yet in binder"); - } // the children have been successfully resolved auto &child = (BoundExpression &)*expr.child; - if (child.expr->type == ExpressionType::VALUE_PARAMETER) { - auto ¶meter = (BoundParameterExpression &)*child.expr; - // parameter: move types into the parameter expression itself - parameter.return_type = expr.cast_type; + if (expr.try_cast) { + if (child.expr->return_type == expr.cast_type) { + // no cast required: type matches + return BindResult(move(child.expr)); + } + child.expr = make_unique(move(child.expr), expr.cast_type, true); } else { - // otherwise add a cast to the target type - child.expr = BoundCastExpression::AddCastToType(move(child.expr), expr.cast_type); + if (child.expr->type == ExpressionType::VALUE_PARAMETER) { + auto ¶meter = (BoundParameterExpression &)*child.expr; + // parameter: move types into the parameter expression itself + parameter.return_type = expr.cast_type; + } else { + // otherwise add a cast to the target type + child.expr = BoundCastExpression::AddCastToType(move(child.expr), expr.cast_type); + } } return BindResult(move(child.expr)); } @@ -110840,7 +120690,7 @@ BindResult ExpressionBinder::BindExpression(CollateExpression &expr, idx_t depth if (child.expr->return_type.id() != LogicalTypeId::VARCHAR) { throw BinderException("collations are only supported for type varchar"); } - child.expr->return_type = LogicalType(LogicalTypeId::VARCHAR, expr.collation); + child.expr->return_type = LogicalType::VARCHAR_COLLATION(expr.collation); return BindResult(move(child.expr)); } @@ -110887,9 +120737,9 @@ BindResult ExpressionBinder::BindExpression(ColumnRefExpression &colref, idx_t d if (colref.table_name.empty()) { auto similar_bindings = binder.bind_context.GetSimilarBindings(colref.column_name); string candidate_str = StringUtil::CandidatesMessage(similar_bindings, "Candidate bindings"); - return BindResult( - binder.FormatError(colref, StringUtil::Format("Referenced column \"%s\" not found in FROM clause!%s", - colref.column_name.c_str(), candidate_str))); + return BindResult(binder.FormatError(colref.query_location, + "Referenced column \"%s\" not found in FROM clause!%s", + colref.column_name.c_str(), candidate_str)); } } // if it was a macro parameter, let macro_binding bind it to the argument @@ -110990,7 +120840,7 @@ LogicalType BoundComparisonExpression::BindComparison(LogicalType left_type, Log // target width does not fit in decimal: truncate the scale (if possible) to try and make it fit max_width = Decimal::MAX_WIDTH_DECIMAL; } - return LogicalType(LogicalTypeId::DECIMAL, max_width, max_scale); + return LogicalType::DECIMAL(max_width, max_scale); } case LogicalTypeId::VARCHAR: // for comparison with strings, we prefer to bind to the numeric types @@ -111000,8 +120850,9 @@ LogicalType BoundComparisonExpression::BindComparison(LogicalType left_type, Log return right_type; } else { // else: check if collations are compatible - if (!left_type.collation().empty() && !right_type.collation().empty() && - left_type.collation() != right_type.collation()) { + auto left_collation = StringType::GetCollation(left_type); + auto right_collation = StringType::GetCollation(right_type); + if (!left_collation.empty() && !right_collation.empty() && left_collation != right_collation) { throw BinderException("Cannot combine types with different collation!"); } } @@ -111034,10 +120885,9 @@ BindResult ExpressionBinder::BindExpression(ComparisonExpression &expr, idx_t de right.expr = BoundCastExpression::AddCastToType(move(right.expr), input_type); if (input_type.id() == LogicalTypeId::VARCHAR) { // handle collation - left.expr = - PushCollation(context, move(left.expr), input_type.collation(), expr.type == ExpressionType::COMPARE_EQUAL); - right.expr = PushCollation(context, move(right.expr), input_type.collation(), - expr.type == ExpressionType::COMPARE_EQUAL); + auto collation = StringType::GetCollation(input_type); + left.expr = PushCollation(context, move(left.expr), collation, expr.type == ExpressionType::COMPARE_EQUAL); + right.expr = PushCollation(context, move(right.expr), collation, expr.type == ExpressionType::COMPARE_EQUAL); } // now create the bound comparison expression return BindResult(make_unique(expr.type, move(left.expr), move(right.expr))); @@ -111199,6 +121049,7 @@ BindResult ExpressionBinder::BindExpression(LambdaExpression &expr, idx_t depth) + namespace duckdb { void ExpressionBinder::ReplaceMacroParametersRecursive(unique_ptr &expr) { @@ -111308,24 +121159,6 @@ void ExpressionBinder::ReplaceMacroParametersRecursive(ParsedExpression &expr, Q } } -void ExpressionBinder::CheckForSideEffects(FunctionExpression &function, idx_t depth, string &error) { - for (idx_t i = 0; i < function.children.size(); i++) { - auto arg_copy = function.children[i]->Copy(); - BindChild(arg_copy, depth, error); - if (!error.empty()) { - return; - } - auto &bound_expr = (BoundExpression &)*arg_copy; - if (bound_expr.expr->HasSideEffects()) { - QueryErrorContext error_context(binder.root_statement, function.query_location); - error = StringUtil::Format("Arguments with side-effects are not supported ('%s()' was supplied). As a " - "workaround, try creating a CTE that evaluates the argument with side-effects.", - arg_copy->ToString()); - return; - } - } -} - BindResult ExpressionBinder::BindMacro(FunctionExpression &function, MacroCatalogEntry *macro_func, idx_t depth, unique_ptr *expr) { auto ¯o_def = *macro_func->function; @@ -111337,12 +121170,6 @@ BindResult ExpressionBinder::BindMacro(FunctionExpression &function, MacroCatalo return BindResult(binder.FormatError(*expr->get(), error)); } - // check for arguments with side-effects TODO: to support this, a projection must be pushed - // CheckForSideEffects(function, depth, error); - // if (!error.empty()) { - // return BindResult(error); - // } - // create a MacroBinding to bind this macro's parameters to its arguments vector types; vector names; @@ -111433,9 +121260,16 @@ BindResult ExpressionBinder::BindExpression(OperatorExpression &op, idx_t depth) // all children bound successfully string function_name; switch (op.type) { - case ExpressionType::ARRAY_EXTRACT: - function_name = "array_extract"; + case ExpressionType::ARRAY_EXTRACT: { + D_ASSERT(op.children[0]->expression_class == ExpressionClass::BOUND_EXPRESSION); + auto &b_exp = (BoundExpression &)*op.children[0]; + if (b_exp.expr->return_type.id() == LogicalTypeId::MAP) { + function_name = "map_extract"; + } else { + function_name = "array_extract"; + } break; + } case ExpressionType::ARRAY_SLICE: function_name = "array_slice"; break; @@ -111449,7 +121283,7 @@ BindResult ExpressionBinder::BindExpression(OperatorExpression &op, idx_t depth) break; } if (!function_name.empty()) { - auto function = make_unique(function_name, op.children); + auto function = make_unique(function_name, move(op.children)); return BindExpression(*function, depth, nullptr); } @@ -111551,6 +121385,7 @@ BindResult ExpressionBinder::BindExpression(SubqueryExpression &expr, idx_t dept D_ASSERT(depth == 0); // first bind the actual subquery in a new binder auto subquery_binder = Binder::CreateBinder(context, &binder); + subquery_binder->can_contain_nulls = true; auto bound_node = subquery_binder->BindNode(*expr.subquery->node); // check the correlated columns of the subquery for correlated columns with depth > 1 for (idx_t i = 0; i < subquery_binder->correlated_columns.size(); i++) { @@ -111631,18 +121466,23 @@ BindResult SelectBinder::BindUnnest(FunctionExpression &function, idx_t depth) { } BindChild(function.children[0], depth, error); if (!error.empty()) { - return BindResult(error); + // failed to bind + // try to bind correlated columns manually + if (!BindCorrelatedColumns(function.children[0])) { + return BindResult(error); + } + auto bound_expr = (BoundExpression *)function.children[0].get(); + ExtractCorrelatedExpressions(binder, *bound_expr->expr); } auto &child = (BoundExpression &)*function.children[0]; - LogicalType child_type = child.expr->return_type; + auto &child_type = child.expr->return_type; if (child_type.id() != LogicalTypeId::LIST) { return BindResult(binder.FormatError(function, "Unnest() can only be applied to lists")); } - LogicalType return_type = LogicalType::ANY; - D_ASSERT(child_type.child_types().size() <= 1); - if (child_type.child_types().size() == 1) { - return_type = child_type.child_types()[0].second; + if (depth > 0) { + throw BinderException(binder.FormatError(function, "Unnest() for correlated expressions is not supported yet")); } + auto &return_type = ListType::GetChildType(child_type); auto result = make_unique(return_type); result->child = move(child.expr); @@ -111652,11 +121492,10 @@ BindResult SelectBinder::BindUnnest(FunctionExpression &function, idx_t depth) { // TODO what if we have multiple unnests in the same projection list? ignore for now - // now create a column reference referring to the aggregate + // now create a column reference referring to the unnest auto colref = make_unique( function.alias.empty() ? node.unnests[unnest_index]->ToString() : function.alias, return_type, ColumnBinding(node.unnest_index, unnest_index), depth); - // move the aggregate expression into the set of bound aggregates return BindResult(move(colref)); } @@ -111673,6 +121512,9 @@ BindResult SelectBinder::BindUnnest(FunctionExpression &function, idx_t depth) { + + + namespace duckdb { static LogicalType ResolveWindowExpressionType(ExpressionType window_type, LogicalType child_type) { @@ -111687,6 +121529,7 @@ static LogicalType ResolveWindowExpressionType(ExpressionType window_type, Logic return LogicalType::BIGINT; case ExpressionType::WINDOW_FIRST_VALUE: case ExpressionType::WINDOW_LAST_VALUE: + case ExpressionType::WINDOW_NTH_VALUE: D_ASSERT(child_type.id() != LogicalTypeId::INVALID); // "Window function needs an expression" return child_type; case ExpressionType::WINDOW_LEAD: @@ -111697,6 +121540,14 @@ static LogicalType ResolveWindowExpressionType(ExpressionType window_type, Logic } } +static inline OrderType ResolveOrderType(const DBConfig &config, OrderType type) { + return (type == OrderType::ORDER_DEFAULT) ? config.default_order_type : type; +} + +static inline OrderByNullType ResolveNullOrder(const DBConfig &config, OrderByNullType null_order) { + return (null_order == OrderByNullType::ORDER_DEFAULT) ? config.default_null_order : null_order; +} + static unique_ptr GetExpression(unique_ptr &expr) { if (!expr) { return nullptr; @@ -111706,6 +121557,43 @@ static unique_ptr GetExpression(unique_ptr &expr) return move(((BoundExpression &)*expr).expr); } +static unique_ptr CastWindowExpression(unique_ptr &expr, const LogicalType &type) { + if (!expr) { + return nullptr; + } + D_ASSERT(expr.get()); + D_ASSERT(expr->expression_class == ExpressionClass::BOUND_EXPRESSION); + + auto &bound = (BoundExpression &)*expr; + bound.expr = BoundCastExpression::AddCastToType(move(bound.expr), type); + + return move(bound.expr); +} + +static LogicalType BindRangeExpression(ClientContext &context, const string &name, unique_ptr &expr, + unique_ptr &order_expr) { + + vector> children; + + D_ASSERT(order_expr.get()); + D_ASSERT(order_expr->expression_class == ExpressionClass::BOUND_EXPRESSION); + auto &bound_order = (BoundExpression &)*order_expr; + children.emplace_back(bound_order.expr->Copy()); + + D_ASSERT(expr.get()); + D_ASSERT(expr->expression_class == ExpressionClass::BOUND_EXPRESSION); + auto &bound = (BoundExpression &)*expr; + children.emplace_back(move(bound.expr)); + + string error; + auto function = ScalarFunction::BindScalarFunction(context, DEFAULT_SCHEMA, name, move(children), error, true); + if (!function) { + throw BinderException(error); + } + bound.expr = move(function); + return bound.expr->return_type; +} + BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) { if (inside_window) { throw BinderException("window function calls cannot be nested"); @@ -111713,6 +121601,12 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) { if (depth > 0) { throw BinderException("correlated columns in window functions not supported"); } + // If we have range expressions, then only one order by clause is allowed. + if ((window.start == WindowBoundary::EXPR_PRECEDING_RANGE || window.start == WindowBoundary::EXPR_FOLLOWING_RANGE || + window.end == WindowBoundary::EXPR_PRECEDING_RANGE || window.end == WindowBoundary::EXPR_FOLLOWING_RANGE) && + window.orders.size() != 1) { + throw BinderException("RANGE frames must have only one ORDER BY expression"); + } // bind inside the children of the window function // we set the inside_window flag to true to prevent binding nested window functions this->inside_window = true; @@ -111730,6 +121624,7 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) { BindChild(window.end_expr, depth, error); BindChild(window.offset_expr, depth, error); BindChild(window.default_expr, depth, error); + this->inside_window = false; if (!error.empty()) { // failed to bind children of window function @@ -111742,6 +121637,23 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) { D_ASSERT(child.get()); D_ASSERT(child->expression_class == ExpressionClass::BOUND_EXPRESSION); auto &bound = (BoundExpression &)*child; + // Add casts for positional arguments + const auto argno = children.size(); + switch (window.type) { + case ExpressionType::WINDOW_NTILE: + // ntile(bigint) + if (argno == 0) { + bound.expr = BoundCastExpression::AddCastToType(move(bound.expr), LogicalType::BIGINT); + } + break; + case ExpressionType::WINDOW_NTH_VALUE: + // nth_value(, index) + if (argno == 1) { + bound.expr = BoundCastExpression::AddCastToType(move(bound.expr), LogicalType::BIGINT); + } + default: + break; + } types.push_back(bound.expr->return_type); children.push_back(move(bound.expr)); } @@ -111780,18 +121692,71 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) { for (auto &child : window.partitions) { result->partitions.push_back(GetExpression(child)); } + + // Convert RANGE boundary expressions to ORDER +/- expressions. + // Note that PRECEEDING and FOLLOWING refer to the sequential order in the frame, + // not the natural ordering of the type. This means that the offset arithmetic must be reversed + // for ORDER BY DESC. auto &config = DBConfig::GetConfig(context); + auto range_sense = OrderType::INVALID; + auto start_type = LogicalType::BIGINT; + if (window.start == WindowBoundary::EXPR_PRECEDING_RANGE) { + D_ASSERT(window.orders.size() == 1); + range_sense = ResolveOrderType(config, window.orders[0].type); + const auto name = (range_sense == OrderType::ASCENDING) ? "-" : "+"; + start_type = BindRangeExpression(context, name, window.start_expr, window.orders[0].expression); + } else if (window.start == WindowBoundary::EXPR_FOLLOWING_RANGE) { + D_ASSERT(window.orders.size() == 1); + range_sense = ResolveOrderType(config, window.orders[0].type); + const auto name = (range_sense == OrderType::ASCENDING) ? "+" : "-"; + start_type = BindRangeExpression(context, name, window.start_expr, window.orders[0].expression); + } + + auto end_type = LogicalType::BIGINT; + if (window.end == WindowBoundary::EXPR_PRECEDING_RANGE) { + D_ASSERT(window.orders.size() == 1); + range_sense = ResolveOrderType(config, window.orders[0].type); + const auto name = (range_sense == OrderType::ASCENDING) ? "-" : "+"; + end_type = BindRangeExpression(context, name, window.end_expr, window.orders[0].expression); + } else if (window.end == WindowBoundary::EXPR_FOLLOWING_RANGE) { + D_ASSERT(window.orders.size() == 1); + range_sense = ResolveOrderType(config, window.orders[0].type); + const auto name = (range_sense == OrderType::ASCENDING) ? "+" : "-"; + end_type = BindRangeExpression(context, name, window.end_expr, window.orders[0].expression); + } + + // Cast ORDER and boundary expressions to the same type + if (range_sense != OrderType::INVALID) { + D_ASSERT(window.orders.size() == 1); + + auto &order_expr = window.orders[0].expression; + D_ASSERT(order_expr.get()); + D_ASSERT(order_expr->expression_class == ExpressionClass::BOUND_EXPRESSION); + auto &bound_order = (BoundExpression &)*order_expr; + auto order_type = bound_order.expr->return_type; + if (window.start_expr) { + order_type = LogicalType::MaxLogicalType(order_type, start_type); + } + if (window.end_expr) { + order_type = LogicalType::MaxLogicalType(order_type, end_type); + } + + // Cast all three to match + bound_order.expr = BoundCastExpression::AddCastToType(move(bound_order.expr), order_type); + start_type = end_type = order_type; + } + for (auto &order : window.orders) { - auto type = order.type == OrderType::ORDER_DEFAULT ? config.default_order_type : order.type; - auto null_order = - order.null_order == OrderByNullType::ORDER_DEFAULT ? config.default_null_order : order.null_order; + auto type = ResolveOrderType(config, order.type); + auto null_order = ResolveNullOrder(config, order.null_order); auto expression = GetExpression(order.expression); result->orders.emplace_back(type, null_order, move(expression)); } - result->start_expr = GetExpression(window.start_expr); - result->end_expr = GetExpression(window.end_expr); - result->offset_expr = GetExpression(window.offset_expr); - result->default_expr = GetExpression(window.default_expr); + + result->start_expr = CastWindowExpression(window.start_expr, start_type); + result->end_expr = CastWindowExpression(window.end_expr, end_type); + result->offset_expr = CastWindowExpression(window.offset_expr, LogicalType::BIGINT); + result->default_expr = CastWindowExpression(window.default_expr, result->return_type); result->start = window.start; result->end = window.end; @@ -112122,10 +122087,9 @@ unique_ptr Binder::BindOrderExpression(OrderBinder &order_binder, un return bound_expr; } -unique_ptr BindDelimiter(ClientContext &context, unique_ptr delimiter, - int64_t &delimiter_value) { - - auto new_binder = Binder::CreateBinder(context); +unique_ptr Binder::BindDelimiter(ClientContext &context, unique_ptr delimiter, + int64_t &delimiter_value) { + auto new_binder = Binder::CreateBinder(context, this, true); ExpressionBinder expr_binder(*new_binder, context); expr_binder.target_type = LogicalType::UBIGINT; auto expr = expr_binder.Bind(delimiter); @@ -112224,8 +122188,8 @@ void Binder::BindModifierTypes(BoundQueryNode &result, const vector auto &bound_colref = (BoundColumnRefExpression &)*target_distinct; auto sql_type = sql_types[bound_colref.binding.column_index]; if (sql_type.id() == LogicalTypeId::VARCHAR) { - target_distinct = - ExpressionBinder::PushCollation(context, move(target_distinct), sql_type.collation(), true); + target_distinct = ExpressionBinder::PushCollation(context, move(target_distinct), + StringType::GetCollation(sql_type), true); } } break; @@ -112243,8 +122207,8 @@ void Binder::BindModifierTypes(BoundQueryNode &result, const vector auto sql_type = sql_types[bound_colref.binding.column_index]; bound_colref.return_type = sql_types[bound_colref.binding.column_index]; if (sql_type.id() == LogicalTypeId::VARCHAR) { - order_node.expression = - ExpressionBinder::PushCollation(context, move(order_node.expression), sql_type.collation()); + order_node.expression = ExpressionBinder::PushCollation(context, move(order_node.expression), + StringType::GetCollation(sql_type)); } } break; @@ -112335,7 +122299,8 @@ unique_ptr Binder::BindNode(SelectNode &statement) { D_ASSERT(bound_expr->return_type.id() != LogicalTypeId::INVALID); // push a potential collation, if necessary - bound_expr = ExpressionBinder::PushCollation(context, move(bound_expr), group_type.collation(), true); + bound_expr = + ExpressionBinder::PushCollation(context, move(bound_expr), StringType::GetCollation(group_type), true); result->groups.push_back(move(bound_expr)); // in the unbound expression we DO bind the table names of any ColumnRefs @@ -112517,9 +122482,11 @@ unique_ptr Binder::BindNode(SetOperationNode &statement) { result->setop_index = GenerateTableIndex(); result->left_binder = Binder::CreateBinder(context, this); + result->left_binder->can_contain_nulls = true; result->left = result->left_binder->BindNode(*statement.left); result->right_binder = Binder::CreateBinder(context, this); + result->right_binder->can_contain_nulls = true; result->right = result->right_binder->BindNode(*statement.right); if (!statement.modifiers.empty()) { @@ -112552,6 +122519,11 @@ unique_ptr Binder::BindNode(SetOperationNode &statement) { // figure out the types of the setop result by picking the max of both for (idx_t i = 0; i < result->left->types.size(); i++) { auto result_type = LogicalType::MaxLogicalType(result->left->types[i], result->right->types[i]); + if (!can_contain_nulls) { + if (ExpressionBinder::ContainsNullType(result_type)) { + result_type = ExpressionBinder::ExchangeNullType(result_type); + } + } result->types.push_back(result_type); } @@ -113334,7 +123306,7 @@ namespace duckdb { BoundStatement Binder::BindCopyTo(CopyStatement &stmt) { // COPY TO a file auto &config = DBConfig::GetConfig(context); - if (!config.enable_copy) { + if (!config.enable_external_access) { throw Exception("COPY TO is disabled by configuration"); } BoundStatement result; @@ -113364,7 +123336,7 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt) { BoundStatement Binder::BindCopyFrom(CopyStatement &stmt) { auto &config = DBConfig::GetConfig(context); - if (!config.enable_copy) { + if (!config.enable_external_access) { throw Exception("COPY FROM is disabled by configuration"); } BoundStatement result; @@ -113439,6 +123411,7 @@ BoundStatement Binder::Bind(CopyStatement &stmt) { } stmt.select_statement = move(statement); } + this->allow_stream_result = false; if (stmt.info->is_from) { return BindCopyFrom(stmt); } else { @@ -113586,6 +123559,8 @@ void Binder::BindCreateViewInfo(CreateViewInfo &base) { // bind the view as if it were a query so we can catch errors // note that we bind the original, and replace the original with a copy // this is because the original has + this->can_contain_nulls = true; + auto copy = base.query->Copy(); auto query_node = Bind(*base.query); base.query = unique_ptr_cast(move(copy)); @@ -113658,7 +123633,6 @@ BoundStatement Binder::Bind(CreateStatement &stmt) { auto &base = (CreateViewInfo &)*stmt.info; // bind the schema auto schema = BindSchema(*stmt.info); - BindCreateViewInfo(base); result.plan = make_unique(LogicalOperatorType::LOGICAL_CREATE_VIEW, move(stmt.info), schema); break; @@ -113797,7 +123771,7 @@ static void BindConstraints(Binder &binder, BoundCreateTableInfo &info) { auto &base = (CreateTableInfo &)*info.base; bool has_primary_key = false; - unordered_set primary_keys; + vector primary_keys; for (idx_t i = 0; i < base.constraints.size(); i++) { auto &cond = base.constraints[i]; switch (cond->type) { @@ -113823,11 +123797,14 @@ static void BindConstraints(Binder &binder, BoundCreateTableInfo &info) { case ConstraintType::UNIQUE: { auto &unique = (UniqueConstraint &)*cond; // have to resolve columns of the unique constraint - unordered_set keys; + vector keys; + unordered_set key_set; if (unique.index != INVALID_INDEX) { D_ASSERT(unique.index < base.columns.size()); // unique constraint is given by single index - keys.insert(unique.index); + unique.columns.push_back(base.columns[unique.index].name); + keys.push_back(unique.index); + key_set.insert(unique.index); } else { // unique constraint is given by list of names // have to resolve names @@ -113837,12 +123814,13 @@ static void BindConstraints(Binder &binder, BoundCreateTableInfo &info) { if (entry == info.name_map.end()) { throw ParserException("column \"%s\" named in key does not exist", keyname); } - if (keys.find(entry->second) != keys.end()) { + if (key_set.find(entry->second) != key_set.end()) { throw ParserException("column \"%s\" appears twice in " "primary key constraint", keyname); } - keys.insert(entry->second); + keys.push_back(entry->second); + key_set.insert(entry->second); } } @@ -113854,7 +123832,8 @@ static void BindConstraints(Binder &binder, BoundCreateTableInfo &info) { has_primary_key = true; primary_keys = keys; } - info.bound_constraints.push_back(make_unique(keys, unique.is_primary_key)); + info.bound_constraints.push_back( + make_unique(move(keys), move(key_set), unique.is_primary_key)); break; } default: @@ -113917,8 +123896,9 @@ unique_ptr Binder::BindCreateTableInfo(unique_ptrallow_stream_result = false; return result; } @@ -114003,6 +123983,7 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) { result.plan = move(del); result.names = {"Count"}; result.types = {LogicalType::BIGINT}; + this->allow_stream_result = false; return result; } @@ -114052,6 +124033,7 @@ BoundStatement Binder::Bind(DropStatement &stmt) { result.plan = make_unique(LogicalOperatorType::LOGICAL_DROP, move(stmt.info)); result.names = {"Success"}; result.types = {LogicalType::BOOLEAN}; + this->allow_stream_result = false; return result; } @@ -114090,14 +124072,40 @@ BoundStatement Binder::Bind(ExplainStatement &stmt) { + + #include namespace duckdb { +//! Sanitizes a string to have only low case chars and underscores +string SanitizeExportIdentifier(const string &str) { + // Copy the original string to result + string result(str); + + for (idx_t i = 0; i < str.length(); ++i) { + auto c = str[i]; + if (c >= 'a' && c <= 'z') { + // If it is lower case just continue + continue; + } + + if (c >= 'A' && c <= 'Z') { + // To lowercase + result[i] = tolower(c); + } else { + // Substitute to underscore + result[i] = '_'; + } + } + + return result; +} + BoundStatement Binder::Bind(ExportStatement &stmt) { // COPY TO a file auto &config = DBConfig::GetConfig(context); - if (!config.enable_copy) { + if (!config.enable_external_access) { throw Exception("COPY TO is disabled by configuration"); } BoundStatement result; @@ -114113,36 +124121,52 @@ BoundStatement Binder::Bind(ExportStatement &stmt) { // gather a list of all the tables vector tables; - Catalog::GetCatalog(context).schemas->Scan(context, [&](CatalogEntry *entry) { - auto schema = (SchemaCatalogEntry *)entry; + auto schemas = catalog.schemas->GetEntries(context); + for (auto &schema : schemas) { schema->Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) { if (entry->type == CatalogType::TABLE_ENTRY) { tables.push_back((TableCatalogEntry *)entry); } }); - }); + } // now generate the COPY statements for each of the tables auto &fs = FileSystem::GetFileSystem(context); unique_ptr child_operator; + + BoundExportData exported_tables; + + idx_t id = 0; // Id for table for (auto &table : tables) { auto info = make_unique(); // we copy the options supplied to the EXPORT info->format = stmt.info->format; info->options = stmt.info->options; // set up the file name for the COPY TO + + auto exported_data = ExportedTableData(); if (table->schema->name == DEFAULT_SCHEMA) { - info->file_path = fs.JoinPath(stmt.info->file_path, - StringUtil::Format("%s.%s", table->name, copy_function->function.extension)); - } else { info->file_path = - fs.JoinPath(stmt.info->file_path, StringUtil::Format("%s.%s.%s", table->schema->name, table->name, - copy_function->function.extension)); + fs.JoinPath(stmt.info->file_path, + StringUtil::Format("%s_%s.%s", to_string(id), SanitizeExportIdentifier(table->name), + copy_function->function.extension)); + } else { + info->file_path = fs.JoinPath( + stmt.info->file_path, + StringUtil::Format("%s_%s_%s.%s", SanitizeExportIdentifier(table->schema->name), to_string(id), + SanitizeExportIdentifier(table->name), copy_function->function.extension)); } info->is_from = false; info->schema = table->schema->name; info->table = table->name; + exported_data.table_name = info->table; + exported_data.schema_name = info->schema; + exported_data.file_path = info->file_path; + + exported_tables.data[table] = exported_data; + id++; + // generate the copy statement and bind it CopyStatement copy_stmt; copy_stmt.info = move(info); @@ -114167,13 +124191,14 @@ BoundStatement Binder::Bind(ExportStatement &stmt) { } // create the export node - auto export_node = make_unique(copy_function->function, move(stmt.info)); + auto export_node = make_unique(copy_function->function, move(stmt.info), exported_tables); if (child_operator) { export_node->children.push_back(move(child_operator)); } result.plan = move(export_node); + this->allow_stream_result = false; return result; } @@ -114332,6 +124357,7 @@ BoundStatement Binder::Bind(InsertStatement &stmt) { insert->AddChild(move(root)); result.plan = move(insert); + this->allow_stream_result = false; return result; } @@ -114349,6 +124375,7 @@ BoundStatement Binder::Bind(LoadStatement &stmt) { result.names = {"Success"}; result.plan = make_unique(LogicalOperatorType::LOGICAL_LOAD, move(stmt.info)); + this->allow_stream_result = false; return result; } @@ -114608,6 +124635,26 @@ static void BindExtraColumns(TableCatalogEntry &table, LogicalGet &get, LogicalP } } +static bool TypeSupportsRegularUpdate(const LogicalType &type) { + switch (type.id()) { + case LogicalTypeId::LIST: + case LogicalTypeId::MAP: + // lists and maps don't support updates directly + return false; + case LogicalTypeId::STRUCT: { + auto &child_types = StructType::GetChildTypes(type); + for (auto &entry : child_types) { + if (!TypeSupportsRegularUpdate(entry.second)) { + return false; + } + } + return true; + } + default: + return true; + } +} + static void BindUpdateConstraints(TableCatalogEntry &table, LogicalGet &get, LogicalProjection &proj, LogicalUpdate &update) { // check the constraints and indexes of the table to see if we need to project any additional columns @@ -114622,16 +124669,26 @@ static void BindUpdateConstraints(TableCatalogEntry &table, LogicalGet &get, Log BindExtraColumns(table, get, proj, update, check.bound_columns); } } - // for index updates, we do the same, however, for index updates we always turn any update into an insert and a - // delete for the insert, we thus need all the columns to be available, hence we check if the update touches any - // index columns - update.is_index_update = false; - for (auto &index : table.storage->info->indexes) { - if (index->IndexIsUpdated(update.columns)) { - update.is_index_update = true; + // for index updates we always turn any update into an insert and a delete + // we thus need all the columns to be available, hence we check if the update touches any index columns + update.update_is_del_and_insert = false; + table.storage->info->indexes.Scan([&](Index &index) { + if (index.IndexIsUpdated(update.columns)) { + update.update_is_del_and_insert = true; + return true; + } + return false; + }); + + // we also convert any updates on LIST columns into delete + insert + for (auto &col : update.columns) { + if (!TypeSupportsRegularUpdate(table.columns[col].type)) { + update.update_is_del_and_insert = true; + break; } } - if (update.is_index_update) { + + if (update.update_is_del_and_insert) { // the update updates a column required by an index, push projections for all columns unordered_set all_columns; for (idx_t i = 0; i < table.storage->types.size(); i++) { @@ -114731,6 +124788,7 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) { result.names = {"Count"}; result.types = {LogicalType::BIGINT}; result.plan = move(update); + this->allow_stream_result = false; return result; } @@ -114881,7 +124939,7 @@ unique_ptr Binder::Bind(BaseTableRef &ref) { for (auto &scan : config.replacement_scans) { auto replacement_function = scan.function(ref.table_name, scan.data); if (replacement_function) { - replacement_function->alias = ref.alias; + replacement_function->alias = ref.alias.empty() ? ref.table_name : ref.alias; replacement_function->column_name_alias = ref.column_name_alias; return Bind(*replacement_function); } @@ -114920,6 +124978,7 @@ unique_ptr Binder::Bind(BaseTableRef &ref) { // for the view and for the current query bool inherit_ctes = false; auto view_binder = Binder::CreateBinder(context, this, inherit_ctes); + view_binder->can_contain_nulls = true; SubqueryRef subquery(unique_ptr_cast(view_catalog_entry->query->Copy())); subquery.alias = ref.alias.empty() ? ref.table_name : ref.alias; subquery.column_name_alias = @@ -115034,6 +125093,7 @@ class BoundExpressionListRef : public BoundTableRef { + namespace duckdb { unique_ptr Binder::Bind(ExpressionListRef &expr) { @@ -115053,23 +125113,38 @@ unique_ptr Binder::Bind(ExpressionListRef &expr) { } vector> list; - if (result->types.empty()) { - // for the first list, we set the expected types as the types of these expressions - for (idx_t val_idx = 0; val_idx < expression_list.size(); val_idx++) { - LogicalType result_type; - auto expr = binder.Bind(expression_list[val_idx], &result_type); - result->types.push_back(result_type); - list.push_back(move(expr)); - } - } else { - // for subsequent lists, we apply the expected types we found in the first list - for (idx_t val_idx = 0; val_idx < expression_list.size(); val_idx++) { + for (idx_t val_idx = 0; val_idx < expression_list.size(); val_idx++) { + if (!result->types.empty()) { + D_ASSERT(result->types.size() == expression_list.size()); binder.target_type = result->types[val_idx]; - list.push_back(binder.Bind(expression_list[val_idx])); } + auto expr = binder.Bind(expression_list[val_idx]); + list.push_back(move(expr)); } result->values.push_back(move(list)); } + if (result->types.empty() && !expr.values.empty()) { + // there are no types specified + // we have to figure out the result types + // for each column, we iterate over all of the expressions and select the max logical type + // we initialize all types to SQLNULL + result->types.resize(expr.values[0].size(), LogicalType::SQLNULL); + // now loop over the lists and select the max logical type + for (idx_t list_idx = 0; list_idx < result->values.size(); list_idx++) { + auto &list = result->values[list_idx]; + for (idx_t val_idx = 0; val_idx < list.size(); val_idx++) { + result->types[val_idx] = + LogicalType::MaxLogicalType(result->types[val_idx], list[val_idx]->return_type); + } + } + // finally do another loop over the expressions and add casts where required + for (idx_t list_idx = 0; list_idx < result->values.size(); list_idx++) { + auto &list = result->values[list_idx]; + for (idx_t val_idx = 0; val_idx < list.size(); val_idx++) { + list[val_idx] = BoundCastExpression::AddCastToType(move(list[val_idx]), result->types[val_idx]); + } + } + } result->bind_index = GenerateTableIndex(); bind_context.AddGenericBinding(result->bind_index, expr.alias, result->names, result->types); return move(result); @@ -115130,7 +125205,7 @@ static unique_ptr BindColumn(Binder &binder, ClientContext &co auto expr = make_unique_base(column_name, alias); ExpressionBinder expr_binder(binder, context); auto result = expr_binder.Bind(expr); - return make_unique(move(result), nullptr); + return make_unique(move(result)); } static unique_ptr AddCondition(ClientContext &context, Binder &left_binder, Binder &right_binder, @@ -115394,6 +125469,7 @@ namespace duckdb { unique_ptr Binder::Bind(SubqueryRef &ref, CommonTableExpressionInfo *cte) { auto binder = Binder::CreateBinder(context, this); + binder->can_contain_nulls = true; if (cte) { binder->bound_ctes.insert(cte); } @@ -115516,7 +125592,8 @@ unique_ptr Binder::Bind(TableFunctionRef &ref) { // cast the parameters to the type of the function for (idx_t i = 0; i < arguments.size(); i++) { if (table_function.arguments[i] != LogicalType::ANY && table_function.arguments[i] != LogicalType::TABLE && - table_function.arguments[i] != LogicalType::POINTER) { + table_function.arguments[i] != LogicalType::POINTER && + table_function.arguments[i].id() != LogicalTypeId::LIST) { parameters[i] = parameters[i].CastAs(table_function.arguments[i]); } } @@ -115921,6 +125998,7 @@ shared_ptr Binder::CreateBinder(ClientContext &context, Binder *parent, Binder::Binder(bool, ClientContext &context, shared_ptr parent_p, bool inherit_ctes_p) : context(context), read_only(true), requires_valid_transaction(true), allow_stream_result(false), parent(move(parent_p)), bound_tables(0), inherit_ctes(inherit_ctes_p) { + parameters = nullptr; if (parent) { // We have to inherit macro parameter bindings from the parent binder, if there is a parent. macro_binding = parent->macro_binding; @@ -116188,9 +126266,9 @@ string Binder::FormatError(TableRef &ref_context, const string &message) { return FormatError(ref_context.query_location, message); } -string Binder::FormatError(idx_t query_location, const string &message) { +string Binder::FormatErrorRecursive(idx_t query_location, const string &message, vector &values) { QueryErrorContext context(root_statement, query_location); - return context.FormatError(message); + return context.FormatErrorRecursive(message, values); } } // namespace duckdb @@ -116246,7 +126324,7 @@ bool BoundAggregateExpression::Equals(const BaseExpression *other_p) const { if (children.size() != other->children.size()) { return false; } - if (other->filter != filter) { + if (!Expression::Equals(other->filter.get(), filter.get())) { return false; } for (idx_t i = 0; i < children.size(); i++) { @@ -116265,8 +126343,8 @@ unique_ptr BoundAggregateExpression::Copy() { for (auto &child : children) { new_children.push_back(child->Copy()); } - auto new_bind_info = bind_info->Copy(); - auto new_filter = filter->Copy(); + auto new_bind_info = bind_info ? bind_info->Copy() : nullptr; + auto new_filter = filter ? filter->Copy() : nullptr; auto copy = make_unique(function, move(new_children), move(new_filter), move(new_bind_info), distinct); copy->CopyProperties(*this); @@ -116363,9 +126441,9 @@ unique_ptr BoundCaseExpression::Copy() { namespace duckdb { -BoundCastExpression::BoundCastExpression(unique_ptr child_p, LogicalType target_type_p) - : Expression(ExpressionType::OPERATOR_CAST, ExpressionClass::BOUND_CAST, move(target_type_p)), - child(move(child_p)) { +BoundCastExpression::BoundCastExpression(unique_ptr child_p, LogicalType target_type_p, bool try_cast_p) + : Expression(ExpressionType::OPERATOR_CAST, ExpressionClass::BOUND_CAST, move(target_type_p)), child(move(child_p)), + try_cast(try_cast_p) { } unique_ptr BoundCastExpression::AddCastToType(unique_ptr expr, const LogicalType &target_type) { @@ -116379,10 +126457,8 @@ unique_ptr BoundCastExpression::AddCastToType(unique_ptr } else if (expr->return_type != target_type) { auto &expr_type = expr->return_type; if (target_type.id() == LogicalTypeId::LIST && expr_type.id() == LogicalTypeId::LIST) { - D_ASSERT(!target_type.child_types().empty()); - D_ASSERT(!expr_type.child_types().empty()); - auto &target_list = target_type.child_types()[0].second; - auto &expr_list = expr_type.child_types()[0].second; + auto &target_list = ListType::GetChildType(target_type); + auto &expr_list = ListType::GetChildType(expr_type); if (target_list.id() == LogicalTypeId::ANY || expr_list == target_list) { return expr; } @@ -116402,17 +126478,37 @@ bool BoundCastExpression::CastIsInvertible(const LogicalType &source_type, const if (source_type.id() == LogicalTypeId::DOUBLE || target_type.id() == LogicalTypeId::DOUBLE) { return false; } + if (source_type.id() == LogicalTypeId::DECIMAL || target_type.id() == LogicalTypeId::DECIMAL) { + uint8_t source_width, target_width; + uint8_t source_scale, target_scale; + // cast to or from decimal + // cast is only invertible if the cast is strictly widening + if (!source_type.GetDecimalProperties(source_width, source_scale)) { + return false; + } + if (!target_type.GetDecimalProperties(target_width, target_scale)) { + return false; + } + if (target_scale < source_scale) { + return false; + } + return true; + } if (source_type.id() == LogicalTypeId::VARCHAR) { - return target_type.id() == LogicalTypeId::DATE || target_type.id() == LogicalTypeId::TIMESTAMP; + return target_type.id() == LogicalTypeId::DATE || target_type.id() == LogicalTypeId::TIMESTAMP || + target_type.id() == LogicalTypeId::TIMESTAMP_NS || target_type.id() == LogicalTypeId::TIMESTAMP_MS || + target_type.id() == LogicalTypeId::TIMESTAMP_SEC; } if (target_type.id() == LogicalTypeId::VARCHAR) { - return source_type.id() == LogicalTypeId::DATE || source_type.id() == LogicalTypeId::TIMESTAMP; + return source_type.id() == LogicalTypeId::DATE || source_type.id() == LogicalTypeId::TIMESTAMP || + source_type.id() == LogicalTypeId::TIMESTAMP_NS || source_type.id() == LogicalTypeId::TIMESTAMP_MS || + source_type.id() == LogicalTypeId::TIMESTAMP_SEC; } return true; } string BoundCastExpression::ToString() const { - return "CAST(" + child->GetName() + " AS " + return_type.ToString() + ")"; + return (try_cast ? "TRY_CAST(" : "CAST(") + child->GetName() + " AS " + return_type.ToString() + ")"; } bool BoundCastExpression::Equals(const BaseExpression *other_p) const { @@ -116423,11 +126519,14 @@ bool BoundCastExpression::Equals(const BaseExpression *other_p) const { if (!Expression::Equals(child.get(), other->child.get())) { return false; } + if (try_cast != other->try_cast) { + return false; + } return true; } unique_ptr BoundCastExpression::Copy() { - auto copy = make_unique(child->Copy(), return_type); + auto copy = make_unique(child->Copy(), return_type, try_cast); copy->CopyProperties(*this); return move(copy); } @@ -116571,7 +126670,7 @@ bool BoundConstantExpression::Equals(const BaseExpression *other_p) const { return false; } auto other = (BoundConstantExpression *)other_p; - return value == other->value; + return !ValueOperations::DistinctFrom(value, other->value); } hash_t BoundConstantExpression::Hash() const { @@ -116592,6 +126691,7 @@ unique_ptr BoundConstantExpression::Copy() { + namespace duckdb { BoundFunctionExpression::BoundFunctionExpression(LogicalType return_type, ScalarFunction bound_function, @@ -116631,14 +126731,9 @@ bool BoundFunctionExpression::Equals(const BaseExpression *other_p) const { if (other->function != function) { return false; } - if (children.size() != other->children.size()) { + if (!ExpressionUtil::ListEquals(children, other->children)) { return false; } - for (idx_t i = 0; i < children.size(); i++) { - if (!Expression::Equals(children[i].get(), other->children[i].get())) { - return false; - } - } if (!FunctionData::Equals(bind_info.get(), other->bind_info.get())) { return false; } @@ -116662,6 +126757,7 @@ unique_ptr BoundFunctionExpression::Copy() { + namespace duckdb { BoundOperatorExpression::BoundOperatorExpression(ExpressionType type, LogicalType return_type) @@ -116691,14 +126787,9 @@ bool BoundOperatorExpression::Equals(const BaseExpression *other_p) const { return false; } auto other = (BoundOperatorExpression *)other_p; - if (children.size() != other->children.size()) { + if (!ExpressionUtil::ListEquals(children, other->children)) { return false; } - for (idx_t i = 0; i < children.size(); i++) { - if (!Expression::Equals(children[i].get(), other->children[i].get())) { - return false; - } - } return true; } @@ -116866,6 +126957,7 @@ unique_ptr BoundUnnestExpression::Copy() { + namespace duckdb { BoundWindowExpression::BoundWindowExpression(ExpressionType type, LogicalType return_type, @@ -116876,7 +126968,129 @@ BoundWindowExpression::BoundWindowExpression(ExpressionType type, LogicalType re } string BoundWindowExpression::ToString() const { - return "WINDOW"; + string result = aggregate.get() ? aggregate->name : ExpressionTypeToString(type); + result += "("; + result += StringUtil::Join(children, children.size(), ", ", + [](const unique_ptr &child) { return child->GetName(); }); + // Lead/Lag extra arguments + if (offset_expr.get()) { + result += ", "; + result += offset_expr->GetName(); + } + if (default_expr.get()) { + result += ", "; + result += default_expr->GetName(); + } + + // Over clause + result += ") OVER("; + string sep; + + // Partitions + if (!partitions.empty()) { + result += "PARTITION BY "; + result += StringUtil::Join(partitions, partitions.size(), ", ", + [](const unique_ptr &partition) { return partition->GetName(); }); + sep = " "; + } + + // Orders + if (!orders.empty()) { + result += sep; + result += "ORDER BY "; + result += StringUtil::Join(orders, orders.size(), ", ", [](const BoundOrderByNode &order) { + auto str = order.expression->GetName(); + str += (order.type == OrderType::ASCENDING) ? " ASC" : " DESC"; + switch (order.null_order) { + case OrderByNullType::NULLS_FIRST: + str += " NULLS FIRST"; + break; + case OrderByNullType::NULLS_LAST: + str += " NULLS LAST"; + break; + default: + break; + } + return str; + }); + sep = " "; + } + + // Rows/Range + string units = "ROWS"; + string from; + switch (start) { + case WindowBoundary::CURRENT_ROW_RANGE: + case WindowBoundary::CURRENT_ROW_ROWS: + from = "CURRENT ROW"; + units = (start == WindowBoundary::CURRENT_ROW_RANGE) ? "RANGE" : "ROWS"; + break; + case WindowBoundary::UNBOUNDED_PRECEDING: + if (end != WindowBoundary::CURRENT_ROW_RANGE) { + from = "UNBOUNDED PRECEDING"; + } + break; + case WindowBoundary::EXPR_PRECEDING_ROWS: + case WindowBoundary::EXPR_PRECEDING_RANGE: + from = start_expr->GetName() + " PRECEDING"; + units = (start == WindowBoundary::EXPR_PRECEDING_RANGE) ? "RANGE" : "ROWS"; + break; + case WindowBoundary::EXPR_FOLLOWING_ROWS: + case WindowBoundary::EXPR_FOLLOWING_RANGE: + from = start_expr->GetName() + " FOLLOWING"; + units = (start == WindowBoundary::EXPR_FOLLOWING_RANGE) ? "RANGE" : "ROWS"; + break; + default: + break; + } + + string to; + switch (end) { + case WindowBoundary::CURRENT_ROW_RANGE: + if (start != WindowBoundary::UNBOUNDED_PRECEDING) { + to = "CURRENT ROW"; + units = "RANGE"; + } + break; + case WindowBoundary::CURRENT_ROW_ROWS: + to = "CURRENT ROW"; + units = "ROWS"; + break; + case WindowBoundary::UNBOUNDED_PRECEDING: + to = "UNBOUNDED PRECEDING"; + break; + case WindowBoundary::EXPR_PRECEDING_ROWS: + case WindowBoundary::EXPR_PRECEDING_RANGE: + to = end_expr->GetName() + " PRECEDING"; + units = (start == WindowBoundary::EXPR_PRECEDING_RANGE) ? "RANGE" : "ROWS"; + break; + case WindowBoundary::EXPR_FOLLOWING_ROWS: + case WindowBoundary::EXPR_FOLLOWING_RANGE: + to = end_expr->GetName() + " FOLLOWING"; + units = (start == WindowBoundary::EXPR_FOLLOWING_RANGE) ? "RANGE" : "ROWS"; + break; + default: + break; + } + + if (!from.empty() || !to.empty()) { + result += sep + units; + } + if (!from.empty() && !to.empty()) { + result += " BETWEEN "; + result += from; + result += " AND "; + result += to; + } else if (!from.empty()) { + result += " "; + result += from; + } else if (!to.empty()) { + result += " "; + result += to; + } + + result += ")"; + return result; } bool BoundWindowExpression::Equals(const BaseExpression *other_p) const { @@ -117074,11 +127288,12 @@ string AggregateBinder::UnsupportedAggregateMessage() { + namespace duckdb { -AlterBinder::AlterBinder(Binder &binder, ClientContext &context, string table, vector &columns, +AlterBinder::AlterBinder(Binder &binder, ClientContext &context, TableCatalogEntry &table, vector &bound_columns, LogicalType target_type) - : ExpressionBinder(binder, context), table(move(table)), columns(columns), bound_columns(bound_columns) { + : ExpressionBinder(binder, context), table(table), bound_columns(bound_columns) { this->target_type = move(target_type); } @@ -117101,17 +127316,16 @@ string AlterBinder::UnsupportedAggregateMessage() { } BindResult AlterBinder::BindColumn(ColumnRefExpression &colref) { - if (!colref.table_name.empty() && colref.table_name != table) { + if (!colref.table_name.empty() && colref.table_name != table.name) { throw BinderException("Cannot reference table %s from within alter statement for table %s!", colref.table_name, - table); + table.name); } - for (idx_t i = 0; i < columns.size(); i++) { - if (colref.column_name == columns[i].name) { - bound_columns.push_back(i); - return BindResult(make_unique(columns[i].type, bound_columns.size() - 1)); - } + auto idx = table.GetColumnIndex(colref.column_name, true); + if (idx == INVALID_INDEX) { + throw BinderException("Table does not contain column %s referenced in alter statement!", colref.column_name); } - throw BinderException("Table does not contain column %s referenced in alter statement!", colref.column_name); + bound_columns.push_back(idx); + return BindResult(make_unique(table.columns[idx].type, bound_columns.size() - 1)); } } // namespace duckdb @@ -117705,6 +127919,8 @@ ExpressionBinder::~ExpressionBinder() { BindResult ExpressionBinder::BindExpression(unique_ptr *expr, idx_t depth, bool root_expression) { auto &expr_ref = **expr; switch (expr_ref.expression_class) { + case ExpressionClass::BETWEEN: + return BindExpression((BetweenExpression &)expr_ref, depth); case ExpressionClass::CASE: return BindExpression((CaseExpression &)expr_ref, depth); case ExpressionClass::CAST: @@ -117780,6 +127996,48 @@ void ExpressionBinder::ExtractCorrelatedExpressions(Binder &binder, Expression & [&](Expression &child) { ExtractCorrelatedExpressions(binder, child); }); } +bool ExpressionBinder::ContainsNullType(const LogicalType &type) { + switch (type.id()) { + case LogicalTypeId::STRUCT: + case LogicalTypeId::MAP: { + auto child_count = StructType::GetChildCount(type); + for (idx_t i = 0; i < child_count; i++) { + if (ContainsNullType(StructType::GetChildType(type, i))) { + return true; + } + } + return false; + } + case LogicalTypeId::LIST: + return ContainsNullType(ListType::GetChildType(type)); + case LogicalTypeId::SQLNULL: + return true; + default: + return false; + } +} + +LogicalType ExpressionBinder::ExchangeNullType(const LogicalType &type) { + switch (type.id()) { + case LogicalTypeId::STRUCT: + case LogicalTypeId::MAP: { + // we make a copy of the child types of the struct here + auto child_types = StructType::GetChildTypes(type); + for (auto &child_type : child_types) { + child_type.second = ExchangeNullType(child_type.second); + } + return type.id() == LogicalTypeId::MAP ? LogicalType::MAP(move(child_types)) + : LogicalType::STRUCT(move(child_types)); + } + case LogicalTypeId::LIST: + return LogicalType::LIST(ExchangeNullType(ListType::GetChildType(type))); + case LogicalTypeId::SQLNULL: + return LogicalType::INTEGER; + default: + return type; + } +} + unique_ptr ExpressionBinder::Bind(unique_ptr &expr, LogicalType *result_type, bool root_expression) { // bind the main expression @@ -117800,10 +128058,13 @@ unique_ptr ExpressionBinder::Bind(unique_ptr &expr // the binder has a specific target type: add a cast to that type result = BoundCastExpression::AddCastToType(move(result), target_type); } else { - if (result->return_type.id() == LogicalTypeId::SQLNULL) { - // SQL NULL type is only used internally in the binder - // cast to INTEGER if we encounter it outside of the binder - result = BoundCastExpression::AddCastToType(move(result), LogicalType::INTEGER); + // SQL NULL type is only used internally in the binder + // cast to INTEGER if we encounter it outside of the binder + if (!binder.can_contain_nulls) { + if (ContainsNullType(result->return_type)) { + auto result_type = ExchangeNullType(result->return_type); + result = BoundCastExpression::AddCastToType(move(result), result_type); + } } } if (result_type) { @@ -117826,7 +128087,7 @@ string ExpressionBinder::Bind(unique_ptr *expr, idx_t depth, b return result.error; } else { // successfully bound: replace the node with a BoundExpression - *expr = make_unique(move(result.expression), move(*expr)); + *expr = make_unique(move(result.expression)); auto be = (BoundExpression *)expr->get(); D_ASSERT(be); be->alias = alias; @@ -117971,6 +128232,12 @@ void ExpressionIterator::EnumerateChildren(Expression &expr, for (auto &child : window_expr.children) { callback(child); } + if (window_expr.start_expr) { + callback(window_expr.start_expr); + } + if (window_expr.end_expr) { + callback(window_expr.end_expr); + } if (window_expr.offset_expr) { callback(window_expr.offset_expr); } @@ -117992,9 +128259,7 @@ void ExpressionIterator::EnumerateChildren(Expression &expr, // these node types have no children break; default: - // called on non BoundExpression type! - D_ASSERT(0); - break; + throw InternalException("ExpressionIterator used on unbound expression"); } } @@ -118059,6 +128324,9 @@ void ExpressionIterator::EnumerateQueryNodeChildren(BoundQueryNode &node, for (idx_t i = 0; i < bound_select.aggregates.size(); i++) { EnumerateExpression(bound_select.aggregates[i], callback); } + for (idx_t i = 0; i < bound_select.unnests.size(); i++) { + EnumerateExpression(bound_select.unnests[i], callback); + } for (idx_t i = 0; i < bound_select.windows.size(); i++) { EnumerateExpression(bound_select.windows[i], callback); } @@ -118088,6 +128356,150 @@ void ExpressionIterator::EnumerateQueryNodeChildren(BoundQueryNode &node, } // namespace duckdb +namespace duckdb { + +ConjunctionOrFilter::ConjunctionOrFilter() : TableFilter(TableFilterType::CONJUNCTION_OR) { +} + +FilterPropagateResult ConjunctionOrFilter::CheckStatistics(BaseStatistics &stats) { + // the OR filter is true if ANY of the children is true + D_ASSERT(!child_filters.empty()); + for (auto &filter : child_filters) { + auto prune_result = filter->CheckStatistics(stats); + if (prune_result == FilterPropagateResult::NO_PRUNING_POSSIBLE) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else if (prune_result == FilterPropagateResult::FILTER_ALWAYS_TRUE) { + return FilterPropagateResult::FILTER_ALWAYS_TRUE; + } + } + return FilterPropagateResult::FILTER_ALWAYS_FALSE; +} + +string ConjunctionOrFilter::ToString(const string &column_name) { + string result; + for (idx_t i = 0; i < child_filters.size(); i++) { + if (i > 0) { + result += " OR "; + } + result += child_filters[i]->ToString(column_name); + } + return result; +} + +ConjunctionAndFilter::ConjunctionAndFilter() : TableFilter(TableFilterType::CONJUNCTION_AND) { +} + +FilterPropagateResult ConjunctionAndFilter::CheckStatistics(BaseStatistics &stats) { + // the OR filter is true if ALL of the children is true + D_ASSERT(!child_filters.empty()); + auto result = FilterPropagateResult::FILTER_ALWAYS_TRUE; + for (auto &filter : child_filters) { + auto prune_result = filter->CheckStatistics(stats); + if (prune_result == FilterPropagateResult::FILTER_ALWAYS_FALSE) { + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } else if (prune_result != result) { + result = FilterPropagateResult::NO_PRUNING_POSSIBLE; + } + } + return result; +} + +string ConjunctionAndFilter::ToString(const string &column_name) { + string result; + for (idx_t i = 0; i < child_filters.size(); i++) { + if (i > 0) { + result += " AND "; + } + result += child_filters[i]->ToString(column_name); + } + return result; +} + +} // namespace duckdb + + + + +namespace duckdb { + +ConstantFilter::ConstantFilter(ExpressionType comparison_type_p, Value constant_p) + : TableFilter(TableFilterType::CONSTANT_COMPARISON), comparison_type(comparison_type_p), + constant(move(constant_p)) { +} + +FilterPropagateResult ConstantFilter::CheckStatistics(BaseStatistics &stats) { + D_ASSERT(constant.type().id() == stats.type.id()); + switch (constant.type().InternalType()) { + case PhysicalType::UINT8: + case PhysicalType::UINT16: + case PhysicalType::UINT32: + case PhysicalType::UINT64: + case PhysicalType::INT8: + case PhysicalType::INT16: + case PhysicalType::INT32: + case PhysicalType::INT64: + case PhysicalType::INT128: + case PhysicalType::FLOAT: + case PhysicalType::DOUBLE: + return ((NumericStatistics &)stats).CheckZonemap(comparison_type, constant); + case PhysicalType::VARCHAR: + return ((StringStatistics &)stats).CheckZonemap(comparison_type, constant.ToString()); + default: + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } +} + +string ConstantFilter::ToString(const string &column_name) { + return column_name + ExpressionTypeToOperator(comparison_type) + constant.ToString(); +} + +} // namespace duckdb + + + +namespace duckdb { + +IsNullFilter::IsNullFilter() : TableFilter(TableFilterType::IS_NULL) { +} + +FilterPropagateResult IsNullFilter::CheckStatistics(BaseStatistics &stats) { + if (!stats.CanHaveNull()) { + // no null values are possible: always false + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } + if (!stats.CanHaveNoNull()) { + // no non-null values are possible: always true + return FilterPropagateResult::FILTER_ALWAYS_TRUE; + } + return FilterPropagateResult::NO_PRUNING_POSSIBLE; +} + +string IsNullFilter::ToString(const string &column_name) { + return column_name + "IS NULL"; +} + +IsNotNullFilter::IsNotNullFilter() : TableFilter(TableFilterType::IS_NOT_NULL) { +} + +FilterPropagateResult IsNotNullFilter::CheckStatistics(BaseStatistics &stats) { + if (!stats.CanHaveNoNull()) { + // no non-null values are possible: always false + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } + if (!stats.CanHaveNull()) { + // no null values are possible: always true + return FilterPropagateResult::FILTER_ALWAYS_TRUE; + } + return FilterPropagateResult::NO_PRUNING_POSSIBLE; +} + +string IsNotNullFilter::ToString(const string &column_name) { + return column_name + " IS NOT NULL"; +} + +} // namespace duckdb + + @@ -118250,6 +128662,37 @@ string LogicalOperator::ToString(idx_t depth) const { return renderer.ToString(*this); } +void LogicalOperator::Verify() { +#ifdef DEBUG + // verify expressions + for (idx_t expr_idx = 0; expr_idx < expressions.size(); expr_idx++) { + // verify that we can (correctly) copy this expression + auto copy = expressions[expr_idx]->Copy(); + auto original_hash = expressions[expr_idx]->Hash(); + auto copy_hash = copy->Hash(); + // copy should be identical to original + D_ASSERT(expressions[expr_idx]->ToString() == copy->ToString()); + D_ASSERT(original_hash == copy_hash); + D_ASSERT(Expression::Equals(expressions[expr_idx].get(), copy.get())); + + D_ASSERT(!Expression::Equals(expressions[expr_idx].get(), nullptr)); + for (idx_t other_idx = 0; other_idx < expr_idx; other_idx++) { + // comparison with other expressions + auto other_hash = expressions[other_idx]->Hash(); + bool expr_equal = Expression::Equals(expressions[expr_idx].get(), expressions[other_idx].get()); + if (original_hash != other_hash) { + // if the hashes are not equal the expressions should not be equal either + D_ASSERT(!expr_equal); + } + } + } + D_ASSERT(!ToString().empty()); + for (auto &child : children) { + child->Verify(); + } +#endif +} + void LogicalOperator::Print() { Printer::Print(ToString()); } @@ -118409,7 +128852,7 @@ void LogicalOperatorVisitor::VisitExpression(unique_ptr *expression) result = VisitReplace((BoundUnnestExpression &)expr, expression); break; default: - D_ASSERT(0); + throw InternalException("Unrecognized expression type in logical operator visitor"); } if (result) { *expression = move(result); @@ -118423,6 +128866,9 @@ void LogicalOperatorVisitor::VisitExpressionChildren(Expression &expr) { ExpressionIterator::EnumerateChildren(expr, [&](unique_ptr &expr) { VisitExpression(&expr); }); } +// these are all default methods that can be overriden +// we don't care about coverage here +// LCOV_EXCL_START unique_ptr LogicalOperatorVisitor::VisitReplace(BoundAggregateExpression &expr, unique_ptr *expr_ptr) { return nullptr; @@ -118503,6 +128949,8 @@ unique_ptr LogicalOperatorVisitor::VisitReplace(BoundUnnestExpressio return nullptr; } +// LCOV_EXCL_STOP + } // namespace duckdb @@ -118705,9 +129153,12 @@ string LogicalGet::GetName() const { string LogicalGet::ParamsToString() const { string result; - for (auto &filter : table_filters) { - result += - names[filter.column_index] + ExpressionTypeToOperator(filter.comparison_type) + filter.constant.ToString(); + for (auto &kv : table_filters.filters) { + auto &column_index = kv.first; + auto &filter = kv.second; + if (column_index < names.size()) { + result += filter->ToString(names[column_index]); + } result += "\n"; } if (!function.to_string) { @@ -118897,10 +129348,10 @@ void Planner::CreatePlan(SQLStatement &statement) { vector bound_parameters; // first bind the tables and columns to the catalog - context.profiler.StartPhase("binder"); + context.profiler->StartPhase("binder"); binder->parameters = &bound_parameters; auto bound_statement = binder->Bind(statement); - context.profiler.EndPhase(); + context.profiler->EndPhase(); this->read_only = binder->read_only; this->requires_valid_transaction = binder->requires_valid_transaction; @@ -119288,6 +129739,7 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal return move(cross_product); } switch (plan->type) { + case LogicalOperatorType::LOGICAL_UNNEST: case LogicalOperatorType::LOGICAL_FILTER: { // filter // first we flatten the dependent join in the child of the filter @@ -119485,6 +129937,9 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal if (limit.offset_val > 0) { throw ParserException("OFFSET not supported in correlated subquery"); } + if (limit.limit) { + throw ParserException("Non-constant limit not supported in correlated subquery"); + } plan->children[0] = PushDownDependentJoinInternal(move(plan->children[0])); if (limit.limit_val == 0) { // limit = 0 means we return zero columns here @@ -119529,8 +129984,7 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal case LogicalOperatorType::LOGICAL_ORDER_BY: throw ParserException("ORDER BY not supported in correlated subquery"); default: - throw NotImplementedException("Logical operator type \"%s\" for dependent join", - LogicalOperatorToString(plan->type)); + throw InternalException("Logical operator type \"%s\" for dependent join", LogicalOperatorToString(plan->type)); } } @@ -119608,6 +130062,8 @@ unique_ptr RewriteCorrelatedExpressions::VisitReplace(BoundColumnRef } // correlated column reference // replace with the entry referring to the duplicate eliminated scan + // if this assertion occurs it generally means the correlated expressions were not propagated correctly + // through different binders D_ASSERT(expr.depth == 1); auto entry = correlated_map.find(expr.binding); D_ASSERT(entry != correlated_map.end()); @@ -119662,7 +130118,6 @@ void RewriteCorrelatedExpressions::RewriteCorrelatedRecursive::RewriteCorrelated if (entry != correlated_map.end()) { // we found the column in the correlated map! // update the binding and reduce the depth by 1 - bound_colref.binding = ColumnBinding(base_binding.table_index, base_binding.column_index + entry->second); bound_colref.depth--; } @@ -119724,23 +130179,38 @@ Binding::Binding(const string &alias, vector coltypes, vectorsecond; + return true; + } + // no match found: try to lowercase the column name + entry = name_map.find(StringUtil::Lower(column_name)); + if (entry != name_map.end()) { + result = entry->second; + return true; + } + return false; +} + +bool Binding::HasMatchingBinding(const string &column_name) { + column_t result; + return TryGetBindingIndex(column_name, result); } BindResult Binding::Bind(ColumnRefExpression &colref, idx_t depth) { - auto column_entry = name_map.find(colref.column_name); - if (column_entry == name_map.end()) { + column_t column_index; + if (!TryGetBindingIndex(colref.column_name, column_index)) { return BindResult(StringUtil::Format("Values list \"%s\" does not have a column named \"%s\"", alias.c_str(), colref.column_name.c_str())); } ColumnBinding binding; binding.table_index = index; - binding.column_index = column_entry->second; - LogicalType sql_type = types[column_entry->second]; + binding.column_index = column_index; + LogicalType sql_type = types[column_index]; if (colref.alias.empty()) { - colref.alias = names[column_entry->second]; + colref.alias = names[column_index]; } return BindResult(make_unique(colref.GetName(), sql_type, binding, depth)); } @@ -119756,22 +130226,21 @@ TableBinding::TableBinding(const string &alias, vector types_p, vec } BindResult TableBinding::Bind(ColumnRefExpression &colref, idx_t depth) { - auto entry = name_map.find(colref.column_name); - if (entry == name_map.end()) { + column_t column_index; + if (!TryGetBindingIndex(colref.column_name, column_index)) { return BindResult(StringUtil::Format("Table \"%s\" does not have a column named \"%s\"", colref.table_name, colref.column_name)); } - auto col_index = entry->second; // fetch the type of the column LogicalType col_type; - if (entry->second == COLUMN_IDENTIFIER_ROW_ID) { + if (column_index == COLUMN_IDENTIFIER_ROW_ID) { // row id: BIGINT type col_type = LogicalType::BIGINT; } else { // normal column: fetch type from base column - col_type = types[col_index]; + col_type = types[column_index]; if (colref.alias.empty()) { - colref.alias = names[entry->second]; + colref.alias = names[column_index]; } } @@ -119781,14 +130250,14 @@ BindResult TableBinding::Bind(ColumnRefExpression &colref, idx_t depth) { binding.column_index = column_ids.size(); for (idx_t i = 0; i < column_ids.size(); i++) { - if (column_ids[i] == col_index) { + if (column_ids[i] == column_index) { binding.column_index = i; break; } } if (binding.column_index == column_ids.size()) { // column binding not found: add it to the list of bindings - column_ids.push_back(col_index); + column_ids.push_back(column_index); } binding.table_index = index; return BindResult(make_unique(colref.GetName(), col_type, binding, depth)); @@ -119799,25 +130268,25 @@ MacroBinding::MacroBinding(vector types_p, vector names_p, } BindResult MacroBinding::Bind(ColumnRefExpression &colref, idx_t depth) { - auto entry = name_map.find(colref.column_name); - if (entry == name_map.end()) { + column_t column_index; + if (!TryGetBindingIndex(colref.column_name, column_index)) { return BindResult( StringUtil::Format("Macro \"%s\" does not have a parameter named \"%s\"", macro_name, colref.column_name)); } ColumnBinding binding; binding.table_index = index; - binding.column_index = entry->second; + binding.column_index = column_index; // we are binding a parameter to create the macro, no arguments are supplied - return BindResult(make_unique(colref.GetName(), types[entry->second], binding, depth)); + return BindResult(make_unique(colref.GetName(), types[column_index], binding, depth)); } unique_ptr MacroBinding::ParamToArg(ColumnRefExpression &colref) { - auto entry = name_map.find(colref.column_name); - if (entry == name_map.end()) { + column_t column_index; + if (!TryGetBindingIndex(colref.column_name, column_index)) { throw BinderException("Macro \"%s\" does not have a parameter named \"%s\"", macro_name, colref.column_name); } - auto arg = arguments[entry->second]->Copy(); + auto arg = arguments[column_index]->Copy(); arg->alias = colref.alias; return arg; } @@ -119825,9 +130294,26 @@ unique_ptr MacroBinding::ParamToArg(ColumnRefExpression &colre } // namespace duckdb + namespace duckdb { -Block::Block(block_id_t id) : FileBuffer(FileBufferType::BLOCK, Storage::BLOCK_ALLOC_SIZE), id(id) { +void TableFilterSet::PushFilter(idx_t column_index, unique_ptr filter) { + auto entry = filters.find(column_index); + if (entry == filters.end()) { + // no filter yet: push the filter directly + filters[column_index] = move(filter); + } else { + // there is already a filter: AND it together + if (entry->second->filter_type == TableFilterType::CONJUNCTION_AND) { + auto &and_filter = (ConjunctionAndFilter &)*entry->second; + and_filter.child_filters.push_back(move(filter)); + } else { + auto and_filter = make_unique(); + and_filter->child_filters.push_back(move(entry->second)); + and_filter->child_filters.push_back(move(filter)); + filters[column_index] = move(and_filter); + } + } } } // namespace duckdb @@ -119836,100 +130322,46 @@ Block::Block(block_id_t id) : FileBuffer(FileBufferType::BLOCK, Storage::BLOCK_A namespace duckdb { -BufferHandle::BufferHandle(shared_ptr handle, FileBuffer *node) : handle(move(handle)), node(node) { +Block::Block(Allocator &allocator, block_id_t id) + : FileBuffer(allocator, FileBufferType::BLOCK, Storage::BLOCK_ALLOC_SIZE), id(id) { } -BufferHandle::~BufferHandle() { - auto &buffer_manager = BufferManager::GetBufferManager(handle->db); - buffer_manager.Unpin(handle); -} - -data_ptr_t BufferHandle::Ptr() { - return node->buffer; +Block::Block(FileBuffer &source, block_id_t id) : FileBuffer(source, FileBufferType::BLOCK), id(id) { + D_ASSERT(GetMallocedSize() == Storage::BLOCK_ALLOC_SIZE); + D_ASSERT(size == Storage::BLOCK_SIZE); } } // namespace duckdb - namespace duckdb { -unique_ptr BufferList::Pop() { - if (!root) { - // no root: return nullptr - return nullptr; - } - // fetch root - auto entry = move(root); - root = move(entry->next); - if (root) { - // new root no longer has prev pointer - root->prev = nullptr; - } else { - last = nullptr; - } - count--; - return entry; +BufferHandle::BufferHandle(shared_ptr handle, FileBuffer *node) : handle(move(handle)), node(node) { } -unique_ptr BufferList::Erase(BufferEntry *entry) { - D_ASSERT(entry->prev || entry == root.get()); - D_ASSERT(entry->next || entry == last); - // first get the entry, either from the previous entry or from the root node - auto current = entry->prev ? move(entry->prev->next) : move(root); - auto prev = entry->prev; - if (entry == last) { - // entry was last entry: last is now the previous entry - last = prev; - } - // now set up prev/next pointers correctly - auto next = move(entry->next); - if (!prev) { - // no prev: entry was root - root = move(next); - if (root) { - // new root no longer has prev pointer - root->prev = nullptr; - } else { - last = nullptr; - } - D_ASSERT(!root || !root->prev); - } else if (prev != last) { - D_ASSERT(next); - next->prev = prev; - prev->next = move(next); - } - count--; - return current; +BufferHandle::~BufferHandle() { + auto &buffer_manager = BufferManager::GetBufferManager(handle->db); + buffer_manager.Unpin(handle); } -void BufferList::Append(unique_ptr entry) { - D_ASSERT(!entry->next); - if (!last) { - // empty list: set as root - entry->prev = nullptr; - root = move(entry); - last = root.get(); - } else { - // non-empty list: append to last entry and set entry as last - entry->prev = last; - last->next = move(entry); - last = last->next.get(); - } - count++; +data_ptr_t BufferHandle::Ptr() { + return node->buffer; } } // namespace duckdb + + + namespace duckdb { ManagedBuffer::ManagedBuffer(DatabaseInstance &db, idx_t size, bool can_destroy, block_id_t id) - : FileBuffer(FileBufferType::MANAGED_BUFFER, size), db(db), can_destroy(can_destroy), id(id) { + : FileBuffer(Allocator::Get(db), FileBufferType::MANAGED_BUFFER, size), db(db), can_destroy(can_destroy), id(id) { D_ASSERT(id >= MAXIMUM_BLOCK); - D_ASSERT(size >= Storage::BLOCK_ALLOC_SIZE); + D_ASSERT(size >= Storage::BLOCK_SIZE); } } // namespace duckdb @@ -119937,7 +130369,6 @@ ManagedBuffer::ManagedBuffer(DatabaseInstance &db, idx_t size, bool can_destroy, - //===----------------------------------------------------------------------===// // DuckDB // @@ -120012,29 +130443,23 @@ class ConcurrentQueue { #endif + namespace duckdb { -BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p) : db(db) { - block_id = block_id_p; - readers = 0; - buffer = nullptr; +BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p) + : db(db), readers(0), block_id(block_id_p), buffer(nullptr), eviction_timestamp(0), can_destroy(false) { eviction_timestamp = 0; state = BlockState::BLOCK_UNLOADED; - can_destroy = false; memory_usage = Storage::BLOCK_ALLOC_SIZE; } BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p, unique_ptr buffer_p, - bool can_destroy_p, idx_t alloc_size) - : db(db) { - D_ASSERT(alloc_size >= Storage::BLOCK_SIZE); - block_id = block_id_p; - readers = 0; + bool can_destroy_p, idx_t block_size) + : db(db), readers(0), block_id(block_id_p), eviction_timestamp(0), can_destroy(can_destroy_p) { + D_ASSERT(block_size >= Storage::BLOCK_SIZE); buffer = move(buffer_p); - eviction_timestamp = 0; state = BlockState::BLOCK_LOADED; - can_destroy = can_destroy_p; - memory_usage = alloc_size; + memory_usage = block_size + Storage::BLOCK_HEADER_SIZE; } BlockHandle::~BlockHandle() { @@ -120054,16 +130479,11 @@ unique_ptr BlockHandle::Load(shared_ptr &handle) { D_ASSERT(handle->buffer); return make_unique(handle, handle->buffer.get()); } - handle->state = BlockState::BLOCK_LOADED; auto &buffer_manager = BufferManager::GetBufferManager(handle->db); auto &block_manager = BlockManager::GetBlockManager(handle->db); if (handle->block_id < MAXIMUM_BLOCK) { - // FIXME: currently we still require a lock for reading blocks from disk - // this is mainly down to the block manager only having a single pointer into the file - // this is relatively easy to fix later on - lock_guard buffer_lock(buffer_manager.manager_lock); - auto block = make_unique(handle->block_id); + auto block = make_unique(Allocator::Get(handle->db), handle->block_id); block_manager.Read(*block); handle->buffer = move(block); } else { @@ -120073,6 +130493,7 @@ unique_ptr BlockHandle::Load(shared_ptr &handle) { handle->buffer = buffer_manager.ReadTemporaryBuffer(handle->block_id); } } + handle->state = BlockState::BLOCK_LOADED; return make_unique(handle, handle->buffer.get()); } @@ -120082,8 +130503,7 @@ void BlockHandle::Unload() { return; } D_ASSERT(CanUnload()); - D_ASSERT(memory_usage >= Storage::BLOCK_SIZE); - state = BlockState::BLOCK_UNLOADED; + D_ASSERT(memory_usage >= Storage::BLOCK_ALLOC_SIZE); auto &buffer_manager = BufferManager::GetBufferManager(db); if (block_id >= MAXIMUM_BLOCK && !can_destroy) { @@ -120092,6 +130512,7 @@ void BlockHandle::Unload() { } buffer.reset(); buffer_manager.current_memory -= memory_usage; + state = BlockState::BLOCK_UNLOADED; } bool BlockHandle::CanUnload() { @@ -120137,20 +130558,39 @@ struct EvictionQueue { eviction_queue_t q; }; +class TemporaryDirectoryHandle { +public: + TemporaryDirectoryHandle(DatabaseInstance &db, string path_p) : db(db), temp_directory(move(path_p)) { + auto &fs = FileSystem::GetFileSystem(db); + if (!temp_directory.empty()) { + fs.CreateDirectory(temp_directory); + } + } + ~TemporaryDirectoryHandle() { + auto &fs = FileSystem::GetFileSystem(db); + if (!temp_directory.empty()) { + fs.RemoveDirectory(temp_directory); + } + } + +private: + DatabaseInstance &db; + string temp_directory; +}; + +void BufferManager::SetTemporaryDirectory(string new_dir) { + if (temp_directory_handle) { + throw NotImplementedException("Cannot switch temporary directory after the current one has been used"); + } + this->temp_directory = move(new_dir); +} + BufferManager::BufferManager(DatabaseInstance &db, string tmp, idx_t maximum_memory) : db(db), current_memory(0), maximum_memory(maximum_memory), temp_directory(move(tmp)), queue(make_unique()), temporary_id(MAXIMUM_BLOCK) { - auto &fs = FileSystem::GetFileSystem(db); - if (!temp_directory.empty()) { - fs.CreateDirectory(temp_directory); - } } BufferManager::~BufferManager() { - auto &fs = FileSystem::GetFileSystem(db); - if (!temp_directory.empty()) { - fs.RemoveDirectory(temp_directory); - } } shared_ptr BufferManager::RegisterBlock(block_id_t block_id) { @@ -120172,53 +130612,125 @@ shared_ptr BufferManager::RegisterBlock(block_id_t block_id) { return result; } -shared_ptr BufferManager::RegisterMemory(idx_t alloc_size, bool can_destroy) { +shared_ptr BufferManager::ConvertToPersistent(BlockManager &block_manager, block_id_t block_id, + shared_ptr old_block) { + // pin the old block to ensure we have it loaded in memory + auto old_handle = Pin(old_block); + D_ASSERT(old_block->state == BlockState::BLOCK_LOADED); + D_ASSERT(old_block->buffer); + + // register a block with the new block id + auto new_block = RegisterBlock(block_id); + D_ASSERT(new_block->state == BlockState::BLOCK_UNLOADED); + D_ASSERT(new_block->readers == 0); + + // move the data from the old block into data for the new block + new_block->state = BlockState::BLOCK_LOADED; + new_block->buffer = make_unique(*old_block->buffer, block_id); + + // clear the old buffer and unload it + old_handle.reset(); + old_block->buffer.reset(); + old_block->state = BlockState::BLOCK_UNLOADED; + old_block->memory_usage = 0; + old_block.reset(); + + // persist the new block to disk + block_manager.Write(*new_block->buffer, block_id); + + AddToEvictionQueue(new_block); + + return new_block; +} + +shared_ptr BufferManager::RegisterMemory(idx_t block_size, bool can_destroy) { + auto alloc_size = block_size + Storage::BLOCK_HEADER_SIZE; // first evict blocks until we have enough memory to store this buffer if (!EvictBlocks(alloc_size, maximum_memory)) { - throw OutOfRangeException("Not enough memory to complete operation: could not allocate block of %lld bytes", - alloc_size); + throw OutOfMemoryException("could not allocate block of %lld bytes", alloc_size); } // allocate the buffer auto temp_id = ++temporary_id; - auto buffer = make_unique(db, alloc_size, can_destroy, temp_id); + auto buffer = make_unique(db, block_size, can_destroy, temp_id); // create a new block pointer for this block - return make_shared(db, temp_id, move(buffer), can_destroy, alloc_size); + auto result = make_shared(db, temp_id, move(buffer), can_destroy, block_size); + return result; } -unique_ptr BufferManager::Allocate(idx_t alloc_size) { - auto block = RegisterMemory(alloc_size, true); +unique_ptr BufferManager::Allocate(idx_t block_size) { + auto block = RegisterMemory(block_size, true); return Pin(block); } +void BufferManager::ReAllocate(shared_ptr &handle, idx_t block_size) { + D_ASSERT(block_size >= Storage::BLOCK_SIZE); + lock_guard lock(handle->lock); + D_ASSERT(handle->state == BlockState::BLOCK_LOADED); + auto alloc_size = block_size + Storage::BLOCK_HEADER_SIZE; + int64_t required_memory = alloc_size - handle->memory_usage; + if (required_memory == 0) { + return; + } else if (required_memory > 0) { + // evict blocks until we have space to resize this block + if (!EvictBlocks(required_memory, maximum_memory)) { + throw OutOfMemoryException("failed to resize block from %lld to %lld", handle->memory_usage, alloc_size); + } + } else { + // no need to evict blocks + current_memory -= idx_t(-required_memory); + } + // resize and adjust current memory + handle->buffer->Resize(block_size); + handle->memory_usage = alloc_size; +} + unique_ptr BufferManager::Pin(shared_ptr &handle) { - // lock the block + idx_t required_memory; + { + // lock the block + lock_guard lock(handle->lock); + // check if the block is already loaded + if (handle->state == BlockState::BLOCK_LOADED) { + // the block is loaded, increment the reader count and return a pointer to the handle + handle->readers++; + return handle->Load(handle); + } + required_memory = handle->memory_usage; + } + // evict blocks until we have space for the current block + if (!EvictBlocks(required_memory, maximum_memory)) { + throw OutOfMemoryException("failed to pin block of size %lld", required_memory); + } + // lock the handle again and repeat the check (in case anybody loaded in the mean time) lock_guard lock(handle->lock); // check if the block is already loaded if (handle->state == BlockState::BLOCK_LOADED) { // the block is loaded, increment the reader count and return a pointer to the handle handle->readers++; + current_memory -= required_memory; return handle->Load(handle); } - // evict blocks until we have space for the current block - if (!EvictBlocks(handle->memory_usage, maximum_memory)) { - throw OutOfRangeException("Not enough memory to complete operation: failed to pin block"); - } // now we can actually load the current block D_ASSERT(handle->readers == 0); handle->readers = 1; return handle->Load(handle); } +void BufferManager::AddToEvictionQueue(shared_ptr &handle) { + D_ASSERT(handle->readers == 0); + handle->eviction_timestamp++; + queue->q.enqueue(make_unique(weak_ptr(handle), handle->eviction_timestamp)); + // FIXME: do some house-keeping to prevent the queue from being flooded with many old blocks +} + void BufferManager::Unpin(shared_ptr &handle) { lock_guard lock(handle->lock); D_ASSERT(handle->readers > 0); handle->readers--; if (handle->readers == 0) { - handle->eviction_timestamp++; - queue->q.enqueue(make_unique(weak_ptr(handle), handle->eviction_timestamp)); - // FIXME: do some house-keeping to prevent the queue from being flooded with many old blocks + AddToEvictionQueue(handle); } } @@ -120266,13 +130778,13 @@ void BufferManager::UnregisterBlock(block_id_t block_id, bool can_destroy) { blocks.erase(block_id); } } + void BufferManager::SetLimit(idx_t limit) { lock_guard buffer_lock(manager_lock); // try to evict until the limit is reached if (!EvictBlocks(0, limit)) { - throw OutOfRangeException( - "Failed to change memory limit to new limit %lld: could not free up enough memory for the new limit", - limit); + throw OutOfMemoryException( + "Failed to change memory limit to %lld: could not free up enough memory for the new limit", limit); } idx_t old_limit = maximum_memory; // set the global maximum memory to the new limit if successful @@ -120281,9 +130793,8 @@ void BufferManager::SetLimit(idx_t limit) { if (!EvictBlocks(0, limit)) { // failed: go back to old limit maximum_memory = old_limit; - throw OutOfRangeException( - "Failed to change memory limit to new limit %lld: could not free up enough memory for the new limit", - limit); + throw OutOfMemoryException( + "Failed to change memory limit to %lld: could not free up enough memory for the new limit", limit); } } @@ -120292,9 +130803,23 @@ string BufferManager::GetTemporaryPath(block_id_t id) { return fs.JoinPath(temp_directory, to_string(id) + ".block"); } +void BufferManager::RequireTemporaryDirectory() { + if (temp_directory.empty()) { + throw Exception( + "Out-of-memory: cannot write buffer because no temporary directory is specified!\nTo enable " + "temporary buffer eviction set a temporary directory using PRAGMA temp_directory='/path/to/tmp.tmp'"); + } + lock_guard temp_handle_guard(temp_handle_lock); + if (!temp_directory_handle) { + // temp directory has not been created yet: initialize it + temp_directory_handle = make_unique(db, temp_directory); + } +} + void BufferManager::WriteTemporaryBuffer(ManagedBuffer &buffer) { - D_ASSERT(!temp_directory.empty()); - D_ASSERT(buffer.size + Storage::BLOCK_HEADER_SIZE >= Storage::BLOCK_ALLOC_SIZE); + RequireTemporaryDirectory(); + + D_ASSERT(buffer.size >= Storage::BLOCK_SIZE); // get the path to write to auto path = GetTemporaryPath(buffer.id); // create the file and write the size followed by the buffer contents @@ -120305,24 +130830,28 @@ void BufferManager::WriteTemporaryBuffer(ManagedBuffer &buffer) { } unique_ptr BufferManager::ReadTemporaryBuffer(block_id_t id) { - if (temp_directory.empty()) { - throw Exception("Out-of-memory: cannot read buffer because no temporary directory is specified!\nTo enable " - "temporary buffer eviction set a temporary directory in the configuration"); - } - idx_t alloc_size; + D_ASSERT(!temp_directory.empty()); + D_ASSERT(temp_directory_handle.get()); + idx_t block_size; // open the temporary file and read the size auto path = GetTemporaryPath(id); auto &fs = FileSystem::GetFileSystem(db); auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ); - handle->Read(&alloc_size, sizeof(idx_t), 0); + handle->Read(&block_size, sizeof(idx_t), 0); // now allocate a buffer of this size and read the data into that buffer - auto buffer = make_unique(db, alloc_size + Storage::BLOCK_HEADER_SIZE, false, id); + auto buffer = make_unique(db, block_size, false, id); buffer->Read(*handle, sizeof(idx_t)); + + handle.reset(); + DeleteTemporaryFile(id); return move(buffer); } void BufferManager::DeleteTemporaryFile(block_id_t id) { + if (temp_directory.empty() || !temp_directory_handle) { + return; + } auto &fs = FileSystem::GetFileSystem(db); auto path = GetTemporaryPath(id); if (fs.FileExists(path)) { @@ -120384,6 +130913,7 @@ class MetaBlockWriter : public Serializer { idx_t offset; public: + BlockPointer GetBlockPointer(); void Flush(); void WriteData(const_data_ptr_t buffer, idx_t write_size) override; @@ -120443,12 +130973,11 @@ struct BoundCreateTableInfo; //! The table data reader is responsible for reading the data of a table from the block manager class TableDataReader { public: - TableDataReader(DatabaseInstance &db, MetaBlockReader &reader, BoundCreateTableInfo &info); + TableDataReader(MetaBlockReader &reader, BoundCreateTableInfo &info); void ReadTableData(); private: - DatabaseInstance &db; MetaBlockReader &reader; BoundCreateTableInfo &info; }; @@ -120506,165 +131035,11 @@ class MetaBlockReader : public Deserializer { -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/table/morsel_info.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/table/chunk_info.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { -class MorselInfo; -struct SelectionVector; -class Transaction; - -enum class ChunkInfoType : uint8_t { CONSTANT_INFO, VECTOR_INFO, EMPTY_INFO }; - -class ChunkInfo { -public: - ChunkInfo(idx_t start, MorselInfo &morsel, ChunkInfoType type) : start(start), morsel(morsel), type(type) { - } - virtual ~ChunkInfo() { - } - - //! The row index of the first row - idx_t start; - //! The morsel the chunk info belongs to - MorselInfo &morsel; - //! The ChunkInfo type - ChunkInfoType type; - -public: - //! Gets up to max_count entries from the chunk info. If the ret is 0>ret>max_count, the selection vector is filled - //! with the tuples - virtual idx_t GetSelVector(Transaction &transaction, SelectionVector &sel_vector, idx_t max_count) = 0; - //! Returns whether or not a single row in the ChunkInfo should be used or not for the given transaction - virtual bool Fetch(Transaction &transaction, row_t row) = 0; - virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0; - - virtual void Serialize(Serializer &serialize) = 0; - static unique_ptr Deserialize(MorselInfo &morsel, Deserializer &source); -}; - -class ChunkConstantInfo : public ChunkInfo { -public: - ChunkConstantInfo(idx_t start, MorselInfo &morsel); - - transaction_t insert_id; - transaction_t delete_id; - -public: - idx_t GetSelVector(Transaction &transaction, SelectionVector &sel_vector, idx_t max_count) override; - bool Fetch(Transaction &transaction, row_t row) override; - void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override; - - void Serialize(Serializer &serialize) override; - static unique_ptr Deserialize(MorselInfo &morsel, Deserializer &source); -}; - -class ChunkVectorInfo : public ChunkInfo { -public: - ChunkVectorInfo(idx_t start, MorselInfo &morsel); - - //! The transaction ids of the transactions that inserted the tuples (if any) - transaction_t inserted[STANDARD_VECTOR_SIZE]; - transaction_t insert_id; - bool same_inserted_id; - - //! The transaction ids of the transactions that deleted the tuples (if any) - transaction_t deleted[STANDARD_VECTOR_SIZE]; - bool any_deleted; - -public: - idx_t GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector, - idx_t max_count); - idx_t GetSelVector(Transaction &transaction, SelectionVector &sel_vector, idx_t max_count) override; - bool Fetch(Transaction &transaction, row_t row) override; - void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override; - - void Append(idx_t start, idx_t end, transaction_t commit_id); - void Delete(Transaction &transaction, row_t rows[], idx_t count); - void CommitDelete(transaction_t commit_id, row_t rows[], idx_t count); - - void Serialize(Serializer &serialize) override; - static unique_ptr Deserialize(MorselInfo &morsel, Deserializer &source); -}; - -} // namespace duckdb - - - -namespace duckdb { -class DataTable; -class Vector; -struct VersionNode; - -class MorselInfo : public SegmentBase { -public: - static constexpr const idx_t MORSEL_VECTOR_COUNT = 100; - static constexpr const idx_t MORSEL_SIZE = STANDARD_VECTOR_SIZE * MORSEL_VECTOR_COUNT; - - static constexpr const idx_t MORSEL_LAYER_COUNT = 10; - static constexpr const idx_t MORSEL_LAYER_SIZE = MORSEL_SIZE / MORSEL_LAYER_COUNT; - -public: - MorselInfo(idx_t start, idx_t count) : SegmentBase(start, count) { - } - - unique_ptr root; - -public: - idx_t GetSelVector(Transaction &transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count); - - //! For a specific row, returns true if it should be used for the transaction and false otherwise. - bool Fetch(Transaction &transaction, idx_t row); - - //! Append count rows to the morsel info - void Append(Transaction &transaction, idx_t start, idx_t count, transaction_t commit_id); - - void CommitAppend(transaction_t commit_id, idx_t start, idx_t count); - - //! Delete the given set of rows in the version manager - void Delete(Transaction &transaction, DataTable *table, Vector &row_ids, idx_t count); - - void RevertAppend(idx_t start); - -private: - ChunkInfo *GetChunkInfo(idx_t vector_idx); - -private: - mutex morsel_lock; -}; - -struct VersionNode { - unique_ptr info[MorselInfo::MORSEL_VECTOR_COUNT]; -}; - -} // namespace duckdb namespace duckdb { -TableDataReader::TableDataReader(DatabaseInstance &db, MetaBlockReader &reader, BoundCreateTableInfo &info) - : db(db), reader(reader), info(info) { +TableDataReader::TableDataReader(MetaBlockReader &reader, BoundCreateTableInfo &info) : reader(reader), info(info) { info.data = make_unique(info.Base().columns.size()); } @@ -120672,32 +131047,18 @@ void TableDataReader::ReadTableData() { auto &columns = info.Base().columns; D_ASSERT(columns.size() > 0); - idx_t table_count = 0; - for (idx_t col = 0; col < columns.size(); col++) { - auto &column = columns[col]; - info.data->column_data[col] = ColumnData::Deserialize(db, reader, column.type); - if (col == 0) { - table_count = info.data->column_data[col]->total_rows; - } else if (table_count != info.data->column_data[col]->total_rows) { - throw Exception("Column length mismatch in table load!"); - } + // deserialize the total table statistics + info.data->column_stats.reserve(columns.size()); + for (idx_t i = 0; i < columns.size(); i++) { + info.data->column_stats.push_back(BaseStatistics::Deserialize(reader, columns[i].type)); } - auto total_rows = table_count; - // create the version tree - info.data->versions = make_shared(); - for (idx_t i = 0; i < total_rows; i += MorselInfo::MORSEL_SIZE) { - auto segment = make_unique(i, MorselInfo::MORSEL_SIZE); - // check how many chunk infos we need to read - auto chunk_info_count = reader.Read(); - if (chunk_info_count > 0) { - segment->root = make_unique(); - for (idx_t i = 0; i < chunk_info_count; i++) { - idx_t vector_index = reader.Read(); - segment->root->info[vector_index] = ChunkInfo::Deserialize(*segment, reader); - } - } - info.data->versions->AppendSegment(move(segment)); + // deserialize each of the individual row groups + auto row_group_count = reader.Read(); + info.data->row_groups.reserve(row_group_count); + for (idx_t i = 0; i < row_group_count; i++) { + auto row_group_pointer = RowGroup::Deserialize(reader, columns); + info.data->row_groups.push_back(move(row_group_pointer)); } } @@ -120716,8 +131077,8 @@ void TableDataReader::ReadTableData() { namespace duckdb { class ColumnData; -class UncompressedSegment; -class MorselInfo; +class ColumnSegment; +class RowGroup; class BaseStatistics; class SegmentStatistics; @@ -120729,17 +131090,13 @@ class TableDataWriter { TableDataWriter(DatabaseInstance &db, TableCatalogEntry &table, MetaBlockWriter &meta_writer); ~TableDataWriter(); - void WriteTableData(); - - void CheckpointColumn(ColumnData &col_data, idx_t col_idx); - void CheckpointDeletes(MorselInfo *info); + BlockPointer WriteTableData(); MetaBlockWriter &GetMetaWriter() { return meta_writer; } private: - DatabaseInstance &db; TableCatalogEntry &table; MetaBlockWriter &meta_writer; }; @@ -120753,56 +131110,35 @@ class TableDataWriter { +namespace duckdb { + +TableDataWriter::TableDataWriter(DatabaseInstance &, TableCatalogEntry &table, MetaBlockWriter &meta_writer) + : table(table), meta_writer(meta_writer) { +} + +TableDataWriter::~TableDataWriter() { +} + +BlockPointer TableDataWriter::WriteTableData() { + // start scanning the table and append the data to the uncompressed segments + return table.storage->Checkpoint(*this); +} + +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/numeric_segment.hpp +// duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp // // //===----------------------------------------------------------------------===// - - -namespace duckdb { -class DatabaseInstance; - -class NumericSegment : public UncompressedSegment { -public: - NumericSegment(DatabaseInstance &db, PhysicalType type, idx_t row_start, block_id_t block_id = INVALID_BLOCK); - - //! The size of this type - idx_t type_size; - -public: - void InitializeScan(ColumnScanState &state) override; - - //! Fetch a single value and append it to the vector - void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) override; - - //! Append a part of a vector to the uncompressed segment with the given append state, updating the provided stats - //! in the process. Returns the amount of tuples appended. If this is less than `count`, the uncompressed segment is - //! full. - idx_t Append(SegmentStatistics &stats, VectorData &data, idx_t offset, idx_t count) override; - -protected: - void FetchBaseData(ColumnScanState &state, idx_t vector_index, Vector &result) override; - -public: - typedef void (*append_function_t)(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset, - VectorData &source, idx_t offset, idx_t count); - -private: - append_function_t append_function; -}; - -} // namespace duckdb - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/string_segment.hpp +// duckdb/storage/checkpoint/string_checkpoint_state.hpp // // //===----------------------------------------------------------------------===// @@ -120811,8 +131147,10 @@ class NumericSegment : public UncompressedSegment { + + + namespace duckdb { -class StorageManager; class OverflowStringWriter { public: @@ -120841,10 +131179,8 @@ struct string_location_t { int32_t offset; }; -class StringSegment : public UncompressedSegment { -public: - StringSegment(DatabaseInstance &db, idx_t row_start, block_id_t block_id = INVALID_BLOCK); - ~StringSegment() override; +struct UncompressedStringSegmentState : public CompressedSegmentState { + ~UncompressedStringSegmentState(); //! The string block holding strings that do not fit in the main block //! FIXME: this should be replaced by a heap that also allows freeing of unused strings @@ -120853,337 +131189,10 @@ class StringSegment : public UncompressedSegment { unique_ptr overflow_writer; //! Map of block id to string block unordered_map overflow_blocks; - -public: - void InitializeScan(ColumnScanState &state) override; - - //! Fetch a single value and append it to the vector - void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) override; - - //! Append a part of a vector to the uncompressed segment with the given append state, updating the provided stats - //! in the process. Returns the amount of tuples appended. If this is less than `count`, the uncompressed segment is - //! full. - idx_t Append(SegmentStatistics &stats, VectorData &data, idx_t offset, idx_t count) override; - - void ToTemporary() override; - -protected: - void FetchBaseData(ColumnScanState &state, idx_t vector_index, Vector &result) override; - -private: - void AppendData(BufferHandle &handle, SegmentStatistics &stats, data_ptr_t target, data_ptr_t end, - idx_t target_offset, VectorData &source, idx_t offset, idx_t count); - - //! Fetch all the strings of a vector from the base table and place their locations in the result vector - void FetchBaseData(ColumnScanState &state, data_ptr_t base_data, idx_t vector_index, Vector &result, idx_t count); - - string_location_t FetchStringLocation(data_ptr_t baseptr, int32_t dict_offset); - string_t FetchString(Vector &result, data_ptr_t baseptr, string_location_t location); - //! Fetch a single string from the dictionary and returns it, potentially pins a buffer manager page and adds it to - //! the set of pinned pages - string_t FetchStringFromDict(Vector &result, data_ptr_t baseptr, int32_t dict_offset); - - //! Fetch string locations for a subset of the strings - void FetchStringLocations(data_ptr_t baseptr, row_t *ids, idx_t vector_index, idx_t vector_offset, idx_t count, - string_location_t result[]); - - void WriteString(string_t string, block_id_t &result_block, int32_t &result_offset); - string_t ReadString(Vector &result, block_id_t block, int32_t offset); - string_t ReadString(data_ptr_t target, int32_t offset); - - void WriteStringMemory(string_t string, block_id_t &result_block, int32_t &result_offset); - - void WriteStringMarker(data_ptr_t target, block_id_t block_id, int32_t offset); - void ReadStringMarker(data_ptr_t target, block_id_t &block_id, int32_t &offset); - - //! Expand the string segment, adding an additional maximum vector to the segment - void ExpandStringSegment(data_ptr_t baseptr); - - //! The amount of bytes remaining to store in the block - idx_t RemainingSpace(BufferHandle &handle); - - void ReadString(string_t *result_data, Vector &result, data_ptr_t baseptr, int32_t *dict_offset, idx_t src_idx, - idx_t res_idx, idx_t &update_idx, size_t vector_index); - - void SetDictionaryOffset(BufferHandle &handle, idx_t offset); - idx_t GetDictionaryOffset(BufferHandle &handle); - -private: - //! The max string size that is allowed within a block. Strings bigger than this will be labeled as a BIG STRING and - //! offloaded to the overflow blocks. - static constexpr uint16_t STRING_BLOCK_LIMIT = 4096; - //! Marker used in length field to indicate the presence of a big string - static constexpr uint16_t BIG_STRING_MARKER = (uint16_t)-1; - //! Base size of big string marker (block id + offset) - static constexpr idx_t BIG_STRING_MARKER_BASE_SIZE = sizeof(block_id_t) + sizeof(int32_t); - //! The marker size of the big string - static constexpr idx_t BIG_STRING_MARKER_SIZE = BIG_STRING_MARKER_BASE_SIZE + sizeof(uint16_t); }; } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/table/validity_segment.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - -namespace duckdb { -class BlockHandle; -class DatabaseInstance; -class SegmentStatistics; -class Vector; -struct VectorData; - -class ValiditySegment : public UncompressedSegment { -public: - ValiditySegment(DatabaseInstance &db, idx_t row_start, block_id_t block_id = INVALID_BLOCK); - ~ValiditySegment(); - -public: - void InitializeScan(ColumnScanState &state) override; - void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) override; - idx_t Append(SegmentStatistics &stats, VectorData &data, idx_t offset, idx_t count) override; - void RevertAppend(idx_t start_row) override; - -protected: - void FetchBaseData(ColumnScanState &state, idx_t vector_index, Vector &result) override; -}; - -} // namespace duckdb - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/table/transient_segment.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - -namespace duckdb { -struct ColumnAppendState; -class DatabaseInstance; -class PersistentSegment; - -class TransientSegment : public ColumnSegment { -public: - TransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start); - TransientSegment(PersistentSegment &segment); - - //! The storage manager - DatabaseInstance &db; - //! The uncompressed segment holding the data - unique_ptr data; - -public: - void InitializeScan(ColumnScanState &state) override; - //! Scan one vector from this transient segment - void Scan(ColumnScanState &state, idx_t vector_index, Vector &result) override; - //! Fetch the base table vector index that belongs to this row - void Fetch(ColumnScanState &state, idx_t vector_index, Vector &result) override; - //! Fetch a value of the specific row id and append it to the result - void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) override; - - //! Initialize an append of this transient segment - void InitializeAppend(ColumnAppendState &state); - //! Appends a (part of) vector to the transient segment, returns the amount of entries successfully appended - idx_t Append(ColumnAppendState &state, VectorData &data, idx_t offset, idx_t count); - //! Revert an append made to this transient segment - void RevertAppend(idx_t start_row); -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/table/update_segment.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - - -namespace duckdb { -class ColumnData; -class DataTable; -class Vector; -struct UpdateInfo; -struct UpdateNode; - -class UpdateSegment : public SegmentBase { -public: - static constexpr const idx_t MORSEL_VECTOR_COUNT = MorselInfo::MORSEL_VECTOR_COUNT; - static constexpr const idx_t MORSEL_SIZE = MorselInfo::MORSEL_SIZE; - - static constexpr const idx_t MORSEL_LAYER_COUNT = MorselInfo::MORSEL_LAYER_COUNT; - static constexpr const idx_t MORSEL_LAYER_SIZE = MorselInfo::MORSEL_LAYER_SIZE; - -public: - UpdateSegment(ColumnData &column_data, idx_t start, idx_t count); - ~UpdateSegment(); - - ColumnData &column_data; - -public: - bool HasUpdates() const; - bool HasUncommittedUpdates(idx_t vector_index); - bool HasUpdates(idx_t vector_index) const; - bool HasUpdates(idx_t start_vector_index, idx_t end_vector_index) const; - UpdateSegment *FindSegment(idx_t end_vector_index) const; - void ClearUpdates(); - - void FetchUpdates(Transaction &transaction, idx_t vector_index, Vector &result); - void FetchCommitted(idx_t vector_index, Vector &result); - void Update(Transaction &transaction, Vector &update, row_t *ids, idx_t count, Vector &base_data); - void FetchRow(Transaction &transaction, idx_t row_id, Vector &result, idx_t result_idx); - - void RollbackUpdate(UpdateInfo *info); - void CleanupUpdateInternal(const StorageLockKey &lock, UpdateInfo *info); - void CleanupUpdate(UpdateInfo *info); - - SegmentStatistics &GetStatistics() { - return stats; - } - StringHeap &GetStringHeap() { - return heap; - } - -private: - //! The lock for the update segment - StorageLock lock; - //! The root node (if any) - unique_ptr root; - //! Update statistics - SegmentStatistics stats; - //! Internal type size - idx_t type_size; - //! String heap, only used for strings - StringHeap heap; - -public: - typedef void (*initialize_update_function_t)(SegmentStatistics &stats, UpdateInfo *base_info, Vector &base_data, - UpdateInfo *update_info, Vector &update, const SelectionVector &sel); - typedef void (*merge_update_function_t)(SegmentStatistics &stats, UpdateInfo *base_info, Vector &base_data, - UpdateInfo *update_info, Vector &update, row_t *ids, idx_t count, - const SelectionVector &sel); - typedef void (*fetch_update_function_t)(transaction_t start_time, transaction_t transaction_id, UpdateInfo *info, - Vector &result); - typedef void (*fetch_committed_function_t)(UpdateInfo *info, Vector &result); - typedef void (*fetch_row_function_t)(transaction_t start_time, transaction_t transaction_id, UpdateInfo *info, - idx_t row_idx, Vector &result, idx_t result_idx); - typedef void (*rollback_update_function_t)(UpdateInfo *base_info, UpdateInfo *rollback_info); - typedef idx_t (*statistics_update_function_t)(UpdateSegment *segment, SegmentStatistics &stats, Vector &update, - idx_t count, SelectionVector &sel); - -private: - initialize_update_function_t initialize_update_function; - merge_update_function_t merge_update_function; - fetch_update_function_t fetch_update_function; - fetch_committed_function_t fetch_committed_function; - fetch_row_function_t fetch_row_function; - rollback_update_function_t rollback_update_function; - statistics_update_function_t statistics_update_function; - -private: - void InitializeUpdateInfo(UpdateInfo &info, row_t *ids, const SelectionVector &sel, idx_t count, idx_t vector_index, - idx_t vector_offset); -}; - -struct UpdateNodeData { - unique_ptr info; - unique_ptr tuples; - unique_ptr tuple_data; -}; - -struct UpdateNode { - unique_ptr info[UpdateSegment::MORSEL_VECTOR_COUNT]; -}; - -} // namespace duckdb - - - - -namespace duckdb { - -TableDataWriter::TableDataWriter(DatabaseInstance &db, TableCatalogEntry &table, MetaBlockWriter &meta_writer) - : db(db), table(table), meta_writer(meta_writer) { -} - -TableDataWriter::~TableDataWriter() { -} - -void TableDataWriter::WriteTableData() { - // start scanning the table and append the data to the uncompressed segments - table.storage->Checkpoint(*this); - - // then we checkpoint the deleted tuples - table.storage->CheckpointDeletes(*this); -} - -void TableDataWriter::CheckpointDeletes(MorselInfo *morsel_info) { - // deletes! write them after the data pointers - while (morsel_info) { - if (morsel_info->root) { - // first count how many ChunkInfo's we need to deserialize - idx_t chunk_info_count = 0; - for (idx_t vector_idx = 0; vector_idx < MorselInfo::MORSEL_VECTOR_COUNT; vector_idx++) { - auto chunk_info = morsel_info->root->info[vector_idx].get(); - if (!chunk_info) { - continue; - } - chunk_info_count++; - } - meta_writer.Write(chunk_info_count); - for (idx_t vector_idx = 0; vector_idx < MorselInfo::MORSEL_VECTOR_COUNT; vector_idx++) { - auto chunk_info = morsel_info->root->info[vector_idx].get(); - if (!chunk_info) { - continue; - } - meta_writer.Write(vector_idx); - chunk_info->Serialize(meta_writer); - } - } else { - meta_writer.Write(0); - } - morsel_info = (MorselInfo *)morsel_info->next.get(); - } -} - -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp -// -// -//===----------------------------------------------------------------------===// - - - - namespace duckdb { @@ -121214,6 +131223,8 @@ class WriteOverflowStringsToDisk : public OverflowStringWriter { } // namespace duckdb + + namespace duckdb { WriteOverflowStringsToDisk::WriteOverflowStringsToDisk(DatabaseInstance &db) @@ -121231,7 +131242,7 @@ void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result auto &buffer_manager = BufferManager::GetBufferManager(db); auto &block_manager = BlockManager::GetBlockManager(db); if (!handle) { - handle = buffer_manager.Allocate(Storage::BLOCK_ALLOC_SIZE); + handle = buffer_manager.Allocate(Storage::BLOCK_SIZE); } // first write the length of the string if (block_id == INVALID_BLOCK || offset + sizeof(uint32_t) >= STRING_SPACE) { @@ -121411,6 +131422,9 @@ void CheckpointManager::WriteSchema(SchemaCatalogEntry &schema) { vector tables; vector views; schema.Scan(CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) { + if (entry->internal) { + return; + } if (entry->type == CatalogType::TABLE_ENTRY) { tables.push_back((TableCatalogEntry *)entry); } else if (entry->type == CatalogType::VIEW_ENTRY) { @@ -121420,11 +131434,16 @@ void CheckpointManager::WriteSchema(SchemaCatalogEntry &schema) { } }); vector sequences; - schema.Scan(CatalogType::SEQUENCE_ENTRY, - [&](CatalogEntry *entry) { sequences.push_back((SequenceCatalogEntry *)entry); }); + schema.Scan(CatalogType::SEQUENCE_ENTRY, [&](CatalogEntry *entry) { + D_ASSERT(!entry->internal); + sequences.push_back((SequenceCatalogEntry *)entry); + }); vector macros; schema.Scan(CatalogType::SCALAR_FUNCTION_ENTRY, [&](CatalogEntry *entry) { + if (entry->internal) { + return; + } if (entry->type == CatalogType::MACRO_ENTRY) { macros.push_back((MacroCatalogEntry *)entry); } @@ -121481,141 +131500,1177 @@ void CheckpointManager::ReadSchema(ClientContext &context, MetaBlockReader &read for (uint32_t i = 0; i < macro_count; i++) { ReadMacro(context, reader); } -} +} + +//===--------------------------------------------------------------------===// +// Views +//===--------------------------------------------------------------------===// +void CheckpointManager::WriteView(ViewCatalogEntry &view) { + view.Serialize(*metadata_writer); +} + +void CheckpointManager::ReadView(ClientContext &context, MetaBlockReader &reader) { + auto info = ViewCatalogEntry::Deserialize(reader); + + auto &catalog = Catalog::GetCatalog(db); + catalog.CreateView(context, info.get()); +} + +//===--------------------------------------------------------------------===// +// Sequences +//===--------------------------------------------------------------------===// +void CheckpointManager::WriteSequence(SequenceCatalogEntry &seq) { + seq.Serialize(*metadata_writer); +} + +void CheckpointManager::ReadSequence(ClientContext &context, MetaBlockReader &reader) { + auto info = SequenceCatalogEntry::Deserialize(reader); + + auto &catalog = Catalog::GetCatalog(db); + catalog.CreateSequence(context, info.get()); +} + +//===--------------------------------------------------------------------===// +// Macro's +//===--------------------------------------------------------------------===// +void CheckpointManager::WriteMacro(MacroCatalogEntry ¯o) { + macro.Serialize(*metadata_writer); +} + +void CheckpointManager::ReadMacro(ClientContext &context, MetaBlockReader &reader) { + auto info = MacroCatalogEntry::Deserialize(reader); + + auto &catalog = Catalog::GetCatalog(db); + catalog.CreateFunction(context, info.get()); +} + +//===--------------------------------------------------------------------===// +// Table Metadata +//===--------------------------------------------------------------------===// +void CheckpointManager::WriteTable(TableCatalogEntry &table) { + // write the table meta data + table.Serialize(*metadata_writer); + // now we need to write the table data + TableDataWriter writer(db, table, *tabledata_writer); + auto pointer = writer.WriteTableData(); + + //! write the block pointer for the table info + metadata_writer->Write(pointer.block_id); + metadata_writer->Write(pointer.offset); +} + +void CheckpointManager::ReadTable(ClientContext &context, MetaBlockReader &reader) { + // deserialize the table meta data + auto info = TableCatalogEntry::Deserialize(reader); + // bind the info + auto binder = Binder::CreateBinder(context); + auto bound_info = binder->BindCreateTableInfo(move(info)); + + // now read the actual table data and place it into the create table info + auto block_id = reader.Read(); + auto offset = reader.Read(); + MetaBlockReader table_data_reader(db, block_id); + table_data_reader.offset = offset; + TableDataReader data_reader(table_data_reader, *bound_info); + data_reader.ReadTableData(); + + // finally create the table in the catalog + auto &catalog = Catalog::GetCatalog(db); + catalog.CreateTable(context, bound_info.get()); +} + +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/segment/uncompressed.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { +class DatabaseInstance; + +struct UncompressedFunctions { + static unique_ptr InitCompression(ColumnDataCheckpointer &checkpointer, + unique_ptr state); + static void Compress(CompressionState &state_p, Vector &data, idx_t count); + static void FinalizeCompress(CompressionState &state_p); + static void EmptySkip(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count) { + } +}; + +struct FixedSizeUncompressed { + static CompressionFunction GetFunction(PhysicalType data_type); +}; + +struct ValidityUncompressed { +public: + static CompressionFunction GetFunction(PhysicalType data_type); + +public: + static const validity_t LOWER_MASKS[65]; + static const validity_t UPPER_MASKS[65]; +}; + +struct StringUncompressed { +public: + static CompressionFunction GetFunction(PhysicalType data_type); + +public: + //! The max string size that is allowed within a block. Strings bigger than this will be labeled as a BIG STRING and + //! offloaded to the overflow blocks. + static constexpr uint16_t STRING_BLOCK_LIMIT = 4096; +}; + +} // namespace duckdb + + + + + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table/column_data_checkpointer.hpp +// +// +//===----------------------------------------------------------------------===// + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table/column_data.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table/column_checkpoint_state.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + + +namespace duckdb { +class ColumnData; +class DatabaseInstance; +class RowGroup; +class TableDataWriter; + +struct ColumnCheckpointState { + ColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, TableDataWriter &writer); + virtual ~ColumnCheckpointState(); + + RowGroup &row_group; + ColumnData &column_data; + TableDataWriter &writer; + SegmentTree new_tree; + vector data_pointers; + unique_ptr global_stats; + +public: + virtual unique_ptr GetStatistics() { + return global_stats->Copy(); + } + + virtual void FlushSegment(unique_ptr segment); + virtual void FlushToDisk(); +}; + +} // namespace duckdb + + + +namespace duckdb { +class ColumnData; +class ColumnSegment; +class DatabaseInstance; +class RowGroup; +class TableDataWriter; +class Transaction; + +struct DataTableInfo; + +class ColumnData { + friend class ColumnDataCheckpointer; + +public: + ColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type, ColumnData *parent); + virtual ~ColumnData(); + + //! Table info for the column + DataTableInfo &info; + //! The column index of the column, either within the parent table or within the parent + idx_t column_index; + //! The start row + idx_t start; + //! The type of the column + LogicalType type; + //! The parent column (if any) + ColumnData *parent; + +public: + virtual bool CheckZonemap(ColumnScanState &state, TableFilter &filter) = 0; + + DatabaseInstance &GetDatabase() const; + DataTableInfo &GetTableInfo() const; + virtual idx_t GetMaxEntry(); + + //! The root type of the column + const LogicalType &RootType() const; + + //! Initialize a scan of the column + virtual void InitializeScan(ColumnScanState &state); + //! Initialize a scan starting at the specified offset + virtual void InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx); + //! Scan the next vector from the column + virtual idx_t Scan(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result); + virtual idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates); + virtual void ScanCommittedRange(idx_t row_group_start, idx_t offset_in_row_group, idx_t count, Vector &result); + virtual idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count); + //! Select + virtual void Select(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result, + SelectionVector &sel, idx_t &count, const TableFilter &filter); + virtual void FilterScan(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result, + SelectionVector &sel, idx_t count); + virtual void FilterScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, SelectionVector &sel, + idx_t count, bool allow_updates); + + //! Skip the scan forward by "count" rows + virtual void Skip(ColumnScanState &state, idx_t count = STANDARD_VECTOR_SIZE); + + //! Initialize an appending phase for this column + virtual void InitializeAppend(ColumnAppendState &state); + //! Append a vector of type [type] to the end of the column + virtual void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count); + virtual void AppendData(BaseStatistics &stats, ColumnAppendState &state, VectorData &vdata, idx_t count); + //! Revert a set of appends to the ColumnData + virtual void RevertAppend(row_t start_row); + + //! Fetch the vector from the column data that belongs to this specific row + virtual idx_t Fetch(ColumnScanState &state, row_t row_id, Vector &result); + //! Fetch a specific row id and append it to the vector + virtual void FetchRow(Transaction &transaction, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx); + + virtual void Update(Transaction &transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, + idx_t offset, idx_t update_count); + virtual void UpdateColumn(Transaction &transaction, const vector &column_path, Vector &update_vector, + row_t *row_ids, idx_t update_count, idx_t depth); + virtual unique_ptr GetUpdateStatistics(); + + virtual void CommitDropColumn(); + + virtual unique_ptr CreateCheckpointState(RowGroup &row_group, TableDataWriter &writer); + virtual unique_ptr Checkpoint(RowGroup &row_group, TableDataWriter &writer); + + virtual void CheckpointScan(ColumnSegment *segment, ColumnScanState &state, idx_t row_group_start, idx_t count, + Vector &scan_vector); + + virtual void DeserializeColumn(Deserializer &source); + static shared_ptr Deserialize(DataTableInfo &info, idx_t column_index, idx_t start_row, + Deserializer &source, const LogicalType &type, ColumnData *parent); + + virtual void GetStorageInfo(idx_t row_group_index, vector col_path, vector> &result); + virtual void Verify(RowGroup &parent); + + static shared_ptr CreateColumn(DataTableInfo &info, idx_t column_index, idx_t start_row, + const LogicalType &type, ColumnData *parent = nullptr); + static unique_ptr CreateColumnUnique(DataTableInfo &info, idx_t column_index, idx_t start_row, + const LogicalType &type, ColumnData *parent = nullptr); + +protected: + //! Append a transient segment + void AppendTransientSegment(idx_t start_row); + + //! Scans a base vector from the column + idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining); + //! Scans a vector from the column merged with any potential updates + //! If ALLOW_UPDATES is set to false, the function will instead throw an exception if any updates are found + template + idx_t ScanVector(Transaction *transaction, idx_t vector_index, ColumnScanState &state, Vector &result); + +protected: + //! The segments holding the data of this column segment + SegmentTree data; + //! The lock for the updates + mutex update_lock; + //! The updates for this column segment + unique_ptr updates; +}; + +} // namespace duckdb + + + +namespace duckdb { + +class ColumnDataCheckpointer { +public: + ColumnDataCheckpointer(ColumnData &col_data_p, RowGroup &row_group_p, ColumnCheckpointState &state_p); + +public: + DatabaseInstance &GetDatabase(); + const LogicalType &GetType() const; + ColumnData &GetColumnData(); + RowGroup &GetRowGroup(); + ColumnCheckpointState &GetCheckpointState(); + + void Checkpoint(unique_ptr segment); + +private: + void ScanSegments(const std::function &callback); + unique_ptr DetectBestCompressionMethod(idx_t &compression_idx); + void WriteToDisk(); + bool HasChanges(); + void WritePersistentSegments(); + +private: + ColumnData &col_data; + RowGroup &row_group; + ColumnCheckpointState &state; + bool is_validity; + Vector intermediate; + unique_ptr owned_segment; + vector compression_functions; +}; + +} // namespace duckdb + + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Analyze +//===--------------------------------------------------------------------===// +struct FixedSizeAnalyzeState : public AnalyzeState { + FixedSizeAnalyzeState() : count(0) { + } + + idx_t count; +}; + +unique_ptr FixedSizeInitAnalyze(ColumnData &col_data, PhysicalType type) { + return make_unique(); +} + +bool FixedSizeAnalyze(AnalyzeState &state_p, Vector &input, idx_t count) { + auto &state = (FixedSizeAnalyzeState &)state_p; + state.count += count; + return true; +} + +template +idx_t FixedSizeFinalAnalyze(AnalyzeState &state_p) { + auto &state = (FixedSizeAnalyzeState &)state_p; + return sizeof(T) * state.count; +} + +//===--------------------------------------------------------------------===// +// Compress +//===--------------------------------------------------------------------===// +struct UncompressedCompressState : public CompressionState { + explicit UncompressedCompressState(ColumnDataCheckpointer &checkpointer) : checkpointer(checkpointer) { + CreateEmptySegment(checkpointer.GetRowGroup().start); + } + + void CreateEmptySegment(idx_t row_start) { + auto &db = checkpointer.GetDatabase(); + auto &type = checkpointer.GetType(); + auto compressed_segment = ColumnSegment::CreateTransientSegment(db, type, row_start); + if (type.InternalType() == PhysicalType::VARCHAR) { + auto &state = (UncompressedStringSegmentState &)*compressed_segment->GetSegmentState(); + state.overflow_writer = make_unique(db); + } + current_segment = move(compressed_segment); + } + + void FlushSegment() { + auto &state = checkpointer.GetCheckpointState(); + state.FlushSegment(move(current_segment)); + } + + void Finalize() { + FlushSegment(); + current_segment.reset(); + } + + ColumnDataCheckpointer &checkpointer; + unique_ptr current_segment; +}; + +unique_ptr UncompressedFunctions::InitCompression(ColumnDataCheckpointer &checkpointer, + unique_ptr state) { + return make_unique(checkpointer); +} + +void UncompressedFunctions::Compress(CompressionState &state_p, Vector &data, idx_t count) { + auto &state = (UncompressedCompressState &)state_p; + VectorData vdata; + data.Orrify(count, vdata); + + ColumnAppendState append_state; + idx_t offset = 0; + while (count > 0) { + idx_t appended = state.current_segment->Append(append_state, vdata, offset, count); + if (appended == count) { + // appended everything: finished + return; + } + auto next_start = state.current_segment->start + state.current_segment->count; + // the segment is full: flush it to disk + state.FlushSegment(); + + // now create a new segment and continue appending + state.CreateEmptySegment(next_start); + offset += appended; + count -= appended; + } +} + +void UncompressedFunctions::FinalizeCompress(CompressionState &state_p) { + auto &state = (UncompressedCompressState &)state_p; + state.Finalize(); +} + +//===--------------------------------------------------------------------===// +// Scan +//===--------------------------------------------------------------------===// +struct FixedSizeScanState : public SegmentScanState { + unique_ptr handle; +}; + +unique_ptr FixedSizeInitScan(ColumnSegment &segment) { + auto result = make_unique(); + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + result->handle = buffer_manager.Pin(segment.block); + return move(result); +} + +//===--------------------------------------------------------------------===// +// Scan base data +//===--------------------------------------------------------------------===// +template +void FixedSizeScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result, + idx_t result_offset) { + auto &scan_state = (FixedSizeScanState &)*state.scan_state; + auto start = segment.GetRelativeIndex(state.row_index); + + auto data = scan_state.handle->node->buffer; + auto source_data = data + start * sizeof(T); + + // copy the data from the base table + result.SetVectorType(VectorType::FLAT_VECTOR); + memcpy(FlatVector::GetData(result) + result_offset * sizeof(T), source_data, scan_count * sizeof(T)); +} + +template +void FixedSizeScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) { + // FIXME: we should be able to do a zero-copy here + FixedSizeScanPartial(segment, state, scan_count, result, 0); +} + +//===--------------------------------------------------------------------===// +// Fetch +//===--------------------------------------------------------------------===// +template +void FixedSizeFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx) { + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + auto handle = buffer_manager.Pin(segment.block); + + // first fetch the data from the base table + auto data_ptr = handle->node->buffer + row_id * sizeof(T); + + memcpy(FlatVector::GetData(result) + result_idx * sizeof(T), data_ptr, sizeof(T)); +} + +//===--------------------------------------------------------------------===// +// Append +//===--------------------------------------------------------------------===// +template +static void AppendLoop(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset, VectorData &adata, + idx_t offset, idx_t count) { + auto sdata = (T *)adata.data; + auto tdata = (T *)target; + if (!adata.validity.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto source_idx = adata.sel->get_index(offset + i); + auto target_idx = target_offset + i; + bool is_null = !adata.validity.RowIsValid(source_idx); + if (!is_null) { + NumericStatistics::Update(stats, sdata[source_idx]); + tdata[target_idx] = sdata[source_idx]; + } else { + // we insert a NullValue in the null gap for debuggability + // this value should never be used or read anywhere + tdata[target_idx] = NullValue(); + } + } + } else { + for (idx_t i = 0; i < count; i++) { + auto source_idx = adata.sel->get_index(offset + i); + auto target_idx = target_offset + i; + NumericStatistics::Update(stats, sdata[source_idx]); + tdata[target_idx] = sdata[source_idx]; + } + } +} + +template <> +void AppendLoop(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset, VectorData &adata, + idx_t offset, idx_t count) { + auto sdata = (list_entry_t *)adata.data; + auto tdata = (list_entry_t *)target; + for (idx_t i = 0; i < count; i++) { + auto source_idx = adata.sel->get_index(offset + i); + auto target_idx = target_offset + i; + tdata[target_idx] = sdata[source_idx]; + } +} + +template +idx_t FixedSizeAppend(ColumnSegment &segment, SegmentStatistics &stats, VectorData &data, idx_t offset, idx_t count) { + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + auto handle = buffer_manager.Pin(segment.block); + + auto target_ptr = handle->node->buffer; + idx_t max_tuple_count = Storage::BLOCK_SIZE / sizeof(T); + idx_t copy_count = MinValue(count, max_tuple_count - segment.count); + + AppendLoop(stats, target_ptr, segment.count, data, offset, copy_count); + segment.count += copy_count; + return copy_count; +} + +//===--------------------------------------------------------------------===// +// Get Function +//===--------------------------------------------------------------------===// +template +CompressionFunction FixedSizeGetFunction(PhysicalType data_type) { + return CompressionFunction(CompressionType::COMPRESSION_UNCOMPRESSED, data_type, FixedSizeInitAnalyze, + FixedSizeAnalyze, FixedSizeFinalAnalyze, UncompressedFunctions::InitCompression, + UncompressedFunctions::Compress, UncompressedFunctions::FinalizeCompress, + FixedSizeInitScan, FixedSizeScan, FixedSizeScanPartial, FixedSizeFetchRow, + UncompressedFunctions::EmptySkip, nullptr, FixedSizeAppend, nullptr); +} + +CompressionFunction FixedSizeUncompressed::GetFunction(PhysicalType data_type) { + switch (data_type) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + return FixedSizeGetFunction(data_type); + case PhysicalType::INT16: + return FixedSizeGetFunction(data_type); + case PhysicalType::INT32: + return FixedSizeGetFunction(data_type); + case PhysicalType::INT64: + return FixedSizeGetFunction(data_type); + case PhysicalType::UINT8: + return FixedSizeGetFunction(data_type); + case PhysicalType::UINT16: + return FixedSizeGetFunction(data_type); + case PhysicalType::UINT32: + return FixedSizeGetFunction(data_type); + case PhysicalType::UINT64: + return FixedSizeGetFunction(data_type); + case PhysicalType::INT128: + return FixedSizeGetFunction(data_type); + case PhysicalType::FLOAT: + return FixedSizeGetFunction(data_type); + case PhysicalType::DOUBLE: + return FixedSizeGetFunction(data_type); + case PhysicalType::INTERVAL: + return FixedSizeGetFunction(data_type); + case PhysicalType::LIST: + return FixedSizeGetFunction(data_type); + default: + throw InternalException("Unsupported type for FixedSizeUncompressed::GetFunction"); + } +} + +} // namespace duckdb + + + + + + + + + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Scan +//===--------------------------------------------------------------------===// +unique_ptr ConstantInitScan(ColumnSegment &segment) { + return nullptr; +} + +//===--------------------------------------------------------------------===// +// Scan base data +//===--------------------------------------------------------------------===// +void ConstantScanFunctionValidity(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) { + auto &validity = (ValidityStatistics &)*segment.stats.statistics; + if (validity.has_null) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(result, true); + } +} + +template +void ConstantScanFunction(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) { + auto &nstats = (NumericStatistics &)*segment.stats.statistics; + + auto data = FlatVector::GetData(result); + data[0] = nstats.min.GetValueUnsafe(); + result.SetVectorType(VectorType::CONSTANT_VECTOR); +} + +//===--------------------------------------------------------------------===// +// Scan Partial +//===--------------------------------------------------------------------===// +void ConstantFillFunctionValidity(ColumnSegment &segment, Vector &result, idx_t start_idx, idx_t count) { + auto &validity = (ValidityStatistics &)*segment.stats.statistics; + if (validity.has_null) { + auto &mask = FlatVector::Validity(result); + for (idx_t i = 0; i < count; i++) { + mask.SetInvalid(start_idx + i); + } + } +} + +template +void ConstantFillFunction(ColumnSegment &segment, Vector &result, idx_t start_idx, idx_t count) { + auto &nstats = (NumericStatistics &)*segment.stats.statistics; + + auto data = FlatVector::GetData(result); + auto constant_value = nstats.min.GetValueUnsafe(); + for (idx_t i = 0; i < count; i++) { + data[start_idx + i] = constant_value; + } +} + +void ConstantScanPartialValidity(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result, + idx_t result_offset) { + ConstantFillFunctionValidity(segment, result, result_offset, scan_count); +} + +template +void ConstantScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result, + idx_t result_offset) { + ConstantFillFunction(segment, result, result_offset, scan_count); +} + +//===--------------------------------------------------------------------===// +// Fetch +//===--------------------------------------------------------------------===// +void ConstantFetchRowValidity(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx) { + ConstantFillFunctionValidity(segment, result, result_idx, 1); +} + +template +void ConstantFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) { + ConstantFillFunction(segment, result, result_idx, 1); +} + +//===--------------------------------------------------------------------===// +// Get Function +//===--------------------------------------------------------------------===// +CompressionFunction ConstantGetFunctionValidity(PhysicalType data_type) { + D_ASSERT(data_type == PhysicalType::BIT); + return CompressionFunction(CompressionType::COMPRESSION_CONSTANT, data_type, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, ConstantInitScan, ConstantScanFunctionValidity, + ConstantScanPartialValidity, ConstantFetchRowValidity, UncompressedFunctions::EmptySkip, + nullptr, nullptr, nullptr); +} + +template +CompressionFunction ConstantGetFunction(PhysicalType data_type) { + return CompressionFunction(CompressionType::COMPRESSION_CONSTANT, data_type, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, ConstantInitScan, ConstantScanFunction, ConstantScanPartial, + ConstantFetchRow, UncompressedFunctions::EmptySkip, nullptr, nullptr, nullptr); +} + +CompressionFunction ConstantFun::GetFunction(PhysicalType data_type) { + switch (data_type) { + case PhysicalType::BIT: + return ConstantGetFunctionValidity(data_type); + case PhysicalType::BOOL: + case PhysicalType::INT8: + return ConstantGetFunction(data_type); + case PhysicalType::INT16: + return ConstantGetFunction(data_type); + case PhysicalType::INT32: + return ConstantGetFunction(data_type); + case PhysicalType::INT64: + return ConstantGetFunction(data_type); + case PhysicalType::UINT8: + return ConstantGetFunction(data_type); + case PhysicalType::UINT16: + return ConstantGetFunction(data_type); + case PhysicalType::UINT32: + return ConstantGetFunction(data_type); + case PhysicalType::UINT64: + return ConstantGetFunction(data_type); + case PhysicalType::INT128: + return ConstantGetFunction(data_type); + case PhysicalType::FLOAT: + return ConstantGetFunction(data_type); + case PhysicalType::DOUBLE: + return ConstantGetFunction(data_type); + default: + throw InternalException("Unsupported type for ConstantUncompressed::GetFunction"); + } +} + +bool ConstantFun::TypeIsSupported(PhysicalType type) { + switch (type) { + case PhysicalType::BIT: + case PhysicalType::BOOL: + case PhysicalType::INT8: + case PhysicalType::INT16: + case PhysicalType::INT32: + case PhysicalType::INT64: + case PhysicalType::UINT8: + case PhysicalType::UINT16: + case PhysicalType::UINT32: + case PhysicalType::UINT64: + case PhysicalType::INT128: + case PhysicalType::FLOAT: + case PhysicalType::DOUBLE: + return true; + default: + throw InternalException("Unsupported type for constant function"); + } +} + +} // namespace duckdb + + + + + + + + +#include + +namespace duckdb { + +using rle_count_t = uint16_t; + +//===--------------------------------------------------------------------===// +// Analyze +//===--------------------------------------------------------------------===// +struct EmptyRLEWriter { + template + static void Operation(VALUE_TYPE value, rle_count_t count, void *dataptr, bool is_null) { + } +}; + +template +struct RLEState { + RLEState() : seen_count(0), last_value(NullValue()), last_seen_count(0), dataptr(nullptr) { + } + + idx_t seen_count; + T last_value; + rle_count_t last_seen_count; + void *dataptr; + bool all_null = true; + +public: + template + void Flush() { + OP::template Operation(last_value, last_seen_count, dataptr, all_null); + } + + template + void Update(T *data, ValidityMask &validity, idx_t idx) { + if (validity.RowIsValid(idx)) { + all_null = false; + if (seen_count == 0) { + // no value seen yet + // assign the current value, and set the seen_count to 1 + // note that we increment last_seen_count rather than setting it to 1 + // this is intentional: this is the first VALID value we see + // but it might not be the first value in case of nulls! + last_value = data[idx]; + seen_count = 1; + last_seen_count++; + } else if (last_value == data[idx]) { + // the last value is identical to this value: increment the last_seen_count + last_seen_count++; + } else { + // the values are different + // issue the callback on the last value + Flush(); + + // increment the seen_count and put the new value into the RLE slot + last_value = data[idx]; + seen_count++; + last_seen_count = 1; + } + } else { + // NULL value: we merely increment the last_seen_count + last_seen_count++; + } + if (last_seen_count == NumericLimits::Maximum()) { + // we have seen the same value so many times in a row we are at the limit of what fits in our count + // write away the value and move to the next value + Flush(); + last_seen_count = 0; + seen_count++; + } + } +}; -//===--------------------------------------------------------------------===// -// Views -//===--------------------------------------------------------------------===// -void CheckpointManager::WriteView(ViewCatalogEntry &view) { - view.Serialize(*metadata_writer); -} +template +struct RLEAnalyzeState : public AnalyzeState { + RLEAnalyzeState() { + } -void CheckpointManager::ReadView(ClientContext &context, MetaBlockReader &reader) { - auto info = ViewCatalogEntry::Deserialize(reader); + RLEState state; +}; - auto &catalog = Catalog::GetCatalog(db); - catalog.CreateView(context, info.get()); +template +unique_ptr RLEInitAnalyze(ColumnData &col_data, PhysicalType type) { + return make_unique>(); } -//===--------------------------------------------------------------------===// -// Sequences -//===--------------------------------------------------------------------===// -void CheckpointManager::WriteSequence(SequenceCatalogEntry &seq) { - seq.Serialize(*metadata_writer); -} +template +bool RLEAnalyze(AnalyzeState &state, Vector &input, idx_t count) { + auto &rle_state = (RLEAnalyzeState &)state; + VectorData vdata; + input.Orrify(count, vdata); -void CheckpointManager::ReadSequence(ClientContext &context, MetaBlockReader &reader) { - auto info = SequenceCatalogEntry::Deserialize(reader); + auto data = (T *)vdata.data; + for (idx_t i = 0; i < count; i++) { + auto idx = vdata.sel->get_index(i); + rle_state.state.Update(data, vdata.validity, idx); + } + return true; +} - auto &catalog = Catalog::GetCatalog(db); - catalog.CreateSequence(context, info.get()); +template +idx_t RLEFinalAnalyze(AnalyzeState &state) { + auto &rle_state = (RLEAnalyzeState &)state; + return (sizeof(rle_count_t) + sizeof(T)) * rle_state.state.seen_count; } //===--------------------------------------------------------------------===// -// Macro's +// Compress //===--------------------------------------------------------------------===// -void CheckpointManager::WriteMacro(MacroCatalogEntry ¯o) { - macro.Serialize(*metadata_writer); -} +template +struct RLECompressState : public CompressionState { + struct RLEWriter { + template + static void Operation(VALUE_TYPE value, rle_count_t count, void *dataptr, bool is_null) { + auto state = (RLECompressState *)dataptr; + state->WriteValue(value, count, is_null); + } + }; -void CheckpointManager::ReadMacro(ClientContext &context, MetaBlockReader &reader) { - auto info = MacroCatalogEntry::Deserialize(reader); + static idx_t MaxRLECount() { + auto entry_size = sizeof(T) + sizeof(rle_count_t); + auto entry_count = Storage::BLOCK_SIZE / entry_size; + auto max_vector_count = entry_count / STANDARD_VECTOR_SIZE; + return max_vector_count * STANDARD_VECTOR_SIZE; + } - auto &catalog = Catalog::GetCatalog(db); - catalog.CreateFunction(context, info.get()); -} + explicit RLECompressState(ColumnDataCheckpointer &checkpointer_p) : checkpointer(checkpointer_p) { + auto &db = checkpointer.GetDatabase(); + auto &type = checkpointer.GetType(); + auto &config = DBConfig::GetConfig(db); + function = config.GetCompressionFunction(CompressionType::COMPRESSION_RLE, type.InternalType()); + CreateEmptySegment(checkpointer.GetRowGroup().start); -//===--------------------------------------------------------------------===// -// Table Metadata -//===--------------------------------------------------------------------===// -void CheckpointManager::WriteTable(TableCatalogEntry &table) { - // write the table meta data - table.Serialize(*metadata_writer); - //! write the blockId for the table info - metadata_writer->Write(tabledata_writer->block->id); - //! and the offset to where the info starts - metadata_writer->Write(tabledata_writer->offset); - // now we need to write the table data - TableDataWriter writer(db, table, *tabledata_writer); - writer.WriteTableData(); -} + state.dataptr = (void *)this; + max_rle_count = MaxRLECount(); + } -void CheckpointManager::ReadTable(ClientContext &context, MetaBlockReader &reader) { - // deserialize the table meta data - auto info = TableCatalogEntry::Deserialize(reader); - // bind the info - auto binder = Binder::CreateBinder(context); - auto bound_info = binder->BindCreateTableInfo(move(info)); + void CreateEmptySegment(idx_t row_start) { + auto &db = checkpointer.GetDatabase(); + auto &type = checkpointer.GetType(); + auto column_segment = ColumnSegment::CreateTransientSegment(db, type, row_start); + column_segment->function = function; + current_segment = move(column_segment); + auto &buffer_manager = BufferManager::GetBufferManager(db); + handle = buffer_manager.Pin(current_segment->block); + } - // now read the actual table data and place it into the create table info - auto block_id = reader.Read(); - auto offset = reader.Read(); - MetaBlockReader table_data_reader(db, block_id); - table_data_reader.offset = offset; - TableDataReader data_reader(db, table_data_reader, *bound_info); - data_reader.ReadTableData(); + void Append(VectorData &vdata, idx_t count) { + auto data = (T *)vdata.data; + for (idx_t i = 0; i < count; i++) { + auto idx = vdata.sel->get_index(i); + state.template Update::RLEWriter>(data, vdata.validity, idx); + } + } - // finally create the table in the catalog - auto &catalog = Catalog::GetCatalog(db); - catalog.CreateTable(context, bound_info.get()); -} + void WriteValue(T value, rle_count_t count, bool is_null) { + // write the RLE entry + auto handle_ptr = handle->Ptr(); + auto data_pointer = (T *)handle_ptr; + auto index_pointer = (rle_count_t *)(handle_ptr + max_rle_count * sizeof(T)); + data_pointer[entry_count] = value; + index_pointer[entry_count] = count; + entry_count++; -} // namespace duckdb + // update meta data + if (!is_null) { + NumericStatistics::Update(current_segment->stats, value); + } + current_segment->count += count; + + if (entry_count == max_rle_count) { + // we have finished writing this segment: flush it and create a new segment + auto row_start = current_segment->start + current_segment->count; + FlushSegment(); + CreateEmptySegment(row_start); + entry_count = 0; + } + } + + void FlushSegment() { + handle.reset(); + auto &state = checkpointer.GetCheckpointState(); + state.FlushSegment(move(current_segment)); + } + void Finalize() { + state.template Flush::RLEWriter>(); + FlushSegment(); + current_segment.reset(); + } + ColumnDataCheckpointer &checkpointer; + CompressionFunction *function; + unique_ptr current_segment; + unique_ptr handle; + RLEState state; + idx_t entry_count = 0; + idx_t max_rle_count; +}; +template +unique_ptr RLEInitCompression(ColumnDataCheckpointer &checkpointer, unique_ptr state) { + return make_unique>(checkpointer); +} +template +void RLECompress(CompressionState &state_p, Vector &scan_vector, idx_t count) { + auto &state = (RLECompressState &)state_p; + VectorData vdata; + scan_vector.Orrify(count, vdata); + state.Append(vdata, count); +} +template +void RLEFinalizeCompress(CompressionState &state_p) { + auto &state = (RLECompressState &)state_p; + state.Finalize(); +} +//===--------------------------------------------------------------------===// +// Scan +//===--------------------------------------------------------------------===// +template +struct RLEScanState : public SegmentScanState { + explicit RLEScanState(ColumnSegment &segment) { + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + handle = buffer_manager.Pin(segment.block); + entry_pos = 0; + position_in_entry = 0; + max_rle_count = RLECompressState::MaxRLECount(); + } + void Skip(idx_t skip_count) { + auto data = handle->node->buffer; + auto index_pointer = (rle_count_t *)(data + (max_rle_count * sizeof(T))); + for (idx_t i = 0; i < skip_count; i++) { + // assign the current value + position_in_entry++; + if (position_in_entry >= index_pointer[entry_pos]) { + // handled all entries in this RLE value + // move to the next entry + entry_pos++; + position_in_entry = 0; + } + } + } + unique_ptr handle; + idx_t entry_pos; + idx_t position_in_entry; + idx_t max_rle_count; +}; +template +unique_ptr RLEInitScan(ColumnSegment &segment) { + auto result = make_unique>(segment); + return move(result); +} +//===--------------------------------------------------------------------===// +// Scan base data +//===--------------------------------------------------------------------===// +template +void RLESkip(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count) { + auto &scan_state = (RLEScanState &)*state.scan_state; + scan_state.Skip(skip_count); +} -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/table/validity_column_data.hpp -// -// -//===----------------------------------------------------------------------===// +template +void RLEScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result, + idx_t result_offset) { + auto &scan_state = (RLEScanState &)*state.scan_state; + auto data = scan_state.handle->node->buffer; + auto data_pointer = (T *)data; + auto index_pointer = (rle_count_t *)(data + (scan_state.max_rle_count * sizeof(T))); + auto result_data = FlatVector::GetData(result); + result.SetVectorType(VectorType::FLAT_VECTOR); + for (idx_t i = 0; i < scan_count; i++) { + // assign the current value + result_data[result_offset + i] = data_pointer[scan_state.entry_pos]; + scan_state.position_in_entry++; + if (scan_state.position_in_entry >= index_pointer[scan_state.entry_pos]) { + // handled all entries in this RLE value + // move to the next entry + scan_state.entry_pos++; + scan_state.position_in_entry = 0; + } + } +} +template +void RLEScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) { + // FIXME: emit constant vector if repetition of single value is >= scan_count + RLEScanPartial(segment, state, scan_count, result, 0); +} +//===--------------------------------------------------------------------===// +// Fetch +//===--------------------------------------------------------------------===// +template +void RLEFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) { + RLEScanState scan_state(segment); + scan_state.Skip(row_id); -namespace duckdb { + auto data = scan_state.handle->node->buffer; + auto data_pointer = (T *)data; + auto result_data = FlatVector::GetData(result); + result_data[result_idx] = data_pointer[scan_state.entry_pos]; +} -//! Validity column data represents the validity data (i.e. which values are null) -class ValidityColumnData : public ColumnData { -public: - ValidityColumnData(DatabaseInstance &db, DataTableInfo &table_info, idx_t column_idx); +//===--------------------------------------------------------------------===// +// Get Function +//===--------------------------------------------------------------------===// +template +CompressionFunction GetRLEFunction(PhysicalType data_type) { + return CompressionFunction(CompressionType::COMPRESSION_RLE, data_type, RLEInitAnalyze, RLEAnalyze, + RLEFinalAnalyze, RLEInitCompression, RLECompress, RLEFinalizeCompress, + RLEInitScan, RLEScan, RLEScanPartial, RLEFetchRow, RLESkip, nullptr, + nullptr, nullptr); +} -public: - bool CheckZonemap(ColumnScanState &state, TableFilter &filter) override; - void InitializeScan(ColumnScanState &state) override; - void InitializeScanWithOffset(ColumnScanState &state, idx_t vector_idx) override; - void Scan(Transaction &transaction, ColumnScanState &state, Vector &result) override; - void IndexScan(ColumnScanState &state, Vector &result, bool allow_pending_updates) override; - void Update(Transaction &transaction, Vector &updates, Vector &row_ids, idx_t count) override; +CompressionFunction RLEFun::GetFunction(PhysicalType type) { + switch (type) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + return GetRLEFunction(type); + case PhysicalType::INT16: + return GetRLEFunction(type); + case PhysicalType::INT32: + return GetRLEFunction(type); + case PhysicalType::INT64: + return GetRLEFunction(type); + case PhysicalType::INT128: + return GetRLEFunction(type); + case PhysicalType::UINT8: + return GetRLEFunction(type); + case PhysicalType::UINT16: + return GetRLEFunction(type); + case PhysicalType::UINT32: + return GetRLEFunction(type); + case PhysicalType::UINT64: + return GetRLEFunction(type); + case PhysicalType::FLOAT: + return GetRLEFunction(type); + case PhysicalType::DOUBLE: + return GetRLEFunction(type); + default: + throw InternalException("Unsupported type for RLE"); + } +} - static unique_ptr Deserialize(DatabaseInstance &db, Deserializer &source); -}; +bool RLEFun::TypeIsSupported(PhysicalType type) { + switch (type) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + case PhysicalType::INT16: + case PhysicalType::INT32: + case PhysicalType::INT64: + case PhysicalType::INT128: + case PhysicalType::UINT8: + case PhysicalType::UINT16: + case PhysicalType::UINT32: + case PhysicalType::UINT64: + case PhysicalType::FLOAT: + case PhysicalType::DOUBLE: + return true; + default: + return false; + } +} } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/table/standard_column_data.hpp -// -// -//===----------------------------------------------------------------------===// + + + + + + @@ -121624,537 +132679,952 @@ class ValidityColumnData : public ColumnData { namespace duckdb { -//! Standard column data represents a regular flat column (e.g. a column of type INTEGER or STRING) -class StandardColumnData : public ColumnData { +//===--------------------------------------------------------------------===// +// Storage Class +//===--------------------------------------------------------------------===// +UncompressedStringSegmentState::~UncompressedStringSegmentState() { + while (head) { + // prevent deep recursion here + head = move(head->next); + } +} + +struct UncompressedStringStorage { public: - StandardColumnData(DatabaseInstance &db, DataTableInfo &table_info, LogicalType type, idx_t column_idx); + //! Marker used in length field to indicate the presence of a big string + static constexpr uint16_t BIG_STRING_MARKER = (uint16_t)-1; + //! Base size of big string marker (block id + offset) + static constexpr idx_t BIG_STRING_MARKER_BASE_SIZE = sizeof(block_id_t) + sizeof(int32_t); + //! The marker size of the big string + static constexpr idx_t BIG_STRING_MARKER_SIZE = BIG_STRING_MARKER_BASE_SIZE + sizeof(uint16_t); - //! The validity column data - ValidityColumnData validity; +public: + static unique_ptr StringInitAnalyze(ColumnData &col_data, PhysicalType type); + static bool StringAnalyze(AnalyzeState &state_p, Vector &input, idx_t count); + static idx_t StringFinalAnalyze(AnalyzeState &state_p); + static unique_ptr StringInitScan(ColumnSegment &segment); + static void StringScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result, + idx_t result_offset); + static void StringScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result); + static void StringFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx); + static unique_ptr StringInitSegment(ColumnSegment &segment, block_id_t block_id); + static idx_t StringAppend(ColumnSegment &segment, SegmentStatistics &stats, VectorData &data, idx_t offset, + idx_t count); public: - bool CheckZonemap(ColumnScanState &state, TableFilter &filter) override; - void InitializeScan(ColumnScanState &state) override; - void InitializeScanWithOffset(ColumnScanState &state, idx_t vector_idx) override; - void Scan(Transaction &transaction, ColumnScanState &state, Vector &result) override; - void IndexScan(ColumnScanState &state, Vector &result, bool allow_pending_updates) override; - void InitializeAppend(ColumnAppendState &state) override; - void AppendData(ColumnAppendState &state, VectorData &vdata, idx_t count) override; - void RevertAppend(row_t start_row) override; - void Update(Transaction &transaction, Vector &updates, Vector &row_ids, idx_t count) override; - void Fetch(ColumnScanState &state, row_t row_id, Vector &result) override; - void FetchRow(ColumnFetchState &state, Transaction &transaction, row_t row_id, Vector &result, - idx_t result_idx) override; + static inline void UpdateStringStats(SegmentStatistics &stats, const string_t &new_value) { + auto &sstats = (StringStatistics &)*stats.statistics; + sstats.Update(new_value); + } - unique_ptr GetStatistics() override; + static void SetDictionaryOffset(BufferHandle &handle, idx_t offset); + static idx_t GetDictionaryOffset(BufferHandle &handle); + static idx_t RemainingSpace(ColumnSegment &segment, BufferHandle &handle); + static void WriteString(ColumnSegment &segment, string_t string, block_id_t &result_block, int32_t &result_offset); + static void WriteStringMemory(ColumnSegment &segment, string_t string, block_id_t &result_block, + int32_t &result_offset); + static string_t ReadString(ColumnSegment &segment, Vector &result, block_id_t block, int32_t offset); + static string_t ReadString(data_ptr_t target, int32_t offset); + static void WriteStringMarker(data_ptr_t target, block_id_t block_id, int32_t offset); + static void ReadStringMarker(data_ptr_t target, block_id_t &block_id, int32_t &offset); - void CommitDropColumn() override; - void Initialize(PersistentColumnData &column_data) override; - void Checkpoint(TableDataWriter &writer) override; - static unique_ptr Deserialize(DatabaseInstance &db, Deserializer &source, - const LogicalType &type); + static string_location_t FetchStringLocation(data_ptr_t baseptr, int32_t dict_offset); + static string_t FetchStringFromDict(ColumnSegment &segment, Vector &result, data_ptr_t baseptr, + int32_t dict_offset); + static string_t FetchString(ColumnSegment &segment, Vector &result, data_ptr_t baseptr, string_location_t location); }; -} // namespace duckdb - +//===--------------------------------------------------------------------===// +// Analyze +//===--------------------------------------------------------------------===// +struct StringAnalyzeState : public AnalyzeState { + StringAnalyzeState() : count(0), total_string_size(0), overflow_strings(0) { + } -namespace duckdb { + idx_t count; + idx_t total_string_size; + idx_t overflow_strings; +}; -ColumnData::ColumnData(DatabaseInstance &db, DataTableInfo &table_info, LogicalType type, idx_t column_idx) - : table_info(table_info), type(move(type)), db(db), column_idx(column_idx), persistent_rows(0) { - statistics = BaseStatistics::CreateEmpty(type); +unique_ptr UncompressedStringStorage::StringInitAnalyze(ColumnData &col_data, PhysicalType type) { + return make_unique(); } -void ColumnData::FilterScan(Transaction &transaction, ColumnScanState &state, Vector &result, SelectionVector &sel, - idx_t &approved_tuple_count) { - Scan(transaction, state, result); - result.Slice(sel, approved_tuple_count); -} +bool UncompressedStringStorage::StringAnalyze(AnalyzeState &state_p, Vector &input, idx_t count) { + auto &state = (StringAnalyzeState &)state_p; + VectorData vdata; + input.Orrify(count, vdata); -void ColumnData::Select(Transaction &transaction, ColumnScanState &state, Vector &result, SelectionVector &sel, - idx_t &approved_tuple_count, vector &table_filters) { - Scan(transaction, state, result); - for (auto &filter : table_filters) { - UncompressedSegment::FilterSelection(sel, result, filter, approved_tuple_count, FlatVector::Validity(result)); + state.count += count; + auto data = (string_t *)vdata.data; + for (idx_t i = 0; i < count; i++) { + auto idx = vdata.sel->get_index(i); + if (vdata.validity.RowIsValid(idx)) { + auto string_size = data[idx].GetSize(); + state.total_string_size += string_size; + if (string_size >= StringUncompressed::STRING_BLOCK_LIMIT) { + state.overflow_strings++; + } + } } + return true; } -void ColumnScanState::Next() { - //! There is no column segment - if (!current) { - return; - } - vector_index++; - if (vector_index * STANDARD_VECTOR_SIZE >= current->count) { - current = (ColumnSegment *)current->next.get(); - vector_index = 0; - initialized = false; - segment_checked = false; - } - vector_index_updates++; - if (vector_index_updates >= MorselInfo::MORSEL_VECTOR_COUNT) { - updates = (UpdateSegment *)updates->next.get(); - vector_index_updates = 0; - } - for (auto &child_state : child_states) { - child_state.Next(); - } +idx_t UncompressedStringStorage::StringFinalAnalyze(AnalyzeState &state_p) { + auto &state = (StringAnalyzeState &)state_p; + return state.count * sizeof(uint32_t) + state.total_string_size + state.overflow_strings * BIG_STRING_MARKER_SIZE; +} + +//===--------------------------------------------------------------------===// +// Scan +//===--------------------------------------------------------------------===// +struct StringScanState : public SegmentScanState { + unique_ptr handle; +}; + +unique_ptr UncompressedStringStorage::StringInitScan(ColumnSegment &segment) { + auto result = make_unique(); + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + result->handle = buffer_manager.Pin(segment.block); + return move(result); } -void TableScanState::NextVector() { - //! nothing to scan for this vector, skip the entire vector - for (idx_t j = 0; j < column_count; j++) { - column_scans[j].Next(); +//===--------------------------------------------------------------------===// +// Scan base data +//===--------------------------------------------------------------------===// +void UncompressedStringStorage::StringScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, + Vector &result, idx_t result_offset) { + // clear any previously locked buffers and get the primary buffer handle + auto &scan_state = (StringScanState &)*state.scan_state; + auto start = segment.GetRelativeIndex(state.row_index); + + auto baseptr = scan_state.handle->node->buffer; + auto base_data = (int32_t *)scan_state.handle->node->buffer; + auto result_data = FlatVector::GetData(result); + + for (idx_t i = 0; i < scan_count; i++) { + result_data[result_offset + i] = FetchStringFromDict(segment, result, baseptr, base_data[start + i]); } } -void ColumnData::Append(ColumnAppendState &state, Vector &vector, idx_t count) { - VectorData vdata; - vector.Orrify(count, vdata); - AppendData(state, vdata, count); +void UncompressedStringStorage::StringScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, + Vector &result) { + StringScanPartial(segment, state, scan_count, result, 0); } -void ColumnData::InitializeAppend(ColumnAppendState &state) { - lock_guard tree_lock(data.node_lock); - if (data.nodes.empty()) { - // no transient segments yet, append one - AppendTransientSegment(persistent_rows); - } - if (updates.nodes.empty()) { - AppendUpdateSegment(0); - } - auto segment = (ColumnSegment *)data.GetLastSegment(); - if (segment->segment_type == ColumnSegmentType::PERSISTENT) { - // cannot append to persistent segment, convert the last segment into a transient segment - auto transient = make_unique((PersistentSegment &)*segment); - state.current = (TransientSegment *)transient.get(); - data.nodes.back().node = (SegmentBase *)transient.get(); - if (data.root_node.get() == segment) { - data.root_node = move(transient); - } else { - D_ASSERT(data.nodes.size() >= 2); - data.nodes[data.nodes.size() - 2].node->next = move(transient); - } +//===--------------------------------------------------------------------===// +// Fetch +//===--------------------------------------------------------------------===// +void UncompressedStringStorage::StringFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, + Vector &result, idx_t result_idx) { + data_ptr_t baseptr; + + // fetch a single row from the string segment + // first pin the main buffer if it is not already pinned + auto primary_id = segment.block->BlockId(); + + auto entry = state.handles.find(primary_id); + if (entry == state.handles.end()) { + // not pinned yet: pin it + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + auto handle = buffer_manager.Pin(segment.block); + baseptr = handle->node->buffer; + state.handles[primary_id] = move(handle); } else { - state.current = (TransientSegment *)segment; + // already pinned: use the pinned handle + baseptr = entry->second->node->buffer; } - state.updates = (UpdateSegment *)updates.nodes.back().node; - D_ASSERT(state.current->segment_type == ColumnSegmentType::TRANSIENT); - state.current->InitializeAppend(state); + auto base_data = (int32_t *)baseptr; + auto result_data = FlatVector::GetData(result); + + result_data[result_idx] = FetchStringFromDict(segment, result, baseptr, base_data[row_id]); } -void ColumnData::AppendData(ColumnAppendState &state, VectorData &vdata, idx_t count) { - // append to update segments - idx_t remaining_update_count = count; - while (remaining_update_count > 0) { - idx_t to_append_elements = - MinValue(remaining_update_count, UpdateSegment::MORSEL_SIZE - state.updates->count); - state.updates->count += to_append_elements; - if (state.updates->count == UpdateSegment::MORSEL_SIZE) { - // have to append a new segment - AppendUpdateSegment(state.updates->start + state.updates->count); - state.updates = (UpdateSegment *)updates.nodes.back().node; - } - remaining_update_count -= to_append_elements; +//===--------------------------------------------------------------------===// +// Append +//===--------------------------------------------------------------------===// +unique_ptr UncompressedStringStorage::StringInitSegment(ColumnSegment &segment, + block_id_t block_id) { + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + if (block_id == INVALID_BLOCK) { + auto handle = buffer_manager.Pin(segment.block); + SetDictionaryOffset(*handle, sizeof(idx_t)); } + return make_unique(); +} - idx_t offset = 0; - while (true) { - // append the data from the vector - idx_t copied_elements = state.current->Append(state, vdata, offset, count); - MergeStatistics(*state.current->stats.statistics); - if (copied_elements == count) { - // finished copying everything - break; +idx_t UncompressedStringStorage::StringAppend(ColumnSegment &segment, SegmentStatistics &stats, VectorData &data, + idx_t offset, idx_t count) { + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + auto handle = buffer_manager.Pin(segment.block); + + auto source_data = (string_t *)data.data; + auto result_data = (int32_t *)handle->node->buffer; + auto end = handle->node->buffer + Storage::BLOCK_SIZE; + for (idx_t i = 0; i < count; i++) { + auto source_idx = data.sel->get_index(offset + i); + auto target_idx = segment.count.load(); + idx_t remaining_space = RemainingSpace(segment, *handle); + if (remaining_space < sizeof(int32_t)) { + // string index does not fit in the block at all + return i; } + remaining_space -= sizeof(int32_t); + if (!data.validity.RowIsValid(source_idx)) { + // null value is stored as -1 + result_data[target_idx] = 0; + } else { + auto dictionary_offset = GetDictionaryOffset(*handle); + D_ASSERT(dictionary_offset < Storage::BLOCK_SIZE); + // non-null value, check if we can fit it within the block + idx_t string_length = source_data[source_idx].GetSize(); + idx_t dictionary_length = string_length + sizeof(uint16_t); + + // determine whether or not we have space in the block for this string + bool use_overflow_block = false; + idx_t required_space = dictionary_length; + if (required_space >= StringUncompressed::STRING_BLOCK_LIMIT) { + // string exceeds block limit, store in overflow block and only write a marker here + required_space = BIG_STRING_MARKER_SIZE; + use_overflow_block = true; + } + if (required_space > remaining_space) { + // no space remaining: return how many tuples we ended up writing + return i; + } + // we have space: write the string + UpdateStringStats(stats, source_data[source_idx]); + + if (use_overflow_block) { + // write to overflow blocks + block_id_t block; + int32_t offset; + // write the string into the current string block + WriteString(segment, source_data[source_idx], block, offset); + dictionary_offset += BIG_STRING_MARKER_SIZE; + auto dict_pos = end - dictionary_offset; - // we couldn't fit everything we wanted in the current column segment, create a new one - { - lock_guard tree_lock(data.node_lock); - AppendTransientSegment(state.current->start + state.current->count); - state.current = (TransientSegment *)data.GetLastSegment(); - state.current->InitializeAppend(state); + // write a big string marker into the dictionary + WriteStringMarker(dict_pos, block, offset); + } else { + // string fits in block, append to dictionary and increment dictionary position + D_ASSERT(string_length < NumericLimits::Maximum()); + dictionary_offset += required_space; + auto dict_pos = end - dictionary_offset; // first write the length as u16 + Store(string_length, dict_pos); + // now write the actual string data into the dictionary + memcpy(dict_pos + sizeof(uint16_t), source_data[source_idx].GetDataUnsafe(), string_length); + } + D_ASSERT(RemainingSpace(segment, *handle) <= Storage::BLOCK_SIZE); + // place the dictionary offset into the set of vectors + D_ASSERT(dictionary_offset <= Storage::BLOCK_SIZE); + result_data[target_idx] = dictionary_offset; + SetDictionaryOffset(*handle, dictionary_offset); } - offset += copied_elements; - count -= copied_elements; + segment.count++; } + return count; } -void ColumnData::RevertAppend(row_t start_row) { - lock_guard tree_lock(data.node_lock); - // check if this row is in the segment tree at all - if (idx_t(start_row) >= data.nodes.back().row_start + data.nodes.back().node->count) { - // the start row is equal to the final portion of the column data: nothing was ever appended here - D_ASSERT(idx_t(start_row) == data.nodes.back().row_start + data.nodes.back().node->count); - return; - } - // find the segment index that the current row belongs to - idx_t segment_index = data.GetSegmentIndex(start_row); - auto segment = data.nodes[segment_index].node; - auto &transient = (TransientSegment &)*segment; - D_ASSERT(transient.segment_type == ColumnSegmentType::TRANSIENT); +//===--------------------------------------------------------------------===// +// Get Function +//===--------------------------------------------------------------------===// +CompressionFunction StringUncompressed::GetFunction(PhysicalType data_type) { + D_ASSERT(data_type == PhysicalType::VARCHAR); + return CompressionFunction(CompressionType::COMPRESSION_UNCOMPRESSED, data_type, + UncompressedStringStorage::StringInitAnalyze, UncompressedStringStorage::StringAnalyze, + UncompressedStringStorage::StringFinalAnalyze, UncompressedFunctions::InitCompression, + UncompressedFunctions::Compress, UncompressedFunctions::FinalizeCompress, + UncompressedStringStorage::StringInitScan, UncompressedStringStorage::StringScan, + UncompressedStringStorage::StringScanPartial, UncompressedStringStorage::StringFetchRow, + UncompressedFunctions::EmptySkip, UncompressedStringStorage::StringInitSegment, + UncompressedStringStorage::StringAppend, nullptr); +} - // remove any segments AFTER this segment: they should be deleted entirely - if (segment_index < data.nodes.size() - 1) { - data.nodes.erase(data.nodes.begin() + segment_index + 1, data.nodes.end()); - } - segment->next = nullptr; - transient.RevertAppend(start_row); +//===--------------------------------------------------------------------===// +// Helper Functions +//===--------------------------------------------------------------------===// +void UncompressedStringStorage::SetDictionaryOffset(BufferHandle &handle, idx_t offset) { + Store(offset, handle.node->buffer + Storage::BLOCK_SIZE - sizeof(idx_t)); +} - // do the same with the update segments - idx_t update_segment_idx = updates.GetSegmentIndex(start_row); - auto update_segment = updates.nodes[update_segment_idx].node; - // remove any segments AFTER this segment - if (update_segment_idx < updates.nodes.size() - 1) { - updates.nodes.erase(updates.nodes.begin() + update_segment_idx + 1, updates.nodes.end()); - } - // truncate this segment - update_segment->next = nullptr; - update_segment->count = start_row - update_segment->start; +idx_t UncompressedStringStorage::GetDictionaryOffset(BufferHandle &handle) { + return Load(handle.node->buffer + Storage::BLOCK_SIZE - sizeof(idx_t)); } -void ColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) { - // perform the fetch within the segment - auto segment = (ColumnSegment *)data.GetSegment(row_id); - auto vector_index = (row_id - segment->start) / STANDARD_VECTOR_SIZE; - segment->Fetch(state, vector_index, result); +idx_t UncompressedStringStorage::RemainingSpace(ColumnSegment &segment, BufferHandle &handle) { + idx_t used_space = GetDictionaryOffset(handle) + segment.count * sizeof(int32_t); + D_ASSERT(Storage::BLOCK_SIZE >= used_space); + return Storage::BLOCK_SIZE - used_space; +} - // merge any updates - auto update_segment = (UpdateSegment *)updates.GetSegment(row_id); - auto update_vector_index = (row_id - update_segment->start) / STANDARD_VECTOR_SIZE; - update_segment->FetchCommitted(update_vector_index, result); +void UncompressedStringStorage::WriteString(ColumnSegment &segment, string_t string, block_id_t &result_block, + int32_t &result_offset) { + auto &state = (UncompressedStringSegmentState &)*segment.GetSegmentState(); + if (state.overflow_writer) { + // overflow writer is set: write string there + state.overflow_writer->WriteString(string, result_block, result_offset); + } else { + // default overflow behavior: use in-memory buffer to store the overflow string + WriteStringMemory(segment, string, result_block, result_offset); + } } -void ColumnData::FetchRow(ColumnFetchState &state, Transaction &transaction, row_t row_id, Vector &result, - idx_t result_idx) { - auto segment = (ColumnSegment *)data.GetSegment(row_id); - auto update_segment = (UpdateSegment *)updates.GetSegment(row_id); +void UncompressedStringStorage::WriteStringMemory(ColumnSegment &segment, string_t string, block_id_t &result_block, + int32_t &result_offset) { + uint32_t total_length = string.GetSize() + sizeof(uint32_t); + shared_ptr block; + unique_ptr handle; - // now perform the fetch within the segment - segment->FetchRow(state, row_id, result, result_idx); - // fetch any (potential) updates - update_segment->FetchRow(transaction, row_id, result, result_idx); -} + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + auto &state = (UncompressedStringSegmentState &)*segment.GetSegmentState(); + // check if the string fits in the current block + if (!state.head || state.head->offset + total_length >= state.head->size) { + // string does not fit, allocate space for it + // create a new string block + idx_t alloc_size = MaxValue(total_length, Storage::BLOCK_SIZE); + auto new_block = make_unique(); + new_block->offset = 0; + new_block->size = alloc_size; + // allocate an in-memory buffer for it + block = buffer_manager.RegisterMemory(alloc_size, false); + handle = buffer_manager.Pin(block); + state.overflow_blocks[block->BlockId()] = new_block.get(); + new_block->block = move(block); + new_block->next = move(state.head); + state.head = move(new_block); + } else { + // string fits, copy it into the current block + handle = buffer_manager.Pin(state.head->block); + } -void ColumnData::AppendTransientSegment(idx_t start_row) { - auto new_segment = make_unique(db, type, start_row); - data.AppendSegment(move(new_segment)); -} + result_block = state.head->block->BlockId(); + result_offset = state.head->offset; -void ColumnData::AppendUpdateSegment(idx_t start_row, idx_t count) { - auto new_segment = make_unique(*this, start_row, count); - updates.AppendSegment(move(new_segment)); + // copy the string and the length there + auto ptr = handle->node->buffer + state.head->offset; + Store(string.GetSize(), ptr); + ptr += sizeof(uint32_t); + memcpy(ptr, string.GetDataUnsafe(), string.GetSize()); + state.head->offset += total_length; } -void ColumnData::SetStatistics(unique_ptr new_stats) { - lock_guard slock(stats_lock); - this->statistics = move(new_stats); -} +string_t UncompressedStringStorage::ReadString(ColumnSegment &segment, Vector &result, block_id_t block, + int32_t offset) { + D_ASSERT(block != INVALID_BLOCK); + D_ASSERT(offset < Storage::BLOCK_SIZE); -void ColumnData::MergeStatistics(BaseStatistics &other) { - lock_guard slock(stats_lock); - statistics->Merge(other); -} + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + auto &state = (UncompressedStringSegmentState &)*segment.GetSegmentState(); + if (block < MAXIMUM_BLOCK) { + // read the overflow string from disk + // pin the initial handle and read the length + auto block_handle = buffer_manager.RegisterBlock(block); + auto handle = buffer_manager.Pin(block_handle); -unique_ptr ColumnData::GetStatistics() { - lock_guard slock(stats_lock); - return statistics->Copy(); -} + uint32_t length = Load(handle->node->buffer + offset); + uint32_t remaining = length; + offset += sizeof(uint32_t); -void ColumnData::CommitDropColumn() { - auto &block_manager = BlockManager::GetBlockManager(db); - auto segment = (ColumnSegment *)data.GetRootSegment(); - while (segment) { - if (segment->segment_type == ColumnSegmentType::PERSISTENT) { - auto &persistent = (PersistentSegment &)*segment; - block_manager.MarkBlockAsModified(persistent.block_id); + // allocate a buffer to store the string + auto alloc_size = MaxValue(Storage::BLOCK_SIZE, length + sizeof(uint32_t)); + auto target_handle = buffer_manager.Allocate(alloc_size); + auto target_ptr = target_handle->node->buffer; + // write the length in this block as well + Store(length, target_ptr); + target_ptr += sizeof(uint32_t); + // now append the string to the single buffer + while (remaining > 0) { + idx_t to_write = MinValue(remaining, Storage::BLOCK_SIZE - sizeof(block_id_t) - offset); + memcpy(target_ptr, handle->node->buffer + offset, to_write); + + remaining -= to_write; + offset += to_write; + target_ptr += to_write; + if (remaining > 0) { + // read the next block + block_id_t next_block = Load(handle->node->buffer + offset); + block_handle = buffer_manager.RegisterBlock(next_block); + handle = buffer_manager.Pin(block_handle); + offset = 0; + } } - segment = (ColumnSegment *)segment->next.get(); + + auto final_buffer = target_handle->node->buffer; + StringVector::AddHandle(result, move(target_handle)); + return ReadString(final_buffer, 0); + } else { + // read the overflow string from memory + // first pin the handle, if it is not pinned yet + auto entry = state.overflow_blocks.find(block); + D_ASSERT(entry != state.overflow_blocks.end()); + auto handle = buffer_manager.Pin(entry->second->block); + auto final_buffer = handle->node->buffer; + StringVector::AddHandle(result, move(handle)); + return ReadString(final_buffer, offset); } } -unique_ptr ColumnData::CreateCheckpointState(TableDataWriter &writer) { - return make_unique(*this, writer); +string_t UncompressedStringStorage::ReadString(data_ptr_t target, int32_t offset) { + auto ptr = target + offset; + auto str_length = Load(ptr); + auto str_ptr = (char *)(ptr + sizeof(uint32_t)); + return string_t(str_ptr, str_length); } -ColumnCheckpointState::ColumnCheckpointState(ColumnData &column_data, TableDataWriter &writer) - : column_data(column_data), writer(writer) { +void UncompressedStringStorage::WriteStringMarker(data_ptr_t target, block_id_t block_id, int32_t offset) { + uint16_t length = BIG_STRING_MARKER; + memcpy(target, &length, sizeof(uint16_t)); + target += sizeof(uint16_t); + memcpy(target, &block_id, sizeof(block_id_t)); + target += sizeof(block_id_t); + memcpy(target, &offset, sizeof(int32_t)); } -ColumnCheckpointState::~ColumnCheckpointState() { +void UncompressedStringStorage::ReadStringMarker(data_ptr_t target, block_id_t &block_id, int32_t &offset) { + target += sizeof(uint16_t); + memcpy(&block_id, target, sizeof(block_id_t)); + target += sizeof(block_id_t); + memcpy(&offset, target, sizeof(int32_t)); } -void ColumnCheckpointState::CreateEmptySegment() { - auto type_id = column_data.type.InternalType(); - if (type_id == PhysicalType::VARCHAR) { - auto string_segment = make_unique(column_data.db, 0); - string_segment->overflow_writer = make_unique(column_data.db); - current_segment = move(string_segment); - } else if (type_id == PhysicalType::BIT) { - current_segment = make_unique(column_data.db, 0); +string_location_t UncompressedStringStorage::FetchStringLocation(data_ptr_t baseptr, int32_t dict_offset) { + D_ASSERT(dict_offset >= 0 && dict_offset <= Storage::BLOCK_SIZE); + if (dict_offset == 0) { + return string_location_t(INVALID_BLOCK, 0); + } + // look up result in dictionary + auto dict_end = baseptr + Storage::BLOCK_SIZE; + auto dict_pos = dict_end - dict_offset; + auto string_length = Load(dict_pos); + string_location_t result; + if (string_length == BIG_STRING_MARKER) { + ReadStringMarker(dict_pos, result.block_id, result.offset); } else { - current_segment = make_unique(column_data.db, type_id, 0); + result.block_id = INVALID_BLOCK; + result.offset = dict_offset; } - segment_stats = make_unique(column_data.type, GetTypeIdSize(type_id)); + return result; } -void ColumnCheckpointState::AppendData(Vector &data, idx_t count) { - VectorData vdata; - data.Orrify(count, vdata); +string_t UncompressedStringStorage::FetchStringFromDict(ColumnSegment &segment, Vector &result, data_ptr_t baseptr, + int32_t dict_offset) { + // fetch base data + D_ASSERT(dict_offset <= Storage::BLOCK_SIZE); + string_location_t location = FetchStringLocation(baseptr, dict_offset); + return FetchString(segment, result, baseptr, location); +} - idx_t offset = 0; - while (count > 0) { - idx_t appended = current_segment->Append(*segment_stats, vdata, offset, count); - if (appended == count) { - // appended everything: finished - return; +string_t UncompressedStringStorage::FetchString(ColumnSegment &segment, Vector &result, data_ptr_t baseptr, + string_location_t location) { + if (location.block_id != INVALID_BLOCK) { + // big string marker: read from separate block + return ReadString(segment, result, location.block_id, location.offset); + } else { + if (location.offset == 0) { + return string_t(nullptr, 0); } - // the segment is full: flush it to disk - FlushSegment(); + // normal string: read string from this block + auto dict_end = baseptr + Storage::BLOCK_SIZE; + auto dict_pos = dict_end - location.offset; + auto string_length = Load(dict_pos); - // now create a new segment and continue appending - CreateEmptySegment(); - offset += appended; - count -= appended; + auto str_ptr = (char *)(dict_pos + sizeof(uint16_t)); + return string_t(str_ptr, string_length); } } -void ColumnCheckpointState::FlushSegment() { - auto tuple_count = current_segment->tuple_count; - if (tuple_count == 0) { - return; +} // namespace duckdb + + + +namespace duckdb { + +CompressionFunction UncompressedFun::GetFunction(PhysicalType type) { + switch (type) { + case PhysicalType::BOOL: + case PhysicalType::INT8: + case PhysicalType::INT16: + case PhysicalType::INT32: + case PhysicalType::INT64: + case PhysicalType::INT128: + case PhysicalType::UINT8: + case PhysicalType::UINT16: + case PhysicalType::UINT32: + case PhysicalType::UINT64: + case PhysicalType::FLOAT: + case PhysicalType::DOUBLE: + case PhysicalType::LIST: + case PhysicalType::INTERVAL: + return FixedSizeUncompressed::GetFunction(type); + case PhysicalType::BIT: + return ValidityUncompressed::GetFunction(type); + case PhysicalType::VARCHAR: + return StringUncompressed::GetFunction(type); + default: + throw InternalException("Unsupported type for Uncompressed"); } +} - // get the buffer of the segment and pin it - auto &buffer_manager = BufferManager::GetBufferManager(column_data.db); - auto &block_manager = BlockManager::GetBlockManager(column_data.db); +bool UncompressedFun::TypeIsSupported(PhysicalType type) { + return true; +} - auto handle = buffer_manager.Pin(current_segment->block); +} // namespace duckdb - // get a free block id to write to - auto block_id = block_manager.GetFreeBlockId(); - // construct the data pointer - uint32_t offset_in_block = 0; - DataPointer data_pointer; - data_pointer.block_id = block_id; - data_pointer.offset = offset_in_block; - data_pointer.row_start = 0; - if (!data_pointers.empty()) { - auto &last_pointer = data_pointers.back(); - data_pointer.row_start = last_pointer.row_start + last_pointer.tuple_count; - } - data_pointer.tuple_count = tuple_count; - data_pointer.statistics = segment_stats->statistics->Copy(); - // construct a persistent segment that points to this block, and append it to the new segment tree - auto persistent_segment = make_unique( - column_data.db, block_id, offset_in_block, column_data.type, data_pointer.row_start, data_pointer.tuple_count, - segment_stats->statistics->Copy()); - new_tree.AppendSegment(move(persistent_segment)); - data_pointers.push_back(move(data_pointer)); - // write the block to disk - block_manager.Write(*handle->node, block_id); - // merge the segment stats into the global stats - global_stats->Merge(*segment_stats->statistics); - handle.reset(); - current_segment.reset(); - segment_stats.reset(); -} -void ColumnCheckpointState::FlushToDisk() { - auto &meta_writer = writer.GetMetaWriter(); - // serialize the global stats of the column - global_stats->Serialize(meta_writer); - meta_writer.Write(data_pointers.size()); - // then write the data pointers themselves - for (idx_t k = 0; k < data_pointers.size(); k++) { - auto &data_pointer = data_pointers[k]; - meta_writer.Write(data_pointer.row_start); - meta_writer.Write(data_pointer.tuple_count); - meta_writer.Write(data_pointer.block_id); - meta_writer.Write(data_pointer.offset); - data_pointer.statistics->Serialize(meta_writer); +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Mask constants +//===--------------------------------------------------------------------===// +// LOWER_MASKS contains masks with all the lower bits set until a specific value +// LOWER_MASKS[0] has the 0 lowest bits set, i.e.: +// 0b0000000000000000000000000000000000000000000000000000000000000000, +// LOWER_MASKS[10] has the 10 lowest bits set, i.e.: +// 0b0000000000000000000000000000000000000000000000000000000111111111, +// etc... +// 0b0000000000000000000000000000000000000001111111111111111111111111, +// ... +// 0b0000000000000000000001111111111111111111111111111111111111111111, +// until LOWER_MASKS[64], which has all bits set: +// 0b1111111111111111111111111111111111111111111111111111111111111111 +// generated with this python snippet: +// for i in range(65): +// print(hex(int((64 - i) * '0' + i * '1', 2)) + ",") +const validity_t ValidityUncompressed::LOWER_MASKS[] = {0x0, + 0x1, + 0x3, + 0x7, + 0xf, + 0x1f, + 0x3f, + 0x7f, + 0xff, + 0x1ff, + 0x3ff, + 0x7ff, + 0xfff, + 0x1fff, + 0x3fff, + 0x7fff, + 0xffff, + 0x1ffff, + 0x3ffff, + 0x7ffff, + 0xfffff, + 0x1fffff, + 0x3fffff, + 0x7fffff, + 0xffffff, + 0x1ffffff, + 0x3ffffff, + 0x7ffffff, + 0xfffffff, + 0x1fffffff, + 0x3fffffff, + 0x7fffffff, + 0xffffffff, + 0x1ffffffff, + 0x3ffffffff, + 0x7ffffffff, + 0xfffffffff, + 0x1fffffffff, + 0x3fffffffff, + 0x7fffffffff, + 0xffffffffff, + 0x1ffffffffff, + 0x3ffffffffff, + 0x7ffffffffff, + 0xfffffffffff, + 0x1fffffffffff, + 0x3fffffffffff, + 0x7fffffffffff, + 0xffffffffffff, + 0x1ffffffffffff, + 0x3ffffffffffff, + 0x7ffffffffffff, + 0xfffffffffffff, + 0x1fffffffffffff, + 0x3fffffffffffff, + 0x7fffffffffffff, + 0xffffffffffffff, + 0x1ffffffffffffff, + 0x3ffffffffffffff, + 0x7ffffffffffffff, + 0xfffffffffffffff, + 0x1fffffffffffffff, + 0x3fffffffffffffff, + 0x7fffffffffffffff, + 0xffffffffffffffff}; + +// UPPER_MASKS contains masks with all the highest bits set until a specific value +// UPPER_MASKS[0] has the 0 highest bits set, i.e.: +// 0b0000000000000000000000000000000000000000000000000000000000000000, +// UPPER_MASKS[10] has the 10 highest bits set, i.e.: +// 0b1111111111110000000000000000000000000000000000000000000000000000, +// etc... +// 0b1111111111111111111111110000000000000000000000000000000000000000, +// ... +// 0b1111111111111111111111111111111111111110000000000000000000000000, +// until UPPER_MASKS[64], which has all bits set: +// 0b1111111111111111111111111111111111111111111111111111111111111111 +// generated with this python snippet: +// for i in range(65): +// print(hex(int(i * '1' + (64 - i) * '0', 2)) + ",") +const validity_t ValidityUncompressed::UPPER_MASKS[] = {0x0, + 0x8000000000000000, + 0xc000000000000000, + 0xe000000000000000, + 0xf000000000000000, + 0xf800000000000000, + 0xfc00000000000000, + 0xfe00000000000000, + 0xff00000000000000, + 0xff80000000000000, + 0xffc0000000000000, + 0xffe0000000000000, + 0xfff0000000000000, + 0xfff8000000000000, + 0xfffc000000000000, + 0xfffe000000000000, + 0xffff000000000000, + 0xffff800000000000, + 0xffffc00000000000, + 0xffffe00000000000, + 0xfffff00000000000, + 0xfffff80000000000, + 0xfffffc0000000000, + 0xfffffe0000000000, + 0xffffff0000000000, + 0xffffff8000000000, + 0xffffffc000000000, + 0xffffffe000000000, + 0xfffffff000000000, + 0xfffffff800000000, + 0xfffffffc00000000, + 0xfffffffe00000000, + 0xffffffff00000000, + 0xffffffff80000000, + 0xffffffffc0000000, + 0xffffffffe0000000, + 0xfffffffff0000000, + 0xfffffffff8000000, + 0xfffffffffc000000, + 0xfffffffffe000000, + 0xffffffffff000000, + 0xffffffffff800000, + 0xffffffffffc00000, + 0xffffffffffe00000, + 0xfffffffffff00000, + 0xfffffffffff80000, + 0xfffffffffffc0000, + 0xfffffffffffe0000, + 0xffffffffffff0000, + 0xffffffffffff8000, + 0xffffffffffffc000, + 0xffffffffffffe000, + 0xfffffffffffff000, + 0xfffffffffffff800, + 0xfffffffffffffc00, + 0xfffffffffffffe00, + 0xffffffffffffff00, + 0xffffffffffffff80, + 0xffffffffffffffc0, + 0xffffffffffffffe0, + 0xfffffffffffffff0, + 0xfffffffffffffff8, + 0xfffffffffffffffc, + 0xfffffffffffffffe, + 0xffffffffffffffff}; + +//===--------------------------------------------------------------------===// +// Analyze +//===--------------------------------------------------------------------===// +struct ValidityAnalyzeState : public AnalyzeState { + ValidityAnalyzeState() : count(0) { } + + idx_t count; +}; + +unique_ptr ValidityInitAnalyze(ColumnData &col_data, PhysicalType type) { + return make_unique(); } -void ColumnData::Checkpoint(TableDataWriter &writer) { - // scan the segments of the column data - // set up the checkpoint state - auto checkpoint_state = CreateCheckpointState(writer); - checkpoint_state->global_stats = BaseStatistics::CreateEmpty(type); +bool ValidityAnalyze(AnalyzeState &state_p, Vector &input, idx_t count) { + auto &state = (ValidityAnalyzeState &)state_p; + state.count += count; + return true; +} - if (!data.root_node) { - // empty table: flush the empty list - checkpoint_state->FlushToDisk(); - return; - } +idx_t ValidityFinalAnalyze(AnalyzeState &state_p) { + auto &state = (ValidityAnalyzeState &)state_p; + return (state.count + 7) / 8; +} - auto &block_manager = BlockManager::GetBlockManager(db); - checkpoint_state->CreateEmptySegment(); - Vector intermediate(type); - // we create a new segment tree with all the new segments - // we do this by scanning the current segments of the column and checking for changes - // if there are any changes (e.g. updates or appends) we write the new changes - // otherwise we simply write out the current data pointers - auto owned_segment = move(data.root_node); - auto segment = (ColumnSegment *)owned_segment.get(); - auto update_segment = (UpdateSegment *)updates.root_node.get(); - idx_t update_vector_index = 0; - while (segment) { - if (segment->segment_type == ColumnSegmentType::PERSISTENT) { - auto &persistent = (PersistentSegment &)*segment; - // persistent segment; check if there were any updates in this segment - idx_t start_vector_index = persistent.start / STANDARD_VECTOR_SIZE; - idx_t end_vector_index = (persistent.start + persistent.count) / STANDARD_VECTOR_SIZE; - bool has_updates = update_segment->HasUpdates(start_vector_index, end_vector_index); - if (has_updates) { - // persistent segment has updates: mark it as modified and rewrite the block with the merged updates - block_manager.MarkBlockAsModified(persistent.block_id); - } else { - // unchanged persistent segment: no need to write the data +//===--------------------------------------------------------------------===// +// Scan +//===--------------------------------------------------------------------===// +struct ValidityScanState : public SegmentScanState { + unique_ptr handle; +}; - // flush any segments preceding this persistent segment - if (checkpoint_state->current_segment->tuple_count > 0) { - checkpoint_state->FlushSegment(); - checkpoint_state->CreateEmptySegment(); - } +unique_ptr ValidityInitScan(ColumnSegment &segment) { + auto result = make_unique(); + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + result->handle = buffer_manager.Pin(segment.block); + return move(result); +} - // set up the data pointer directly using the data from the persistent segment - DataPointer pointer; - pointer.block_id = persistent.block_id; - pointer.offset = 0; - pointer.row_start = segment->start; - pointer.tuple_count = persistent.count; - pointer.statistics = persistent.stats.statistics->Copy(); +//===--------------------------------------------------------------------===// +// Scan base data +//===--------------------------------------------------------------------===// +void ValidityScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result, + idx_t result_offset) { + auto start = segment.GetRelativeIndex(state.row_index); - // merge the persistent stats into the global column stats - checkpoint_state->global_stats->Merge(*persistent.stats.statistics); + static_assert(sizeof(validity_t) == sizeof(uint64_t), "validity_t should be 64-bit"); + auto &scan_state = (ValidityScanState &)*state.scan_state; + + auto &result_mask = FlatVector::Validity(result); + auto input_data = (validity_t *)scan_state.handle->node->buffer; - // directly append the current segment to the new tree - checkpoint_state->new_tree.AppendSegment(move(owned_segment)); +#ifdef DEBUG + // this method relies on all the bits we are going to write to being set to valid + for (idx_t i = 0; i < scan_count; i++) { + D_ASSERT(result_mask.RowIsValid(result_offset + i)); + } +#endif +#if STANDARD_VECTOR_SIZE < 128 + // fallback for tiny vector sizes + // the bitwise ops we use below don't work if the vector size is too small + ValidityMask source_mask(input_data); + for (idx_t i = 0; i < scan_count; i++) { + result_mask.Set(result_offset + i, source_mask.RowIsValid(start + i)); + } +#else + // the code below does what the fallback code above states, but using bitwise ops: + auto result_data = (validity_t *)result_mask.GetData(); - checkpoint_state->data_pointers.push_back(move(pointer)); + // set up the initial positions + // we need to find the validity_entry to modify, together with the bit-index WITHIN the validity entry + idx_t result_entry = result_offset / ValidityMask::BITS_PER_VALUE; + idx_t result_idx = result_offset - result_entry * ValidityMask::BITS_PER_VALUE; - // move to the next segment in the list - owned_segment = move(segment->next); - segment = (ColumnSegment *)owned_segment.get(); + // same for the input: find the validity_entry we are pulling from, together with the bit-index WITHIN that entry + idx_t input_entry = start / ValidityMask::BITS_PER_VALUE; + idx_t input_idx = start - input_entry * ValidityMask::BITS_PER_VALUE; - // move the update segment forward - update_vector_index = end_vector_index; - update_segment = update_segment->FindSegment(end_vector_index); - continue; - } + // now start the bit games + idx_t pos = 0; + while (pos < scan_count) { + // these are the current validity entries we are dealing with + idx_t current_result_idx = result_entry; + idx_t offset; + validity_t input_mask = input_data[input_entry]; + + // construct the mask to AND together with the result + if (result_idx < input_idx) { + // we have to shift the input RIGHT if the result_idx is smaller than the input_idx + auto shift_amount = input_idx - result_idx; + D_ASSERT(shift_amount > 0 && shift_amount <= ValidityMask::BITS_PER_VALUE); + + input_mask = input_mask >> shift_amount; + + // now the upper "shift_amount" bits are set to 0 + // we need them to be set to 1 + // otherwise the subsequent bitwise & will modify values outside of the range of values we want to alter + input_mask |= ValidityUncompressed::UPPER_MASKS[shift_amount]; + + // after this, we move to the next input_entry + offset = ValidityMask::BITS_PER_VALUE - input_idx; + input_entry++; + input_idx = 0; + result_idx += offset; + } else if (result_idx > input_idx) { + // we have to shift the input LEFT if the result_idx is bigger than the input_idx + auto shift_amount = result_idx - input_idx; + D_ASSERT(shift_amount > 0 && shift_amount <= ValidityMask::BITS_PER_VALUE); + + // to avoid overflows, we set the upper "shift_amount" values to 0 first + input_mask = (input_mask & ~ValidityUncompressed::UPPER_MASKS[shift_amount]) << shift_amount; + + // now the lower "shift_amount" bits are set to 0 + // we need them to be set to 1 + // otherwise the subsequent bitwise & will modify values outside of the range of values we want to alter + input_mask |= ValidityUncompressed::LOWER_MASKS[shift_amount]; + + // after this, we move to the next result_entry + offset = ValidityMask::BITS_PER_VALUE - result_idx; + result_entry++; + result_idx = 0; + input_idx += offset; + } else { + // if the input_idx is equal to result_idx they are already aligned + // we just move to the next entry for both after this + offset = ValidityMask::BITS_PER_VALUE - result_idx; + input_entry++; + result_entry++; + result_idx = input_idx = 0; + } + // now we need to check if we should include the ENTIRE mask + // OR if we need to mask from the right side + pos += offset; + if (pos > scan_count) { + // we need to set any bits that are past the scan_count on the right-side to 1 + // this is required so we don't influence any bits that are not part of the scan + input_mask |= ValidityUncompressed::UPPER_MASKS[pos - scan_count]; + } + // now finally we can merge the input mask with the result mask + if (input_mask != ValidityMask::ValidityBuffer::MAX_ENTRY) { + if (!result_data) { + result_mask.Initialize(MaxValue(STANDARD_VECTOR_SIZE, result_offset + scan_count)); + result_data = (validity_t *)result_mask.GetData(); + } + result_data[current_result_idx] &= input_mask; } - // not persisted yet: scan the segment and write it to disk - ColumnScanState state; - segment->InitializeScan(state); + } +#endif - Vector scan_vector(type); - idx_t base_update_index = update_segment->start / STANDARD_VECTOR_SIZE; - for (idx_t vector_index = 0; vector_index * STANDARD_VECTOR_SIZE < segment->count; vector_index++) { - scan_vector.Reference(intermediate); +#ifdef DEBUG + // verify that we actually accomplished the bitwise ops equivalent that we wanted to do + ValidityMask input_mask(input_data); + for (idx_t i = 0; i < scan_count; i++) { + D_ASSERT(result_mask.RowIsValid(result_offset + i) == input_mask.RowIsValid(start + i)); + } +#endif +} - idx_t count = MinValue(segment->count - vector_index * STANDARD_VECTOR_SIZE, STANDARD_VECTOR_SIZE); +void ValidityScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) { + result.Normalify(scan_count); - segment->Scan(state, vector_index, scan_vector); - update_segment->FetchCommitted(update_vector_index - base_update_index, scan_vector); + auto start = segment.GetRelativeIndex(state.row_index); + if (start % ValidityMask::BITS_PER_VALUE == 0) { + auto &scan_state = (ValidityScanState &)*state.scan_state; - checkpoint_state->AppendData(scan_vector, count); - update_vector_index++; - if (update_vector_index - base_update_index >= UpdateSegment::MORSEL_VECTOR_COUNT) { - base_update_index += UpdateSegment::MORSEL_VECTOR_COUNT; - update_segment = (UpdateSegment *)update_segment->next.get(); + // aligned scan: no need to do anything fancy + // note: this is only an optimization which avoids having to do messy bitshifting in the common case + // it is not required for correctness + auto &result_mask = FlatVector::Validity(result); + auto input_data = (validity_t *)scan_state.handle->node->buffer; + auto result_data = (validity_t *)result_mask.GetData(); + idx_t start_offset = start / ValidityMask::BITS_PER_VALUE; + idx_t entry_scan_count = (scan_count + ValidityMask::BITS_PER_VALUE - 1) / ValidityMask::BITS_PER_VALUE; + for (idx_t i = 0; i < entry_scan_count; i++) { + auto input_entry = input_data[start_offset + i]; + if (!result_data && input_entry == ValidityMask::ValidityBuffer::MAX_ENTRY) { + continue; } + if (!result_data) { + result_mask.Initialize(MaxValue(STANDARD_VECTOR_SIZE, scan_count)); + result_data = (validity_t *)result_mask.GetData(); + } + result_data[i] = input_entry; } - // move to the next segment in the list - owned_segment = move(segment->next); - segment = (ColumnSegment *)owned_segment.get(); + } else { + // unaligned scan: fall back to scan_partial which does bitshift tricks + ValidityScanPartial(segment, state, scan_count, result, 0); } - // flush the final segment - checkpoint_state->FlushSegment(); - // replace the old tree with the new one - data.Replace(checkpoint_state->new_tree); +} - // flush the meta information/data pointers to disk - checkpoint_state->FlushToDisk(); +//===--------------------------------------------------------------------===// +// Fetch +//===--------------------------------------------------------------------===// +void ValidityFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) { + D_ASSERT(row_id >= 0 && row_id < row_t(segment.count)); + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + auto handle = buffer_manager.Pin(segment.block); + ValidityMask mask((validity_t *)handle->node->buffer); + auto &result_mask = FlatVector::Validity(result); + if (!mask.RowIsValidUnsafe(row_id)) { + result_mask.SetInvalid(result_idx); + } +} - // reset all the updates: they have been persisted to disk and included in the new segments - update_segment = (UpdateSegment *)updates.root_node.get(); - while (update_segment) { - update_segment->ClearUpdates(); - update_segment = (UpdateSegment *)update_segment->next.get(); +//===--------------------------------------------------------------------===// +// Append +//===--------------------------------------------------------------------===// +unique_ptr ValidityInitSegment(ColumnSegment &segment, block_id_t block_id) { + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + if (block_id == INVALID_BLOCK) { + auto handle = buffer_manager.Pin(segment.block); + memset(handle->node->buffer, 0xFF, Storage::BLOCK_SIZE); } + return nullptr; } -void ColumnData::Initialize(PersistentColumnData &column_data) { - // set up statistics - SetStatistics(move(column_data.stats)); +idx_t ValidityAppend(ColumnSegment &segment, SegmentStatistics &stats, VectorData &data, idx_t offset, idx_t vcount) { + auto &validity_stats = (ValidityStatistics &)*stats.statistics; - persistent_rows = column_data.total_rows; - // load persistent segments - idx_t segment_rows = 0; - for (auto &segment : column_data.segments) { - segment_rows += segment->count; - data.AppendSegment(move(segment)); - } - if (segment_rows != persistent_rows) { - throw Exception("Segment rows does not match total rows stored in column..."); + auto max_tuples = Storage::BLOCK_SIZE / ValidityMask::STANDARD_MASK_SIZE * STANDARD_VECTOR_SIZE; + idx_t append_count = MinValue(vcount, max_tuples - segment.count); + if (data.validity.AllValid()) { + // no null values: skip append + segment.count += append_count; + validity_stats.has_no_null = true; + return append_count; } + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + auto handle = buffer_manager.Pin(segment.block); - // set up the (empty) update segments - idx_t row_count = 0; - while (row_count < persistent_rows) { - idx_t next = MinValue(row_count + UpdateSegment::MORSEL_SIZE, persistent_rows); - AppendUpdateSegment(row_count, next - row_count); - row_count = next; - } - if (row_count % UpdateSegment::MORSEL_SIZE == 0) { - AppendUpdateSegment(row_count, 0); + ValidityMask mask((validity_t *)handle->node->buffer); + for (idx_t i = 0; i < append_count; i++) { + auto idx = data.sel->get_index(offset + i); + if (!data.validity.RowIsValidUnsafe(idx)) { + mask.SetInvalidUnsafe(segment.count + i); + validity_stats.has_null = true; + } else { + validity_stats.has_no_null = true; + } } + segment.count += append_count; + return append_count; } -void ColumnData::BaseDeserialize(DatabaseInstance &db, Deserializer &source, const LogicalType &type, - PersistentColumnData &result) { - // load the column statistics - result.stats = BaseStatistics::Deserialize(source, type); - result.total_rows = 0; - - // load the data pointers for the column - idx_t data_pointer_count = source.Read(); - for (idx_t data_ptr = 0; data_ptr < data_pointer_count; data_ptr++) { - // read the data pointer - DataPointer data_pointer; - data_pointer.row_start = source.Read(); - data_pointer.tuple_count = source.Read(); - data_pointer.block_id = source.Read(); - data_pointer.offset = source.Read(); - data_pointer.statistics = BaseStatistics::Deserialize(source, type); +void ValidityRevertAppend(ColumnSegment &segment, idx_t start_row) { + idx_t start_bit = start_row - segment.start; - result.total_rows += data_pointer.tuple_count; - // create a persistent segment - auto segment = - make_unique(db, data_pointer.block_id, data_pointer.offset, type, data_pointer.row_start, - data_pointer.tuple_count, move(data_pointer.statistics)); - result.segments.push_back(move(segment)); + auto &buffer_manager = BufferManager::GetBufferManager(segment.db); + auto handle = buffer_manager.Pin(segment.block); + idx_t revert_start; + if (start_bit % 8 != 0) { + // handle sub-bit stuff (yay) + idx_t byte_pos = start_bit / 8; + idx_t bit_start = byte_pos * 8; + idx_t bit_end = (byte_pos + 1) * 8; + ValidityMask mask((validity_t *)handle->node->buffer + byte_pos); + for (idx_t i = start_bit; i < bit_end; i++) { + mask.SetValid(i - bit_start); + } + revert_start = bit_end / 8; + } else { + revert_start = start_bit / 8; } + // for the rest, we just memset + memset(handle->node->buffer + revert_start, 0xFF, Storage::BLOCK_SIZE - revert_start); } -unique_ptr ColumnData::Deserialize(DatabaseInstance &db, Deserializer &source, - const LogicalType &type) { - switch (type.id()) { - case LogicalTypeId::VALIDITY: - return ValidityColumnData::Deserialize(db, source); - default: - return StandardColumnData::Deserialize(db, source, type); - } +//===--------------------------------------------------------------------===// +// Get Function +//===--------------------------------------------------------------------===// +CompressionFunction ValidityUncompressed::GetFunction(PhysicalType data_type) { + D_ASSERT(data_type == PhysicalType::BIT); + return CompressionFunction( + CompressionType::COMPRESSION_UNCOMPRESSED, data_type, ValidityInitAnalyze, ValidityAnalyze, + ValidityFinalAnalyze, UncompressedFunctions::InitCompression, UncompressedFunctions::Compress, + UncompressedFunctions::FinalizeCompress, ValidityInitScan, ValidityScan, ValidityScanPartial, ValidityFetchRow, + UncompressedFunctions::EmptySkip, ValidityInitSegment, ValidityAppend, ValidityRevertAppend); } } // namespace duckdb @@ -122174,7 +133644,95 @@ unique_ptr ColumnData::Deserialize(DatabaseInstance &db, D +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table/standard_column_data.hpp +// +// +//===----------------------------------------------------------------------===// + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table/validity_column_data.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +//! Validity column data represents the validity data (i.e. which values are null) +class ValidityColumnData : public ColumnData { +public: + ValidityColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, ColumnData *parent); + +public: + bool CheckZonemap(ColumnScanState &state, TableFilter &filter) override; +}; + +} // namespace duckdb + + +namespace duckdb { + +//! Standard column data represents a regular flat column (e.g. a column of type INTEGER or STRING) +class StandardColumnData : public ColumnData { +public: + StandardColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type, + ColumnData *parent = nullptr); + + //! The validity column data + ValidityColumnData validity; + +public: + bool CheckZonemap(ColumnScanState &state, TableFilter &filter) override; + + void InitializeScan(ColumnScanState &state) override; + void InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) override; + + idx_t Scan(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result) override; + idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) override; + idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count) override; + + void InitializeAppend(ColumnAppendState &state) override; + void AppendData(BaseStatistics &stats, ColumnAppendState &state, VectorData &vdata, idx_t count) override; + void RevertAppend(row_t start_row) override; + idx_t Fetch(ColumnScanState &state, row_t row_id, Vector &result) override; + void FetchRow(Transaction &transaction, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx) override; + void Update(Transaction &transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, idx_t offset, + idx_t update_count) override; + void UpdateColumn(Transaction &transaction, const vector &column_path, Vector &update_vector, + row_t *row_ids, idx_t update_count, idx_t depth) override; + unique_ptr GetUpdateStatistics() override; + + void CommitDropColumn() override; + + unique_ptr CreateCheckpointState(RowGroup &row_group, TableDataWriter &writer) override; + unique_ptr Checkpoint(RowGroup &row_group, TableDataWriter &writer) override; + void CheckpointScan(ColumnSegment *segment, ColumnScanState &state, idx_t row_group_start, idx_t count, + Vector &scan_vector) override; + + void DeserializeColumn(Deserializer &source) override; + + void GetStorageInfo(idx_t row_group_index, vector col_path, vector> &result) override; + + void Verify(RowGroup &parent) override; + +private: + template + void TemplatedScan(Transaction *transaction, ColumnScanState &state, Vector &result); +}; +} // namespace duckdb @@ -122183,93 +133741,122 @@ namespace duckdb { DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &table, vector types_p, unique_ptr data) - : info(make_shared(schema, table)), types(move(types_p)), db(db), total_rows(0), is_root(true) { - // set up the segment trees for the column segments - for (idx_t i = 0; i < types.size(); i++) { - auto column_data = make_shared(db, *info, types[i], i); - columns.push_back(move(column_data)); + : info(make_shared(db, schema, table)), types(move(types_p)), db(db), total_rows(0), is_root(true) { + // initialize the table with the existing data from disk, if any + this->row_groups = make_shared(); + if (data && !data->row_groups.empty()) { + for (auto &row_group_pointer : data->row_groups) { + auto new_row_group = make_unique(db, *info, types, row_group_pointer); + auto row_group_count = new_row_group->start + new_row_group->count; + if (row_group_count > total_rows) { + total_rows = row_group_count; + } + row_groups->AppendSegment(move(new_row_group)); + } + column_stats = move(data->column_stats); + if (column_stats.size() != types.size()) { + throw IOException("Table statistics column count is not aligned with table column count. Corrupt file?"); + } } + if (column_stats.empty()) { + D_ASSERT(total_rows == 0); - // initialize the table with the existing data from disk, if any - if (data && !data->column_data.empty()) { - D_ASSERT(data->column_data.size() == types.size()); - for (idx_t i = 0; i < types.size(); i++) { - columns[i]->Initialize(*data->column_data[i]); + AppendRowGroup(0); + for (auto &type : types) { + column_stats.push_back(BaseStatistics::CreateEmpty(type)); } - total_rows = columns[0]->persistent_rows; - versions = move(data->versions); } else { - versions = make_shared(); - } - if (total_rows == 0) { - // append one (empty) morsel to the table - auto segment = make_unique(0, MorselInfo::MORSEL_SIZE); - versions->AppendSegment(move(segment)); + D_ASSERT(column_stats.size() == types.size()); + D_ASSERT(row_groups->GetRootSegment() != nullptr); } } +void DataTable::AppendRowGroup(idx_t start_row) { + auto new_row_group = make_unique(db, *info, start_row, 0); + new_row_group->InitializeEmpty(types); + row_groups->AppendSegment(move(new_row_group)); +} + DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition &new_column, Expression *default_value) - : info(parent.info), types(parent.types), db(parent.db), versions(parent.versions), total_rows(parent.total_rows), - columns(parent.columns), is_root(true) { + : info(parent.info), types(parent.types), db(parent.db), total_rows(parent.total_rows.load()), is_root(true) { // prevent any new tuples from being added to the parent lock_guard parent_lock(parent.append_lock); // add the new column to this DataTable auto new_column_type = new_column.type; - idx_t new_column_idx = columns.size(); + auto new_column_idx = parent.types.size(); types.push_back(new_column_type); - auto column_data = make_shared(db, *info, new_column_type, new_column_idx); - columns.push_back(move(column_data)); + + // set up the statistics + for (idx_t i = 0; i < parent.column_stats.size(); i++) { + column_stats.push_back(parent.column_stats[i]->Copy()); + } + column_stats.push_back(BaseStatistics::CreateEmpty(new_column_type)); + + auto &transaction = Transaction::GetTransaction(context); + + ExpressionExecutor executor; + DataChunk dummy_chunk; + Vector result(new_column_type); + if (!default_value) { + FlatVector::Validity(result).SetAllInvalid(STANDARD_VECTOR_SIZE); + } else { + executor.AddExpression(*default_value); + } // fill the column with its DEFAULT value, or NULL if none is specified - idx_t rows_to_write = total_rows; - if (rows_to_write > 0) { - ExpressionExecutor executor; - DataChunk dummy_chunk; - Vector result(new_column_type); - if (!default_value) { - FlatVector::Validity(result).SetAllInvalid(STANDARD_VECTOR_SIZE); - } else { - executor.AddExpression(*default_value); - } + auto new_stats = make_unique(new_column.type); + this->row_groups = make_shared(); + auto current_row_group = (RowGroup *)parent.row_groups->GetRootSegment(); + while (current_row_group) { + auto new_row_group = current_row_group->AddColumn(context, new_column, executor, default_value, result); + // merge in the statistics + column_stats[new_column_idx]->Merge(*new_row_group->GetStatistics(new_column_idx)); - ColumnAppendState state; - columns[new_column_idx]->InitializeAppend(state); - for (idx_t i = 0; i < rows_to_write; i += STANDARD_VECTOR_SIZE) { - idx_t rows_in_this_vector = MinValue(rows_to_write - i, STANDARD_VECTOR_SIZE); - if (default_value) { - dummy_chunk.SetCardinality(rows_in_this_vector); - executor.ExecuteExpression(dummy_chunk, result); - } - columns[new_column_idx]->Append(state, result, rows_in_this_vector); - } + row_groups->AppendSegment(move(new_row_group)); + current_row_group = (RowGroup *)current_row_group->next.get(); } + // also add this column to client local storage - Transaction::GetTransaction(context).storage.AddColumn(&parent, this, new_column, default_value); + transaction.storage.AddColumn(&parent, this, new_column, default_value); // this table replaces the previous table, hence the parent is no longer the root DataTable parent.is_root = false; } DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_column) - : info(parent.info), types(parent.types), db(parent.db), versions(parent.versions), total_rows(parent.total_rows), - columns(parent.columns), is_root(true) { + : info(parent.info), types(parent.types), db(parent.db), total_rows(parent.total_rows.load()), is_root(true) { // prevent any new tuples from being added to the parent lock_guard parent_lock(parent.append_lock); // first check if there are any indexes that exist that point to the removed column - for (auto &index : info->indexes) { - for (auto &column_id : index->column_ids) { + info->indexes.Scan([&](Index &index) { + for (auto &column_id : index.column_ids) { if (column_id == removed_column) { throw CatalogException("Cannot drop this column: an index depends on it!"); } else if (column_id > removed_column) { throw CatalogException("Cannot drop this column: an index depends on a column after it!"); } } - } - // erase the column from this DataTable + return false; + }); + + // erase the stats and type from this DataTable D_ASSERT(removed_column < types.size()); types.erase(types.begin() + removed_column); - columns.erase(columns.begin() + removed_column); + for (idx_t i = 0; i < parent.column_stats.size(); i++) { + if (i != removed_column) { + column_stats.push_back(parent.column_stats[i]->Copy()); + } + } + + // alter the row_groups and remove the column from each of them + this->row_groups = make_shared(); + auto current_row_group = (RowGroup *)parent.row_groups->GetRootSegment(); + while (current_row_group) { + auto new_row_group = current_row_group->RemoveColumn(removed_column); + row_groups->AppendSegment(move(new_row_group)); + current_row_group = (RowGroup *)current_row_group->next.get(); + } // this table replaces the previous table, hence the parent is no longer the root DataTable parent.is_root = false; @@ -122277,64 +133864,66 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_idx, const LogicalType &target_type, vector bound_columns, Expression &cast_expr) - : info(parent.info), types(parent.types), db(parent.db), versions(parent.versions), total_rows(parent.total_rows), - columns(parent.columns), is_root(true) { - - // prevent any new tuples from being added to the parent - CreateIndexScanState scan_state; - parent.InitializeCreateIndexScan(scan_state, bound_columns); + : info(parent.info), types(parent.types), db(parent.db), total_rows(parent.total_rows.load()), is_root(true) { + // prevent any tuples from being added to the parent + lock_guard lock(append_lock); // first check if there are any indexes that exist that point to the changed column - for (auto &index : info->indexes) { - for (auto &column_id : index->column_ids) { + info->indexes.Scan([&](Index &index) { + for (auto &column_id : index.column_ids) { if (column_id == changed_idx) { throw CatalogException("Cannot change the type of this column: an index depends on it!"); } } - } + return false; + }); + // change the type in this DataTable types[changed_idx] = target_type; - // construct a new column data for this type - auto column_data = make_shared(db, *info, target_type, changed_idx); - - ColumnAppendState append_state; - column_data->InitializeAppend(append_state); + // set up the statistics for the table + // the column that had its type changed will have the new statistics computed during conversion + for (idx_t i = 0; i < types.size(); i++) { + if (i == changed_idx) { + column_stats.push_back(BaseStatistics::CreateEmpty(types[i])); + } else { + column_stats.push_back(parent.column_stats[i]->Copy()); + } + } // scan the original table, and fill the new column with the transformed value auto &transaction = Transaction::GetTransaction(context); - vector types; + vector scan_types; for (idx_t i = 0; i < bound_columns.size(); i++) { if (bound_columns[i] == COLUMN_IDENTIFIER_ROW_ID) { - types.push_back(LOGICAL_ROW_TYPE); + scan_types.push_back(LOGICAL_ROW_TYPE); } else { - types.push_back(parent.types[bound_columns[i]]); + scan_types.push_back(parent.types[bound_columns[i]]); } } - DataChunk scan_chunk; - scan_chunk.Initialize(types); + scan_chunk.Initialize(scan_types); ExpressionExecutor executor; executor.AddExpression(cast_expr); - Vector append_vector(target_type); - while (true) { - // scan the table - scan_chunk.Reset(); - parent.CreateIndexScan(scan_state, bound_columns, scan_chunk); - if (scan_chunk.size() == 0) { - break; - } - // execute the expression - executor.ExecuteExpression(scan_chunk, append_vector); - column_data->Append(append_state, append_vector, scan_chunk.size()); + TableScanState scan_state; + scan_state.column_ids = bound_columns; + scan_state.max_row = total_rows; + + // now alter the type of the column within all of the row_groups individually + this->row_groups = make_shared(); + auto current_row_group = (RowGroup *)parent.row_groups->GetRootSegment(); + while (current_row_group) { + auto new_row_group = + current_row_group->AlterType(context, target_type, changed_idx, executor, scan_state, scan_chunk); + column_stats[changed_idx]->Merge(*new_row_group->GetStatistics(changed_idx)); + row_groups->AppendSegment(move(new_row_group)); + current_row_group = (RowGroup *)current_row_group->next.get(); } - // also add this column to client local storage - transaction.storage.ChangeType(&parent, this, changed_idx, target_type, bound_columns, cast_expr); - columns[changed_idx] = move(column_data); + transaction.storage.ChangeType(&parent, this, changed_idx, target_type, bound_columns, cast_expr); // this table replaces the previous table, hence the parent is no longer the root DataTable parent.is_root = false; @@ -122346,26 +133935,18 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_id void DataTable::InitializeScan(TableScanState &state, const vector &column_ids, TableFilterSet *table_filters) { // initialize a column scan state for each column - state.column_scans = unique_ptr(new ColumnScanState[column_ids.size()]); - for (idx_t i = 0; i < column_ids.size(); i++) { - auto column = column_ids[i]; - if (column != COLUMN_IDENTIFIER_ROW_ID) { - columns[column]->InitializeScan(state.column_scans[i]); - } else { - state.column_scans[i].current = nullptr; - } - } // initialize the chunk scan state - state.column_count = column_ids.size(); - state.current_row = 0; - state.base_row = 0; + auto row_group = (RowGroup *)row_groups->GetRootSegment(); + state.column_ids = column_ids; state.max_row = total_rows; - state.version_info = (MorselInfo *)versions->GetRootSegment(); state.table_filters = table_filters; if (table_filters) { D_ASSERT(table_filters->filters.size() > 0); state.adaptive_filter = make_unique(table_filters); } + while (row_group && !row_group->InitializeScan(state.row_group_scan_state)) { + row_group = (RowGroup *)row_group->next.get(); + } } void DataTable::InitializeScan(Transaction &transaction, TableScanState &state, const vector &column_ids, @@ -122374,36 +133955,34 @@ void DataTable::InitializeScan(Transaction &transaction, TableScanState &state, transaction.storage.InitializeScan(this, state.local_state, table_filters); } -void DataTable::InitializeScanWithOffset(TableScanState &state, const vector &column_ids, - TableFilterSet *table_filters, idx_t start_row, idx_t end_row) { - D_ASSERT(start_row % STANDARD_VECTOR_SIZE == 0); - D_ASSERT(end_row > start_row); - idx_t vector_offset = start_row / STANDARD_VECTOR_SIZE; - // initialize a column scan state for each column - state.column_scans = unique_ptr(new ColumnScanState[column_ids.size()]); - for (idx_t i = 0; i < column_ids.size(); i++) { - auto column = column_ids[i]; - if (column != COLUMN_IDENTIFIER_ROW_ID) { - columns[column]->InitializeScanWithOffset(state.column_scans[i], vector_offset); - } else { - state.column_scans[i].current = nullptr; - } - } +void DataTable::InitializeScanWithOffset(TableScanState &state, const vector &column_ids, idx_t start_row, + idx_t end_row) { - // initialize the chunk scan state - state.column_count = column_ids.size(); - state.current_row = start_row; - state.base_row = start_row; + auto row_group = (RowGroup *)row_groups->GetSegment(start_row); + state.column_ids = column_ids; state.max_row = end_row; - state.version_info = (MorselInfo *)versions->GetSegment(state.current_row); + state.table_filters = nullptr; + idx_t start_vector = (start_row - row_group->start) / STANDARD_VECTOR_SIZE; + if (!row_group->InitializeScanWithOffset(state.row_group_scan_state, start_vector)) { + throw InternalException("Failed to initialize row group scan with offset"); + } +} + +bool DataTable::InitializeScanInRowGroup(TableScanState &state, const vector &column_ids, + TableFilterSet *table_filters, RowGroup *row_group, idx_t vector_index, + idx_t max_row) { + state.column_ids = column_ids; + state.max_row = max_row; state.table_filters = table_filters; - if (table_filters && !table_filters->filters.empty()) { + if (table_filters) { + D_ASSERT(table_filters->filters.size() > 0); state.adaptive_filter = make_unique(table_filters); } + return row_group->InitializeScanWithOffset(state.row_group_scan_state, vector_index); } idx_t DataTable::MaxThreads(ClientContext &context) { - idx_t parallel_scan_vector_count = 100; + idx_t parallel_scan_vector_count = RowGroup::ROW_GROUP_VECTOR_COUNT; if (context.force_parallelism) { parallel_scan_vector_count = 1; } @@ -122413,31 +133992,45 @@ idx_t DataTable::MaxThreads(ClientContext &context) { } void DataTable::InitializeParallelScan(ParallelTableScanState &state) { - state.current_row = 0; + state.current_row_group = (RowGroup *)row_groups->GetRootSegment(); state.transaction_local_data = false; } bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state, const vector &column_ids) { - idx_t parallel_scan_vector_count = 100; - if (context.force_parallelism) { - parallel_scan_vector_count = 1; - } - idx_t parallel_scan_tuple_count = STANDARD_VECTOR_SIZE * parallel_scan_vector_count; - - if (state.current_row < total_rows) { - idx_t next = MinValue(state.current_row + parallel_scan_tuple_count, total_rows); - - // scan a morsel from the persistent rows - InitializeScanWithOffset(scan_state, column_ids, scan_state.table_filters, state.current_row, next); - - state.current_row = next; + while (state.current_row_group) { + idx_t vector_index; + idx_t max_row; + if (context.force_parallelism) { + vector_index = state.vector_index; + max_row = state.current_row_group->start + + MinValue(state.current_row_group->count, + STANDARD_VECTOR_SIZE * state.vector_index + STANDARD_VECTOR_SIZE); + } else { + vector_index = 0; + max_row = state.current_row_group->start + state.current_row_group->count; + } + bool need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters, + state.current_row_group, vector_index, max_row); + if (context.force_parallelism) { + state.vector_index++; + if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) { + state.current_row_group = (RowGroup *)state.current_row_group->next.get(); + state.vector_index = 0; + } + } else { + state.current_row_group = (RowGroup *)state.current_row_group->next.get(); + } + if (!need_to_scan) { + // filters allow us to skip this row group: move to the next row group + continue; + } return true; - } else if (!state.transaction_local_data) { + } + if (!state.transaction_local_data) { auto &transaction = Transaction::GetTransaction(context); // create a task for scanning the local data - scan_state.current_row = 0; - scan_state.base_row = 0; + scan_state.row_group_scan_state.max_row = 0; scan_state.max_row = 0; transaction.storage.InitializeScan(this, scan_state.local_state, scan_state.table_filters); state.transaction_local_data = true; @@ -122450,187 +134043,55 @@ bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState void DataTable::Scan(Transaction &transaction, DataChunk &result, TableScanState &state, vector &column_ids) { // scan the persistent segments - while (ScanBaseTable(transaction, result, state, column_ids, state.current_row, state.max_row)) { - if (result.size() > 0) { - return; - } - result.Reset(); + if (ScanBaseTable(transaction, result, state)) { + D_ASSERT(result.size() > 0); + return; } // scan the transaction-local segments transaction.storage.Scan(state.local_state, column_ids, result); } -bool DataTable::CheckZonemap(TableScanState &state, const vector &column_ids, TableFilterSet *table_filters, - idx_t ¤t_row) { - if (!table_filters) { - return true; - } - for (auto &table_filter : table_filters->filters) { - for (auto &predicate_constant : table_filter.second) { - D_ASSERT(predicate_constant.column_index < column_ids.size()); - auto base_column_idx = column_ids[predicate_constant.column_index]; - bool read_segment = columns[base_column_idx]->CheckZonemap( - state.column_scans[predicate_constant.column_index], predicate_constant); - if (!read_segment) { - //! We can skip this partition - idx_t vectors_to_skip = - ceil((double)(state.column_scans[predicate_constant.column_index].current->count + - state.column_scans[predicate_constant.column_index].current->start - current_row) / - STANDARD_VECTOR_SIZE); - for (idx_t i = 0; i < vectors_to_skip; ++i) { - state.NextVector(); - current_row += STANDARD_VECTOR_SIZE; - } - return false; - } - } - } - - return true; -} - -bool DataTable::ScanBaseTable(Transaction &transaction, DataChunk &result, TableScanState &state, - const vector &column_ids, idx_t ¤t_row, idx_t max_row) { - if (current_row >= max_row) { - // exceeded the amount of rows to scan - return false; - } - auto max_count = MinValue(STANDARD_VECTOR_SIZE, max_row - current_row); - idx_t vector_offset = (current_row - state.base_row) / STANDARD_VECTOR_SIZE; - //! first check the zonemap if we have to scan this partition - if (!CheckZonemap(state, column_ids, state.table_filters, current_row)) { - return true; - } - // second, scan the version chunk manager to figure out which tuples to load for this transaction - SelectionVector valid_sel(STANDARD_VECTOR_SIZE); - while (vector_offset >= MorselInfo::MORSEL_VECTOR_COUNT) { - state.version_info = (MorselInfo *)state.version_info->next.get(); - state.base_row += MorselInfo::MORSEL_SIZE; - vector_offset -= MorselInfo::MORSEL_VECTOR_COUNT; - } - idx_t count = state.version_info->GetSelVector(transaction, vector_offset, valid_sel, max_count); - if (count == 0) { - // nothing to scan for this vector, skip the entire vector - state.NextVector(); - current_row += STANDARD_VECTOR_SIZE; - return true; - } - idx_t approved_tuple_count = count; - if (count == max_count && !state.table_filters) { - //! If we don't have any deleted tuples or filters we can just run a regular scan - for (idx_t i = 0; i < column_ids.size(); i++) { - auto column = column_ids[i]; - if (column == COLUMN_IDENTIFIER_ROW_ID) { - // scan row id - D_ASSERT(result.data[i].GetType().InternalType() == ROW_TYPE); - result.data[i].Sequence(current_row, 1); - } else { - columns[column]->Scan(transaction, state.column_scans[i], result.data[i]); - } - } - } else { - SelectionVector sel; - - if (count != max_count) { - sel.Initialize(valid_sel); +bool DataTable::ScanBaseTable(Transaction &transaction, DataChunk &result, TableScanState &state) { + auto current_row_group = state.row_group_scan_state.row_group; + while (current_row_group) { + current_row_group->Scan(transaction, state.row_group_scan_state, result); + if (result.size() > 0) { + return true; } else { - sel.Initialize(FlatVector::INCREMENTAL_SELECTION_VECTOR); - } - //! First, we scan the columns with filters, fetch their data and generate a selection vector. - //! get runtime statistics - auto start_time = high_resolution_clock::now(); - if (state.table_filters) { - for (idx_t i = 0; i < state.table_filters->filters.size(); i++) { - auto tf_idx = state.adaptive_filter->permutation[i]; - auto col_idx = column_ids[tf_idx]; - columns[col_idx]->Select(transaction, state.column_scans[tf_idx], result.data[tf_idx], sel, - approved_tuple_count, state.table_filters->filters[tf_idx]); - } - for (auto &table_filter : state.table_filters->filters) { - result.data[table_filter.first].Slice(sel, approved_tuple_count); - } - } - //! Now we use the selection vector to fetch data for the other columns. - for (idx_t i = 0; i < column_ids.size(); i++) { - if (!state.table_filters || state.table_filters->filters.find(i) == state.table_filters->filters.end()) { - auto column = column_ids[i]; - if (column == COLUMN_IDENTIFIER_ROW_ID) { - D_ASSERT(result.data[i].GetType().InternalType() == PhysicalType::INT64); - result.data[i].SetVectorType(VectorType::FLAT_VECTOR); - auto result_data = (int64_t *)FlatVector::GetData(result.data[i]); - for (size_t sel_idx = 0; sel_idx < approved_tuple_count; sel_idx++) { - result_data[sel_idx] = current_row + sel.get_index(sel_idx); + do { + current_row_group = state.row_group_scan_state.row_group = (RowGroup *)current_row_group->next.get(); + if (current_row_group) { + bool scan_row_group = current_row_group->InitializeScan(state.row_group_scan_state); + if (scan_row_group) { + // skip this row group + break; } - } else { - columns[column]->FilterScan(transaction, state.column_scans[i], result.data[i], sel, - approved_tuple_count); } - } - } - auto end_time = high_resolution_clock::now(); - if (state.adaptive_filter && state.table_filters->filters.size() > 1) { - state.adaptive_filter->AdaptRuntimeStatistics( - duration_cast>(end_time - start_time).count()); + } while (current_row_group); } } - - result.SetCardinality(approved_tuple_count); - current_row += STANDARD_VECTOR_SIZE; - return true; + return false; } //===--------------------------------------------------------------------===// // Fetch //===--------------------------------------------------------------------===// -void DataTable::Fetch(Transaction &transaction, DataChunk &result, vector &column_ids, +void DataTable::Fetch(Transaction &transaction, DataChunk &result, const vector &column_ids, Vector &row_identifiers, idx_t fetch_count, ColumnFetchState &state) { - // first figure out which row identifiers we should use for this transaction by looking at the VersionManagers - row_t rows[STANDARD_VECTOR_SIZE]; - idx_t count = FetchRows(transaction, row_identifiers, fetch_count, rows); - if (count == 0) { - // no rows to use - return; - } - // for each of the remaining rows, now fetch the data - result.SetCardinality(count); - for (idx_t col_idx = 0; col_idx < column_ids.size(); col_idx++) { - auto column = column_ids[col_idx]; - if (column == COLUMN_IDENTIFIER_ROW_ID) { - // row id column: fill in the row ids - D_ASSERT(result.data[col_idx].GetType().InternalType() == PhysicalType::INT64); - result.data[col_idx].SetVectorType(VectorType::FLAT_VECTOR); - auto data = FlatVector::GetData(result.data[col_idx]); - for (idx_t i = 0; i < count; i++) { - data[i] = rows[i]; - } - } else { - // regular column: fetch data from the base column - for (idx_t i = 0; i < count; i++) { - auto row_id = rows[i]; - columns[column]->FetchRow(state, transaction, row_id, result.data[col_idx], i); - } - } - } -} - -idx_t DataTable::FetchRows(Transaction &transaction, Vector &row_identifiers, idx_t fetch_count, row_t result_rows[]) { - D_ASSERT(row_identifiers.GetType().InternalType() == ROW_TYPE); - - // now iterate over the row ids and figure out which rows to use - idx_t count = 0; - + // figure out which row_group to fetch from auto row_ids = FlatVector::GetData(row_identifiers); + idx_t count = 0; for (idx_t i = 0; i < fetch_count; i++) { auto row_id = row_ids[i]; - auto segment = (MorselInfo *)versions->GetSegment(row_id); - bool use_row = segment->Fetch(transaction, row_id - segment->start); - if (use_row) { - // row is not deleted; use the row - result_rows[count++] = row_id; + auto row_group = (RowGroup *)row_groups->GetSegment(row_id); + if (!row_group->Fetch(transaction, row_id - row_group->start)) { + continue; } + row_group->FetchRow(transaction, state, column_ids, row_id, result, count); + count++; } - return count; + result.SetCardinality(count); } //===--------------------------------------------------------------------===// @@ -122647,7 +134108,7 @@ static void VerifyCheckConstraint(TableCatalogEntry &table, Expression &expr, Da Vector result(LogicalType::INTEGER); try { executor.ExecuteExpression(chunk, result); - } catch (Exception &ex) { + } catch (std::exception &ex) { throw ConstraintException("CHECK constraint failed: %s (Error: %s)", table.name, ex.what()); } catch (...) { throw ConstraintException("CHECK constraint failed: %s (Unknown Error)", table.name); @@ -122680,9 +134141,10 @@ void DataTable::VerifyAppendConstraints(TableCatalogEntry &table, DataChunk &chu } case ConstraintType::UNIQUE: { //! check whether or not the chunk can be inserted into the indexes - for (auto &index : info->indexes) { - index->VerifyAppend(chunk); - } + info->indexes.Scan([&](Index &index) { + index.VerifyAppend(chunk); + return false; + }); break; } case ConstraintType::FOREIGN_KEY: @@ -122715,49 +134177,19 @@ void DataTable::Append(TableCatalogEntry &table, ClientContext &context, DataChu void DataTable::InitializeAppend(Transaction &transaction, TableAppendState &state, idx_t append_count) { // obtain the append lock for this table - state.append_lock = std::unique_lock(append_lock); + state.append_lock = unique_lock(append_lock); if (!is_root) { throw TransactionException("Transaction conflict: adding entries to a table that has been altered!"); } - // obtain locks on all indexes for the table - state.index_locks = unique_ptr(new IndexLock[info->indexes.size()]); - for (idx_t i = 0; i < info->indexes.size(); i++) { - info->indexes[i]->InitializeLock(state.index_locks[i]); - } - // for each column, initialize the append state - state.states = unique_ptr(new ColumnAppendState[types.size()]); - for (idx_t i = 0; i < types.size(); i++) { - columns[i]->InitializeAppend(state.states[i]); - } state.row_start = total_rows; state.current_row = state.row_start; + state.remaining_append_count = append_count; - // start writing to the morsels - lock_guard morsel_lock(versions->node_lock); - auto last_morsel = (MorselInfo *)versions->GetLastSegment(); - D_ASSERT(last_morsel->start <= (idx_t)state.row_start); - idx_t current_position = state.row_start - last_morsel->start; - idx_t remaining = append_count; - while (true) { - idx_t remaining_in_morsel = MorselInfo::MORSEL_SIZE - current_position; - idx_t to_write = MinValue(remaining, remaining_in_morsel); - remaining -= to_write; - if (to_write > 0) { - // write to the last morsel - auto morsel = (MorselInfo *)versions->GetLastSegment(); - morsel->Append(transaction, current_position, to_write, transaction.transaction_id); - } - - current_position = 0; - if (remaining > 0) { - idx_t start = last_morsel->start + MorselInfo::MORSEL_SIZE; - auto morsel = make_unique(start, MorselInfo::MORSEL_SIZE); - last_morsel = morsel.get(); - versions->AppendSegment(move(morsel)); - } else { - break; - } - } + // start writing to the row_groups + lock_guard row_group_lock(row_groups->node_lock); + auto last_row_group = (RowGroup *)row_groups->GetLastSegment(); + D_ASSERT(total_rows == last_row_group->start + last_row_group->count); + last_row_group->InitializeAppend(transaction, state.row_group_append_state, state.remaining_append_count); total_rows += append_count; } @@ -122766,11 +134198,47 @@ void DataTable::Append(Transaction &transaction, DataChunk &chunk, TableAppendSt D_ASSERT(chunk.ColumnCount() == types.size()); chunk.Verify(); - // append the physical data to each of the entries - for (idx_t i = 0; i < types.size(); i++) { - columns[i]->Append(state.states[i], chunk.data[i], chunk.size()); + idx_t append_count = chunk.size(); + idx_t remaining = chunk.size(); + while (true) { + auto current_row_group = state.row_group_append_state.row_group; + // check how much we can fit into the current row_group + idx_t append_count = + MinValue(remaining, RowGroup::ROW_GROUP_SIZE - state.row_group_append_state.offset_in_row_group); + if (append_count > 0) { + current_row_group->Append(state.row_group_append_state, chunk, append_count); + // merge the stats + lock_guard stats_guard(stats_lock); + for (idx_t i = 0; i < types.size(); i++) { + column_stats[i]->Merge(*current_row_group->GetStatistics(i)); + } + } + state.remaining_append_count -= append_count; + remaining -= append_count; + if (remaining > 0) { + // we expect max 1 iteration of this loop (i.e. a single chunk should never overflow more than one + // row_group) + D_ASSERT(chunk.size() == remaining + append_count); + // slice the input chunk + if (remaining < chunk.size()) { + SelectionVector sel(STANDARD_VECTOR_SIZE); + for (idx_t i = 0; i < remaining; i++) { + sel.set_index(i, append_count + i); + } + chunk.Slice(sel, remaining); + } + // append a new row_group + AppendRowGroup(current_row_group->start + current_row_group->count); + // set up the append state for this row_group + lock_guard row_group_lock(row_groups->node_lock); + auto last_row_group = (RowGroup *)row_groups->GetLastSegment(); + last_row_group->InitializeAppend(transaction, state.row_group_append_state, state.remaining_append_count); + continue; + } else { + break; + } } - state.current_row += chunk.size(); + state.current_row += append_count; } void DataTable::ScanTableSegment(idx_t row_start, idx_t count, const std::function &function) { @@ -122778,9 +134246,9 @@ void DataTable::ScanTableSegment(idx_t row_start, idx_t count, const std::functi vector column_ids; vector types; - for (idx_t i = 0; i < columns.size(); i++) { + for (idx_t i = 0; i < this->types.size(); i++) { column_ids.push_back(i); - types.push_back(columns[i]->type); + types.push_back(this->types[i]); } DataChunk chunk; chunk.Initialize(types); @@ -122788,26 +134256,30 @@ void DataTable::ScanTableSegment(idx_t row_start, idx_t count, const std::functi CreateIndexScanState state; idx_t row_start_aligned = row_start / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE; - InitializeScanWithOffset(state, column_ids, nullptr, row_start_aligned, row_start + count); + InitializeScanWithOffset(state, column_ids, row_start_aligned, row_start + count); - while (true) { - idx_t current_row = state.current_row; - CreateIndexScan(state, column_ids, chunk, true); + idx_t current_row = row_start_aligned; + while (current_row < end) { + ScanCreateIndex(state, chunk, TableScanType::TABLE_SCAN_COMMITTED_ROWS); if (chunk.size() == 0) { break; } - idx_t end_row = state.current_row; + idx_t end_row = current_row + chunk.size(); // figure out if we need to write the entire chunk or just part of it - idx_t chunk_start = current_row < row_start ? row_start : current_row; - idx_t chunk_end = end_row > end ? end : end_row; + idx_t chunk_start = MaxValue(current_row, row_start); + idx_t chunk_end = MinValue(end_row, end); + D_ASSERT(chunk_start < chunk_end); idx_t chunk_count = chunk_end - chunk_start; if (chunk_count != chunk.size()) { // need to slice the chunk before insert - SelectionVector sel(chunk_start % STANDARD_VECTOR_SIZE, chunk_count); + auto start_in_chunk = chunk_start % STANDARD_VECTOR_SIZE; + SelectionVector sel(start_in_chunk, chunk_count); chunk.Slice(sel, chunk_count); + chunk.Verify(); } function(chunk); chunk.Reset(); + current_row = end_row; } } @@ -122819,21 +134291,21 @@ void DataTable::WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count) { void DataTable::CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count) { lock_guard lock(append_lock); - auto morsel = (MorselInfo *)versions->GetSegment(row_start); + auto row_group = (RowGroup *)row_groups->GetSegment(row_start); idx_t current_row = row_start; idx_t remaining = count; while (true) { - idx_t start_in_morsel = current_row - morsel->start; - idx_t append_count = MinValue(morsel->count - start_in_morsel, remaining); + idx_t start_in_row_group = current_row - row_group->start; + idx_t append_count = MinValue(row_group->count - start_in_row_group, remaining); - morsel->CommitAppend(commit_id, start_in_morsel, append_count); + row_group->CommitAppend(commit_id, start_in_row_group, append_count); current_row += append_count; remaining -= append_count; if (remaining == 0) { break; } - morsel = (MorselInfo *)morsel->next.get(); + row_group = (RowGroup *)row_group->next.get(); } info->cardinality += count; } @@ -122843,7 +134315,6 @@ void DataTable::RevertAppendInternal(idx_t start_row, idx_t count) { // nothing to revert! return; } - if (total_rows != start_row + count) { // interleaved append: don't do anything // in this case the rows will stay as "inserted by transaction X", but will never be committed @@ -122856,20 +134327,16 @@ void DataTable::RevertAppendInternal(idx_t start_row, idx_t count) { info->cardinality = start_row; total_rows = start_row; D_ASSERT(is_root); - // revert changes in the base columns - for (idx_t i = 0; i < types.size(); i++) { - columns[i]->RevertAppend(start_row); - } - // revert appends made to morsels - lock_guard tree_lock(versions->node_lock); + // revert appends made to row_groups + lock_guard tree_lock(row_groups->node_lock); // find the segment index that the current row belongs to - idx_t segment_index = versions->GetSegmentIndex(start_row); - auto segment = versions->nodes[segment_index].node; - auto &info = (MorselInfo &)*segment; + idx_t segment_index = row_groups->GetSegmentIndex(start_row); + auto segment = row_groups->nodes[segment_index].node; + auto &info = (RowGroup &)*segment; // remove any segments AFTER this segment: they should be deleted entirely - if (segment_index < versions->nodes.size() - 1) { - versions->nodes.erase(versions->nodes.begin() + segment_index + 1, versions->nodes.end()); + if (segment_index < row_groups->nodes.size() - 1) { + row_groups->nodes.erase(row_groups->nodes.begin() + segment_index + 1, row_groups->nodes.end()); } info.next = nullptr; info.RevertAppend(start_row); @@ -122877,11 +134344,8 @@ void DataTable::RevertAppendInternal(idx_t start_row, idx_t count) { void DataTable::RevertAppend(idx_t start_row, idx_t count) { lock_guard lock(append_lock); - if (!info->indexes.empty()) { - auto index_locks = unique_ptr(new IndexLock[info->indexes.size()]); - for (idx_t i = 0; i < info->indexes.size(); i++) { - info->indexes[i]->InitializeLock(index_locks[i]); - } + + if (!info->indexes.Empty()) { idx_t current_row_base = start_row; row_t row_data[STANDARD_VECTOR_SIZE]; Vector row_identifiers(LOGICAL_ROW_TYPE, (data_ptr_t)row_data); @@ -122889,9 +134353,10 @@ void DataTable::RevertAppend(idx_t start_row, idx_t count) { for (idx_t i = 0; i < chunk.size(); i++) { row_data[i] = current_row_base + i; } - for (idx_t i = 0; i < info->indexes.size(); i++) { - info->indexes[i]->Delete(index_locks[i], chunk, row_identifiers); - } + info->indexes.Scan([&](Index &index) { + index.Delete(chunk, row_identifiers); + return false; + }); current_row_base += chunk.size(); }); } @@ -122903,27 +134368,33 @@ void DataTable::RevertAppend(idx_t start_row, idx_t count) { //===--------------------------------------------------------------------===// bool DataTable::AppendToIndexes(TableAppendState &state, DataChunk &chunk, row_t row_start) { D_ASSERT(is_root); - if (info->indexes.empty()) { + if (info->indexes.Empty()) { return true; } // first generate the vector of row identifiers Vector row_identifiers(LOGICAL_ROW_TYPE); VectorOperations::GenerateSequence(row_identifiers, chunk.size(), row_start, 1); - idx_t failed_index = INVALID_INDEX; + vector already_appended; + bool append_failed = false; // now append the entries to the indices - for (idx_t i = 0; i < info->indexes.size(); i++) { - if (!info->indexes[i]->Append(state.index_locks[i], chunk, row_identifiers)) { - failed_index = i; - break; + info->indexes.Scan([&](Index &index) { + if (!index.Append(chunk, row_identifiers)) { + append_failed = true; + return true; } - } - if (failed_index != INVALID_INDEX) { + already_appended.push_back(&index); + return false; + }); + + if (append_failed) { // constraint violation! // remove any appended entries from previous indexes (if any) - for (idx_t i = 0; i < failed_index; i++) { - info->indexes[i]->Delete(state.index_locks[i], chunk, row_identifiers); + + for (auto *index : already_appended) { + index->Delete(chunk, row_identifiers); } + return false; } return true; @@ -122931,7 +134402,7 @@ bool DataTable::AppendToIndexes(TableAppendState &state, DataChunk &chunk, row_t void DataTable::RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, row_t row_start) { D_ASSERT(is_root); - if (info->indexes.empty()) { + if (info->indexes.Empty()) { return; } // first generate the vector of row identifiers @@ -122944,41 +134415,57 @@ void DataTable::RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, row void DataTable::RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, Vector &row_identifiers) { D_ASSERT(is_root); - for (idx_t i = 0; i < info->indexes.size(); i++) { - info->indexes[i]->Delete(state.index_locks[i], chunk, row_identifiers); - } + info->indexes.Scan([&](Index &index) { + index.Delete(chunk, row_identifiers); + return false; + }); } void DataTable::RemoveFromIndexes(Vector &row_identifiers, idx_t count) { D_ASSERT(is_root); auto row_ids = FlatVector::GetData(row_identifiers); + + // figure out which row_group to fetch from + auto row_group = (RowGroup *)row_groups->GetSegment(row_ids[0]); + auto row_group_vector_idx = (row_ids[0] - row_group->start) / STANDARD_VECTOR_SIZE; + auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start; + // create a selection vector from the row_ids SelectionVector sel(STANDARD_VECTOR_SIZE); for (idx_t i = 0; i < count; i++) { - sel.set_index(i, row_ids[i] % STANDARD_VECTOR_SIZE); + auto row_in_vector = row_ids[i] - base_row_id; + D_ASSERT(row_in_vector < STANDARD_VECTOR_SIZE); + sel.set_index(i, row_in_vector); } - // fetch the data for these row identifiers - DataChunk result; - result.Initialize(types); + // now fetch the columns from that row_group // FIXME: we do not need to fetch all columns, only the columns required by the indices! - auto states = unique_ptr(new ColumnScanState[types.size()]); + TableScanState state; + state.max_row = total_rows; for (idx_t i = 0; i < types.size(); i++) { - columns[i]->Fetch(states[i], row_ids[0], result.data[i]); + state.column_ids.push_back(i); } + DataChunk result; + result.Initialize(types); + + row_group->InitializeScanWithOffset(state.row_group_scan_state, row_group_vector_idx); + row_group->ScanCommitted(state.row_group_scan_state, result, + TableScanType::TABLE_SCAN_COMMITTED_ROWS_DISALLOW_UPDATES); result.Slice(sel, count); - for (auto &index : info->indexes) { - index->Delete(result, row_identifiers); - } + + info->indexes.Scan([&](Index &index) { + index.Delete(result, row_identifiers); + return false; + }); } //===--------------------------------------------------------------------===// // Delete //===--------------------------------------------------------------------===// -void DataTable::Delete(TableCatalogEntry &table, ClientContext &context, Vector &row_identifiers, idx_t count) { +idx_t DataTable::Delete(TableCatalogEntry &table, ClientContext &context, Vector &row_identifiers, idx_t count) { D_ASSERT(row_identifiers.GetType().InternalType() == ROW_TYPE); if (count == 0) { - return; + return 0; } auto &transaction = Transaction::GetTransaction(context); @@ -122989,17 +134476,39 @@ void DataTable::Delete(TableCatalogEntry &table, ClientContext &context, Vector if (first_id >= MAX_ROW_ID) { // deletion is in transaction-local storage: push delete into local chunk collection - transaction.storage.Delete(this, row_identifiers, count); + return transaction.storage.Delete(this, row_identifiers, count); } else { - auto morsel = (MorselInfo *)versions->GetSegment(first_id); - morsel->Delete(transaction, this, row_identifiers, count); + idx_t delete_count = 0; + // delete is in the row groups + // we need to figure out for each id to which row group it belongs + // usually all (or many) ids belong to the same row group + // we iterate over the ids and check for every id if it belongs to the same row group as their predecessor + idx_t pos = 0; + do { + idx_t start = pos; + auto row_group = (RowGroup *)row_groups->GetSegment(ids[pos]); + for (pos++; pos < count; pos++) { + D_ASSERT(ids[pos] >= 0); + // check if this id still belongs to this row group + if (idx_t(ids[pos]) < row_group->start) { + // id is before row_group start -> it does not + break; + } + if (idx_t(ids[pos]) >= row_group->start + row_group->count) { + // id is after row group end -> it does not + break; + } + } + delete_count += row_group->Delete(transaction, this, ids + start, pos - start); + } while (pos < count); + return delete_count; } } //===--------------------------------------------------------------------===// // Update //===--------------------------------------------------------------------===// -static void CreateMockChunk(vector &types, vector &column_ids, DataChunk &chunk, +static void CreateMockChunk(vector &types, const vector &column_ids, DataChunk &chunk, DataChunk &mock_chunk) { // construct a mock DataChunk mock_chunk.InitializeEmpty(types); @@ -123009,7 +134518,7 @@ static void CreateMockChunk(vector &types, vector &column mock_chunk.SetCardinality(chunk.size()); } -static bool CreateMockChunk(TableCatalogEntry &table, vector &column_ids, +static bool CreateMockChunk(TableCatalogEntry &table, const vector &column_ids, unordered_set &desired_column_ids, DataChunk &chunk, DataChunk &mock_chunk) { idx_t found_columns = 0; // check whether the desired columns are present in the UPDATE clause @@ -123033,7 +134542,8 @@ static bool CreateMockChunk(TableCatalogEntry &table, vector &column_i return true; } -void DataTable::VerifyUpdateConstraints(TableCatalogEntry &table, DataChunk &chunk, vector &column_ids) { +void DataTable::VerifyUpdateConstraints(TableCatalogEntry &table, DataChunk &chunk, + const vector &column_ids) { for (auto &constraint : table.bound_constraints) { switch (constraint->type) { case ConstraintType::NOT_NULL: { @@ -123067,21 +134577,28 @@ void DataTable::VerifyUpdateConstraints(TableCatalogEntry &table, DataChunk &chu // update should not be called for indexed columns! // instead update should have been rewritten to delete + update on higher layer #ifdef DEBUG - for (auto &index : info->indexes) { - D_ASSERT(!index->IndexIsUpdated(column_ids)); - } + info->indexes.Scan([&](Index &index) { + D_ASSERT(!index.IndexIsUpdated(column_ids)); + return false; + }); + #endif } -void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, vector &column_ids, - DataChunk &updates) { +void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, + const vector &column_ids, DataChunk &updates) { D_ASSERT(row_ids.GetType().InternalType() == ROW_TYPE); + auto count = updates.size(); updates.Verify(); - if (updates.size() == 0) { + if (count == 0) { return; } + if (!is_root) { + throw TransactionException("Transaction conflict: cannot update a table that has been altered!"); + } + // first verify that no constraints are violated VerifyUpdateConstraints(table, updates, column_ids); @@ -123089,7 +134606,8 @@ void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector auto &transaction = Transaction::GetTransaction(context); updates.Normalify(); - row_ids.Normalify(updates.size()); + row_ids.Normalify(count); + auto ids = FlatVector::GetData(row_ids); auto first_id = FlatVector::GetValue(row_ids, 0); if (first_id >= MAX_ROW_ID) { // update is in transaction-local storage: push update into local storage @@ -123097,12 +134615,67 @@ void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector return; } - for (idx_t i = 0; i < column_ids.size(); i++) { - auto column = column_ids[i]; - D_ASSERT(column != COLUMN_IDENTIFIER_ROW_ID); + // update is in the row groups + // we need to figure out for each id to which row group it belongs + // usually all (or many) ids belong to the same row group + // we iterate over the ids and check for every id if it belongs to the same row group as their predecessor + idx_t pos = 0; + do { + idx_t start = pos; + auto row_group = (RowGroup *)row_groups->GetSegment(ids[pos]); + row_t base_id = + row_group->start + ((ids[pos] - row_group->start) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE); + for (pos++; pos < count; pos++) { + D_ASSERT(ids[pos] >= 0); + // check if this id still belongs to this vector + if (ids[pos] < base_id) { + // id is before vector start -> it does not + break; + } + if (ids[pos] >= base_id + STANDARD_VECTOR_SIZE) { + // id is after vector end -> it does not + break; + } + } + row_group->Update(transaction, updates, ids, start, pos - start, column_ids); + + lock_guard stats_guard(stats_lock); + for (idx_t i = 0; i < column_ids.size(); i++) { + auto column_id = column_ids[i]; + column_stats[column_id]->Merge(*row_group->GetStatistics(column_id)); + } + } while (pos < count); +} + +void DataTable::UpdateColumn(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, + const vector &column_path, DataChunk &updates) { + D_ASSERT(row_ids.GetType().InternalType() == ROW_TYPE); + D_ASSERT(updates.ColumnCount() == 1); + updates.Verify(); + if (updates.size() == 0) { + return; + } + + if (!is_root) { + throw TransactionException("Transaction conflict: cannot update a table that has been altered!"); + } + + // now perform the actual update + auto &transaction = Transaction::GetTransaction(context); - columns[column]->Update(transaction, updates.data[i], row_ids, updates.size()); + updates.Normalify(); + row_ids.Normalify(updates.size()); + auto first_id = FlatVector::GetValue(row_ids, 0); + if (first_id >= MAX_ROW_ID) { + throw NotImplementedException("Cannot update a column-path on transaction local data"); } + // find the row_group this id belongs to + auto primary_column_idx = column_path[0]; + auto row_group = (RowGroup *)row_groups->GetSegment(first_id); + row_group->UpdateColumn(transaction, updates, row_ids, column_path); + + lock_guard stats_guard(stats_lock); + column_stats[primary_column_idx]->Merge(*row_group->GetStatistics(primary_column_idx)); } //===--------------------------------------------------------------------===// @@ -123111,47 +134684,28 @@ void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector void DataTable::InitializeCreateIndexScan(CreateIndexScanState &state, const vector &column_ids) { // we grab the append lock to make sure nothing is appended until AFTER we finish the index scan state.append_lock = std::unique_lock(append_lock); - state.delete_lock = std::unique_lock(versions->node_lock); + state.delete_lock = std::unique_lock(row_groups->node_lock); InitializeScan(state, column_ids); } -void DataTable::CreateIndexScan(CreateIndexScanState &state, const vector &column_ids, DataChunk &result, - bool allow_pending_updates) { - // scan the persistent segments - if (ScanCreateIndex(state, column_ids, result, state.current_row, state.max_row, allow_pending_updates)) { - return; - } -} - -bool DataTable::ScanCreateIndex(CreateIndexScanState &state, const vector &column_ids, DataChunk &result, - idx_t ¤t_row, idx_t max_row, bool allow_pending_updates) { - if (current_row >= max_row) { - return false; - } - idx_t count = MinValue(STANDARD_VECTOR_SIZE, max_row - current_row); - - // scan the base columns to fetch the actual data - // note that we insert all data into the index, even if it is marked as deleted - // FIXME: tuples that are already "cleaned up" do not need to be inserted into the index! - for (idx_t i = 0; i < column_ids.size(); i++) { - auto column = column_ids[i]; - if (column == COLUMN_IDENTIFIER_ROW_ID) { - // scan row id - D_ASSERT(result.data[i].GetType().InternalType() == ROW_TYPE); - result.data[i].Sequence(current_row, 1); +bool DataTable::ScanCreateIndex(CreateIndexScanState &state, DataChunk &result, TableScanType type) { + auto current_row_group = state.row_group_scan_state.row_group; + while (current_row_group) { + current_row_group->ScanCommitted(state.row_group_scan_state, result, type); + if (result.size() > 0) { + return true; } else { - // scan actual base column - columns[column]->IndexScan(state.column_scans[i], result.data[i], allow_pending_updates); + current_row_group = state.row_group_scan_state.row_group = (RowGroup *)current_row_group->next.get(); + if (current_row_group) { + current_row_group->InitializeScan(state.row_group_scan_state); + } } } - result.SetCardinality(count); - - current_row += STANDARD_VECTOR_SIZE; - return count > 0; + return false; } -void DataTable::AddIndex(unique_ptr index, vector> &expressions) { +void DataTable::AddIndex(unique_ptr index, const vector> &expressions) { DataChunk result; result.Initialize(index->logical_types); @@ -123174,55 +134728,80 @@ void DataTable::AddIndex(unique_ptr index, vector> } // now start incrementally building the index - IndexLock lock; - index->InitializeLock(lock); - ExpressionExecutor executor(expressions); - while (true) { - intermediate.Reset(); - // scan a new chunk from the table to index - CreateIndexScan(state, column_ids, intermediate); - if (intermediate.size() == 0) { - // finished scanning for index creation - // release all locks - break; - } - // resolve the expressions for this chunk - executor.Execute(intermediate, result); + { + IndexLock lock; + index->InitializeLock(lock); + ExpressionExecutor executor(expressions); + while (true) { + intermediate.Reset(); + // scan a new chunk from the table to index + ScanCreateIndex(state, intermediate, TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED); + if (intermediate.size() == 0) { + // finished scanning for index creation + // release all locks + break; + } + // resolve the expressions for this chunk + executor.Execute(intermediate, result); - // insert into the index - if (!index->Insert(lock, result, intermediate.data[intermediate.ColumnCount() - 1])) { - throw ConstraintException("Cant create unique index, table contains duplicate data on indexed column(s)"); + // insert into the index + if (!index->Insert(lock, result, intermediate.data[intermediate.ColumnCount() - 1])) { + throw ConstraintException( + "Cant create unique index, table contains duplicate data on indexed column(s)"); + } } } - info->indexes.push_back(move(index)); + info->indexes.AddIndex(move(index)); } unique_ptr DataTable::GetStatistics(ClientContext &context, column_t column_id) { if (column_id == COLUMN_IDENTIFIER_ROW_ID) { return nullptr; } - // FIXME: potentially merge with transaction local shtuff - return columns[column_id]->GetStatistics(); + lock_guard stats_guard(stats_lock); + return column_stats[column_id]->Copy(); } //===--------------------------------------------------------------------===// // Checkpoint //===--------------------------------------------------------------------===// -void DataTable::Checkpoint(TableDataWriter &writer) { - // checkpoint each individual column - for (size_t i = 0; i < columns.size(); i++) { - columns[i]->Checkpoint(writer); +BlockPointer DataTable::Checkpoint(TableDataWriter &writer) { + // checkpoint each individual row group + // FIXME: we might want to combine adjacent row groups in case they have had deletions... + vector> global_stats; + for (idx_t i = 0; i < types.size(); i++) { + global_stats.push_back(BaseStatistics::CreateEmpty(types[i])); } -} -void DataTable::CheckpointDeletes(TableDataWriter &writer) { - // then we checkpoint the deleted tuples - D_ASSERT(versions); - writer.CheckpointDeletes(((MorselInfo *)versions->GetRootSegment())); + auto row_group = (RowGroup *)row_groups->GetRootSegment(); + vector row_group_pointers; + while (row_group) { + auto pointer = row_group->Checkpoint(writer, global_stats); + row_group_pointers.push_back(move(pointer)); + row_group = (RowGroup *)row_group->next.get(); + } + // store the current position in the metadata writer + // this is where the row groups for this table start + auto &meta_writer = writer.GetMetaWriter(); + auto pointer = meta_writer.GetBlockPointer(); + + for (auto &stats : global_stats) { + stats->Serialize(meta_writer); + } + // now start writing the row group pointers to disk + meta_writer.Write(row_group_pointers.size()); + for (auto &row_group_pointer : row_group_pointers) { + RowGroup::Serialize(row_group_pointer, meta_writer); + } + return pointer; } void DataTable::CommitDropColumn(idx_t index) { - columns[index]->CommitDropColumn(); + auto segment = (RowGroup *)row_groups->GetRootSegment(); + while (segment) { + segment->CommitDropColumn(index); + segment = (RowGroup *)segment->next.get(); + } } idx_t DataTable::GetTotalRows() { @@ -123231,9 +134810,29 @@ idx_t DataTable::GetTotalRows() { void DataTable::CommitDropTable() { // commit a drop of this table: mark all blocks as modified so they can be reclaimed later on - for (size_t i = 0; i < columns.size(); i++) { - CommitDropColumn(i); + auto segment = (RowGroup *)row_groups->GetRootSegment(); + while (segment) { + segment->CommitDrop(); + segment = (RowGroup *)segment->next.get(); + } +} + +//===--------------------------------------------------------------------===// +// GetStorageInfo +//===--------------------------------------------------------------------===// +vector> DataTable::GetStorageInfo() { + vector> result; + + auto row_group = (RowGroup *)row_groups->GetRootSegment(); + idx_t row_group_index = 0; + while (row_group) { + row_group->GetStorageInfo(row_group_index, result); + row_group_index++; + + row_group = (RowGroup *)row_group->next.get(); } + + return result; } } // namespace duckdb @@ -123246,12 +134845,15 @@ void DataTable::CommitDropTable() { namespace duckdb { -Index::Index(IndexType type, vector column_ids_p, vector> unbound_expressions) - : type(type), column_ids(move(column_ids_p)), unbound_expressions(move(unbound_expressions)) { - for (auto &expr : this->unbound_expressions) { +Index::Index(IndexType type, const vector &column_ids_p, + const vector> &unbound_expressions, bool is_unique, bool is_primary) + : type(type), column_ids(column_ids_p), is_unique(is_unique), is_primary(is_primary) { + for (auto &expr : unbound_expressions) { types.push_back(expr->return_type.InternalType()); logical_types.push_back(expr->return_type); - bound_expressions.push_back(BindExpression(expr->Copy())); + auto unbound_expression = expr->Copy(); + bound_expressions.push_back(BindExpression(unbound_expression->Copy())); + this->unbound_expressions.emplace_back(move(unbound_expression)); } for (auto &bound_expr : bound_expressions) { executor.AddExpression(*bound_expr); @@ -123262,7 +134864,7 @@ Index::Index(IndexType type, vector column_ids_p, vector(lock); + state.index_lock = unique_lock(lock); } bool Index::Append(DataChunk &entries, Vector &row_identifiers) { @@ -123291,7 +134893,7 @@ unique_ptr Index::BindExpression(unique_ptr expr) { return expr; } -bool Index::IndexIsUpdated(vector &column_ids) { +bool Index::IndexIsUpdated(const vector &column_ids) const { for (auto &column : column_ids) { if (column_id_set.find(column) != column_id_set.end()) { return true; @@ -123311,6 +134913,7 @@ bool Index::IndexIsUpdated(vector &column_ids) { + namespace duckdb { LocalTableStorage::LocalTableStorage(DataTable &table) : table(table) { @@ -123365,9 +134968,9 @@ void LocalTableStorage::Clear() { deleted_entries.clear(); indexes.clear(); deleted_rows = 0; - for (auto &index : table.info->indexes) { - D_ASSERT(index->type == IndexType::ART); - auto &art = (ART &)*index; + table.info->indexes.Scan([&](Index &index) { + D_ASSERT(index.type == IndexType::ART); + auto &art = (ART &)index; if (art.is_unique) { // unique index: create a local ART index that maintains the same unique constraint vector> unbound_expressions; @@ -123376,7 +134979,8 @@ void LocalTableStorage::Clear() { } indexes.push_back(make_unique(art.column_ids, move(unbound_expressions), true)); } - } + return false; + }); } void LocalStorage::InitializeScan(DataTable *table, LocalScanState &state, TableFilterSet *table_filters) { @@ -123442,11 +135046,9 @@ void LocalStorage::Scan(LocalScanState &state, const vector &column_id auto column_filters = state.table_filters->filters.find(i); if (column_filters != state.table_filters->filters.end()) { //! We have filters to apply here - for (auto &column_filter : column_filters->second) { - auto &mask = FlatVector::Validity(result.data[i]); - UncompressedSegment::FilterSelection(sel, result.data[i], column_filter, approved_tuple_count, - mask); - } + auto &mask = FlatVector::Validity(result.data[i]); + ColumnSegment::FilterSelection(sel, result.data[i], *column_filters->second, approved_tuple_count, + mask); count = approved_tuple_count; } } @@ -123492,7 +135094,7 @@ void LocalStorage::Append(DataTable *table, DataChunk &chunk) { } //! Append to the chunk storage->collection.Append(chunk); - if (storage->active_scans == 0 && storage->collection.Count() >= MorselInfo::MORSEL_SIZE * 2) { + if (storage->active_scans == 0 && storage->collection.Count() >= RowGroup::ROW_GROUP_SIZE * 2) { // flush to base storage Flush(*table, *storage); } @@ -123519,7 +135121,7 @@ static idx_t GetChunk(Vector &row_ids) { return first_id / STANDARD_VECTOR_SIZE; } -void LocalStorage::Delete(DataTable *table, Vector &row_ids, idx_t count) { +idx_t LocalStorage::Delete(DataTable *table, Vector &row_ids, idx_t count) { auto storage = GetStorage(table); // figure out the chunk from which these row ids came idx_t chunk_idx = GetChunk(row_ids); @@ -123537,16 +135139,21 @@ void LocalStorage::Delete(DataTable *table, Vector &row_ids, idx_t count) { } else { deleted = entry->second.get(); } - storage->deleted_rows += count; // now actually mark the entries as deleted in the deleted vector idx_t base_index = MAX_ROW_ID + chunk_idx * STANDARD_VECTOR_SIZE; + idx_t deleted_count = 0; auto ids = FlatVector::GetData(row_ids); for (idx_t i = 0; i < count; i++) { auto id = ids[i] - base_index; + if (!deleted[id]) { + deleted_count++; + } deleted[id] = true; } + storage->deleted_rows += deleted_count; + return deleted_count; } template @@ -123597,7 +135204,7 @@ static void UpdateChunk(Vector &data, Vector &updates, Vector &row_ids, idx_t co } } -void LocalStorage::Update(DataTable *table, Vector &row_ids, vector &column_ids, DataChunk &data) { +void LocalStorage::Update(DataTable *table, Vector &row_ids, const vector &column_ids, DataChunk &data) { auto storage = GetStorage(table); // figure out the chunk from which these row ids came idx_t chunk_idx = GetChunk(row_ids); @@ -123803,6 +135410,13 @@ MetaBlockWriter::~MetaBlockWriter() { Flush(); } +BlockPointer MetaBlockWriter::GetBlockPointer() { + BlockPointer pointer; + pointer.block_id = block->id; + pointer.offset = offset; + return pointer; +} + void MetaBlockWriter::Flush() { if (offset > sizeof(block_id_t)) { auto &block_manager = BlockManager::GetBlockManager(db); @@ -123840,242 +135454,6 @@ void MetaBlockWriter::WriteData(const_data_ptr_t buffer, idx_t write_size) { } } // namespace duckdb - - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/transaction/update_info.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -#include - -namespace duckdb { -class UpdateSegment; - -struct UpdateInfo { - //! The update segment that this update info affects - UpdateSegment *segment; - //! The version number - std::atomic version_number; - //! The vector index within the uncompressed segment - idx_t vector_index; - //! The amount of updated tuples - sel_t N; - //! The maximum amount of tuples that can fit into this UpdateInfo - sel_t max; - //! The row ids of the tuples that have been updated. This should always be kept sorted! - sel_t *tuples; - //! The data of the tuples - data_ptr_t tuple_data; - //! The previous update info (or nullptr if it is the base) - UpdateInfo *prev; - //! The next update info in the chain (or nullptr if it is the last) - UpdateInfo *next; - - //! Loop over the update chain and execute the specified callback on all UpdateInfo's that are relevant for that - //! transaction in-order of newest to oldest - template - static void UpdatesForTransaction(UpdateInfo *current, transaction_t start_time, transaction_t transaction_id, - T &&callback) { - while (current) { - if (current->version_number > start_time && current->version_number != transaction_id) { - // these tuples were either committed AFTER this transaction started or are not committed yet, use - // tuples stored in this version - callback(current); - } - current = current->next; - } - } - - Value GetValue(idx_t index); - string ToString(); - void Print(); - void Verify(); -}; - -} // namespace duckdb - - - - - - - - -namespace duckdb { - -static NumericSegment::append_function_t GetAppendFunction(PhysicalType type); - -NumericSegment::NumericSegment(DatabaseInstance &db, PhysicalType type, idx_t row_start, block_id_t block_id) - : UncompressedSegment(db, type, row_start) { - // set up the different functions for this type of segment - this->append_function = GetAppendFunction(type); - - // figure out how many vectors we want to store in this block - this->type_size = GetTypeIdSize(type); - this->vector_size = type_size * STANDARD_VECTOR_SIZE; - this->max_vector_count = Storage::BLOCK_SIZE / vector_size; - // FIXME: this is a fix for test/sql/storage/checkpointed_self_append_tinyint.test - // it is only required because of ToTemporary() - // this should be removed when ToTemporary() is removed - if (max_vector_count > 80) { - max_vector_count = 80; - } - - auto &buffer_manager = BufferManager::GetBufferManager(db); - if (block_id == INVALID_BLOCK) { - // no block id specified: allocate a buffer for the uncompressed segment - this->block = buffer_manager.RegisterMemory(Storage::BLOCK_ALLOC_SIZE, false); - } else { - this->block = buffer_manager.RegisterBlock(block_id); - } -} - -//===--------------------------------------------------------------------===// -// Scan -//===--------------------------------------------------------------------===// -void NumericSegment::InitializeScan(ColumnScanState &state) { - // pin the primary buffer - auto &buffer_manager = BufferManager::GetBufferManager(db); - state.primary_handle = buffer_manager.Pin(block); -} - -//===--------------------------------------------------------------------===// -// Fetch base data -//===--------------------------------------------------------------------===// -void NumericSegment::FetchBaseData(ColumnScanState &state, idx_t vector_index, Vector &result) { - D_ASSERT(vector_index < max_vector_count); - D_ASSERT(vector_index * STANDARD_VECTOR_SIZE <= tuple_count); - - auto data = state.primary_handle->node->buffer; - auto offset = vector_index * vector_size; - - idx_t count = GetVectorCount(vector_index); - - auto source_data = data + offset; - - // fetch the nullmask and copy the data from the base table - result.SetVectorType(VectorType::FLAT_VECTOR); - memcpy(FlatVector::GetData(result), source_data, count * type_size); -} - -//===--------------------------------------------------------------------===// -// Fetch -//===--------------------------------------------------------------------===// -void NumericSegment::FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) { - auto &buffer_manager = BufferManager::GetBufferManager(db); - auto handle = buffer_manager.Pin(block); - - // get the vector index - idx_t vector_index = row_id / STANDARD_VECTOR_SIZE; - idx_t id_in_vector = row_id - vector_index * STANDARD_VECTOR_SIZE; - D_ASSERT(vector_index < max_vector_count); - - // first fetch the data from the base table - auto vector_ptr = handle->node->buffer + vector_index * vector_size; - - memcpy(FlatVector::GetData(result) + result_idx * type_size, vector_ptr + id_in_vector * type_size, type_size); -} - -//===--------------------------------------------------------------------===// -// Append -//===--------------------------------------------------------------------===// -idx_t NumericSegment::Append(SegmentStatistics &stats, VectorData &data, idx_t offset, idx_t count) { - auto &buffer_manager = BufferManager::GetBufferManager(db); - auto handle = buffer_manager.Pin(block); - - idx_t initial_count = tuple_count; - while (count > 0) { - // get the vector index of the vector to append to and see how many tuples we can append to that vector - idx_t vector_index = tuple_count / STANDARD_VECTOR_SIZE; - if (vector_index == max_vector_count) { - break; - } - idx_t current_tuple_count = tuple_count - vector_index * STANDARD_VECTOR_SIZE; - idx_t append_count = MinValue(STANDARD_VECTOR_SIZE - current_tuple_count, count); - - // now perform the actual append - append_function(stats, handle->node->buffer + vector_size * vector_index, current_tuple_count, data, offset, - append_count); - - count -= append_count; - offset += append_count; - tuple_count += append_count; - } - return tuple_count - initial_count; -} - -//===--------------------------------------------------------------------===// -// Append -//===--------------------------------------------------------------------===// -template -static void AppendLoop(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset, VectorData &adata, - idx_t offset, idx_t count) { - auto sdata = (T *)adata.data; - auto tdata = (T *)target; - if (!adata.validity.AllValid()) { - for (idx_t i = 0; i < count; i++) { - auto source_idx = adata.sel->get_index(offset + i); - auto target_idx = target_offset + i; - bool is_null = !adata.validity.RowIsValid(source_idx); - if (!is_null) { - NumericStatistics::Update(stats, sdata[source_idx]); - tdata[target_idx] = sdata[source_idx]; - } - } - } else { - for (idx_t i = 0; i < count; i++) { - auto source_idx = adata.sel->get_index(offset + i); - auto target_idx = target_offset + i; - NumericStatistics::Update(stats, sdata[source_idx]); - tdata[target_idx] = sdata[source_idx]; - } - } -} - -static NumericSegment::append_function_t GetAppendFunction(PhysicalType type) { - switch (type) { - case PhysicalType::BOOL: - case PhysicalType::INT8: - return AppendLoop; - case PhysicalType::INT16: - return AppendLoop; - case PhysicalType::INT32: - return AppendLoop; - case PhysicalType::INT64: - return AppendLoop; - case PhysicalType::UINT8: - return AppendLoop; - case PhysicalType::UINT16: - return AppendLoop; - case PhysicalType::UINT32: - return AppendLoop; - case PhysicalType::UINT64: - return AppendLoop; - case PhysicalType::INT128: - return AppendLoop; - case PhysicalType::FLOAT: - return AppendLoop; - case PhysicalType::DOUBLE: - return AppendLoop; - case PhysicalType::INTERVAL: - return AppendLoop; - default: - throw NotImplementedException("Unimplemented type for uncompressed segment"); - } -} - -} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // @@ -124091,22 +135469,7 @@ static NumericSegment::append_function_t GetAppendFunction(PhysicalType type) { -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/set.hpp -// -// -//===----------------------------------------------------------------------===// - - - -#include - -namespace duckdb { -using std::set; -} - + namespace duckdb { @@ -124188,6 +135551,7 @@ class SingleFileBlockManager : public BlockManager { + #include #include @@ -124247,9 +135611,10 @@ T DeserializeHeaderStructure(data_ptr_t ptr) { return T::Deserialize(source); } -SingleFileBlockManager::SingleFileBlockManager(DatabaseInstance &db, string path, bool read_only, bool create_new, +SingleFileBlockManager::SingleFileBlockManager(DatabaseInstance &db, string path_p, bool read_only, bool create_new, bool use_direct_io) - : db(db), path(path), header_buffer(FileBufferType::MANAGED_BUFFER, Storage::FILE_HEADER_SIZE), iteration_count(0), + : db(db), path(move(path_p)), + header_buffer(Allocator::Get(db), FileBufferType::MANAGED_BUFFER, Storage::FILE_HEADER_SIZE), iteration_count(0), read_only(read_only), use_direct_io(use_direct_io) { uint8_t flags; FileLockType lock; @@ -124281,7 +135646,7 @@ SingleFileBlockManager::SingleFileBlockManager(DatabaseInstance &db, string path SerializeHeaderStructure(main_header, header_buffer.buffer); // now write the header to the file - header_buffer.Write(*handle, 0); + header_buffer.ChecksumAndWrite(*handle, 0); header_buffer.Clear(); // write the database headers @@ -124294,14 +135659,14 @@ SingleFileBlockManager::SingleFileBlockManager(DatabaseInstance &db, string path h1.free_list = INVALID_BLOCK; h1.block_count = 0; SerializeHeaderStructure(h1, header_buffer.buffer); - header_buffer.Write(*handle, Storage::FILE_HEADER_SIZE); + header_buffer.ChecksumAndWrite(*handle, Storage::FILE_HEADER_SIZE); // header 2 h2.iteration = 0; h2.meta_block = INVALID_BLOCK; h2.free_list = INVALID_BLOCK; h2.block_count = 0; SerializeHeaderStructure(h2, header_buffer.buffer); - header_buffer.Write(*handle, Storage::FILE_HEADER_SIZE * 2); + header_buffer.ChecksumAndWrite(*handle, Storage::FILE_HEADER_SIZE * 2); // ensure that writing to disk is completed before returning handle->Sync(); // we start with h2 as active_header, this way our initial write will be in h1 @@ -124310,7 +135675,7 @@ SingleFileBlockManager::SingleFileBlockManager(DatabaseInstance &db, string path max_block = 0; } else { // otherwise, we check the metadata of the file - header_buffer.Read(*handle, 0); + header_buffer.ReadAndChecksum(*handle, 0); MainHeader header = DeserializeHeaderStructure(header_buffer.buffer); // check the version number if (header.version_number != VERSION_NUMBER) { @@ -124328,9 +135693,9 @@ SingleFileBlockManager::SingleFileBlockManager(DatabaseInstance &db, string path // read the database headers from disk DatabaseHeader h1, h2; - header_buffer.Read(*handle, Storage::FILE_HEADER_SIZE); + header_buffer.ReadAndChecksum(*handle, Storage::FILE_HEADER_SIZE); h1 = DeserializeHeaderStructure(header_buffer.buffer); - header_buffer.Read(*handle, Storage::FILE_HEADER_SIZE * 2); + header_buffer.ReadAndChecksum(*handle, Storage::FILE_HEADER_SIZE * 2); h2 = DeserializeHeaderStructure(header_buffer.buffer); // check the header with the highest iteration count if (h1.iteration > h2.iteration) { @@ -124391,6 +135756,7 @@ block_id_t SingleFileBlockManager::GetFreeBlockId() { } void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) { + D_ASSERT(block_id >= 0); modified_blocks.insert(block_id); } @@ -124399,18 +135765,18 @@ block_id_t SingleFileBlockManager::GetMetaBlock() { } unique_ptr SingleFileBlockManager::CreateBlock() { - return make_unique(GetFreeBlockId()); + return make_unique(Allocator::Get(db), GetFreeBlockId()); } void SingleFileBlockManager::Read(Block &block) { D_ASSERT(block.id >= 0); D_ASSERT(std::find(free_list.begin(), free_list.end(), block.id) == free_list.end()); - block.Read(*handle, BLOCK_START + block.id * Storage::BLOCK_ALLOC_SIZE); + block.ReadAndChecksum(*handle, BLOCK_START + block.id * Storage::BLOCK_ALLOC_SIZE); } void SingleFileBlockManager::Write(FileBuffer &buffer, block_id_t block_id) { D_ASSERT(block_id >= 0); - buffer.Write(*handle, BLOCK_START + block_id * Storage::BLOCK_ALLOC_SIZE); + buffer.ChecksumAndWrite(*handle, BLOCK_START + block_id * Storage::BLOCK_ALLOC_SIZE); } void SingleFileBlockManager::WriteHeader(DatabaseHeader header) { @@ -124453,7 +135819,8 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) { Store(header, header_buffer.buffer); // now write the header to the file, active_header determines whether we write to h1 or h2 // note that if active_header is h1 we write to h2, and vice versa - header_buffer.Write(*handle, active_header == 1 ? Storage::FILE_HEADER_SIZE : Storage::FILE_HEADER_SIZE * 2); + header_buffer.ChecksumAndWrite(*handle, + active_header == 1 ? Storage::FILE_HEADER_SIZE : Storage::FILE_HEADER_SIZE * 2); // switch active header to the other header active_header = 1 - active_header; //! Ensure the header write ends up on disk @@ -124469,6 +135836,8 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) { + + namespace duckdb { BaseStatistics::BaseStatistics(LogicalType type) : type(move(type)) { @@ -124486,6 +135855,15 @@ bool BaseStatistics::CanHaveNull() { return ((ValidityStatistics &)*validity_stats).has_null; } +bool BaseStatistics::CanHaveNoNull() { + if (!validity_stats) { + // we don't know + // solid maybe + return true; + } + return ((ValidityStatistics &)*validity_stats).has_no_null; +} + unique_ptr BaseStatistics::Copy() { auto statistics = make_unique(type); if (validity_stats) { @@ -124495,6 +135873,7 @@ unique_ptr BaseStatistics::Copy() { } void BaseStatistics::Merge(const BaseStatistics &other) { + D_ASSERT(type == other.type); if (other.validity_stats) { if (validity_stats) { validity_stats->Merge(*other.validity_stats); @@ -124507,7 +135886,7 @@ void BaseStatistics::Merge(const BaseStatistics &other) { unique_ptr BaseStatistics::CreateEmpty(LogicalType type) { switch (type.InternalType()) { case PhysicalType::BIT: - return make_unique(); + return make_unique(false, false); case PhysicalType::BOOL: case PhysicalType::INT8: case PhysicalType::INT16: @@ -124523,18 +135902,26 @@ unique_ptr BaseStatistics::CreateEmpty(LogicalType type) { return make_unique(move(type)); case PhysicalType::VARCHAR: return make_unique(move(type)); + case PhysicalType::STRUCT: + return make_unique(move(type)); + case PhysicalType::LIST: + return make_unique(move(type)); case PhysicalType::INTERVAL: default: - return make_unique(move(type)); + auto base_stats = make_unique(move(type)); + base_stats->validity_stats = make_unique(false); + return base_stats; } } void BaseStatistics::Serialize(Serializer &serializer) { serializer.Write(CanHaveNull()); + serializer.Write(CanHaveNoNull()); } unique_ptr BaseStatistics::Deserialize(Deserializer &source, LogicalType type) { bool can_have_null = source.Read(); + bool can_have_no_null = source.Read(); unique_ptr result; switch (type.InternalType()) { case PhysicalType::BIT: @@ -124556,15 +135943,19 @@ unique_ptr BaseStatistics::Deserialize(Deserializer &source, Log case PhysicalType::VARCHAR: result = StringStatistics::Deserialize(source, move(type)); break; + case PhysicalType::STRUCT: + result = StructStatistics::Deserialize(source, move(type)); + break; + case PhysicalType::LIST: + result = ListStatistics::Deserialize(source, move(type)); + break; case PhysicalType::INTERVAL: result = make_unique(move(type)); break; default: throw InternalException("Unimplemented type for statistics deserialization"); } - if (!can_have_null) { - result->validity_stats = make_unique(can_have_null); - } + result->validity_stats = make_unique(can_have_null, can_have_no_null); return result; } @@ -124572,10 +135963,111 @@ string BaseStatistics::ToString() { return StringUtil::Format("Base Statistics %s", validity_stats ? validity_stats->ToString() : "[]"); } -void BaseStatistics::Verify(Vector &vector, idx_t count) { +void BaseStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) { D_ASSERT(vector.GetType() == this->type); - if (!validity_stats) { - validity_stats->Verify(vector, count); + if (validity_stats) { + validity_stats->Verify(vector, sel, count); + } +} + +void BaseStatistics::Verify(Vector &vector, idx_t count) { + Verify(vector, FlatVector::INCREMENTAL_SELECTION_VECTOR, count); +} + +} // namespace duckdb + + + +namespace duckdb { + +ListStatistics::ListStatistics(LogicalType type_p) : BaseStatistics(move(type_p)) { + D_ASSERT(type.InternalType() == PhysicalType::LIST); + + auto &child_type = ListType::GetChildType(type); + child_stats = BaseStatistics::CreateEmpty(child_type); + validity_stats = make_unique(false); +} + +void ListStatistics::Merge(const BaseStatistics &other_p) { + BaseStatistics::Merge(other_p); + + auto &other = (const ListStatistics &)other_p; + if (child_stats && other.child_stats) { + child_stats->Merge(*other.child_stats); + } else { + child_stats.reset(); + } +} + +// LCOV_EXCL_START +FilterPropagateResult ListStatistics::CheckZonemap(ExpressionType comparison_type, const Value &constant) { + throw InternalException("List zonemaps are not supported yet"); +} +// LCOV_EXCL_STOP + +unique_ptr ListStatistics::Copy() { + auto copy = make_unique(type); + copy->validity_stats = validity_stats ? validity_stats->Copy() : nullptr; + copy->child_stats = child_stats ? child_stats->Copy() : nullptr; + return move(copy); +} + +void ListStatistics::Serialize(Serializer &serializer) { + BaseStatistics::Serialize(serializer); + child_stats->Serialize(serializer); +} + +unique_ptr ListStatistics::Deserialize(Deserializer &source, LogicalType type) { + D_ASSERT(type.InternalType() == PhysicalType::LIST); + auto result = make_unique(move(type)); + auto &child_type = ListType::GetChildType(result->type); + result->child_stats = BaseStatistics::Deserialize(source, child_type); + return move(result); +} + +string ListStatistics::ToString() { + string result; + result += " ["; + result += child_stats ? child_stats->ToString() : "No Stats"; + result += "]"; + result += validity_stats ? validity_stats->ToString() : ""; + return result; +} + +void ListStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) { + BaseStatistics::Verify(vector, sel, count); + + if (child_stats) { + auto &child_entry = ListVector::GetEntry(vector); + VectorData vdata; + vector.Orrify(count, vdata); + + auto list_data = (list_entry_t *)vdata.data; + idx_t total_list_count = 0; + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto index = vdata.sel->get_index(idx); + auto list = list_data[index]; + if (vdata.validity.RowIsValid(index)) { + for (idx_t list_idx = 0; list_idx < list.length; list_idx++) { + total_list_count++; + } + } + } + SelectionVector list_sel(total_list_count); + idx_t list_count = 0; + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto index = vdata.sel->get_index(idx); + auto list = list_data[index]; + if (vdata.validity.RowIsValid(index)) { + for (idx_t list_idx = 0; list_idx < list.length; list_idx++) { + list_sel.set_index(list_count++, list.offset + list_idx); + } + } + } + + child_stats->Verify(child_entry, list_sel, list_count); } } @@ -124656,9 +136148,14 @@ template <> void NumericStatistics::Update(SegmentStatistics &stats, interval_t new_value) { } +template <> +void NumericStatistics::Update(SegmentStatistics &stats, list_entry_t new_value) { +} + NumericStatistics::NumericStatistics(LogicalType type_p) : BaseStatistics(move(type_p)) { min = Value::MaximumValue(type); max = Value::MinimumValue(type); + validity_stats = make_unique(false); } NumericStatistics::NumericStatistics(LogicalType type_p, Value min_p, Value max_p) @@ -124668,28 +136165,77 @@ NumericStatistics::NumericStatistics(LogicalType type_p, Value min_p, Value max_ void NumericStatistics::Merge(const BaseStatistics &other_p) { BaseStatistics::Merge(other_p); auto &other = (const NumericStatistics &)other_p; - if (other.min < min) { + if (other.min.is_null || min.is_null) { + min.is_null = true; + } else if (other.min < min) { min = other.min; } - if (other.max > max) { + if (other.max.is_null || max.is_null) { + max.is_null = true; + } else if (other.max > max) { max = other.max; } } -bool NumericStatistics::CheckZonemap(ExpressionType comparison_type, const Value &constant) { +FilterPropagateResult NumericStatistics::CheckZonemap(ExpressionType comparison_type, const Value &constant) { + if (min.is_null || max.is_null) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } switch (comparison_type) { case ExpressionType::COMPARE_EQUAL: - return constant >= min && constant <= max; + if (constant == min && constant == max) { + return FilterPropagateResult::FILTER_ALWAYS_TRUE; + } else if (constant >= min && constant <= max) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else { + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - return constant <= max; + // X >= C + // this can be true only if max(X) >= C + // if min(X) >= C, then this is always true + if (min >= constant) { + return FilterPropagateResult::FILTER_ALWAYS_TRUE; + } else if (max >= constant) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else { + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } case ExpressionType::COMPARE_GREATERTHAN: - return constant < max; + // X > C + // this can be true only if max(X) > C + // if min(X) > C, then this is always true + if (min > constant) { + return FilterPropagateResult::FILTER_ALWAYS_TRUE; + } else if (max > constant) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else { + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } case ExpressionType::COMPARE_LESSTHANOREQUALTO: - return constant >= min; + // X <= C + // this can be true only if min(X) <= C + // if max(X) <= C, then this is always true + if (max <= constant) { + return FilterPropagateResult::FILTER_ALWAYS_TRUE; + } else if (min <= constant) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else { + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } case ExpressionType::COMPARE_LESSTHAN: - return constant > min; + // X < C + // this can be true only if min(X) < C + // if max(X) < C, then this is always true + if (max < constant) { + return FilterPropagateResult::FILTER_ALWAYS_TRUE; + } else if (min < constant) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else { + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } default: - throw InternalException("Operation not implemented"); + throw InternalException("Expression type in zonemap check not implemented"); } } @@ -124701,6 +136247,10 @@ unique_ptr NumericStatistics::Copy() { return move(stats); } +bool NumericStatistics::IsConstant() { + return max <= min; +} + void NumericStatistics::Serialize(Serializer &serializer) { BaseStatistics::Serialize(serializer); min.Serialize(serializer); @@ -124714,25 +136264,26 @@ unique_ptr NumericStatistics::Deserialize(Deserializer &source, } string NumericStatistics::ToString() { - return StringUtil::Format("Numeric Statistics<%s> %s[Min: %s, Max: %s]", type.ToString(), - validity_stats ? validity_stats->ToString() : "", min.ToString(), max.ToString()); + return StringUtil::Format("[Min: %s, Max: %s]%s", min.ToString(), max.ToString(), + validity_stats ? validity_stats->ToString() : ""); } template -void NumericStatistics::TemplatedVerify(Vector &vector, idx_t count) { +void NumericStatistics::TemplatedVerify(Vector &vector, const SelectionVector &sel, idx_t count) { VectorData vdata; vector.Orrify(count, vdata); auto data = (T *)vdata.data; for (idx_t i = 0; i < count; i++) { - auto index = vdata.sel->get_index(i); + auto idx = sel.get_index(i); + auto index = vdata.sel->get_index(idx); if (!vdata.validity.RowIsValid(index)) { continue; } - if (!min.is_null && LessThan::Operation(data[index], min.GetValueUnsafe())) { + if (!min.is_null && LessThan::Operation(data[index], min.GetValueUnsafe())) { // LCOV_EXCL_START throw InternalException("Statistics mismatch: value is smaller than min.\nStatistics: %s\nVector: %s", ToString(), vector.ToString(count)); - } + } // LCOV_EXCL_STOP if (!max.is_null && GreaterThan::Operation(data[index], max.GetValueUnsafe())) { throw InternalException("Statistics mismatch: value is bigger than max.\nStatistics: %s\nVector: %s", ToString(), vector.ToString(count)); @@ -124740,32 +136291,32 @@ void NumericStatistics::TemplatedVerify(Vector &vector, idx_t count) { } } -void NumericStatistics::Verify(Vector &vector, idx_t count) { - BaseStatistics::Verify(vector, count); +void NumericStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) { + BaseStatistics::Verify(vector, sel, count); switch (type.InternalType()) { case PhysicalType::BOOL: break; case PhysicalType::INT8: - TemplatedVerify(vector, count); + TemplatedVerify(vector, sel, count); break; case PhysicalType::INT16: - TemplatedVerify(vector, count); + TemplatedVerify(vector, sel, count); break; case PhysicalType::INT32: - TemplatedVerify(vector, count); + TemplatedVerify(vector, sel, count); break; case PhysicalType::INT64: - TemplatedVerify(vector, count); + TemplatedVerify(vector, sel, count); break; case PhysicalType::INT128: - TemplatedVerify(vector, count); + TemplatedVerify(vector, sel, count); break; case PhysicalType::FLOAT: - TemplatedVerify(vector, count); + TemplatedVerify(vector, sel, count); break; case PhysicalType::DOUBLE: - TemplatedVerify(vector, count); + TemplatedVerify(vector, sel, count); break; default: throw InternalException("Unsupported type %s for numeric statistics verify", type.ToString()); @@ -124781,37 +136332,20 @@ void NumericStatistics::Verify(Vector &vector, idx_t count) { namespace duckdb { -SegmentStatistics::SegmentStatistics(LogicalType type, idx_t type_size) : type(move(type)), type_size(type_size) { +SegmentStatistics::SegmentStatistics(LogicalType type) : type(move(type)) { Reset(); } -SegmentStatistics::SegmentStatistics(LogicalType type, idx_t type_size, unique_ptr stats) - : type(move(type)), type_size(type_size), statistics(move(stats)) { +SegmentStatistics::SegmentStatistics(LogicalType type, unique_ptr stats) + : type(move(type)), statistics(move(stats)) { + if (!statistics) { + Reset(); + } } void SegmentStatistics::Reset() { statistics = BaseStatistics::CreateEmpty(type); -} - -bool SegmentStatistics::CheckZonemap(TableFilter &filter) { - switch (type.InternalType()) { - case PhysicalType::UINT8: - case PhysicalType::UINT16: - case PhysicalType::UINT32: - case PhysicalType::UINT64: - case PhysicalType::INT8: - case PhysicalType::INT16: - case PhysicalType::INT32: - case PhysicalType::INT64: - case PhysicalType::INT128: - case PhysicalType::FLOAT: - case PhysicalType::DOUBLE: - return ((NumericStatistics &)*statistics).CheckZonemap(filter.comparison_type, filter.constant); - case PhysicalType::VARCHAR: - return ((StringStatistics &)*statistics).CheckZonemap(filter.comparison_type, filter.constant.ToString()); - default: - return true; - } + statistics->validity_stats = make_unique(false); } } // namespace duckdb @@ -124831,6 +136365,7 @@ StringStatistics::StringStatistics(LogicalType type_p) : BaseStatistics(move(typ max_string_length = 0; has_unicode = false; has_overflow_strings = false; + validity_stats = make_unique(false); } unique_ptr StringStatistics::Copy() { @@ -124929,7 +136464,7 @@ void StringStatistics::Merge(const BaseStatistics &other_p) { has_overflow_strings = has_overflow_strings || other.has_overflow_strings; } -bool StringStatistics::CheckZonemap(ExpressionType comparison_type, const string &constant) { +FilterPropagateResult StringStatistics::CheckZonemap(ExpressionType comparison_type, const string &constant) { auto data = (const_data_ptr_t)constant.c_str(); auto size = constant.size(); @@ -124938,41 +136473,53 @@ bool StringStatistics::CheckZonemap(ExpressionType comparison_type, const string int max_comp = StringValueComparison(data, value_size, max); switch (comparison_type) { case ExpressionType::COMPARE_EQUAL: - return min_comp >= 0 && max_comp <= 0; + if (min_comp >= 0 && max_comp <= 0) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else { + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } case ExpressionType::COMPARE_GREATERTHANOREQUALTO: case ExpressionType::COMPARE_GREATERTHAN: - return max_comp <= 0; + if (max_comp <= 0) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else { + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } case ExpressionType::COMPARE_LESSTHAN: case ExpressionType::COMPARE_LESSTHANOREQUALTO: - return min_comp >= 0; + if (min_comp >= 0) { + return FilterPropagateResult::NO_PRUNING_POSSIBLE; + } else { + return FilterPropagateResult::FILTER_ALWAYS_FALSE; + } default: - throw InternalException("Operation not implemented"); + throw InternalException("Expression type not implemented for string statistics zone map"); } } static idx_t GetValidMinMaxSubstring(data_ptr_t data) { - idx_t len = 0; for (idx_t i = 0; i < StringStatistics::MAX_STRING_MINMAX_SIZE; i++) { if (data[i] == '\0') { return i; } - if ((data[i] & 0xC0) != 0x80) { - len = i; + if ((data[i] & 0x80) != 0) { + return i; } } - return len; + return StringStatistics::MAX_STRING_MINMAX_SIZE; } string StringStatistics::ToString() { idx_t min_len = GetValidMinMaxSubstring(min); idx_t max_len = GetValidMinMaxSubstring(max); - return StringUtil::Format("String Statistics %s[Min: %s, Max: %s, Has Unicode: %s, Max String Length: %lld]", - validity_stats ? validity_stats->ToString() : "", string((const char *)min, min_len), - string((const char *)max, max_len), has_unicode ? "true" : "false", max_string_length); + return StringUtil::Format("[Min: %s, Max: %s, Has Unicode: %s, Max String Length: %lld]%s", + string((const char *)min, min_len), string((const char *)max, max_len), + has_unicode ? "true" : "false", max_string_length, + validity_stats ? validity_stats->ToString() : ""); } -void StringStatistics::Verify(Vector &vector, idx_t count) { - BaseStatistics::Verify(vector, count); +void StringStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) { + BaseStatistics::Verify(vector, sel, count); string_t min_string((const char *)min, MAX_STRING_MINMAX_SIZE); string_t max_string((const char *)max, MAX_STRING_MINMAX_SIZE); @@ -124981,13 +136528,15 @@ void StringStatistics::Verify(Vector &vector, idx_t count) { vector.Orrify(count, vdata); auto data = (string_t *)vdata.data; for (idx_t i = 0; i < count; i++) { - auto index = vdata.sel->get_index(i); + auto idx = sel.get_index(i); + auto index = vdata.sel->get_index(idx); if (!vdata.validity.RowIsValid(index)) { continue; } auto value = data[index]; auto data = value.GetDataUnsafe(); auto len = value.GetSize(); + // LCOV_EXCL_START if (len > max_string_length) { throw InternalException( "Statistics mismatch: string value exceeds maximum string length.\nStatistics: %s\nVector: %s", @@ -125011,6 +136560,107 @@ void StringStatistics::Verify(Vector &vector, idx_t count) { throw InternalException("Statistics mismatch: value is bigger than max.\nStatistics: %s\nVector: %s", ToString(), vector.ToString(count)); } + // LCOV_EXCL_STOP + } +} + +} // namespace duckdb + + + + +namespace duckdb { + +StructStatistics::StructStatistics(LogicalType type_p) : BaseStatistics(move(type_p)) { + D_ASSERT(type.InternalType() == PhysicalType::STRUCT); + + auto &child_types = StructType::GetChildTypes(type); + child_stats.resize(child_types.size()); + for (idx_t i = 0; i < child_types.size(); i++) { + child_stats[i] = BaseStatistics::CreateEmpty(child_types[i].second); + } + validity_stats = make_unique(false); +} + +void StructStatistics::Merge(const BaseStatistics &other_p) { + BaseStatistics::Merge(other_p); + + auto &other = (const StructStatistics &)other_p; + D_ASSERT(other.child_stats.size() == child_stats.size()); + for (idx_t i = 0; i < child_stats.size(); i++) { + if (child_stats[i] && other.child_stats[i]) { + child_stats[i]->Merge(*other.child_stats[i]); + } else { + child_stats[i].reset(); + } + } +} + +// LCOV_EXCL_START +FilterPropagateResult StructStatistics::CheckZonemap(ExpressionType comparison_type, const Value &constant) { + throw InternalException("Struct zonemaps are not supported yet"); +} +// LCOV_EXCL_STOP + +unique_ptr StructStatistics::Copy() { + auto copy = make_unique(type); + if (validity_stats) { + copy->validity_stats = validity_stats->Copy(); + } + for (idx_t i = 0; i < child_stats.size(); i++) { + copy->child_stats[i] = child_stats[i] ? child_stats[i]->Copy() : nullptr; + } + return move(copy); +} + +void StructStatistics::Serialize(Serializer &serializer) { + BaseStatistics::Serialize(serializer); + for (idx_t i = 0; i < child_stats.size(); i++) { + serializer.Write(child_stats[i] ? true : false); + if (child_stats[i]) { + child_stats[i]->Serialize(serializer); + } + } +} + +unique_ptr StructStatistics::Deserialize(Deserializer &source, LogicalType type) { + D_ASSERT(type.InternalType() == PhysicalType::STRUCT); + auto result = make_unique(move(type)); + auto &child_types = StructType::GetChildTypes(result->type); + for (idx_t i = 0; i < child_types.size(); i++) { + auto has_child = source.Read(); + if (has_child) { + result->child_stats[i] = BaseStatistics::Deserialize(source, child_types[i].second); + } else { + result->child_stats[i].reset(); + } + } + return move(result); +} + +string StructStatistics::ToString() { + string result; + result += " {"; + auto &child_types = StructType::GetChildTypes(type); + for (idx_t i = 0; i < child_types.size(); i++) { + if (i > 0) { + result += ", "; + } + result += child_types[i].first + ": " + (child_stats[i] ? child_stats[i]->ToString() : "No Stats"); + } + result += "}"; + result += validity_stats ? validity_stats->ToString() : ""; + return result; +} + +void StructStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) { + BaseStatistics::Verify(vector, sel, count); + + auto &child_entries = StructVector::GetEntries(vector); + for (idx_t i = 0; i < child_entries.size(); i++) { + if (child_stats[i]) { + child_stats[i]->Verify(*child_entries[i], sel, count); + } } } @@ -125022,8 +136672,8 @@ void StringStatistics::Verify(Vector &vector, idx_t count) { namespace duckdb { -ValidityStatistics::ValidityStatistics(bool has_null) - : BaseStatistics(LogicalType(LogicalTypeId::VALIDITY)), has_null(has_null) { +ValidityStatistics::ValidityStatistics(bool has_null, bool has_no_null) + : BaseStatistics(LogicalType(LogicalTypeId::VALIDITY)), has_null(has_null), has_no_null(has_no_null) { } unique_ptr ValidityStatistics::Combine(const unique_ptr &lstats, @@ -125037,34 +136687,63 @@ unique_ptr ValidityStatistics::Combine(const unique_ptr(l.has_null || r.has_null); + return make_unique(l.has_null || r.has_null, l.has_no_null || r.has_no_null); + } +} + +bool ValidityStatistics::IsConstant() { + if (!has_null) { + return true; + } + if (!has_no_null) { + return true; } + return false; } void ValidityStatistics::Merge(const BaseStatistics &other_p) { auto &other = (ValidityStatistics &)other_p; has_null = has_null || other.has_null; + has_no_null = has_no_null || other.has_no_null; } unique_ptr ValidityStatistics::Copy() { - return make_unique(has_null); + return make_unique(has_null, has_no_null); } void ValidityStatistics::Serialize(Serializer &serializer) { BaseStatistics::Serialize(serializer); serializer.Write(has_null); + serializer.Write(has_no_null); } unique_ptr ValidityStatistics::Deserialize(Deserializer &source) { bool has_null = source.Read(); - return make_unique(has_null); + bool has_no_null = source.Read(); + return make_unique(has_null, has_no_null); } -void ValidityStatistics::Verify(Vector &vector, idx_t count) { - if (VectorOperations::HasNull(vector, count)) { - throw InternalException( - "Statistics mismatch: vector labeled as not having NULL values, but vector contains null values: %s", - vector.ToString(count)); +void ValidityStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) { + if (has_null && has_no_null) { + // nothing to verify + return; + } + VectorData vdata; + vector.Orrify(count, vdata); + for (idx_t i = 0; i < count; i++) { + auto idx = sel.get_index(i); + auto index = vdata.sel->get_index(idx); + bool row_is_valid = vdata.validity.RowIsValid(index); + if (row_is_valid && !has_no_null) { + throw InternalException( + "Statistics mismatch: vector labeled as having only NULL values, but vector contains valid values: %s", + vector.ToString(count)); + } + if (!row_is_valid && !has_null) { + throw InternalException( + "Statistics mismatch: vector labeled as not having NULL values, but vector contains null values: %s", + vector.ToString(count)); + } } } @@ -125077,7 +136756,7 @@ string ValidityStatistics::ToString() { namespace duckdb { -const uint64_t VERSION_NUMBER = 14; +const uint64_t VERSION_NUMBER = 19; } // namespace duckdb @@ -125146,27 +136825,770 @@ namespace duckdb { class InMemoryBlockManager : public BlockManager { public: void StartCheckpoint() override { - throw Exception("Cannot perform IO in in-memory database!"); + throw InternalException("Cannot perform IO in in-memory database!"); } unique_ptr CreateBlock() override { - throw Exception("Cannot perform IO in in-memory database!"); + throw InternalException("Cannot perform IO in in-memory database!"); } block_id_t GetFreeBlockId() override { - throw Exception("Cannot perform IO in in-memory database!"); + throw InternalException("Cannot perform IO in in-memory database!"); } block_id_t GetMetaBlock() override { - throw Exception("Cannot perform IO in in-memory database!"); + throw InternalException("Cannot perform IO in in-memory database!"); } void Read(Block &block) override { - throw Exception("Cannot perform IO in in-memory database!"); + throw InternalException("Cannot perform IO in in-memory database!"); } void Write(FileBuffer &block, block_id_t block_id) override { - throw Exception("Cannot perform IO in in-memory database!"); + throw InternalException("Cannot perform IO in in-memory database!"); } void WriteHeader(DatabaseHeader header) override { - throw Exception("Cannot perform IO in in-memory database!"); + throw InternalException("Cannot perform IO in in-memory database!"); + } +}; +} // namespace duckdb + + + + + + + + + + + + + + + +namespace duckdb { + +StorageManager::StorageManager(DatabaseInstance &db, string path, bool read_only) + : db(db), path(move(path)), wal(db), read_only(read_only) { +} + +StorageManager::~StorageManager() { +} + +StorageManager &StorageManager::GetStorageManager(ClientContext &context) { + return StorageManager::GetStorageManager(*context.db); +} + +BufferManager &BufferManager::GetBufferManager(ClientContext &context) { + return BufferManager::GetBufferManager(*context.db); +} + +ObjectCache &ObjectCache::GetObjectCache(ClientContext &context) { + return context.db->GetObjectCache(); +} + +bool ObjectCache::ObjectCacheEnabled(ClientContext &context) { + return context.db->config.object_cache_enable; +} + +bool StorageManager::InMemory() { + return path.empty() || path == ":memory:"; +} + +void StorageManager::Initialize() { + bool in_memory = InMemory(); + if (in_memory && read_only) { + throw CatalogException("Cannot launch in-memory database in read-only mode!"); + } + + // first initialize the base system catalogs + // these are never written to the WAL + Connection con(db); + con.BeginTransaction(); + + auto &config = DBConfig::GetConfig(db); + auto &catalog = Catalog::GetCatalog(*con.context); + + // create the default schema + CreateSchemaInfo info; + info.schema = DEFAULT_SCHEMA; + info.internal = true; + catalog.CreateSchema(*con.context, &info); + + if (config.initialize_default_database) { + // initialize default functions + BuiltinFunctions builtin(*con.context, catalog); + builtin.Initialize(); + } + + // commit transactions + con.Commit(); + + if (!in_memory) { + // create or load the database from disk, if not in-memory mode + LoadDatabase(); + } else { + block_manager = make_unique(); + buffer_manager = make_unique(db, config.temporary_directory, config.maximum_memory); + } +} + +void StorageManager::LoadDatabase() { + string wal_path = path + ".wal"; + auto &fs = db.GetFileSystem(); + auto &config = db.config; + bool truncate_wal = false; + // first check if the database exists + if (!fs.FileExists(path)) { + if (read_only) { + throw CatalogException("Cannot open database \"%s\" in read-only mode: database does not exist", path); + } + // check if the WAL exists + if (fs.FileExists(wal_path)) { + // WAL file exists but database file does not + // remove the WAL + fs.RemoveFile(wal_path); + } + // initialize the block manager while creating a new db file + block_manager = make_unique(db, path, read_only, true, config.use_direct_io); + buffer_manager = make_unique(db, config.temporary_directory, config.maximum_memory); + } else { + // initialize the block manager while loading the current db file + auto sf_bm = make_unique(db, path, read_only, false, config.use_direct_io); + auto sf = sf_bm.get(); + block_manager = move(sf_bm); + buffer_manager = make_unique(db, config.temporary_directory, config.maximum_memory); + sf->LoadFreeList(); + + //! Load from storage + CheckpointManager checkpointer(db); + checkpointer.LoadFromStorage(); + // check if the WAL file exists + if (fs.FileExists(wal_path)) { + // replay the WAL + truncate_wal = WriteAheadLog::Replay(db, wal_path); + } + } + // initialize the WAL file + if (!read_only) { + wal.Initialize(wal_path); + if (truncate_wal) { + wal.Truncate(0); + } + } +} + +void StorageManager::CreateCheckpoint(bool delete_wal, bool force_checkpoint) { + if (InMemory() || read_only || !wal.initialized) { + return; + } + if (wal.GetWALSize() > 0 || db.config.force_checkpoint || force_checkpoint) { + // we only need to checkpoint if there is anything in the WAL + CheckpointManager checkpointer(db); + checkpointer.CreateCheckpoint(); + } + if (delete_wal) { + wal.Delete(); + } +} + +} // namespace duckdb + + + + +namespace duckdb { + +struct TransactionVersionOperator { + static bool UseInsertedVersion(transaction_t start_time, transaction_t transaction_id, transaction_t id) { + return id < start_time || id == transaction_id; + } + + static bool UseDeletedVersion(transaction_t start_time, transaction_t transaction_id, transaction_t id) { + return !UseInsertedVersion(start_time, transaction_id, id); + } +}; + +struct CommittedVersionOperator { + static bool UseInsertedVersion(transaction_t start_time, transaction_t transaction_id, transaction_t id) { + return true; + } + + static bool UseDeletedVersion(transaction_t min_start_time, transaction_t min_transaction_id, transaction_t id) { + return (id >= min_start_time && id < TRANSACTION_ID_START) || (id >= min_transaction_id); + } +}; + +static bool UseVersion(Transaction &transaction, transaction_t id) { + return TransactionVersionOperator::UseInsertedVersion(transaction.start_time, transaction.transaction_id, id); +} + +unique_ptr ChunkInfo::Deserialize(Deserializer &source) { + auto type = source.Read(); + switch (type) { + case ChunkInfoType::EMPTY_INFO: + return nullptr; + case ChunkInfoType::CONSTANT_INFO: + return ChunkConstantInfo::Deserialize(source); + case ChunkInfoType::VECTOR_INFO: + return ChunkVectorInfo::Deserialize(source); + default: + throw SerializationException("Could not deserialize Chunk Info Type: unrecognized type"); + } +} + +//===--------------------------------------------------------------------===// +// Constant info +//===--------------------------------------------------------------------===// +ChunkConstantInfo::ChunkConstantInfo(idx_t start) + : ChunkInfo(start, ChunkInfoType::CONSTANT_INFO), insert_id(0), delete_id(NOT_DELETED_ID) { +} + +template +idx_t ChunkConstantInfo::TemplatedGetSelVector(transaction_t start_time, transaction_t transaction_id, + SelectionVector &sel_vector, idx_t max_count) { + if (OP::UseInsertedVersion(start_time, transaction_id, insert_id) && + OP::UseDeletedVersion(start_time, transaction_id, delete_id)) { + return max_count; + } + return 0; +} + +idx_t ChunkConstantInfo::GetSelVector(Transaction &transaction, SelectionVector &sel_vector, idx_t max_count) { + return TemplatedGetSelVector(transaction.start_time, transaction.transaction_id, + sel_vector, max_count); +} + +idx_t ChunkConstantInfo::GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id, + SelectionVector &sel_vector, idx_t max_count) { + return TemplatedGetSelVector(min_start_id, min_transaction_id, sel_vector, max_count); +} + +bool ChunkConstantInfo::Fetch(Transaction &transaction, row_t row) { + return UseVersion(transaction, insert_id) && !UseVersion(transaction, delete_id); +} + +void ChunkConstantInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t end) { + D_ASSERT(start == 0 && end == STANDARD_VECTOR_SIZE); + insert_id = commit_id; +} + +void ChunkConstantInfo::Serialize(Serializer &serializer) { + // we only need to write this node if any tuple deletions have been committed + bool is_deleted = insert_id >= TRANSACTION_ID_START || delete_id < TRANSACTION_ID_START; + if (!is_deleted) { + serializer.Write(ChunkInfoType::EMPTY_INFO); + return; + } + serializer.Write(type); + serializer.Write(start); +} + +unique_ptr ChunkConstantInfo::Deserialize(Deserializer &source) { + auto start = source.Read(); + + auto info = make_unique(start); + info->insert_id = 0; + info->delete_id = 0; + return move(info); +} + +//===--------------------------------------------------------------------===// +// Vector info +//===--------------------------------------------------------------------===// +ChunkVectorInfo::ChunkVectorInfo(idx_t start) + : ChunkInfo(start, ChunkInfoType::VECTOR_INFO), insert_id(0), same_inserted_id(true), any_deleted(false) { + for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) { + inserted[i] = 0; + deleted[i] = NOT_DELETED_ID; + } +} + +template +idx_t ChunkVectorInfo::TemplatedGetSelVector(transaction_t start_time, transaction_t transaction_id, + SelectionVector &sel_vector, idx_t max_count) { + idx_t count = 0; + if (same_inserted_id && !any_deleted) { + // all tuples have the same inserted id: and no tuples were deleted + if (OP::UseInsertedVersion(start_time, transaction_id, insert_id)) { + return max_count; + } else { + return 0; + } + } else if (same_inserted_id) { + if (!OP::UseInsertedVersion(start_time, transaction_id, insert_id)) { + return 0; + } + // have to check deleted flag + for (idx_t i = 0; i < max_count; i++) { + if (OP::UseDeletedVersion(start_time, transaction_id, deleted[i])) { + sel_vector.set_index(count++, i); + } + } + } else if (!any_deleted) { + // have to check inserted flag + for (idx_t i = 0; i < max_count; i++) { + if (OP::UseInsertedVersion(start_time, transaction_id, inserted[i])) { + sel_vector.set_index(count++, i); + } + } + } else { + // have to check both flags + for (idx_t i = 0; i < max_count; i++) { + if (OP::UseInsertedVersion(start_time, transaction_id, inserted[i]) && + OP::UseDeletedVersion(start_time, transaction_id, deleted[i])) { + sel_vector.set_index(count++, i); + } + } + } + return count; +} + +idx_t ChunkVectorInfo::GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector, + idx_t max_count) { + return TemplatedGetSelVector(start_time, transaction_id, sel_vector, max_count); +} + +idx_t ChunkVectorInfo::GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id, + SelectionVector &sel_vector, idx_t max_count) { + return TemplatedGetSelVector(min_start_id, min_transaction_id, sel_vector, max_count); +} + +idx_t ChunkVectorInfo::GetSelVector(Transaction &transaction, SelectionVector &sel_vector, idx_t max_count) { + return GetSelVector(transaction.start_time, transaction.transaction_id, sel_vector, max_count); +} + +bool ChunkVectorInfo::Fetch(Transaction &transaction, row_t row) { + return UseVersion(transaction, inserted[row]) && !UseVersion(transaction, deleted[row]); +} + +idx_t ChunkVectorInfo::Delete(Transaction &transaction, row_t rows[], idx_t count) { + any_deleted = true; + + // first check the chunk for conflicts + idx_t deleted_tuples = 0; + for (idx_t i = 0; i < count; i++) { + if (deleted[rows[i]] == transaction.transaction_id) { + continue; + } + if (deleted[rows[i]] != NOT_DELETED_ID) { + // tuple was already deleted by another transaction + throw TransactionException("Conflict on tuple deletion!"); + } + if (inserted[rows[i]] >= TRANSACTION_ID_START) { + throw TransactionException("Deleting non-committed tuples is not supported (for now...)"); + } + deleted_tuples++; + } + // after verifying that there are no conflicts we mark the tuples as deleted + for (idx_t i = 0; i < count; i++) { + deleted[rows[i]] = transaction.transaction_id; + } + return deleted_tuples; +} + +void ChunkVectorInfo::CommitDelete(transaction_t commit_id, row_t rows[], idx_t count) { + for (idx_t i = 0; i < count; i++) { + deleted[rows[i]] = commit_id; + } +} + +void ChunkVectorInfo::Append(idx_t start, idx_t end, transaction_t commit_id) { + if (start == 0) { + insert_id = commit_id; + } else if (insert_id != commit_id) { + same_inserted_id = false; + insert_id = NOT_DELETED_ID; + } + for (idx_t i = start; i < end; i++) { + inserted[i] = commit_id; + } +} + +void ChunkVectorInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t end) { + if (same_inserted_id) { + insert_id = commit_id; } + for (idx_t i = start; i < end; i++) { + inserted[i] = commit_id; + } +} + +void ChunkVectorInfo::Serialize(Serializer &serializer) { + SelectionVector sel(STANDARD_VECTOR_SIZE); + transaction_t start_time = TRANSACTION_ID_START - 1; + transaction_t transaction_id = INVALID_INDEX; + idx_t count = GetSelVector(start_time, transaction_id, sel, STANDARD_VECTOR_SIZE); + if (count == STANDARD_VECTOR_SIZE) { + // nothing is deleted: skip writing anything + serializer.Write(ChunkInfoType::EMPTY_INFO); + return; + } + if (count == 0) { + // everything is deleted: write a constant vector + serializer.Write(ChunkInfoType::CONSTANT_INFO); + serializer.Write(start); + return; + } + // write a boolean vector + serializer.Write(ChunkInfoType::VECTOR_INFO); + serializer.Write(start); + bool deleted_tuples[STANDARD_VECTOR_SIZE]; + for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) { + deleted_tuples[i] = true; + } + for (idx_t i = 0; i < count; i++) { + deleted_tuples[sel.get_index(i)] = false; + } + serializer.WriteData((data_ptr_t)deleted_tuples, sizeof(bool) * STANDARD_VECTOR_SIZE); +} + +unique_ptr ChunkVectorInfo::Deserialize(Deserializer &source) { + auto start = source.Read(); + + auto result = make_unique(start); + result->any_deleted = true; + bool deleted_tuples[STANDARD_VECTOR_SIZE]; + source.ReadData((data_ptr_t)deleted_tuples, sizeof(bool) * STANDARD_VECTOR_SIZE); + for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) { + if (deleted_tuples[i]) { + result->deleted[i] = 0; + } + } + return move(result); +} + +} // namespace duckdb + + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table/list_column_data.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +//! List column data represents a list +class ListColumnData : public ColumnData { +public: + ListColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type, + ColumnData *parent = nullptr); + + //! The child-column of the list + unique_ptr child_column; + //! The validity column data of the struct + ValidityColumnData validity; + +public: + bool CheckZonemap(ColumnScanState &state, TableFilter &filter) override; + + void InitializeScan(ColumnScanState &state) override; + void InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) override; + + idx_t Scan(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result) override; + idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) override; + idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count) override; + + void Skip(ColumnScanState &state, idx_t count = STANDARD_VECTOR_SIZE) override; + + void InitializeAppend(ColumnAppendState &state) override; + void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) override; + void RevertAppend(row_t start_row) override; + idx_t Fetch(ColumnScanState &state, row_t row_id, Vector &result) override; + void FetchRow(Transaction &transaction, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx) override; + void Update(Transaction &transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, idx_t offset, + idx_t update_count) override; + void UpdateColumn(Transaction &transaction, const vector &column_path, Vector &update_vector, + row_t *row_ids, idx_t update_count, idx_t depth) override; + unique_ptr GetUpdateStatistics() override; + + void CommitDropColumn() override; + + unique_ptr CreateCheckpointState(RowGroup &row_group, TableDataWriter &writer) override; + unique_ptr Checkpoint(RowGroup &row_group, TableDataWriter &writer) override; + + void DeserializeColumn(Deserializer &source) override; + + void GetStorageInfo(idx_t row_group_index, vector col_path, vector> &result) override; + +private: + list_entry_t FetchListEntry(idx_t row_idx); +}; + +} // namespace duckdb + + + + + + + +namespace duckdb { + +ColumnCheckpointState::ColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, TableDataWriter &writer) + : row_group(row_group), column_data(column_data), writer(writer) { +} + +ColumnCheckpointState::~ColumnCheckpointState() { +} + +void ColumnCheckpointState::FlushSegment(unique_ptr segment) { + auto tuple_count = segment->count.load(); + if (tuple_count == 0) { // LCOV_EXCL_START + return; + } // LCOV_EXCL_STOP + + // merge the segment stats into the global stats + global_stats->Merge(*segment->stats.statistics); + + // get the buffer of the segment and pin it + auto &db = column_data.GetDatabase(); + auto &block_manager = BlockManager::GetBlockManager(db); + + bool block_is_constant = segment->stats.statistics->IsConstant(); + + block_id_t block_id; + uint32_t offset_in_block; + if (!block_is_constant) { + // non-constant block + // get a free block id to write to + block_id = block_manager.GetFreeBlockId(); + offset_in_block = 0; + } else { + // constant block: no need to write anything to disk besides the stats + // set up the compression function to constant + block_id = INVALID_BLOCK; + offset_in_block = 0; + + auto &config = DBConfig::GetConfig(db); + segment->function = + config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, segment->type.InternalType()); + } + + // construct the data pointer + DataPointer data_pointer; + data_pointer.block_pointer.block_id = block_id; + data_pointer.block_pointer.offset = offset_in_block; + data_pointer.row_start = row_group.start; + if (!data_pointers.empty()) { + auto &last_pointer = data_pointers.back(); + data_pointer.row_start = last_pointer.row_start + last_pointer.tuple_count; + } + data_pointer.tuple_count = tuple_count; + data_pointer.compression_type = segment->function->type; + data_pointer.statistics = segment->stats.statistics->Copy(); + + // convert the segment into a persistent segment that points to this block + segment->ConvertToPersistent(block_id, offset_in_block); + + // append the segment to the new segment tree + new_tree.AppendSegment(move(segment)); + data_pointers.push_back(move(data_pointer)); +} + +void ColumnCheckpointState::FlushToDisk() { + auto &meta_writer = writer.GetMetaWriter(); + + meta_writer.Write(data_pointers.size()); + // then write the data pointers themselves + for (idx_t k = 0; k < data_pointers.size(); k++) { + auto &data_pointer = data_pointers[k]; + meta_writer.Write(data_pointer.row_start); + meta_writer.Write(data_pointer.tuple_count); + meta_writer.Write(data_pointer.block_pointer.block_id); + meta_writer.Write(data_pointer.block_pointer.offset); + meta_writer.Write(data_pointer.compression_type); + data_pointer.statistics->Serialize(meta_writer); + } +} + +} // namespace duckdb + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table/update_segment.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + +namespace duckdb { +class ColumnData; +class DataTable; +class Vector; +struct UpdateInfo; +struct UpdateNode; + +class UpdateSegment { +public: + UpdateSegment(ColumnData &column_data); + ~UpdateSegment(); + + ColumnData &column_data; + +public: + bool HasUpdates() const; + bool HasUncommittedUpdates(idx_t vector_index); + bool HasUpdates(idx_t vector_index) const; + bool HasUpdates(idx_t start_row_idx, idx_t end_row_idx); + void ClearUpdates(); + + void FetchUpdates(Transaction &transaction, idx_t vector_index, Vector &result); + void FetchCommitted(idx_t vector_index, Vector &result); + void FetchCommittedRange(idx_t start_row, idx_t count, Vector &result); + void Update(Transaction &transaction, idx_t column_index, Vector &update, row_t *ids, idx_t offset, idx_t count, + Vector &base_data); + void FetchRow(Transaction &transaction, idx_t row_id, Vector &result, idx_t result_idx); + + void RollbackUpdate(UpdateInfo *info); + void CleanupUpdateInternal(const StorageLockKey &lock, UpdateInfo *info); + void CleanupUpdate(UpdateInfo *info); + + unique_ptr GetStatistics(); + StringHeap &GetStringHeap() { + return heap; + } + +private: + //! The lock for the update segment + StorageLock lock; + //! The root node (if any) + unique_ptr root; + //! Update statistics + SegmentStatistics stats; + //! Stats lock + mutex stats_lock; + //! Internal type size + idx_t type_size; + //! String heap, only used for strings + StringHeap heap; + +public: + typedef void (*initialize_update_function_t)(UpdateInfo *base_info, Vector &base_data, UpdateInfo *update_info, + Vector &update, const SelectionVector &sel); + typedef void (*merge_update_function_t)(UpdateInfo *base_info, Vector &base_data, UpdateInfo *update_info, + Vector &update, row_t *ids, idx_t count, const SelectionVector &sel); + typedef void (*fetch_update_function_t)(transaction_t start_time, transaction_t transaction_id, UpdateInfo *info, + Vector &result); + typedef void (*fetch_committed_function_t)(UpdateInfo *info, Vector &result); + typedef void (*fetch_committed_range_function_t)(UpdateInfo *info, idx_t start, idx_t end, idx_t result_offset, + Vector &result); + typedef void (*fetch_row_function_t)(transaction_t start_time, transaction_t transaction_id, UpdateInfo *info, + idx_t row_idx, Vector &result, idx_t result_idx); + typedef void (*rollback_update_function_t)(UpdateInfo *base_info, UpdateInfo *rollback_info); + typedef idx_t (*statistics_update_function_t)(UpdateSegment *segment, SegmentStatistics &stats, Vector &update, + idx_t offset, idx_t count, SelectionVector &sel); + +private: + initialize_update_function_t initialize_update_function; + merge_update_function_t merge_update_function; + fetch_update_function_t fetch_update_function; + fetch_committed_function_t fetch_committed_function; + fetch_committed_range_function_t fetch_committed_range; + fetch_row_function_t fetch_row_function; + rollback_update_function_t rollback_update_function; + statistics_update_function_t statistics_update_function; + +private: + void InitializeUpdateInfo(UpdateInfo &info, row_t *ids, const SelectionVector &sel, idx_t count, idx_t vector_index, + idx_t vector_offset); +}; + +struct UpdateNodeData { + unique_ptr info; + unique_ptr tuples; + unique_ptr tuple_data; +}; + +struct UpdateNode { + unique_ptr info[RowGroup::ROW_GROUP_VECTOR_COUNT]; +}; + +} // namespace duckdb + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table/struct_column_data.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +//! Struct column data represents a struct +class StructColumnData : public ColumnData { +public: + StructColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type, + ColumnData *parent = nullptr); + + //! The sub-columns of the struct + vector> sub_columns; + //! The validity column data of the struct + ValidityColumnData validity; + +public: + bool CheckZonemap(ColumnScanState &state, TableFilter &filter) override; + idx_t GetMaxEntry() override; + + void InitializeScan(ColumnScanState &state) override; + void InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) override; + + idx_t Scan(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result) override; + idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) override; + idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count) override; + + void InitializeAppend(ColumnAppendState &state) override; + void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) override; + void RevertAppend(row_t start_row) override; + idx_t Fetch(ColumnScanState &state, row_t row_id, Vector &result) override; + void FetchRow(Transaction &transaction, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx) override; + void Update(Transaction &transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, idx_t offset, + idx_t update_count) override; + void UpdateColumn(Transaction &transaction, const vector &column_path, Vector &update_vector, + row_t *row_ids, idx_t update_count, idx_t depth) override; + unique_ptr GetUpdateStatistics() override; + + void CommitDropColumn() override; + + unique_ptr CreateCheckpointState(RowGroup &row_group, TableDataWriter &writer) override; + unique_ptr Checkpoint(RowGroup &row_group, TableDataWriter &writer) override; + + void DeserializeColumn(Deserializer &source) override; + + void GetStorageInfo(idx_t row_group_index, vector col_path, vector> &result) override; + + void Verify(RowGroup &parent) override; }; + } // namespace duckdb @@ -125175,135 +137597,727 @@ class InMemoryBlockManager : public BlockManager { +namespace duckdb { + +ColumnData::ColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type, ColumnData *parent) + : info(info), column_index(column_index), start(start_row), type(move(type)), parent(parent) { +} + +ColumnData::~ColumnData() { +} + +DatabaseInstance &ColumnData::GetDatabase() const { + return info.db; +} + +DataTableInfo &ColumnData::GetTableInfo() const { + return info; +} + +const LogicalType &ColumnData::RootType() const { + if (parent) { + return parent->RootType(); + } + return type; +} + +idx_t ColumnData::GetMaxEntry() { + auto last_segment = data.GetLastSegment(); + return last_segment ? last_segment->start + last_segment->count : start; +} + +void ColumnData::InitializeScan(ColumnScanState &state) { + state.current = (ColumnSegment *)data.GetRootSegment(); + state.row_index = state.current ? state.current->start : 0; + state.internal_index = state.row_index; + state.initialized = false; +} + +void ColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) { + state.current = (ColumnSegment *)data.GetSegment(row_idx); + state.row_index = row_idx; + state.internal_index = state.current->start; + state.initialized = false; +} + +idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remaining) { + if (!state.initialized) { + D_ASSERT(state.current); + state.current->InitializeScan(state); + state.initialized = true; + } + D_ASSERT(state.internal_index <= state.row_index); + if (state.internal_index < state.row_index) { + state.current->Skip(state); + } + D_ASSERT(state.current->type == type); + idx_t initial_remaining = remaining; + while (remaining > 0) { + D_ASSERT(state.row_index >= state.current->start && + state.row_index <= state.current->start + state.current->count); + idx_t scan_count = MinValue(remaining, state.current->start + state.current->count - state.row_index); + idx_t result_offset = initial_remaining - remaining; + state.current->Scan(state, scan_count, result, result_offset, scan_count == initial_remaining); + + state.row_index += scan_count; + remaining -= scan_count; + if (remaining > 0) { + if (!state.current->next) { + break; + } + state.current = (ColumnSegment *)state.current->next.get(); + state.current->InitializeScan(state); + state.segment_checked = false; + D_ASSERT(state.row_index >= state.current->start && + state.row_index <= state.current->start + state.current->count); + } + } + state.internal_index = state.row_index; + return initial_remaining - remaining; +} + +template +idx_t ColumnData::ScanVector(Transaction *transaction, idx_t vector_index, ColumnScanState &state, Vector &result) { + auto scan_count = ScanVector(state, result, STANDARD_VECTOR_SIZE); + + lock_guard update_guard(update_lock); + if (updates) { + if (!ALLOW_UPDATES && updates->HasUncommittedUpdates(vector_index)) { + throw TransactionException("Cannot create index with outstanding updates"); + } + result.Normalify(scan_count); + if (SCAN_COMMITTED) { + updates->FetchCommitted(vector_index, result); + } else { + D_ASSERT(transaction); + updates->FetchUpdates(*transaction, vector_index, result); + } + } + return scan_count; +} + +template idx_t ColumnData::ScanVector(Transaction *transaction, idx_t vector_index, + ColumnScanState &state, Vector &result); +template idx_t ColumnData::ScanVector(Transaction *transaction, idx_t vector_index, ColumnScanState &state, + Vector &result); +template idx_t ColumnData::ScanVector(Transaction *transaction, idx_t vector_index, ColumnScanState &state, + Vector &result); +template idx_t ColumnData::ScanVector(Transaction *transaction, idx_t vector_index, ColumnScanState &state, + Vector &result); + +idx_t ColumnData::Scan(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result) { + return ScanVector(&transaction, vector_index, state, result); +} + +idx_t ColumnData::ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) { + if (allow_updates) { + return ScanVector(nullptr, vector_index, state, result); + } else { + return ScanVector(nullptr, vector_index, state, result); + } +} + +void ColumnData::ScanCommittedRange(idx_t row_group_start, idx_t offset_in_row_group, idx_t count, Vector &result) { + ColumnScanState child_state; + InitializeScanWithOffset(child_state, row_group_start + offset_in_row_group); + auto scan_count = ScanVector(child_state, result, count); + if (updates) { + result.Normalify(scan_count); + updates->FetchCommittedRange(offset_in_row_group, count, result); + } +} + +idx_t ColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t count) { + if (count == 0) { + return 0; + } + // ScanCount can only be used if there are no updates + D_ASSERT(!updates); + return ScanVector(state, result, count); +} + +void ColumnData::Select(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result, + SelectionVector &sel, idx_t &count, const TableFilter &filter) { + idx_t scan_count = Scan(transaction, vector_index, state, result); + result.Normalify(scan_count); + ColumnSegment::FilterSelection(sel, result, filter, count, FlatVector::Validity(result)); +} + +void ColumnData::FilterScan(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result, + SelectionVector &sel, idx_t count) { + Scan(transaction, vector_index, state, result); + result.Slice(sel, count); +} + +void ColumnData::FilterScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, SelectionVector &sel, + idx_t count, bool allow_updates) { + ScanCommitted(vector_index, state, result, allow_updates); + result.Slice(sel, count); +} + +void ColumnData::Skip(ColumnScanState &state, idx_t count) { + state.Next(count); +} + +void ColumnScanState::NextInternal(idx_t count) { + if (!current) { + //! There is no column segment + return; + } + row_index += count; + while (row_index >= current->start + current->count) { + current = (ColumnSegment *)current->next.get(); + initialized = false; + segment_checked = false; + if (!current) { + break; + } + } + D_ASSERT(!current || (row_index >= current->start && row_index < current->start + current->count)); +} + +void ColumnScanState::Next(idx_t count) { + NextInternal(count); + for (auto &child_state : child_states) { + child_state.Next(count); + } +} + +void ColumnScanState::NextVector() { + Next(STANDARD_VECTOR_SIZE); +} + +void ColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) { + VectorData vdata; + vector.Orrify(count, vdata); + AppendData(stats, state, vdata, count); +} + +void ColumnData::InitializeAppend(ColumnAppendState &state) { + lock_guard tree_lock(data.node_lock); + if (data.nodes.empty()) { + // no segments yet, append an empty segment + AppendTransientSegment(start); + } + auto segment = (ColumnSegment *)data.GetLastSegment(); + if (segment->segment_type == ColumnSegmentType::PERSISTENT) { + // no transient segments yet + auto total_rows = segment->start + segment->count; + AppendTransientSegment(total_rows); + state.current = (ColumnSegment *)data.GetLastSegment(); + } else { + state.current = (ColumnSegment *)segment; + } + + D_ASSERT(state.current->segment_type == ColumnSegmentType::TRANSIENT); + state.current->InitializeAppend(state); +} + +void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, VectorData &vdata, idx_t count) { + idx_t offset = 0; + while (true) { + // append the data from the vector + idx_t copied_elements = state.current->Append(state, vdata, offset, count); + stats.Merge(*state.current->stats.statistics); + if (copied_elements == count) { + // finished copying everything + break; + } + + // we couldn't fit everything we wanted in the current column segment, create a new one + { + lock_guard tree_lock(data.node_lock); + AppendTransientSegment(state.current->start + state.current->count); + state.current = (ColumnSegment *)data.GetLastSegment(); + state.current->InitializeAppend(state); + } + offset += copied_elements; + count -= copied_elements; + } +} + +void ColumnData::RevertAppend(row_t start_row) { + lock_guard tree_lock(data.node_lock); + // check if this row is in the segment tree at all + if (idx_t(start_row) >= data.nodes.back().row_start + data.nodes.back().node->count) { + // the start row is equal to the final portion of the column data: nothing was ever appended here + D_ASSERT(idx_t(start_row) == data.nodes.back().row_start + data.nodes.back().node->count); + return; + } + // find the segment index that the current row belongs to + idx_t segment_index = data.GetSegmentIndex(start_row); + auto segment = data.nodes[segment_index].node; + auto &transient = (ColumnSegment &)*segment; + D_ASSERT(transient.segment_type == ColumnSegmentType::TRANSIENT); + + // remove any segments AFTER this segment: they should be deleted entirely + if (segment_index < data.nodes.size() - 1) { + data.nodes.erase(data.nodes.begin() + segment_index + 1, data.nodes.end()); + } + segment->next = nullptr; + transient.RevertAppend(start_row); +} + +idx_t ColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) { + D_ASSERT(row_id >= 0); + D_ASSERT(idx_t(row_id) >= start); + // perform the fetch within the segment + state.row_index = start + ((row_id - start) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE); + state.current = (ColumnSegment *)data.GetSegment(state.row_index); + state.internal_index = state.current->start; + return ScanVector(state, result, STANDARD_VECTOR_SIZE); +} + +void ColumnData::FetchRow(Transaction &transaction, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx) { + auto segment = (ColumnSegment *)data.GetSegment(row_id); + + // now perform the fetch within the segment + segment->FetchRow(state, row_id, result, result_idx); + // merge any updates made to this row + lock_guard update_guard(update_lock); + if (updates) { + updates->FetchRow(transaction, row_id, result, result_idx); + } +} + +void ColumnData::Update(Transaction &transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, + idx_t offset, idx_t update_count) { + lock_guard update_guard(update_lock); + if (!updates) { + updates = make_unique(*this); + } + Vector base_vector(type); + ColumnScanState state; + auto fetch_count = Fetch(state, row_ids[offset], base_vector); + + base_vector.Normalify(fetch_count); + updates->Update(transaction, column_index, update_vector, row_ids, offset, update_count, base_vector); +} + +void ColumnData::UpdateColumn(Transaction &transaction, const vector &column_path, Vector &update_vector, + row_t *row_ids, idx_t update_count, idx_t depth) { + // this method should only be called at the end of the path in the base column case + D_ASSERT(depth >= column_path.size()); + ColumnData::Update(transaction, column_path[0], update_vector, row_ids, 0, update_count); +} + +unique_ptr ColumnData::GetUpdateStatistics() { + lock_guard update_guard(update_lock); + return updates ? updates->GetStatistics() : nullptr; +} + +void ColumnData::AppendTransientSegment(idx_t start_row) { + auto new_segment = ColumnSegment::CreateTransientSegment(GetDatabase(), type, start_row); + data.AppendSegment(move(new_segment)); +} + +void ColumnData::CommitDropColumn() { + auto &block_manager = BlockManager::GetBlockManager(GetDatabase()); + auto segment = (ColumnSegment *)data.GetRootSegment(); + while (segment) { + if (segment->segment_type == ColumnSegmentType::PERSISTENT) { + auto block_id = segment->GetBlockId(); + if (block_id != INVALID_BLOCK) { + block_manager.MarkBlockAsModified(block_id); + } + } + segment = (ColumnSegment *)segment->next.get(); + } +} + +unique_ptr ColumnData::CreateCheckpointState(RowGroup &row_group, TableDataWriter &writer) { + return make_unique(row_group, *this, writer); +} + +void ColumnData::CheckpointScan(ColumnSegment *segment, ColumnScanState &state, idx_t row_group_start, idx_t count, + Vector &scan_vector) { + segment->Scan(state, count, scan_vector, 0, true); + if (updates) { + scan_vector.Normalify(count); + updates->FetchCommittedRange(state.row_index - row_group_start, count, scan_vector); + } +} + +unique_ptr ColumnData::Checkpoint(RowGroup &row_group, TableDataWriter &writer) { + // scan the segments of the column data + // set up the checkpoint state + auto checkpoint_state = CreateCheckpointState(row_group, writer); + checkpoint_state->global_stats = BaseStatistics::CreateEmpty(type); + + if (!data.root_node) { + // empty table: flush the empty list + return checkpoint_state; + } + lock_guard update_guard(update_lock); + + ColumnDataCheckpointer checkpointer(*this, row_group, *checkpoint_state); + checkpointer.Checkpoint(move(data.root_node)); + + // replace the old tree with the new one + data.Replace(checkpoint_state->new_tree); + + return checkpoint_state; +} + +void ColumnData::DeserializeColumn(Deserializer &source) { + // load the data pointers for the column + idx_t data_pointer_count = source.Read(); + for (idx_t data_ptr = 0; data_ptr < data_pointer_count; data_ptr++) { + // read the data pointer + DataPointer data_pointer; + data_pointer.row_start = source.Read(); + data_pointer.tuple_count = source.Read(); + data_pointer.block_pointer.block_id = source.Read(); + data_pointer.block_pointer.offset = source.Read(); + data_pointer.compression_type = source.Read(); + data_pointer.statistics = BaseStatistics::Deserialize(source, type); + + // create a persistent segment + auto segment = ColumnSegment::CreatePersistentSegment( + GetDatabase(), data_pointer.block_pointer.block_id, data_pointer.block_pointer.offset, type, + data_pointer.row_start, data_pointer.tuple_count, data_pointer.compression_type, + move(data_pointer.statistics)); + data.AppendSegment(move(segment)); + } +} + +shared_ptr ColumnData::Deserialize(DataTableInfo &info, idx_t column_index, idx_t start_row, + Deserializer &source, const LogicalType &type, ColumnData *parent) { + auto entry = ColumnData::CreateColumn(info, column_index, start_row, type, parent); + entry->DeserializeColumn(source); + return entry; +} + +void ColumnData::GetStorageInfo(idx_t row_group_index, vector col_path, vector> &result) { + D_ASSERT(!col_path.empty()); + + // convert the column path to a string + string col_path_str = "["; + for (idx_t i = 0; i < col_path.size(); i++) { + if (i > 0) { + col_path_str += ", "; + } + col_path_str += to_string(col_path[i]); + } + col_path_str += "]"; + + // iterate over the segments + idx_t segment_idx = 0; + auto segment = (ColumnSegment *)data.GetRootSegment(); + while (segment) { + vector column_info; + // row_group_id + column_info.push_back(Value::BIGINT(row_group_index)); + // column_id + column_info.push_back(Value::BIGINT(col_path[0])); + // column_path + column_info.emplace_back(col_path_str); + // segment_id + column_info.push_back(Value::BIGINT(segment_idx)); + // segment_type + column_info.emplace_back(type.ToString()); + // start + column_info.push_back(Value::BIGINT(segment->start)); + // count + column_info.push_back(Value::BIGINT(segment->count)); + // compression + column_info.emplace_back(CompressionTypeToString(segment->function->type)); + // stats + column_info.emplace_back(segment->stats.statistics ? segment->stats.statistics->ToString() + : string("No Stats")); + // has_updates + column_info.push_back(Value::BOOLEAN(updates ? true : false)); + // persistent + // block_id + // block_offset + if (segment->segment_type == ColumnSegmentType::PERSISTENT) { + column_info.push_back(Value::BOOLEAN(true)); + column_info.push_back(Value::BIGINT(segment->GetBlockId())); + column_info.push_back(Value::BIGINT(segment->GetBlockOffset())); + } else { + column_info.push_back(Value::BOOLEAN(false)); + column_info.emplace_back(); + column_info.emplace_back(); + } + + result.push_back(move(column_info)); + + segment_idx++; + segment = (ColumnSegment *)segment->next.get(); + } +} + +void ColumnData::Verify(RowGroup &parent) { +#ifdef DEBUG + D_ASSERT(this->start == parent.start); + auto root = data.GetRootSegment(); + if (root) { + D_ASSERT(root != nullptr); + D_ASSERT(root->start == this->start); + idx_t prev_end = root->start; + while (root) { + D_ASSERT(prev_end == root->start); + prev_end = root->start + root->count; + if (!root->next) { + D_ASSERT(prev_end == parent.start + parent.count); + } + root = root->next.get(); + } + } else { + if (type.InternalType() != PhysicalType::STRUCT) { + D_ASSERT(parent.count == 0); + } + } +#endif +} +template +static RET CreateColumnInternal(DataTableInfo &info, idx_t column_index, idx_t start_row, const LogicalType &type, + ColumnData *parent) { + if (type.InternalType() == PhysicalType::STRUCT) { + return OP::template Create(info, column_index, start_row, type, parent); + } else if (type.InternalType() == PhysicalType::LIST) { + return OP::template Create(info, column_index, start_row, type, parent); + } else if (type.id() == LogicalTypeId::VALIDITY) { + return OP::template Create(info, column_index, start_row, parent); + } + return OP::template Create(info, column_index, start_row, type, parent); +} +shared_ptr ColumnData::CreateColumn(DataTableInfo &info, idx_t column_index, idx_t start_row, + const LogicalType &type, ColumnData *parent) { + return CreateColumnInternal, SharedConstructor>(info, column_index, start_row, type, parent); +} +unique_ptr ColumnData::CreateColumnUnique(DataTableInfo &info, idx_t column_index, idx_t start_row, + const LogicalType &type, ColumnData *parent) { + return CreateColumnInternal, UniqueConstructor>(info, column_index, start_row, type, parent); +} +} // namespace duckdb namespace duckdb { -StorageManager::StorageManager(DatabaseInstance &db, string path, bool read_only) - : db(db), path(move(path)), wal(db), read_only(read_only) { -} - -StorageManager::~StorageManager() { +ColumnDataCheckpointer::ColumnDataCheckpointer(ColumnData &col_data_p, RowGroup &row_group_p, + ColumnCheckpointState &state_p) + : col_data(col_data_p), row_group(row_group_p), state(state_p), + is_validity(GetType().id() == LogicalTypeId::VALIDITY), + intermediate(is_validity ? LogicalType::BOOLEAN : GetType(), true, is_validity) { + auto &config = DBConfig::GetConfig(GetDatabase()); + compression_functions = config.GetCompressionFunctions(GetType().InternalType()); } -StorageManager &StorageManager::GetStorageManager(ClientContext &context) { - return StorageManager::GetStorageManager(*context.db); +DatabaseInstance &ColumnDataCheckpointer::GetDatabase() { + return col_data.GetDatabase(); } -BufferManager &BufferManager::GetBufferManager(ClientContext &context) { - return BufferManager::GetBufferManager(*context.db); +const LogicalType &ColumnDataCheckpointer::GetType() const { + return col_data.type; } -ObjectCache &ObjectCache::GetObjectCache(ClientContext &context) { - return context.db->GetObjectCache(); +ColumnData &ColumnDataCheckpointer::GetColumnData() { + return col_data; } -bool ObjectCache::ObjectCacheEnabled(ClientContext &context) { - return context.db->config.object_cache_enable; +RowGroup &ColumnDataCheckpointer::GetRowGroup() { + return row_group; } -bool StorageManager::InMemory() { - return path.empty() || path == ":memory:"; +ColumnCheckpointState &ColumnDataCheckpointer::GetCheckpointState() { + return state; } -void StorageManager::Initialize() { - bool in_memory = InMemory(); - if (in_memory && read_only) { - throw CatalogException("Cannot launch in-memory database in read-only mode!"); - } - - // first initialize the base system catalogs - // these are never written to the WAL - Connection con(db); - con.BeginTransaction(); - - auto &catalog = Catalog::GetCatalog(*con.context); +void ColumnDataCheckpointer::ScanSegments(const std::function &callback) { + Vector scan_vector(intermediate.GetType(), nullptr); + for (auto segment = (ColumnSegment *)owned_segment.get(); segment; segment = (ColumnSegment *)segment->next.get()) { + ColumnScanState scan_state; + scan_state.current = segment; + segment->InitializeScan(scan_state); - // create the default schema - CreateSchemaInfo info; - info.schema = DEFAULT_SCHEMA; - info.internal = true; - catalog.CreateSchema(*con.context, &info); + for (idx_t base_row_index = 0; base_row_index < segment->count; base_row_index += STANDARD_VECTOR_SIZE) { + scan_vector.Reference(intermediate); - // initialize default functions - BuiltinFunctions builtin(*con.context, catalog); - builtin.Initialize(); + idx_t count = MinValue(segment->count - base_row_index, STANDARD_VECTOR_SIZE); + scan_state.row_index = segment->start + base_row_index; - // commit transactions - con.Commit(); + col_data.CheckpointScan(segment, scan_state, row_group.start, count, scan_vector); - if (!in_memory) { - // create or load the database from disk, if not in-memory mode - LoadDatabase(); - } else { - auto &config = DBConfig::GetConfig(*con.context); - block_manager = make_unique(); - buffer_manager = make_unique(db, config.temporary_directory, config.maximum_memory); + callback(scan_vector, count); + } } } -void StorageManager::LoadDatabase() { - string wal_path = path + ".wal"; - auto &fs = db.GetFileSystem(); - auto &config = db.config; - bool truncate_wal = false; - // first check if the database exists - if (!fs.FileExists(path)) { - if (read_only) { - throw CatalogException("Cannot open database \"%s\" in read-only mode: database does not exist", path); +unique_ptr ColumnDataCheckpointer::DetectBestCompressionMethod(idx_t &compression_idx) { + D_ASSERT(!compression_functions.empty()); + auto &config = DBConfig::GetConfig(GetDatabase()); + if (config.force_compression != CompressionType::COMPRESSION_INVALID) { + // force_compression flag has been set + // check if this compression method is available + bool found = false; + for (idx_t i = 0; i < compression_functions.size(); i++) { + if (compression_functions[i]->type == config.force_compression) { + found = true; + break; + } } - // check if the WAL exists - if (fs.FileExists(wal_path)) { - // WAL file exists but database file does not - // remove the WAL - fs.RemoveFile(wal_path); + if (found) { + // the force_compression method is available + // clear all other compression methods + for (idx_t i = 0; i < compression_functions.size(); i++) { + if (compression_functions[i]->type != config.force_compression) { + compression_functions[i] = nullptr; + } + } } - // initialize the block manager while creating a new db file - block_manager = make_unique(db, path, read_only, true, config.use_direct_io); - buffer_manager = make_unique(db, config.temporary_directory, config.maximum_memory); - } else { - // initialize the block manager while loading the current db file - auto sf_bm = make_unique(db, path, read_only, false, config.use_direct_io); - auto sf = sf_bm.get(); - block_manager = move(sf_bm); - buffer_manager = make_unique(db, config.temporary_directory, config.maximum_memory); - sf->LoadFreeList(); + } - //! Load from storage - CheckpointManager checkpointer(db); - checkpointer.LoadFromStorage(); - // check if the WAL file exists - if (fs.FileExists(wal_path)) { - // replay the WAL - truncate_wal = WriteAheadLog::Replay(db, wal_path); + // set up the analyze states for each compression method + vector> analyze_states; + analyze_states.reserve(compression_functions.size()); + for (idx_t i = 0; i < compression_functions.size(); i++) { + if (!compression_functions[i]) { + analyze_states.push_back(nullptr); + continue; } + analyze_states.push_back(compression_functions[i]->init_analyze(col_data, col_data.type.InternalType())); } - // initialize the WAL file - if (!read_only) { - wal.Initialize(wal_path); - if (truncate_wal) { - wal.Truncate(0); + + // scan over all the segments and run the analyze step + ScanSegments([&](Vector &scan_vector, idx_t count) { + for (idx_t i = 0; i < compression_functions.size(); i++) { + if (!compression_functions[i]) { + continue; + } + auto success = compression_functions[i]->analyze(*analyze_states[i], scan_vector, count); + if (!success) { + // could not use this compression function on this data set + // erase it + compression_functions[i] = nullptr; + analyze_states[i].reset(); + } + } + }); + + // now that we have passed over all the data, we need to figure out the best method + // we do this using the final_analyze method + unique_ptr state; + compression_idx = INVALID_INDEX; + idx_t best_score = NumericLimits::Maximum(); + for (idx_t i = 0; i < compression_functions.size(); i++) { + if (!compression_functions[i]) { + continue; + } + auto score = compression_functions[i]->final_analyze(*analyze_states[i]); + if (score < best_score) { + compression_idx = i; + best_score = score; + state = move(analyze_states[i]); } } + return state; } -void StorageManager::CreateCheckpoint(bool delete_wal, bool force_checkpoint) { - if (InMemory() || read_only || !wal.initialized) { - return; +void ColumnDataCheckpointer::WriteToDisk() { + // there were changes or transient segments + // we need to rewrite the column segments to disk + + // first we check the current segments + // if there are any persistent segments, we will mark their old block ids as modified + // since the segments will be rewritten their old on disk data is no longer required + auto &block_manager = BlockManager::GetBlockManager(GetDatabase()); + for (auto segment = (ColumnSegment *)owned_segment.get(); segment; segment = (ColumnSegment *)segment->next.get()) { + if (segment->segment_type == ColumnSegmentType::PERSISTENT) { + // persistent segment has updates: mark it as modified and rewrite the block with the merged updates + auto block_id = segment->GetBlockId(); + if (block_id != INVALID_BLOCK) { + block_manager.MarkBlockAsModified(block_id); + } + } } - if (wal.GetWALSize() > 0 || db.config.force_checkpoint || force_checkpoint) { - // we only need to checkpoint if there is anything in the WAL - CheckpointManager checkpointer(db); - checkpointer.CreateCheckpoint(); + + // now we need to write our segment + // we will first run an analyze step that determines which compression function to use + idx_t compression_idx; + auto analyze_state = DetectBestCompressionMethod(compression_idx); + + if (!analyze_state) { + throw InternalException("No suitable compression/storage method found to store column"); } - if (delete_wal) { - wal.Delete(); + + // now that we have analyzed the compression functions we can start writing to disk + auto best_function = compression_functions[compression_idx]; + auto compress_state = best_function->init_compression(*this, move(analyze_state)); + ScanSegments( + [&](Vector &scan_vector, idx_t count) { best_function->compress(*compress_state, scan_vector, count); }); + best_function->compress_finalize(*compress_state); + + // now we actually write the data to disk + owned_segment.reset(); +} + +bool ColumnDataCheckpointer::HasChanges() { + for (auto segment = (ColumnSegment *)owned_segment.get(); segment; segment = (ColumnSegment *)segment->next.get()) { + if (segment->segment_type == ColumnSegmentType::TRANSIENT) { + // transient segment: always need to write to disk + return true; + } else { + // persistent segment; check if there were any updates or deletions in this segment + idx_t start_row_idx = segment->start - row_group.start; + idx_t end_row_idx = start_row_idx + segment->count; + if (col_data.updates && col_data.updates->HasUpdates(start_row_idx, end_row_idx)) { + return true; + } + } + } + return false; +} + +void ColumnDataCheckpointer::WritePersistentSegments() { + // all segments are persistent and there are no updates + // we only need to write the metadata + auto segment = (ColumnSegment *)owned_segment.get(); + while (segment) { + auto next_segment = move(segment->next); + + D_ASSERT(segment->segment_type == ColumnSegmentType::PERSISTENT); + + // set up the data pointer directly using the data from the persistent segment + DataPointer pointer; + pointer.block_pointer.block_id = segment->GetBlockId(); + pointer.block_pointer.offset = segment->GetBlockOffset(); + pointer.row_start = segment->start; + pointer.tuple_count = segment->count; + pointer.compression_type = segment->function->type; + pointer.statistics = segment->stats.statistics->Copy(); + + // merge the persistent stats into the global column stats + state.global_stats->Merge(*segment->stats.statistics); + + // directly append the current segment to the new tree + state.new_tree.AppendSegment(move(owned_segment)); + + state.data_pointers.push_back(move(pointer)); + + // move to the next segment in the list + owned_segment = move(next_segment); + segment = (ColumnSegment *)owned_segment.get(); + } +} + +void ColumnDataCheckpointer::Checkpoint(unique_ptr segment) { + D_ASSERT(!owned_segment); + this->owned_segment = move(segment); + // first check if any of the segments have changes + if (!HasChanges()) { + // no changes: only need to write the metadata for this column + WritePersistentSegments(); + } else { + // there are changes: rewrite the set of columns + WriteToDisk(); } } @@ -125319,673 +138333,1194 @@ void StorageManager::CreateCheckpoint(bool delete_wal, bool force_checkpoint) { + + + +#include + namespace duckdb { -StringSegment::StringSegment(DatabaseInstance &db, idx_t row_start, block_id_t block_id) - : UncompressedSegment(db, PhysicalType::VARCHAR, row_start) { - this->max_vector_count = 0; - // the vector_size is given in the size of the dictionary offsets - this->vector_size = STANDARD_VECTOR_SIZE * sizeof(int32_t); +unique_ptr ColumnSegment::CreatePersistentSegment(DatabaseInstance &db, block_id_t block_id, + idx_t offset, const LogicalType &type, idx_t start, + idx_t count, CompressionType compression_type, + unique_ptr statistics) { + D_ASSERT(offset == 0); + auto &config = DBConfig::GetConfig(db); + CompressionFunction *function; + if (block_id == INVALID_BLOCK) { + function = config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, type.InternalType()); + } else { + function = config.GetCompressionFunction(compression_type, type.InternalType()); + } + return make_unique(db, type, ColumnSegmentType::PERSISTENT, start, count, function, move(statistics), + block_id, offset); +} + +unique_ptr ColumnSegment::CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, + idx_t start) { + auto &config = DBConfig::GetConfig(db); + auto function = config.GetCompressionFunction(CompressionType::COMPRESSION_UNCOMPRESSED, type.InternalType()); + return make_unique(db, type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr, + INVALID_BLOCK, idx_t(-1)); +} +ColumnSegment::ColumnSegment(DatabaseInstance &db, LogicalType type_p, ColumnSegmentType segment_type, idx_t start, + idx_t count, CompressionFunction *function_p, unique_ptr statistics, + block_id_t block_id_p, idx_t offset_p) + : SegmentBase(start, count), db(db), type(move(type_p)), type_size(GetTypeIdSize(type.InternalType())), + segment_type(segment_type), function(function_p), stats(type, move(statistics)), block_id(block_id_p), + offset(offset_p) { + D_ASSERT(function); auto &buffer_manager = BufferManager::GetBufferManager(db); if (block_id == INVALID_BLOCK) { - // start off with an empty string segment: allocate space for it - this->block = buffer_manager.RegisterMemory(Storage::BLOCK_ALLOC_SIZE, false); - auto handle = buffer_manager.Pin(block); - SetDictionaryOffset(*handle, sizeof(idx_t)); - - ExpandStringSegment(handle->node->buffer); + // no block id specified + // there are two cases here: + // transient: allocate a buffer for the uncompressed segment + // persistent: constant segment, no need to allocate anything + if (segment_type == ColumnSegmentType::TRANSIENT) { + this->block = buffer_manager.RegisterMemory(Storage::BLOCK_SIZE, false); + } } else { + D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT); this->block = buffer_manager.RegisterBlock(block_id); } + if (function->init_segment) { + segment_state = function->init_segment(*this, block_id); + } } -void StringSegment::SetDictionaryOffset(BufferHandle &handle, idx_t offset) { - Store(offset, handle.node->buffer + Storage::BLOCK_SIZE - sizeof(idx_t)); +ColumnSegment::~ColumnSegment() { } -idx_t StringSegment::GetDictionaryOffset(BufferHandle &handle) { - return Load(handle.node->buffer + Storage::BLOCK_SIZE - sizeof(idx_t)); +//===--------------------------------------------------------------------===// +// Scan +//===--------------------------------------------------------------------===// +void ColumnSegment::InitializeScan(ColumnScanState &state) { + state.scan_state = function->init_scan(*this); } -StringSegment::~StringSegment() { - while (head) { - // prevent deep recursion here - head = move(head->next); +void ColumnSegment::Scan(ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset, + bool entire_vector) { + if (entire_vector) { + D_ASSERT(result_offset == 0); + Scan(state, scan_count, result); + } else { + D_ASSERT(result.GetVectorType() == VectorType::FLAT_VECTOR); + ScanPartial(state, scan_count, result, result_offset); + D_ASSERT(result.GetVectorType() == VectorType::FLAT_VECTOR); } } -void StringSegment::ExpandStringSegment(data_ptr_t baseptr) { - // clear the nullmask for this vector - max_vector_count++; +void ColumnSegment::Skip(ColumnScanState &state) { + function->skip(*this, state, state.row_index - state.internal_index); + state.internal_index = state.row_index; } -//===--------------------------------------------------------------------===// -// Scan -//===--------------------------------------------------------------------===// -void StringSegment::InitializeScan(ColumnScanState &state) { - // pin the primary buffer - auto &buffer_manager = BufferManager::GetBufferManager(db); - state.primary_handle = buffer_manager.Pin(block); +void ColumnSegment::Scan(ColumnScanState &state, idx_t scan_count, Vector &result) { + function->scan_vector(*this, state, scan_count, result); +} + +void ColumnSegment::ScanPartial(ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset) { + function->scan_partial(*this, state, scan_count, result, result_offset); } //===--------------------------------------------------------------------===// -// Filter base data +// Fetch //===--------------------------------------------------------------------===// -void StringSegment::ReadString(string_t *result_data, Vector &result, data_ptr_t baseptr, int32_t *dict_offset, - idx_t src_idx, idx_t res_idx, idx_t &update_idx, size_t vector_index) { - result_data[res_idx] = FetchStringFromDict(result, baseptr, dict_offset[src_idx]); +void ColumnSegment::FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) { + function->fetch_row(*this, state, row_id - this->start, result, result_idx); +} + +void ColumnSegment::InitializeAppend(ColumnAppendState &state) { + D_ASSERT(segment_type == ColumnSegmentType::TRANSIENT); } //===--------------------------------------------------------------------===// -// Fetch base data +// Append //===--------------------------------------------------------------------===// -void StringSegment::FetchBaseData(ColumnScanState &state, idx_t vector_index, Vector &result) { - // clear any previously locked buffers and get the primary buffer handle - auto handle = state.primary_handle.get(); +idx_t ColumnSegment::Append(ColumnAppendState &state, VectorData &append_data, idx_t offset, idx_t count) { + D_ASSERT(segment_type == ColumnSegmentType::TRANSIENT); + if (!function->append) { + throw InternalException("Attempting to append to a compressed segment without append method"); + } + return function->append(*this, stats, append_data, offset, count); +} - // fetch the data from the base segment - FetchBaseData(state, handle->node->buffer, vector_index, result, GetVectorCount(vector_index)); +void ColumnSegment::RevertAppend(idx_t start_row) { + D_ASSERT(segment_type == ColumnSegmentType::TRANSIENT); + if (function->revert_append) { + function->revert_append(*this, start_row); + } + this->count = start_row - this->start; } -void StringSegment::FetchBaseData(ColumnScanState &state, data_ptr_t baseptr, idx_t vector_index, Vector &result, - idx_t count) { - auto base = baseptr + vector_index * vector_size; +//===--------------------------------------------------------------------===// +// Convert To Persistent +//===--------------------------------------------------------------------===// +void ColumnSegment::ConvertToPersistent(block_id_t block_id_p, idx_t offset_p) { + D_ASSERT(segment_type == ColumnSegmentType::TRANSIENT); + segment_type = ColumnSegmentType::PERSISTENT; + block_id = block_id_p; + offset = offset_p; + + if (block_id == INVALID_BLOCK) { + // constant block: reset the block buffer + block.reset(); + } else { + // non-constant block: write the block to disk + auto &buffer_manager = BufferManager::GetBufferManager(db); + auto &block_manager = BlockManager::GetBlockManager(db); - auto base_data = (int32_t *)base; - auto result_data = FlatVector::GetData(result); + // the data for the block already exists in-memory of our block + // instead of copying the data we alter some metadata so the buffer points to an on-disk block + block = buffer_manager.ConvertToPersistent(block_manager, block_id, move(block)); + } - // no updates: fetch only from the string dictionary - for (idx_t i = 0; i < count; i++) { - result_data[i] = FetchStringFromDict(result, baseptr, base_data[i]); + segment_state.reset(); + if (function->init_segment) { + segment_state = function->init_segment(*this, block_id); } } //===--------------------------------------------------------------------===// -// Fetch strings +// Filter Selection //===--------------------------------------------------------------------===// -void StringSegment::FetchStringLocations(data_ptr_t baseptr, row_t *ids, idx_t vector_index, idx_t vector_offset, - idx_t count, string_location_t result[]) { - auto base = baseptr + vector_index * vector_size; - auto base_data = (int32_t *)base; - - // no updates: fetch strings from base vector - for (idx_t i = 0; i < count; i++) { - auto id = ids[i] - vector_offset; - result[i] = FetchStringLocation(baseptr, base_data[id]); +template +static idx_t TemplatedFilterSelection(T *vec, T *predicate, SelectionVector &sel, idx_t approved_tuple_count, + ValidityMask &mask, SelectionVector &result_sel) { + idx_t result_count = 0; + for (idx_t i = 0; i < approved_tuple_count; i++) { + auto idx = sel.get_index(i); + if ((!HAS_NULL || mask.RowIsValid(idx)) && OP::Operation(vec[idx], *predicate)) { + result_sel.set_index(result_count++, idx); + } } + return result_count; } -string_location_t StringSegment::FetchStringLocation(data_ptr_t baseptr, int32_t dict_offset) { - D_ASSERT(dict_offset >= 0 && dict_offset <= Storage::BLOCK_ALLOC_SIZE); - if (dict_offset == 0) { - return string_location_t(INVALID_BLOCK, 0); +template +static void FilterSelectionSwitch(T *vec, T *predicate, SelectionVector &sel, idx_t &approved_tuple_count, + ExpressionType comparison_type, ValidityMask &mask) { + SelectionVector new_sel(approved_tuple_count); + // the inplace loops take the result as the last parameter + switch (comparison_type) { + case ExpressionType::COMPARE_EQUAL: { + if (mask.AllValid()) { + approved_tuple_count = + TemplatedFilterSelection(vec, predicate, sel, approved_tuple_count, mask, new_sel); + } else { + approved_tuple_count = + TemplatedFilterSelection(vec, predicate, sel, approved_tuple_count, mask, new_sel); + } + break; } - // look up result in dictionary - auto dict_end = baseptr + Storage::BLOCK_SIZE; - auto dict_pos = dict_end - dict_offset; - auto string_length = Load(dict_pos); - string_location_t result; - if (string_length == BIG_STRING_MARKER) { - ReadStringMarker(dict_pos, result.block_id, result.offset); - } else { - result.block_id = INVALID_BLOCK; - result.offset = dict_offset; + case ExpressionType::COMPARE_LESSTHAN: { + if (mask.AllValid()) { + approved_tuple_count = + TemplatedFilterSelection(vec, predicate, sel, approved_tuple_count, mask, new_sel); + } else { + approved_tuple_count = + TemplatedFilterSelection(vec, predicate, sel, approved_tuple_count, mask, new_sel); + } + break; } - return result; -} - -string_t StringSegment::FetchStringFromDict(Vector &result, data_ptr_t baseptr, int32_t dict_offset) { - // fetch base data - D_ASSERT(dict_offset <= Storage::BLOCK_SIZE); - string_location_t location = FetchStringLocation(baseptr, dict_offset); - return FetchString(result, baseptr, location); + case ExpressionType::COMPARE_GREATERTHAN: { + if (mask.AllValid()) { + approved_tuple_count = TemplatedFilterSelection(vec, predicate, sel, + approved_tuple_count, mask, new_sel); + } else { + approved_tuple_count = TemplatedFilterSelection(vec, predicate, sel, + approved_tuple_count, mask, new_sel); + } + break; + } + case ExpressionType::COMPARE_LESSTHANOREQUALTO: { + if (mask.AllValid()) { + approved_tuple_count = TemplatedFilterSelection( + vec, predicate, sel, approved_tuple_count, mask, new_sel); + } else { + approved_tuple_count = TemplatedFilterSelection( + vec, predicate, sel, approved_tuple_count, mask, new_sel); + } + break; + } + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: { + if (mask.AllValid()) { + approved_tuple_count = TemplatedFilterSelection( + vec, predicate, sel, approved_tuple_count, mask, new_sel); + } else { + approved_tuple_count = TemplatedFilterSelection( + vec, predicate, sel, approved_tuple_count, mask, new_sel); + } + break; + } + default: + throw NotImplementedException("Unknown comparison type for filter pushed down to table!"); + } + sel.Initialize(new_sel); } -string_t StringSegment::FetchString(Vector &result, data_ptr_t baseptr, string_location_t location) { - if (location.block_id != INVALID_BLOCK) { - // big string marker: read from separate block - return ReadString(result, location.block_id, location.offset); +template +static idx_t TemplatedNullSelection(SelectionVector &sel, idx_t approved_tuple_count, ValidityMask &mask) { + if (mask.AllValid()) { + // no NULL values + if (IS_NULL) { + return 0; + } else { + return approved_tuple_count; + } } else { - if (location.offset == 0) { - return string_t(nullptr, 0); + SelectionVector result_sel(approved_tuple_count); + idx_t result_count = 0; + for (idx_t i = 0; i < approved_tuple_count; i++) { + auto idx = sel.get_index(i); + if (mask.RowIsValid(idx) != IS_NULL) { + result_sel.set_index(result_count++, idx); + } } - // normal string: read string from this block - auto dict_end = baseptr + Storage::BLOCK_SIZE; - auto dict_pos = dict_end - location.offset; - auto string_length = Load(dict_pos); + sel.Initialize(result_sel); + return result_count; + } +} - auto str_ptr = (char *)(dict_pos + sizeof(uint16_t)); - return string_t(str_ptr, string_length); +void ColumnSegment::FilterSelection(SelectionVector &sel, Vector &result, const TableFilter &filter, + idx_t &approved_tuple_count, ValidityMask &mask) { + switch (filter.filter_type) { + case TableFilterType::CONJUNCTION_AND: { + auto &conjunction_and = (ConjunctionAndFilter &)filter; + for (auto &child_filter : conjunction_and.child_filters) { + FilterSelection(sel, result, *child_filter, approved_tuple_count, mask); + } + break; + } + case TableFilterType::CONSTANT_COMPARISON: { + auto &constant_filter = (ConstantFilter &)filter; + // the inplace loops take the result as the last parameter + switch (result.GetType().InternalType()) { + case PhysicalType::UINT8: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::UINT16: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::UINT32: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::UINT64: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::INT8: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::INT16: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::INT32: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::INT64: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::INT128: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::FLOAT: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::DOUBLE: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::VARCHAR: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant.str_value); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + case PhysicalType::BOOL: { + auto result_flat = FlatVector::GetData(result); + Vector predicate_vector(constant_filter.constant); + auto predicate = FlatVector::GetData(predicate_vector); + FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, + constant_filter.comparison_type, mask); + break; + } + default: + throw InvalidTypeException(result.GetType(), "Invalid type for filter pushed down to table comparison"); + } + break; + } + case TableFilterType::IS_NULL: + TemplatedNullSelection(sel, approved_tuple_count, mask); + break; + case TableFilterType::IS_NOT_NULL: + TemplatedNullSelection(sel, approved_tuple_count, mask); + break; + default: + throw InternalException("FIXME: unsupported type for filter selection"); } } -void StringSegment::FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) { - idx_t vector_index = row_id / STANDARD_VECTOR_SIZE; - idx_t id_in_vector = row_id - vector_index * STANDARD_VECTOR_SIZE; - D_ASSERT(vector_index < max_vector_count); +} // namespace duckdb - data_ptr_t baseptr; - // fetch a single row from the string segment - // first pin the main buffer if it is not already pinned - auto primary_id = block->BlockId(); - auto entry = state.handles.find(primary_id); - if (entry == state.handles.end()) { - // not pinned yet: pin it - auto &buffer_manager = BufferManager::GetBufferManager(db); - auto handle = buffer_manager.Pin(block); - baseptr = handle->node->buffer; - state.handles[primary_id] = move(handle); - } else { - // already pinned: use the pinned handle - baseptr = entry->second->node->buffer; - } +namespace duckdb { - auto base = baseptr + vector_index * vector_size; - auto base_data = (int32_t *)base; - auto result_data = FlatVector::GetData(result); +ListColumnData::ListColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type_p, + ColumnData *parent) + : ColumnData(info, column_index, start_row, move(type_p), parent), validity(info, 0, start_row, this) { + D_ASSERT(type.InternalType() == PhysicalType::LIST); + auto &child_type = ListType::GetChildType(type); + // the child column, with column index 1 (0 is the validity mask) + child_column = ColumnData::CreateColumnUnique(info, 1, start_row, child_type, this); +} - result_data[result_idx] = FetchStringFromDict(result, baseptr, base_data[id_in_vector]); +bool ListColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) { + // table filters are not supported yet for list columns + return false; } -//===--------------------------------------------------------------------===// -// Append -//===--------------------------------------------------------------------===// -idx_t StringSegment::Append(SegmentStatistics &stats, VectorData &data, idx_t offset, idx_t count) { - auto &buffer_manager = BufferManager::GetBufferManager(db); - auto handle = buffer_manager.Pin(block); - idx_t initial_count = tuple_count; - while (count > 0) { - // get the vector index of the vector to append to and see how many tuples we can append to that vector - idx_t vector_index = tuple_count / STANDARD_VECTOR_SIZE; - if (vector_index == max_vector_count) { - // we are at the maximum vector, check if there is space to increase the maximum vector count - // as a heuristic, we only allow another vector to be added if we have at least 32 bytes per string - // remaining (32KB out of a 256KB block, or around 12% empty) - if (RemainingSpace(*handle) >= STANDARD_VECTOR_SIZE * 32) { - // we have enough remaining space to add another vector - ExpandStringSegment(handle->node->buffer); - } else { - break; - } - } - idx_t current_tuple_count = tuple_count - vector_index * STANDARD_VECTOR_SIZE; - idx_t append_count = MinValue(STANDARD_VECTOR_SIZE - current_tuple_count, count); +void ListColumnData::InitializeScan(ColumnScanState &state) { + ColumnData::InitializeScan(state); + + // initialize the validity segment + ColumnScanState validity_state; + validity.InitializeScan(validity_state); + state.child_states.push_back(move(validity_state)); + + // initialize the child scan + ColumnScanState child_state; + child_column->InitializeScan(child_state); + state.child_states.push_back(move(child_state)); +} + +list_entry_t ListColumnData::FetchListEntry(idx_t row_idx) { + auto segment = (ColumnSegment *)data.GetSegment(row_idx); + ColumnFetchState fetch_state; + Vector result(type, 1); + segment->FetchRow(fetch_state, row_idx, result, 0); + + // initialize the child scan with the required offset + auto list_data = FlatVector::GetData(result); + return list_data[0]; +} + +void ListColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) { + if (row_idx == 0) { + InitializeScan(state); + return; + } + ColumnData::InitializeScanWithOffset(state, row_idx); - // now perform the actual append - AppendData(*handle, stats, handle->node->buffer + vector_size * vector_index, - handle->node->buffer + Storage::BLOCK_SIZE, current_tuple_count, data, offset, append_count); + // initialize the validity segment + ColumnScanState validity_state; + validity.InitializeScanWithOffset(validity_state, row_idx); + state.child_states.push_back(move(validity_state)); + + // we need to read the list at position row_idx to get the correct row offset of the child + auto list_entry = FetchListEntry(row_idx); + auto child_offset = list_entry.offset; - count -= append_count; - offset += append_count; - tuple_count += append_count; + D_ASSERT(child_offset <= child_column->GetMaxEntry()); + ColumnScanState child_state; + if (child_offset < child_column->GetMaxEntry()) { + child_column->InitializeScanWithOffset(child_state, child_offset); } - return tuple_count - initial_count; + state.child_states.push_back(move(child_state)); } -idx_t StringSegment::RemainingSpace(BufferHandle &handle) { - idx_t used_space = GetDictionaryOffset(handle) + max_vector_count * vector_size; - D_ASSERT(Storage::BLOCK_SIZE >= used_space); - return Storage::BLOCK_SIZE - used_space; +idx_t ListColumnData::Scan(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result) { + return ScanCount(state, result, STANDARD_VECTOR_SIZE); } -static inline void UpdateStringStats(SegmentStatistics &stats, const string_t &new_value) { - auto &sstats = (StringStatistics &)*stats.statistics; - sstats.Update(new_value); +idx_t ListColumnData::ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) { + return ScanCount(state, result, STANDARD_VECTOR_SIZE); } -void StringSegment::AppendData(BufferHandle &handle, SegmentStatistics &stats, data_ptr_t target, data_ptr_t end, - idx_t target_offset, VectorData &adata, idx_t offset, idx_t count) { - auto sdata = (string_t *)adata.data; - auto result_data = (int32_t *)target; +idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t count) { + if (count == 0) { + return 0; + } + // updates not supported for lists + D_ASSERT(!updates); - idx_t remaining_strings = STANDARD_VECTOR_SIZE - (this->tuple_count % STANDARD_VECTOR_SIZE); - for (idx_t i = 0; i < count; i++) { - auto source_idx = adata.sel->get_index(offset + i); - auto target_idx = target_offset + i; - if (!adata.validity.RowIsValid(source_idx)) { - // null value is stored as -1 - result_data[target_idx] = 0; - } else { - auto dictionary_offset = GetDictionaryOffset(handle); - D_ASSERT(dictionary_offset < Storage::BLOCK_SIZE); - // non-null value, check if we can fit it within the block - idx_t string_length = sdata[source_idx].GetSize(); - idx_t total_length = string_length + sizeof(uint16_t); - - UpdateStringStats(stats, sdata[source_idx]); - - // determine whether or not the string needs to be stored in an overflow block - // we never place small strings in the overflow blocks: the pointer would take more space than the - // string itself we always place big strings (>= STRING_BLOCK_LIMIT) in the overflow blocks we also have - // to always leave enough room for BIG_STRING_MARKER_SIZE for each of the remaining strings - if (total_length > BIG_STRING_MARKER_BASE_SIZE && - (total_length >= STRING_BLOCK_LIMIT || - total_length + (remaining_strings * BIG_STRING_MARKER_SIZE) > RemainingSpace(handle))) { - D_ASSERT(RemainingSpace(handle) >= BIG_STRING_MARKER_SIZE); - // string is too big for block: write to overflow blocks - block_id_t block; - int32_t offset; - // write the string into the current string block - WriteString(sdata[source_idx], block, offset); - dictionary_offset += BIG_STRING_MARKER_SIZE; - auto dict_pos = end - dictionary_offset; + idx_t scan_count = ScanVector(state, result, count); + validity.ScanCount(state.child_states[0], result, count); - // write a big string marker into the dictionary - WriteStringMarker(dict_pos, block, offset); - } else { - // string fits in block, append to dictionary and increment dictionary position - D_ASSERT(string_length < NumericLimits::Maximum()); - dictionary_offset += total_length; - auto dict_pos = end - dictionary_offset; // first write the length as u16 - Store(string_length, dict_pos); - // now write the actual string data into the dictionary - memcpy(dict_pos + sizeof(uint16_t), sdata[source_idx].GetDataUnsafe(), string_length); - } - D_ASSERT(RemainingSpace(handle) <= Storage::BLOCK_SIZE); - // place the dictionary offset into the set of vectors - D_ASSERT(dictionary_offset <= Storage::BLOCK_SIZE); - result_data[target_idx] = dictionary_offset; - SetDictionaryOffset(handle, dictionary_offset); - } - remaining_strings--; + auto data = FlatVector::GetData(result); + auto first_entry = data[0]; + auto last_entry = data[scan_count - 1]; + +#ifdef DEBUG + for (idx_t i = 1; i < scan_count; i++) { + D_ASSERT(data[i].offset == data[i - 1].offset + data[i - 1].length); + } +#endif + // shift all offsets so they are 0 at the first entry + for (idx_t i = 0; i < scan_count; i++) { + data[i].offset -= first_entry.offset; } -} -void StringSegment::WriteString(string_t string, block_id_t &result_block, int32_t &result_offset) { - if (overflow_writer) { - // overflow writer is set: write string there - overflow_writer->WriteString(string, result_block, result_offset); - } else { - // default overflow behavior: use in-memory buffer to store the overflow string - WriteStringMemory(string, result_block, result_offset); + D_ASSERT(last_entry.offset >= first_entry.offset); + idx_t child_scan_count = last_entry.offset + last_entry.length - first_entry.offset; + ListVector::Reserve(result, child_scan_count); + + if (child_scan_count > 0) { + auto &child_entry = ListVector::GetEntry(result); + D_ASSERT(child_entry.GetType().InternalType() == PhysicalType::STRUCT || + state.child_states[1].row_index + child_scan_count <= child_column->GetMaxEntry()); + child_column->ScanCount(state.child_states[1], child_entry, child_scan_count); } + + ListVector::SetListSize(result, child_scan_count); + return scan_count; } -void StringSegment::WriteStringMemory(string_t string, block_id_t &result_block, int32_t &result_offset) { - uint32_t total_length = string.GetSize() + sizeof(uint32_t); - shared_ptr block; - unique_ptr handle; +void ListColumnData::Skip(ColumnScanState &state, idx_t count) { + // skip inside the validity segment + validity.Skip(state.child_states[0], count); - auto &buffer_manager = BufferManager::GetBufferManager(db); - // check if the string fits in the current block - if (!head || head->offset + total_length >= head->size) { - // string does not fit, allocate space for it - // create a new string block - idx_t alloc_size = MaxValue(total_length, Storage::BLOCK_ALLOC_SIZE); - auto new_block = make_unique(); - new_block->offset = 0; - new_block->size = alloc_size; - // allocate an in-memory buffer for it - block = buffer_manager.RegisterMemory(alloc_size, false); - handle = buffer_manager.Pin(block); - overflow_blocks[block->BlockId()] = new_block.get(); - new_block->block = move(block); - new_block->next = move(head); - head = move(new_block); - } else { - // string fits, copy it into the current block - handle = buffer_manager.Pin(head->block); + // we need to read the list entries/offsets to figure out how much to skip + // note that we only need to read the first and last entry + // however, let's just read all "count" entries for now + auto data = unique_ptr(new list_entry_t[count]); + Vector result(type, (data_ptr_t)data.get()); + idx_t scan_count = ScanVector(state, result, count); + if (scan_count == 0) { + return; } - result_block = head->block->BlockId(); - result_offset = head->offset; + auto &first_entry = data[0]; + auto &last_entry = data[scan_count - 1]; + idx_t child_scan_count = last_entry.offset + last_entry.length - first_entry.offset; - // copy the string and the length there - auto ptr = handle->node->buffer + head->offset; - Store(string.GetSize(), ptr); - ptr += sizeof(uint32_t); - memcpy(ptr, string.GetDataUnsafe(), string.GetSize()); - head->offset += total_length; + // skip the child state forward by the child_scan_count + child_column->Skip(state.child_states[1], child_scan_count); } -string_t StringSegment::ReadString(Vector &result, block_id_t block, int32_t offset) { - D_ASSERT(offset < Storage::BLOCK_SIZE); - if (block == INVALID_BLOCK) { - return string_t(nullptr, 0); - } - auto &buffer_manager = BufferManager::GetBufferManager(db); - if (block < MAXIMUM_BLOCK) { - // read the overflow string from disk - // pin the initial handle and read the length - auto block_handle = buffer_manager.RegisterBlock(block); - auto handle = buffer_manager.Pin(block_handle); +void ListColumnData::InitializeAppend(ColumnAppendState &state) { + // initialize the list offset append + ColumnData::InitializeAppend(state); - uint32_t length = Load(handle->node->buffer + offset); - uint32_t remaining = length; - offset += sizeof(uint32_t); + // initialize the validity append + ColumnAppendState validity_append_state; + validity.InitializeAppend(validity_append_state); + state.child_appends.push_back(move(validity_append_state)); - // allocate a buffer to store the string - auto alloc_size = MaxValue(Storage::BLOCK_ALLOC_SIZE, length + sizeof(uint32_t)); - auto target_handle = buffer_manager.Allocate(alloc_size); - auto target_ptr = target_handle->node->buffer; - // write the length in this block as well - Store(length, target_ptr); - target_ptr += sizeof(uint32_t); - // now append the string to the single buffer - while (remaining > 0) { - idx_t to_write = MinValue(remaining, Storage::BLOCK_SIZE - sizeof(block_id_t) - offset); - memcpy(target_ptr, handle->node->buffer + offset, to_write); + // initialize the child column append + ColumnAppendState child_append_state; + child_column->InitializeAppend(child_append_state); + state.child_appends.push_back(move(child_append_state)); +} - remaining -= to_write; - offset += to_write; - target_ptr += to_write; - if (remaining > 0) { - // read the next block - block_id_t next_block = Load(handle->node->buffer + offset); - block_handle = buffer_manager.RegisterBlock(next_block); - handle = buffer_manager.Pin(block_handle); - offset = 0; +void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, Vector &vector, idx_t count) { + D_ASSERT(count > 0); + auto &stats = (ListStatistics &)stats_p; + + vector.Normalify(count); + auto &list_validity = FlatVector::Validity(vector); + + // construct the list_entry_t entries to append to the column data + auto input_offsets = FlatVector::GetData(vector); + auto start_offset = child_column->GetMaxEntry(); + idx_t child_count = 0; + + auto append_offsets = unique_ptr(new list_entry_t[count]); + for (idx_t i = 0; i < count; i++) { + if (list_validity.RowIsValid(i)) { + append_offsets[i].offset = start_offset + input_offsets[i].offset; + append_offsets[i].length = input_offsets[i].length; + child_count += input_offsets[i].length; + } else { + if (i > 0) { + append_offsets[i].offset = append_offsets[i - 1].offset + append_offsets[i - 1].length; + } else { + append_offsets[i].offset = start_offset; } + append_offsets[i].length = 0; } + } +#ifdef DEBUG + D_ASSERT(append_offsets[0].offset == start_offset); + for (idx_t i = 1; i < count; i++) { + D_ASSERT(append_offsets[i].offset == append_offsets[i - 1].offset + append_offsets[i - 1].length); + } + D_ASSERT(append_offsets[count - 1].offset + append_offsets[count - 1].length - append_offsets[0].offset == + child_count); +#endif - auto final_buffer = target_handle->node->buffer; - StringVector::AddHandle(result, move(target_handle)); - return ReadString(final_buffer, 0); - } else { - // read the overflow string from memory - // first pin the handle, if it is not pinned yet - auto entry = overflow_blocks.find(block); - D_ASSERT(entry != overflow_blocks.end()); - auto handle = buffer_manager.Pin(entry->second->block); - auto final_buffer = handle->node->buffer; - StringVector::AddHandle(result, move(handle)); - return ReadString(final_buffer, offset); + VectorData vdata; + vdata.validity = list_validity; + vdata.sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR; + vdata.data = (data_ptr_t)append_offsets.get(); + + // append the list offsets + ColumnData::AppendData(stats, state, vdata, count); + // append the validity data + validity.AppendData(*stats.validity_stats, state.child_appends[0], vdata, count); + // append the child vector + if (child_count > 0) { + auto &child_vector = ListVector::GetEntry(vector); + child_column->Append(*stats.child_stats, state.child_appends[1], child_vector, child_count); } } -string_t StringSegment::ReadString(data_ptr_t target, int32_t offset) { - auto ptr = target + offset; - auto str_length = Load(ptr); - auto str_ptr = (char *)(ptr + sizeof(uint32_t)); - return string_t(str_ptr, str_length); +void ListColumnData::RevertAppend(row_t start_row) { + ColumnData::RevertAppend(start_row); + validity.RevertAppend(start_row); + auto column_count = GetMaxEntry(); + if (column_count > start) { + // revert append in the child column + auto list_entry = FetchListEntry(column_count - 1); + child_column->RevertAppend(list_entry.offset + list_entry.length); + } } -void StringSegment::WriteStringMarker(data_ptr_t target, block_id_t block_id, int32_t offset) { - uint16_t length = BIG_STRING_MARKER; - memcpy(target, &length, sizeof(uint16_t)); - target += sizeof(uint16_t); - memcpy(target, &block_id, sizeof(block_id_t)); - target += sizeof(block_id_t); - memcpy(target, &offset, sizeof(int32_t)); +idx_t ListColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) { + throw NotImplementedException("List Fetch"); } -void StringSegment::ReadStringMarker(data_ptr_t target, block_id_t &block_id, int32_t &offset) { - target += sizeof(uint16_t); - memcpy(&block_id, target, sizeof(block_id_t)); - target += sizeof(block_id_t); - memcpy(&offset, target, sizeof(int32_t)); +void ListColumnData::Update(Transaction &transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, + idx_t offset, idx_t update_count) { + throw NotImplementedException("List Update is not supported."); } -void StringSegment::ToTemporary() { - ToTemporaryInternal(); - this->max_vector_count = (this->tuple_count + (STANDARD_VECTOR_SIZE - 1)) / STANDARD_VECTOR_SIZE; +void ListColumnData::UpdateColumn(Transaction &transaction, const vector &column_path, Vector &update_vector, + row_t *row_ids, idx_t update_count, idx_t depth) { + throw NotImplementedException("List Update Column is not supported"); } -} // namespace duckdb - +unique_ptr ListColumnData::GetUpdateStatistics() { + return nullptr; +} +void ListColumnData::FetchRow(Transaction &transaction, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx) { + // insert any child states that are required + // we need two (validity & list child) + // note that we need a scan state for the child vector + // this is because we will (potentially) fetch more than one tuple from the list child + if (state.child_states.empty()) { + auto child_state = make_unique(); + state.child_states.push_back(move(child_state)); + } + // fetch the list_entry_t and the validity mask for that list + auto segment = (ColumnSegment *)data.GetSegment(row_id); + // now perform the fetch within the segment + segment->FetchRow(state, row_id, result, result_idx); + validity.FetchRow(transaction, *state.child_states[0], row_id, result, result_idx); + + auto &validity = FlatVector::Validity(result); + auto list_data = FlatVector::GetData(result); + auto &list_entry = list_data[result_idx]; + auto original_offset = list_entry.offset; + // set the list entry offset to the size of the current list + list_entry.offset = ListVector::GetListSize(result); + if (!validity.RowIsValid(result_idx)) { + // the list is NULL! no need to fetch the child + D_ASSERT(list_entry.length == 0); + return; + } -namespace duckdb { + // now we need to read from the child all the elements between [offset...length] + auto child_scan_count = list_entry.length; + if (child_scan_count > 0) { + auto child_state = make_unique(); + auto &child_type = ListType::GetChildType(result.GetType()); + Vector child_scan(child_type, child_scan_count); + // seek the scan towards the specified position and read [length] entries + child_column->InitializeScanWithOffset(*child_state, original_offset); + D_ASSERT(child_type.InternalType() == PhysicalType::STRUCT || + child_state->row_index + child_scan_count <= child_column->GetMaxEntry()); + child_column->ScanCount(*child_state, child_scan, child_scan_count); -static bool UseVersion(transaction_t start_time, transaction_t transaction_id, transaction_t id) { - return id < start_time || id == transaction_id; + ListVector::Append(result, child_scan, child_scan_count); + } } -static bool UseVersion(Transaction &transaction, transaction_t id) { - return UseVersion(transaction.start_time, transaction.transaction_id, id); +void ListColumnData::CommitDropColumn() { + validity.CommitDropColumn(); + child_column->CommitDropColumn(); } -unique_ptr ChunkInfo::Deserialize(MorselInfo &morsel, Deserializer &source) { - auto type = source.Read(); - switch (type) { - case ChunkInfoType::EMPTY_INFO: - return nullptr; - case ChunkInfoType::CONSTANT_INFO: - return ChunkConstantInfo::Deserialize(morsel, source); - case ChunkInfoType::VECTOR_INFO: - return ChunkVectorInfo::Deserialize(morsel, source); - default: - throw SerializationException("Could not deserialize Chunk Info Type: unrecognized type"); +struct ListColumnCheckpointState : public ColumnCheckpointState { + ListColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, TableDataWriter &writer) + : ColumnCheckpointState(row_group, column_data, writer) { + global_stats = make_unique(column_data.type); } -} -//===--------------------------------------------------------------------===// -// Constant info -//===--------------------------------------------------------------------===// -ChunkConstantInfo::ChunkConstantInfo(idx_t start, MorselInfo &morsel) - : ChunkInfo(start, morsel, ChunkInfoType::CONSTANT_INFO), insert_id(0), delete_id(NOT_DELETED_ID) { -} + unique_ptr validity_state; + unique_ptr child_state; -idx_t ChunkConstantInfo::GetSelVector(Transaction &transaction, SelectionVector &sel_vector, idx_t max_count) { - if (UseVersion(transaction, insert_id) && !UseVersion(transaction, delete_id)) { - return max_count; +public: + unique_ptr GetStatistics() override { + auto stats = global_stats->Copy(); + auto &list_stats = (ListStatistics &)*stats; + stats->validity_stats = validity_state->GetStatistics(); + list_stats.child_stats = child_state->GetStatistics(); + return stats; } - return 0; + + void FlushToDisk() override { + ColumnCheckpointState::FlushToDisk(); + validity_state->FlushToDisk(); + child_state->FlushToDisk(); + } +}; + +unique_ptr ListColumnData::CreateCheckpointState(RowGroup &row_group, TableDataWriter &writer) { + return make_unique(row_group, *this, writer); } -bool ChunkConstantInfo::Fetch(Transaction &transaction, row_t row) { - return UseVersion(transaction, insert_id) && !UseVersion(transaction, delete_id); +unique_ptr ListColumnData::Checkpoint(RowGroup &row_group, TableDataWriter &writer) { + auto validity_state = validity.Checkpoint(row_group, writer); + auto base_state = ColumnData::Checkpoint(row_group, writer); + auto child_state = child_column->Checkpoint(row_group, writer); + + auto &checkpoint_state = (ListColumnCheckpointState &)*base_state; + checkpoint_state.validity_state = move(validity_state); + checkpoint_state.child_state = move(child_state); + return base_state; } -void ChunkConstantInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t end) { - D_ASSERT(start == 0 && end == STANDARD_VECTOR_SIZE); - insert_id = commit_id; +void ListColumnData::DeserializeColumn(Deserializer &source) { + ColumnData::DeserializeColumn(source); + validity.DeserializeColumn(source); + child_column->DeserializeColumn(source); } -void ChunkConstantInfo::Serialize(Serializer &serializer) { - // we only need to write this node if any tuple deletions have been committed - bool is_deleted = insert_id >= TRANSACTION_ID_START || delete_id < TRANSACTION_ID_START; - if (!is_deleted) { - serializer.Write(ChunkInfoType::EMPTY_INFO); - return; - } - serializer.Write(type); - serializer.Write(start); +void ListColumnData::GetStorageInfo(idx_t row_group_index, vector col_path, vector> &result) { + col_path.push_back(0); + validity.GetStorageInfo(row_group_index, col_path, result); + col_path.back() = 1; + child_column->GetStorageInfo(row_group_index, col_path, result); } -unique_ptr ChunkConstantInfo::Deserialize(MorselInfo &morsel, Deserializer &source) { - auto start = source.Read(); +} // namespace duckdb - auto info = make_unique(start, morsel); - info->insert_id = 0; - info->delete_id = 0; - return move(info); -} -//===--------------------------------------------------------------------===// -// Vector info -//===--------------------------------------------------------------------===// -ChunkVectorInfo::ChunkVectorInfo(idx_t start, MorselInfo &morsel) - : ChunkInfo(start, morsel, ChunkInfoType::VECTOR_INFO), insert_id(0), same_inserted_id(true), any_deleted(false) { - for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) { - inserted[i] = 0; - deleted[i] = NOT_DELETED_ID; - } -} -idx_t ChunkVectorInfo::GetSelVector(Transaction &transaction, SelectionVector &sel_vector, idx_t max_count) { - return GetSelVector(transaction.start_time, transaction.transaction_id, sel_vector, max_count); -} +namespace duckdb { -idx_t ChunkVectorInfo::GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector, - idx_t max_count) { - idx_t count = 0; - if (same_inserted_id && !any_deleted) { - // all tuples have the same inserted id: and no tuples were deleted - if (UseVersion(start_time, transaction_id, insert_id)) { - return max_count; - } else { - return 0; - } - } else if (same_inserted_id) { - if (!UseVersion(start_time, transaction_id, insert_id)) { - return 0; - } - // have to check deleted flag - for (idx_t i = 0; i < max_count; i++) { - if (!UseVersion(start_time, transaction_id, deleted[i])) { - sel_vector.set_index(count++, i); - } - } - } else if (!any_deleted) { - // have to check inserted flag - for (idx_t i = 0; i < max_count; i++) { - if (UseVersion(start_time, transaction_id, inserted[i])) { - sel_vector.set_index(count++, i); - } - } - } else { - // have to check both flags - for (idx_t i = 0; i < max_count; i++) { - if (UseVersion(start_time, transaction_id, inserted[i]) && - !UseVersion(start_time, transaction_id, deleted[i])) { - sel_vector.set_index(count++, i); - } - } - } - return count; +PersistentTableData::PersistentTableData(idx_t column_count) { } -bool ChunkVectorInfo::Fetch(Transaction &transaction, row_t row) { - return UseVersion(transaction, inserted[row]) && !UseVersion(transaction, deleted[row]); +PersistentTableData::~PersistentTableData() { } -void ChunkVectorInfo::Delete(Transaction &transaction, row_t rows[], idx_t count) { - any_deleted = true; +} // namespace duckdb - // first check the chunk for conflicts - for (idx_t i = 0; i < count; i++) { - if (deleted[rows[i]] != NOT_DELETED_ID) { - // tuple was already deleted by another transaction - throw TransactionException("Conflict on tuple deletion!"); - } - if (inserted[rows[i]] >= TRANSACTION_ID_START) { - throw TransactionException("Deleting non-committed tuples is not supported (for now...)"); - } + + + + + + + + + + + + + +namespace duckdb { + +constexpr const idx_t RowGroup::ROW_GROUP_VECTOR_COUNT; +constexpr const idx_t RowGroup::ROW_GROUP_SIZE; + +RowGroup::RowGroup(DatabaseInstance &db, DataTableInfo &table_info, idx_t start, idx_t count) + : SegmentBase(start, count), db(db), table_info(table_info) { + + Verify(); +} + +RowGroup::RowGroup(DatabaseInstance &db, DataTableInfo &table_info, const vector &types, + RowGroupPointer &pointer) + : SegmentBase(pointer.row_start, pointer.tuple_count), db(db), table_info(table_info) { + // deserialize the columns + if (pointer.data_pointers.size() != types.size()) { + throw IOException("Row group column count is unaligned with table column count. Corrupt file?"); } - // after verifying that there are no conflicts we mark the tuples as deleted - for (idx_t i = 0; i < count; i++) { - deleted[rows[i]] = transaction.transaction_id; + for (idx_t i = 0; i < pointer.data_pointers.size(); i++) { + auto &block_pointer = pointer.data_pointers[i]; + MetaBlockReader column_data_reader(db, block_pointer.block_id); + column_data_reader.offset = block_pointer.offset; + this->columns.push_back(ColumnData::Deserialize(table_info, i, start, column_data_reader, types[i], nullptr)); } -} -void ChunkVectorInfo::CommitDelete(transaction_t commit_id, row_t rows[], idx_t count) { - for (idx_t i = 0; i < count; i++) { - deleted[rows[i]] = commit_id; + // set up the statistics + for (auto &stats : pointer.statistics) { + const auto& type = stats->type; + this->stats.push_back(make_shared(type, move(stats))); } + this->version_info = move(pointer.versions); + + Verify(); } -void ChunkVectorInfo::Append(idx_t start, idx_t end, transaction_t commit_id) { - if (start == 0) { - insert_id = commit_id; - } else if (insert_id != commit_id) { - same_inserted_id = false; - insert_id = NOT_DELETED_ID; - } - for (idx_t i = start; i < end; i++) { - inserted[i] = commit_id; +RowGroup::~RowGroup() { +} + +void RowGroup::InitializeEmpty(const vector &types) { + // set up the segment trees for the column segments + for (idx_t i = 0; i < types.size(); i++) { + auto column_data = ColumnData::CreateColumn(GetTableInfo(), i, start, types[i]); + stats.push_back(make_shared(types[i])); + columns.push_back(move(column_data)); } } -void ChunkVectorInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t end) { - if (same_inserted_id) { - insert_id = commit_id; +bool RowGroup::InitializeScanWithOffset(RowGroupScanState &state, idx_t vector_offset) { + auto &column_ids = state.parent.column_ids; + if (state.parent.table_filters) { + if (!CheckZonemap(*state.parent.table_filters, column_ids)) { + return false; + } } - for (idx_t i = start; i < end; i++) { - inserted[i] = commit_id; + + state.row_group = this; + state.vector_index = vector_offset; + state.max_row = + this->start > state.parent.max_row ? 0 : MinValue(this->count, state.parent.max_row - this->start); + state.column_scans = unique_ptr(new ColumnScanState[column_ids.size()]); + for (idx_t i = 0; i < column_ids.size(); i++) { + auto column = column_ids[i]; + if (column != COLUMN_IDENTIFIER_ROW_ID) { + columns[column]->InitializeScanWithOffset(state.column_scans[i], + start + vector_offset * STANDARD_VECTOR_SIZE); + } else { + state.column_scans[i].current = nullptr; + } } + return true; } -void ChunkVectorInfo::Serialize(Serializer &serializer) { - SelectionVector sel(STANDARD_VECTOR_SIZE); - transaction_t start_time = TRANSACTION_ID_START - 1; - transaction_t transaction_id = INVALID_INDEX; - idx_t count = GetSelVector(start_time, transaction_id, sel, STANDARD_VECTOR_SIZE); - if (count == STANDARD_VECTOR_SIZE) { - // nothing is deleted: skip writing anything - serializer.Write(ChunkInfoType::EMPTY_INFO); - return; +bool RowGroup::InitializeScan(RowGroupScanState &state) { + auto &column_ids = state.parent.column_ids; + if (state.parent.table_filters) { + if (!CheckZonemap(*state.parent.table_filters, column_ids)) { + return false; + } } - if (count == 0) { - // everything is deleted: write a constant vector - serializer.Write(ChunkInfoType::CONSTANT_INFO); - serializer.Write(start); - return; + state.row_group = this; + state.vector_index = 0; + state.max_row = + this->start > state.parent.max_row ? 0 : MinValue(this->count, state.parent.max_row - this->start); + state.column_scans = unique_ptr(new ColumnScanState[column_ids.size()]); + for (idx_t i = 0; i < column_ids.size(); i++) { + auto column = column_ids[i]; + if (column != COLUMN_IDENTIFIER_ROW_ID) { + columns[column]->InitializeScan(state.column_scans[i]); + } else { + state.column_scans[i].current = nullptr; + } } - // write a boolean vector - serializer.Write(ChunkInfoType::VECTOR_INFO); - serializer.Write(start); - bool deleted_tuples[STANDARD_VECTOR_SIZE]; - for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) { - deleted_tuples[i] = true; + return true; +} + +unique_ptr RowGroup::AlterType(ClientContext &context, const LogicalType &target_type, idx_t changed_idx, + ExpressionExecutor &executor, TableScanState &scan_state, + DataChunk &scan_chunk) { + Verify(); + + // construct a new column data for this type + auto column_data = ColumnData::CreateColumn(GetTableInfo(), changed_idx, start, target_type); + + ColumnAppendState append_state; + column_data->InitializeAppend(append_state); + + // scan the original table, and fill the new column with the transformed value + InitializeScan(scan_state.row_group_scan_state); + + Vector append_vector(target_type); + auto altered_col_stats = make_shared(target_type); + while (true) { + // scan the table + scan_chunk.Reset(); + ScanCommitted(scan_state.row_group_scan_state, scan_chunk, TableScanType::TABLE_SCAN_COMMITTED_ROWS); + if (scan_chunk.size() == 0) { + break; + } + // execute the expression + executor.ExecuteExpression(scan_chunk, append_vector); + column_data->Append(*altered_col_stats->statistics, append_state, append_vector, scan_chunk.size()); } - for (idx_t i = 0; i < count; i++) { - deleted_tuples[sel.get_index(i)] = false; + + // set up the row_group based on this row_group + auto row_group = make_unique(db, table_info, this->start, this->count); + row_group->version_info = version_info; + for (idx_t i = 0; i < columns.size(); i++) { + if (i == changed_idx) { + // this is the altered column: use the new column + row_group->columns.push_back(move(column_data)); + row_group->stats.push_back(move(altered_col_stats)); + } else { + // this column was not altered: use the data directly + row_group->columns.push_back(columns[i]); + row_group->stats.push_back(stats[i]); + } } - serializer.WriteData((data_ptr_t)deleted_tuples, sizeof(bool) * STANDARD_VECTOR_SIZE); + row_group->Verify(); + return row_group; } -unique_ptr ChunkVectorInfo::Deserialize(MorselInfo &morsel, Deserializer &source) { - auto start = source.Read(); +unique_ptr RowGroup::AddColumn(ClientContext &context, ColumnDefinition &new_column, + ExpressionExecutor &executor, Expression *default_value, Vector &result) { + Verify(); - auto result = make_unique(start, morsel); - result->any_deleted = true; - bool deleted_tuples[STANDARD_VECTOR_SIZE]; - source.ReadData((data_ptr_t)deleted_tuples, sizeof(bool) * STANDARD_VECTOR_SIZE); - for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) { - if (deleted_tuples[i]) { - result->deleted[i] = 0; + // construct a new column data for the new column + auto added_column = ColumnData::CreateColumn(GetTableInfo(), columns.size(), start, new_column.type); + + auto added_col_stats = make_shared(new_column.type); + idx_t rows_to_write = this->count; + if (rows_to_write > 0) { + DataChunk dummy_chunk; + + ColumnAppendState state; + added_column->InitializeAppend(state); + for (idx_t i = 0; i < rows_to_write; i += STANDARD_VECTOR_SIZE) { + idx_t rows_in_this_vector = MinValue(rows_to_write - i, STANDARD_VECTOR_SIZE); + if (default_value) { + dummy_chunk.SetCardinality(rows_in_this_vector); + executor.ExecuteExpression(dummy_chunk, result); + } + added_column->Append(*added_col_stats->statistics, state, result, rows_in_this_vector); } } - return move(result); + + // set up the row_group based on this row_group + auto row_group = make_unique(db, table_info, this->start, this->count); + row_group->version_info = version_info; + row_group->columns = columns; + row_group->stats = stats; + // now add the new column + row_group->columns.push_back(move(added_column)); + row_group->stats.push_back(move(added_col_stats)); + + row_group->Verify(); + return row_group; } -} // namespace duckdb +unique_ptr RowGroup::RemoveColumn(idx_t removed_column) { + Verify(); + D_ASSERT(removed_column < columns.size()); + auto row_group = make_unique(db, table_info, this->start, this->count); + row_group->version_info = version_info; + row_group->columns = columns; + row_group->stats = stats; + // now remove the column + row_group->columns.erase(row_group->columns.begin() + removed_column); + row_group->stats.erase(row_group->stats.begin() + removed_column); -#include + row_group->Verify(); + return row_group; +} -namespace duckdb { +void RowGroup::CommitDrop() { + for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) { + CommitDropColumn(column_idx); + } +} -ColumnSegment::ColumnSegment(LogicalType type_p, ColumnSegmentType segment_type, idx_t start, idx_t count) - : SegmentBase(start, count), type(move(type_p)), type_size(GetTypeIdSize(type.InternalType())), - segment_type(segment_type), stats(type, type_size) { +void RowGroup::CommitDropColumn(idx_t column_idx) { + D_ASSERT(column_idx < columns.size()); + columns[column_idx]->CommitDropColumn(); } -ColumnSegment::ColumnSegment(LogicalType type_p, ColumnSegmentType segment_type, idx_t start, idx_t count, - unique_ptr statistics) - : SegmentBase(start, count), type(move(type_p)), type_size(GetTypeIdSize(type.InternalType())), - segment_type(segment_type), stats(type, type_size, move(statistics)) { +void RowGroup::NextVector(RowGroupScanState &state) { + state.vector_index++; + for (idx_t i = 0; i < state.parent.column_ids.size(); i++) { + auto column = state.parent.column_ids[i]; + if (column == COLUMN_IDENTIFIER_ROW_ID) { + continue; + } + D_ASSERT(column < columns.size()); + columns[column]->Skip(state.column_scans[i]); + } } -} // namespace duckdb +bool RowGroup::CheckZonemap(TableFilterSet &filters, const vector &column_ids) { + for (auto &entry : filters.filters) { + auto column_index = entry.first; + auto &filter = entry.second; + auto base_column_index = column_ids[column_index]; + + auto propagate_result = filter->CheckStatistics(*stats[base_column_index]->statistics); + if (propagate_result == FilterPropagateResult::FILTER_ALWAYS_FALSE || + propagate_result == FilterPropagateResult::FILTER_FALSE_OR_NULL) { + return false; + } + } + return true; +} +bool RowGroup::CheckZonemapSegments(RowGroupScanState &state) { + if (!state.parent.table_filters) { + return true; + } + auto &column_ids = state.parent.column_ids; + for (auto &entry : state.parent.table_filters->filters) { + D_ASSERT(entry.first < column_ids.size()); + auto column_idx = entry.first; + auto base_column_idx = column_ids[column_idx]; + bool read_segment = columns[base_column_idx]->CheckZonemap(state.column_scans[column_idx], *entry.second); + if (!read_segment) { + idx_t target_row = + state.column_scans[column_idx].current->start + state.column_scans[column_idx].current->count; + D_ASSERT(target_row >= this->start); + D_ASSERT(target_row <= this->start + this->count); + idx_t target_vector_index = (target_row - this->start) / STANDARD_VECTOR_SIZE; + if (state.vector_index == target_vector_index) { + // we can't skip any full vectors because this segment contains less than a full vector + // for now we just bail-out + // FIXME: we could check if we can ALSO skip the next segments, in which case skipping a full vector + // might be possible + // we don't care that much though, since a single segment that fits less than a full vector is + // exceedingly rare + return true; + } + while (state.vector_index < target_vector_index) { + NextVector(state); + } + return false; + } + } + return true; +} +template +void RowGroup::TemplatedScan(Transaction *transaction, RowGroupScanState &state, DataChunk &result) { + const bool ALLOW_UPDATES = TYPE != TableScanType::TABLE_SCAN_COMMITTED_ROWS_DISALLOW_UPDATES && + TYPE != TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED; + auto &table_filters = state.parent.table_filters; + auto &column_ids = state.parent.column_ids; + auto &adaptive_filter = state.parent.adaptive_filter; + while (true) { + if (state.vector_index * STANDARD_VECTOR_SIZE >= state.max_row) { + // exceeded the amount of rows to scan + return; + } + idx_t current_row = state.vector_index * STANDARD_VECTOR_SIZE; + auto max_count = MinValue(STANDARD_VECTOR_SIZE, state.max_row - current_row); + //! first check the zonemap if we have to scan this partition + if (!CheckZonemapSegments(state)) { + continue; + } + // second, scan the version chunk manager to figure out which tuples to load for this transaction + idx_t count; + SelectionVector valid_sel(STANDARD_VECTOR_SIZE); + if (TYPE == TableScanType::TABLE_SCAN_REGULAR) { + D_ASSERT(transaction); + count = state.row_group->GetSelVector(*transaction, state.vector_index, valid_sel, max_count); + if (count == 0) { + // nothing to scan for this vector, skip the entire vector + NextVector(state); + continue; + } + } else if (TYPE == TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED) { + auto &transaction_manager = TransactionManager::Get(db); + auto lowest_active_start = transaction_manager.LowestActiveStart(); + auto lowest_active_id = transaction_manager.LowestActiveId(); -namespace duckdb { + count = state.row_group->GetCommittedSelVector(lowest_active_start, lowest_active_id, state.vector_index, + valid_sel, max_count); + if (count == 0) { + // nothing to scan for this vector, skip the entire vector + NextVector(state); + continue; + } + } else { + count = max_count; + } + if (count == max_count && !table_filters) { + // scan all vectors completely: full scan without deletions or table filters + for (idx_t i = 0; i < column_ids.size(); i++) { + auto column = column_ids[i]; + if (column == COLUMN_IDENTIFIER_ROW_ID) { + // scan row id + D_ASSERT(result.data[i].GetType().InternalType() == ROW_TYPE); + result.data[i].Sequence(this->start + current_row, 1); + } else { + if (TYPE != TableScanType::TABLE_SCAN_REGULAR) { + columns[column]->ScanCommitted(state.vector_index, state.column_scans[i], result.data[i], + ALLOW_UPDATES); + } else { + D_ASSERT(transaction); + columns[column]->Scan(*transaction, state.vector_index, state.column_scans[i], result.data[i]); + } + } + } + } else { + // partial scan: we have deletions or table filters + idx_t approved_tuple_count = count; + SelectionVector sel; + if (count != max_count) { + sel.Initialize(valid_sel); + } else { + sel.Initialize(FlatVector::INCREMENTAL_SELECTION_VECTOR); + } + //! first, we scan the columns with filters, fetch their data and generate a selection vector. + //! get runtime statistics + auto start_time = high_resolution_clock::now(); + if (table_filters) { + D_ASSERT(ALLOW_UPDATES); + for (idx_t i = 0; i < table_filters->filters.size(); i++) { + auto tf_idx = adaptive_filter->permutation[i]; + auto col_idx = column_ids[tf_idx]; + columns[col_idx]->Select(*transaction, state.vector_index, state.column_scans[tf_idx], + result.data[tf_idx], sel, approved_tuple_count, + *table_filters->filters[tf_idx]); + } + for (auto &table_filter : table_filters->filters) { + result.data[table_filter.first].Slice(sel, approved_tuple_count); + } + } + if (approved_tuple_count == 0) { + // all rows were filtered out by the table filters + // skip this vector in all the scans that were not scanned yet + D_ASSERT(table_filters); + result.Reset(); + for (idx_t i = 0; i < column_ids.size(); i++) { + auto col_idx = column_ids[i]; + if (col_idx == COLUMN_IDENTIFIER_ROW_ID) { + continue; + } + if (table_filters->filters.find(i) == table_filters->filters.end()) { + columns[col_idx]->Skip(state.column_scans[i]); + } + } + state.vector_index++; + continue; + } + //! Now we use the selection vector to fetch data for the other columns. + for (idx_t i = 0; i < column_ids.size(); i++) { + if (!table_filters || table_filters->filters.find(i) == table_filters->filters.end()) { + auto column = column_ids[i]; + if (column == COLUMN_IDENTIFIER_ROW_ID) { + D_ASSERT(result.data[i].GetType().InternalType() == PhysicalType::INT64); + result.data[i].SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = (int64_t *)FlatVector::GetData(result.data[i]); + for (size_t sel_idx = 0; sel_idx < approved_tuple_count; sel_idx++) { + result_data[sel_idx] = this->start + current_row + sel.get_index(sel_idx); + } + } else { + if (TYPE == TableScanType::TABLE_SCAN_REGULAR) { + D_ASSERT(transaction); + columns[column]->FilterScan(*transaction, state.vector_index, state.column_scans[i], + result.data[i], sel, approved_tuple_count); + } else { + D_ASSERT(!transaction); + columns[column]->FilterScanCommitted(state.vector_index, state.column_scans[i], + result.data[i], sel, approved_tuple_count, + ALLOW_UPDATES); + } + } + } + } + auto end_time = high_resolution_clock::now(); + if (adaptive_filter && table_filters->filters.size() > 1) { + adaptive_filter->AdaptRuntimeStatistics(duration_cast>(end_time - start_time).count()); + } + D_ASSERT(approved_tuple_count > 0); + count = approved_tuple_count; + } + result.SetCardinality(count); + state.vector_index++; + break; + } +} -constexpr const idx_t MorselInfo::MORSEL_VECTOR_COUNT; -constexpr const idx_t MorselInfo::MORSEL_SIZE; -constexpr const idx_t MorselInfo::MORSEL_LAYER_COUNT; -constexpr const idx_t MorselInfo::MORSEL_LAYER_SIZE; +void RowGroup::Scan(Transaction &transaction, RowGroupScanState &state, DataChunk &result) { + TemplatedScan(&transaction, state, result); +} -ChunkInfo *MorselInfo::GetChunkInfo(idx_t vector_idx) { - if (!root) { +void RowGroup::ScanCommitted(RowGroupScanState &state, DataChunk &result, TableScanType type) { + switch (type) { + case TableScanType::TABLE_SCAN_COMMITTED_ROWS: + TemplatedScan(nullptr, state, result); + break; + case TableScanType::TABLE_SCAN_COMMITTED_ROWS_DISALLOW_UPDATES: + TemplatedScan(nullptr, state, result); + break; + case TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED: + TemplatedScan(nullptr, state, result); + break; + default: + throw InternalException("Unrecognized table scan type"); + } +} + +ChunkInfo *RowGroup::GetChunkInfo(idx_t vector_idx) { + if (!version_info) { return nullptr; } - return root->info[vector_idx].get(); + return version_info->info[vector_idx].get(); } -idx_t MorselInfo::GetSelVector(Transaction &transaction, idx_t vector_idx, SelectionVector &sel_vector, - idx_t max_count) { - lock_guard lock(morsel_lock); +idx_t RowGroup::GetSelVector(Transaction &transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count) { + lock_guard lock(row_group_lock); auto info = GetChunkInfo(vector_idx); if (!info) { @@ -125994,9 +139529,20 @@ idx_t MorselInfo::GetSelVector(Transaction &transaction, idx_t vector_idx, Selec return info->GetSelVector(transaction, sel_vector, max_count); } -bool MorselInfo::Fetch(Transaction &transaction, idx_t row) { - D_ASSERT(row < MorselInfo::MORSEL_SIZE); - lock_guard lock(morsel_lock); +idx_t RowGroup::GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx, + SelectionVector &sel_vector, idx_t max_count) { + lock_guard lock(row_group_lock); + + auto info = GetChunkInfo(vector_idx); + if (!info) { + return max_count; + } + return info->GetCommittedSelVector(start_time, transaction_id, sel_vector, max_count); +} + +bool RowGroup::Fetch(Transaction &transaction, idx_t row) { + D_ASSERT(row < this->count); + lock_guard lock(row_group_lock); idx_t vector_index = row / STANDARD_VECTOR_SIZE; auto info = GetChunkInfo(vector_index); @@ -126006,80 +139552,300 @@ bool MorselInfo::Fetch(Transaction &transaction, idx_t row) { return info->Fetch(transaction, row - vector_index * STANDARD_VECTOR_SIZE); } -void MorselInfo::Append(Transaction &transaction, idx_t morsel_start, idx_t count, transaction_t commit_id) { - idx_t morsel_end = morsel_start + count; - lock_guard lock(morsel_lock); +void RowGroup::FetchRow(Transaction &transaction, ColumnFetchState &state, const vector &column_ids, + row_t row_id, DataChunk &result, idx_t result_idx) { + for (idx_t col_idx = 0; col_idx < column_ids.size(); col_idx++) { + auto column = column_ids[col_idx]; + if (column == COLUMN_IDENTIFIER_ROW_ID) { + // row id column: fill in the row ids + D_ASSERT(result.data[col_idx].GetType().InternalType() == PhysicalType::INT64); + result.data[col_idx].SetVectorType(VectorType::FLAT_VECTOR); + auto data = FlatVector::GetData(result.data[col_idx]); + data[result_idx] = row_id; + } else { + // regular column: fetch data from the base column + columns[column]->FetchRow(transaction, state, row_id, result.data[col_idx], result_idx); + } + } +} + +void RowGroup::AppendVersionInfo(Transaction &transaction, idx_t row_group_start, idx_t count, + transaction_t commit_id) { + idx_t row_group_end = row_group_start + count; + lock_guard lock(row_group_lock); - // create the root if it doesn't exist yet - if (!root) { - root = make_unique(); + this->count += count; + D_ASSERT(this->count <= RowGroup::ROW_GROUP_SIZE); + + // create the version_info if it doesn't exist yet + if (!version_info) { + version_info = make_unique(); } - idx_t start_vector_idx = morsel_start / STANDARD_VECTOR_SIZE; - idx_t end_vector_idx = (morsel_end - 1) / STANDARD_VECTOR_SIZE; + idx_t start_vector_idx = row_group_start / STANDARD_VECTOR_SIZE; + idx_t end_vector_idx = (row_group_end - 1) / STANDARD_VECTOR_SIZE; for (idx_t vector_idx = start_vector_idx; vector_idx <= end_vector_idx; vector_idx++) { - idx_t start = vector_idx == start_vector_idx ? morsel_start - start_vector_idx * STANDARD_VECTOR_SIZE : 0; + idx_t start = vector_idx == start_vector_idx ? row_group_start - start_vector_idx * STANDARD_VECTOR_SIZE : 0; idx_t end = - vector_idx == end_vector_idx ? morsel_end - end_vector_idx * STANDARD_VECTOR_SIZE : STANDARD_VECTOR_SIZE; + vector_idx == end_vector_idx ? row_group_end - end_vector_idx * STANDARD_VECTOR_SIZE : STANDARD_VECTOR_SIZE; if (start == 0 && end == STANDARD_VECTOR_SIZE) { // entire vector is encapsulated by append: append a single constant - auto constant_info = make_unique(this->start + vector_idx * STANDARD_VECTOR_SIZE, *this); + auto constant_info = make_unique(this->start + vector_idx * STANDARD_VECTOR_SIZE); constant_info->insert_id = commit_id; constant_info->delete_id = NOT_DELETED_ID; - root->info[vector_idx] = move(constant_info); + version_info->info[vector_idx] = move(constant_info); } else { // part of a vector is encapsulated: append to that part ChunkVectorInfo *info; - if (!root->info[vector_idx]) { + if (!version_info->info[vector_idx]) { // first time appending to this vector: create new info - auto insert_info = make_unique(this->start + vector_idx * STANDARD_VECTOR_SIZE, *this); + auto insert_info = make_unique(this->start + vector_idx * STANDARD_VECTOR_SIZE); info = insert_info.get(); - root->info[vector_idx] = move(insert_info); + version_info->info[vector_idx] = move(insert_info); } else { - D_ASSERT(root->info[vector_idx]->type == ChunkInfoType::VECTOR_INFO); + D_ASSERT(version_info->info[vector_idx]->type == ChunkInfoType::VECTOR_INFO); // use existing vector - info = (ChunkVectorInfo *)root->info[vector_idx].get(); + info = (ChunkVectorInfo *)version_info->info[vector_idx].get(); } info->Append(start, end, commit_id); } } } -void MorselInfo::CommitAppend(transaction_t commit_id, idx_t morsel_start, idx_t count) { - D_ASSERT(root.get()); - idx_t morsel_end = morsel_start + count; - lock_guard lock(morsel_lock); +void RowGroup::CommitAppend(transaction_t commit_id, idx_t row_group_start, idx_t count) { + D_ASSERT(version_info.get()); + idx_t row_group_end = row_group_start + count; + lock_guard lock(row_group_lock); - idx_t start_vector_idx = morsel_start / STANDARD_VECTOR_SIZE; - idx_t end_vector_idx = (morsel_end - 1) / STANDARD_VECTOR_SIZE; + idx_t start_vector_idx = row_group_start / STANDARD_VECTOR_SIZE; + idx_t end_vector_idx = (row_group_end - 1) / STANDARD_VECTOR_SIZE; for (idx_t vector_idx = start_vector_idx; vector_idx <= end_vector_idx; vector_idx++) { - idx_t start = vector_idx == start_vector_idx ? morsel_start - start_vector_idx * STANDARD_VECTOR_SIZE : 0; + idx_t start = vector_idx == start_vector_idx ? row_group_start - start_vector_idx * STANDARD_VECTOR_SIZE : 0; idx_t end = - vector_idx == end_vector_idx ? morsel_end - end_vector_idx * STANDARD_VECTOR_SIZE : STANDARD_VECTOR_SIZE; + vector_idx == end_vector_idx ? row_group_end - end_vector_idx * STANDARD_VECTOR_SIZE : STANDARD_VECTOR_SIZE; - auto info = root->info[vector_idx].get(); + auto info = version_info->info[vector_idx].get(); info->CommitAppend(commit_id, start, end); } } -void MorselInfo::RevertAppend(idx_t morsel_start) { - if (!root) { +void RowGroup::RevertAppend(idx_t row_group_start) { + if (!version_info) { return; } - idx_t start_row = morsel_start - this->start; + idx_t start_row = row_group_start - this->start; idx_t start_vector_idx = (start_row + (STANDARD_VECTOR_SIZE - 1)) / STANDARD_VECTOR_SIZE; - for (idx_t vector_idx = start_vector_idx; vector_idx < MorselInfo::MORSEL_VECTOR_COUNT; vector_idx++) { - root->info[vector_idx].reset(); + for (idx_t vector_idx = start_vector_idx; vector_idx < RowGroup::ROW_GROUP_VECTOR_COUNT; vector_idx++) { + version_info->info[vector_idx].reset(); + } + for (auto &column : columns) { + column->RevertAppend(row_group_start); + } + this->count = MinValue(row_group_start - this->start, this->count); + Verify(); +} + +void RowGroup::InitializeAppend(Transaction &transaction, RowGroupAppendState &append_state, + idx_t remaining_append_count) { + append_state.row_group = this; + append_state.offset_in_row_group = this->count; + // for each column, initialize the append state + append_state.states = unique_ptr(new ColumnAppendState[columns.size()]); + for (idx_t i = 0; i < columns.size(); i++) { + columns[i]->InitializeAppend(append_state.states[i]); + } + // append the version info for this row_group + idx_t append_count = MinValue(remaining_append_count, RowGroup::ROW_GROUP_SIZE - this->count); + AppendVersionInfo(transaction, this->count, append_count, transaction.transaction_id); +} + +void RowGroup::Append(RowGroupAppendState &state, DataChunk &chunk, idx_t append_count) { + // append to the current row_group + for (idx_t i = 0; i < columns.size(); i++) { + columns[i]->Append(*stats[i]->statistics, state.states[i], chunk.data[i], append_count); + } + state.offset_in_row_group += append_count; +} + +void RowGroup::Update(Transaction &transaction, DataChunk &update_chunk, row_t *ids, idx_t offset, idx_t count, + const vector &column_ids) { +#ifdef DEBUG + for (size_t i = offset; i < offset + count; i++) { + D_ASSERT(ids[i] >= row_t(this->start) && ids[i] < row_t(this->start + this->count)); + } +#endif + for (idx_t i = 0; i < column_ids.size(); i++) { + auto column = column_ids[i]; + D_ASSERT(column != COLUMN_IDENTIFIER_ROW_ID); + D_ASSERT(columns[column]->type.id() == update_chunk.data[i].GetType().id()); + columns[column]->Update(transaction, column, update_chunk.data[i], ids, offset, count); + MergeStatistics(column, *columns[column]->GetUpdateStatistics()); + } +} + +void RowGroup::UpdateColumn(Transaction &transaction, DataChunk &updates, Vector &row_ids, + const vector &column_path) { + D_ASSERT(updates.ColumnCount() == 1); + auto ids = FlatVector::GetData(row_ids); + + auto primary_column_idx = column_path[0]; + D_ASSERT(primary_column_idx != COLUMN_IDENTIFIER_ROW_ID); + D_ASSERT(primary_column_idx < columns.size()); + columns[primary_column_idx]->UpdateColumn(transaction, column_path, updates.data[0], ids, updates.size(), 1); + MergeStatistics(primary_column_idx, *columns[primary_column_idx]->GetUpdateStatistics()); +} + +unique_ptr RowGroup::GetStatistics(idx_t column_idx) { + D_ASSERT(column_idx < stats.size()); + + lock_guard slock(stats_lock); + return stats[column_idx]->statistics->Copy(); +} + +void RowGroup::MergeStatistics(idx_t column_idx, BaseStatistics &other) { + D_ASSERT(column_idx < stats.size()); + + lock_guard slock(stats_lock); + stats[column_idx]->statistics->Merge(other); +} + +RowGroupPointer RowGroup::Checkpoint(TableDataWriter &writer, vector> &global_stats) { + vector> states; + states.reserve(columns.size()); + + // checkpoint the individual columns of the row group + for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) { + auto &column = columns[column_idx]; + auto checkpoint_state = column->Checkpoint(*this, writer); + D_ASSERT(checkpoint_state); + + auto stats = checkpoint_state->GetStatistics(); + D_ASSERT(stats); + + global_stats[column_idx]->Merge(*stats); + states.push_back(move(checkpoint_state)); + } + + // construct the row group pointer and write the column meta data to disk + D_ASSERT(states.size() == columns.size()); + RowGroupPointer row_group_pointer; + row_group_pointer.row_start = start; + row_group_pointer.tuple_count = count; + for (auto &state : states) { + // get the current position of the meta data writer + auto &meta_writer = writer.GetMetaWriter(); + auto pointer = meta_writer.GetBlockPointer(); + + // store the stats and the data pointers in the row group pointers + row_group_pointer.data_pointers.push_back(pointer); + row_group_pointer.statistics.push_back(state->GetStatistics()); + + // now flush the actual column data to disk + state->FlushToDisk(); + } + row_group_pointer.versions = version_info; + Verify(); + return row_group_pointer; +} + +void RowGroup::CheckpointDeletes(VersionNode *versions, Serializer &serializer) { + if (!versions) { + // no version information: write nothing + serializer.Write(0); + return; + } + // first count how many ChunkInfo's we need to deserialize + idx_t chunk_info_count = 0; + for (idx_t vector_idx = 0; vector_idx < RowGroup::ROW_GROUP_VECTOR_COUNT; vector_idx++) { + auto chunk_info = versions->info[vector_idx].get(); + if (!chunk_info) { + continue; + } + chunk_info_count++; + } + // now serialize the actual version information + serializer.Write(chunk_info_count); + for (idx_t vector_idx = 0; vector_idx < RowGroup::ROW_GROUP_VECTOR_COUNT; vector_idx++) { + auto chunk_info = versions->info[vector_idx].get(); + if (!chunk_info) { + continue; + } + serializer.Write(vector_idx); + chunk_info->Serialize(serializer); + } +} + +shared_ptr RowGroup::DeserializeDeletes(Deserializer &source) { + auto chunk_count = source.Read(); + if (chunk_count == 0) { + // no deletes + return nullptr; + } + auto version_info = make_shared(); + for (idx_t i = 0; i < chunk_count; i++) { + idx_t vector_index = source.Read(); + if (vector_index >= RowGroup::ROW_GROUP_VECTOR_COUNT) { + throw Exception("In DeserializeDeletes, vector_index is out of range for the row group. Corrupted file?"); + } + version_info->info[vector_index] = ChunkInfo::Deserialize(source); + } + return version_info; +} + +void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &serializer) { + serializer.Write(pointer.row_start); + serializer.Write(pointer.tuple_count); + for (auto &stats : pointer.statistics) { + stats->Serialize(serializer); + } + for (auto &data_pointer : pointer.data_pointers) { + serializer.Write(data_pointer.block_id); + serializer.Write(data_pointer.offset); + } + CheckpointDeletes(pointer.versions.get(), serializer); +} + +RowGroupPointer RowGroup::Deserialize(Deserializer &source, const vector &columns) { + RowGroupPointer result; + result.row_start = source.Read(); + result.tuple_count = source.Read(); + + result.data_pointers.reserve(columns.size()); + result.statistics.reserve(columns.size()); + + for (idx_t i = 0; i < columns.size(); i++) { + auto stats = BaseStatistics::Deserialize(source, columns[i].type); + result.statistics.push_back(move(stats)); + } + for (idx_t i = 0; i < columns.size(); i++) { + BlockPointer pointer; + pointer.block_id = source.Read(); + pointer.offset = source.Read(); + result.data_pointers.push_back(pointer); + } + result.versions = DeserializeDeletes(source); + return result; +} + +//===--------------------------------------------------------------------===// +// GetStorageInfo +//===--------------------------------------------------------------------===// +void RowGroup::GetStorageInfo(idx_t row_group_index, vector> &result) { + for (idx_t col_idx = 0; col_idx < columns.size(); col_idx++) { + columns[col_idx]->GetStorageInfo(row_group_index, {col_idx}, result); } } +//===--------------------------------------------------------------------===// +// Version Delete Information +//===--------------------------------------------------------------------===// class VersionDeleteState { public: - VersionDeleteState(MorselInfo &info, Transaction &transaction, DataTable *table, idx_t base_row) + VersionDeleteState(RowGroup &info, Transaction &transaction, DataTable *table, idx_t base_row) : info(info), transaction(transaction), table(table), current_info(nullptr), current_chunk(INVALID_INDEX), - count(0), base_row(base_row) { + count(0), base_row(base_row), delete_count(0) { } - MorselInfo &info; + RowGroup &info; Transaction &transaction; DataTable *table; ChunkVectorInfo *current_info; @@ -126088,53 +139854,62 @@ class VersionDeleteState { idx_t count; idx_t base_row; idx_t chunk_row; + idx_t delete_count; public: void Delete(row_t row_id); void Flush(); }; -void MorselInfo::Delete(Transaction &transaction, DataTable *table, Vector &row_ids, idx_t count) { - lock_guard lock(morsel_lock); +idx_t RowGroup::Delete(Transaction &transaction, DataTable *table, row_t *ids, idx_t count) { + lock_guard lock(row_group_lock); VersionDeleteState del_state(*this, transaction, table, this->start); - VectorData rdata; - row_ids.Orrify(count, rdata); // obtain a write lock - auto ids = (row_t *)rdata.data; for (idx_t i = 0; i < count; i++) { - auto ridx = rdata.sel->get_index(i); - del_state.Delete(ids[ridx] - this->start); + D_ASSERT(ids[i] >= 0); + D_ASSERT(idx_t(ids[i]) >= this->start && idx_t(ids[i]) < this->start + this->count); + del_state.Delete(ids[i] - this->start); } del_state.Flush(); + return del_state.delete_count; +} + +void RowGroup::Verify() { +#ifdef DEBUG + for (auto &column : columns) { + column->Verify(*this); + } +#endif } void VersionDeleteState::Delete(row_t row_id) { + D_ASSERT(row_id >= 0); idx_t vector_idx = row_id / STANDARD_VECTOR_SIZE; idx_t idx_in_vector = row_id - vector_idx * STANDARD_VECTOR_SIZE; if (current_chunk != vector_idx) { Flush(); - if (!info.root) { - info.root = make_unique(); + if (!info.version_info) { + info.version_info = make_unique(); } - if (!info.root->info[vector_idx]) { + if (!info.version_info->info[vector_idx]) { // no info yet: create it - info.root->info[vector_idx] = - make_unique(info.start + vector_idx * STANDARD_VECTOR_SIZE, info); - } else if (info.root->info[vector_idx]->type == ChunkInfoType::CONSTANT_INFO) { - auto &constant = (ChunkConstantInfo &)*info.root->info[vector_idx]; + info.version_info->info[vector_idx] = + make_unique(info.start + vector_idx * STANDARD_VECTOR_SIZE); + } else if (info.version_info->info[vector_idx]->type == ChunkInfoType::CONSTANT_INFO) { + auto &constant = (ChunkConstantInfo &)*info.version_info->info[vector_idx]; // info exists but it's a constant info: convert to a vector info - auto new_info = make_unique(info.start + vector_idx * STANDARD_VECTOR_SIZE, info); - new_info->insert_id = constant.insert_id; + auto new_info = make_unique(info.start + vector_idx * STANDARD_VECTOR_SIZE); + new_info->insert_id = constant.insert_id.load(); for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) { - new_info->inserted[i] = constant.insert_id; + new_info->inserted[i] = constant.insert_id.load(); } - info.root->info[vector_idx] = move(new_info); + info.version_info->info[vector_idx] = move(new_info); } - D_ASSERT(info.root->info[vector_idx]->type == ChunkInfoType::VECTOR_INFO); - current_info = (ChunkVectorInfo *)info.root->info[vector_idx].get(); + D_ASSERT(info.version_info->info[vector_idx]->type == ChunkInfoType::VECTOR_INFO); + current_info = (ChunkVectorInfo *)info.version_info->info[vector_idx].get(); current_chunk = vector_idx; chunk_row = vector_idx * STANDARD_VECTOR_SIZE; } @@ -126146,7 +139921,7 @@ void VersionDeleteState::Flush() { return; } // delete in the current info - current_info->Delete(transaction, rows, count); + delete_count += current_info->Delete(transaction, rows, count); // now push the delete into the undo buffer transaction.PushDelete(table, current_info, rows, count, base_row + chunk_row); count = 0; @@ -126156,71 +139931,6 @@ void VersionDeleteState::Flush() { - - - - - - - - - - -namespace duckdb { - -PersistentSegment::PersistentSegment(DatabaseInstance &db, block_id_t id, idx_t offset, const LogicalType &type_p, - idx_t start, idx_t count, unique_ptr statistics) - : ColumnSegment(type_p, ColumnSegmentType::PERSISTENT, start, count, move(statistics)), db(db), block_id(id), - offset(offset) { - D_ASSERT(offset == 0); - if (type.InternalType() == PhysicalType::VARCHAR) { - data = make_unique(db, start, id); - data->max_vector_count = count / STANDARD_VECTOR_SIZE + (count % STANDARD_VECTOR_SIZE == 0 ? 0 : 1); - } else if (type.InternalType() == PhysicalType::BIT) { - data = make_unique(db, start, id); - } else { - data = make_unique(db, type.InternalType(), start, id); - } - data->tuple_count = count; -} - -void PersistentSegment::InitializeScan(ColumnScanState &state) { - data->InitializeScan(state); -} - -void PersistentSegment::Scan(ColumnScanState &state, idx_t vector_index, Vector &result) { - data->Scan(state, vector_index, result); -} - -void PersistentSegment::Fetch(ColumnScanState &state, idx_t vector_index, Vector &result) { - data->Fetch(state, vector_index, result); -} - -void PersistentSegment::FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) { - data->FetchRow(state, row_id - this->start, result, result_idx); -} - -} // namespace duckdb - - - - -namespace duckdb { - -PersistentTableData::PersistentTableData(idx_t column_count) { - column_data.resize(column_count); -} - -PersistentTableData::~PersistentTableData() { -} - -PersistentColumnData::~PersistentColumnData() { -} - -} // namespace duckdb - - - namespace duckdb { SegmentBase *SegmentTree::GetRootSegment() { @@ -126228,7 +139938,7 @@ SegmentBase *SegmentTree::GetRootSegment() { } SegmentBase *SegmentTree::GetLastSegment() { - return nodes.back().node; + return nodes.empty() ? nullptr : nodes.back().node; } SegmentBase *SegmentTree::GetSegment(idx_t row_number) { @@ -126237,12 +139947,17 @@ SegmentBase *SegmentTree::GetSegment(idx_t row_number) { } idx_t SegmentTree::GetSegmentIndex(idx_t row_number) { + D_ASSERT(!nodes.empty()); + D_ASSERT(row_number >= nodes[0].row_start); + D_ASSERT(row_number < nodes.back().row_start + nodes.back().node->count); idx_t lower = 0; idx_t upper = nodes.size() - 1; // binary search to find the node while (lower <= upper) { idx_t index = (lower + upper) / 2; + D_ASSERT(index < nodes.size()); auto &entry = nodes[index]; + D_ASSERT(entry.row_start == entry.node->start); if (row_number < entry.row_start) { upper = index - 1; } else if (row_number >= entry.row_start + entry.node->count) { @@ -126286,22 +140001,25 @@ void SegmentTree::Replace(SegmentTree &other) { namespace duckdb { -StandardColumnData::StandardColumnData(DatabaseInstance &db, DataTableInfo &table_info, LogicalType type, - idx_t column_idx) - : ColumnData(db, table_info, move(type), column_idx), validity(db, table_info, column_idx) { +StandardColumnData::StandardColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type, + ColumnData *parent) + : ColumnData(info, column_index, start_row, move(type), parent), validity(info, 0, start_row, this) { } bool StandardColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) { if (!state.segment_checked) { - state.segment_checked = true; if (!state.current) { return true; } - if (state.current->stats.CheckZonemap(filter)) { + state.segment_checked = true; + auto prune_result = filter.CheckStatistics(*state.current->stats.statistics); + if (prune_result != FilterPropagateResult::FILTER_ALWAYS_FALSE) { return true; } - if (state.updates) { - return state.updates->GetStatistics().CheckZonemap(filter); + if (updates) { + auto update_stats = updates->GetStatistics(); + prune_result = filter.CheckStatistics(*update_stats); + return prune_result != FilterPropagateResult::FILTER_ALWAYS_FALSE; } else { return false; } @@ -126311,12 +140029,7 @@ bool StandardColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filte } void StandardColumnData::InitializeScan(ColumnScanState &state) { - // initialize the current segment - state.current = (ColumnSegment *)data.GetRootSegment(); - state.updates = (UpdateSegment *)updates.GetRootSegment(); - state.vector_index = 0; - state.vector_index_updates = 0; - state.initialized = false; + ColumnData::InitializeScan(state); // initialize the validity segment ColumnScanState child_state; @@ -126324,53 +140037,34 @@ void StandardColumnData::InitializeScan(ColumnScanState &state) { state.child_states.push_back(move(child_state)); } -void StandardColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t vector_idx) { - idx_t row_idx = vector_idx * STANDARD_VECTOR_SIZE; - state.current = (ColumnSegment *)data.GetSegment(row_idx); - state.updates = (UpdateSegment *)updates.GetSegment(row_idx); - state.vector_index = (row_idx - state.current->start) / STANDARD_VECTOR_SIZE; - state.vector_index_updates = (row_idx - state.updates->start) / STANDARD_VECTOR_SIZE; - state.initialized = false; +void StandardColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) { + ColumnData::InitializeScanWithOffset(state, row_idx); // initialize the validity segment ColumnScanState child_state; - validity.InitializeScanWithOffset(child_state, vector_idx); + validity.InitializeScanWithOffset(child_state, row_idx); state.child_states.push_back(move(child_state)); } -void StandardColumnData::Scan(Transaction &transaction, ColumnScanState &state, Vector &result) { - if (!state.initialized) { - state.current->InitializeScan(state); - state.initialized = true; - } - // fetch validity data - validity.Scan(transaction, state.child_states[0], result); - - // perform a scan of this segment - state.current->Scan(state, state.vector_index, result); - - // merge the updates into the result - state.updates->FetchUpdates(transaction, state.vector_index_updates, result); - - // move over to the next vector - state.Next(); +idx_t StandardColumnData::Scan(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result) { + D_ASSERT(state.row_index == state.child_states[0].row_index); + auto scan_count = ColumnData::Scan(transaction, vector_index, state, result); + validity.Scan(transaction, vector_index, state.child_states[0], result); + return scan_count; } -void StandardColumnData::IndexScan(ColumnScanState &state, Vector &result, bool allow_pending_updates) { - if (!state.initialized) { - state.current->InitializeScan(state); - state.initialized = true; - } - // // perform a scan of this segment - validity.IndexScan(state.child_states[0], result, allow_pending_updates); +idx_t StandardColumnData::ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, + bool allow_updates) { + D_ASSERT(state.row_index == state.child_states[0].row_index); + auto scan_count = ColumnData::ScanCommitted(vector_index, state, result, allow_updates); + validity.ScanCommitted(vector_index, state.child_states[0], result, allow_updates); + return scan_count; +} - state.current->Scan(state, state.vector_index, result); - if (!allow_pending_updates && state.updates->HasUncommittedUpdates(state.vector_index)) { - throw TransactionException("Cannot create index with outstanding updates"); - } - state.updates->FetchCommitted(state.vector_index_updates, result); - // move over to the next vector - state.Next(); +idx_t StandardColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t count) { + auto scan_count = ColumnData::ScanCount(state, result, count); + validity.ScanCount(state.child_states[0], result, count); + return scan_count; } void StandardColumnData::InitializeAppend(ColumnAppendState &state) { @@ -126381,10 +140075,10 @@ void StandardColumnData::InitializeAppend(ColumnAppendState &state) { state.child_appends.push_back(move(child_append)); } -void StandardColumnData::AppendData(ColumnAppendState &state, VectorData &vdata, idx_t count) { - ColumnData::AppendData(state, vdata, count); +void StandardColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, VectorData &vdata, idx_t count) { + ColumnData::AppendData(stats, state, vdata, count); - validity.AppendData(state.child_appends[0], vdata, count); + validity.AppendData(*stats.validity_stats, state.child_appends[0], vdata, count); } void StandardColumnData::RevertAppend(row_t start_row) { @@ -126393,50 +140087,56 @@ void StandardColumnData::RevertAppend(row_t start_row) { validity.RevertAppend(start_row); } -void StandardColumnData::Update(Transaction &transaction, Vector &update_vector, Vector &row_ids, idx_t count) { - idx_t first_id = FlatVector::GetValue(row_ids, 0); - - // fetch the validity data for this segment - Vector base_data(type); - auto column_segment = (ColumnSegment *)data.GetSegment(first_id); - auto vector_index = (first_id - column_segment->start) / STANDARD_VECTOR_SIZE; - // now perform the fetch within the segment - ColumnScanState state; - column_segment->Fetch(state, vector_index, base_data); - - // first find the segment that the update belongs to - auto segment = (UpdateSegment *)updates.GetSegment(first_id); - // now perform the update within the segment - segment->Update(transaction, update_vector, FlatVector::GetData(row_ids), count, base_data); - - validity.Update(transaction, update_vector, row_ids, count); -} - -void StandardColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) { +idx_t StandardColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) { // fetch validity mask if (state.child_states.empty()) { ColumnScanState child_state; state.child_states.push_back(move(child_state)); } + auto scan_count = ColumnData::Fetch(state, row_id, result); validity.Fetch(state.child_states[0], row_id, result); - ColumnData::Fetch(state, row_id, result); + return scan_count; +} + +void StandardColumnData::Update(Transaction &transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, + idx_t offset, idx_t update_count) { + ColumnData::Update(transaction, column_index, update_vector, row_ids, offset, update_count); + validity.Update(transaction, column_index, update_vector, row_ids, offset, update_count); +} + +void StandardColumnData::UpdateColumn(Transaction &transaction, const vector &column_path, + Vector &update_vector, row_t *row_ids, idx_t update_count, idx_t depth) { + if (depth >= column_path.size()) { + // update this column + ColumnData::Update(transaction, column_path[0], update_vector, row_ids, 0, update_count); + } else { + // update the child column (i.e. the validity column) + validity.UpdateColumn(transaction, column_path, update_vector, row_ids, update_count, depth + 1); + } +} + +unique_ptr StandardColumnData::GetUpdateStatistics() { + auto stats = updates ? updates->GetStatistics() : nullptr; + auto validity_stats = validity.GetUpdateStatistics(); + if (!stats && !validity_stats) { + return nullptr; + } + if (!stats) { + stats = BaseStatistics::CreateEmpty(type); + } + stats->validity_stats = move(validity_stats); + return stats; } -void StandardColumnData::FetchRow(ColumnFetchState &state, Transaction &transaction, row_t row_id, Vector &result, +void StandardColumnData::FetchRow(Transaction &transaction, ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) { // find the segment the row belongs to if (state.child_states.empty()) { auto child_state = make_unique(); state.child_states.push_back(move(child_state)); } - validity.FetchRow(*state.child_states[0], transaction, row_id, result, result_idx); - ColumnData::FetchRow(state, transaction, row_id, result, result_idx); -} - -unique_ptr StandardColumnData::GetStatistics() { - auto base_stats = ColumnData::GetStatistics(); - base_stats->validity_stats = validity.GetStatistics(); - return base_stats; + validity.FetchRow(transaction, *state.child_states[0], row_id, result, result_idx); + ColumnData::FetchRow(transaction, state, row_id, result, result_idx); } void StandardColumnData::CommitDropColumn() { @@ -126444,93 +140144,365 @@ void StandardColumnData::CommitDropColumn() { validity.CommitDropColumn(); } -void StandardColumnData::Checkpoint(TableDataWriter &writer) { - ColumnData::Checkpoint(writer); - validity.Checkpoint(writer); +struct StandardColumnCheckpointState : public ColumnCheckpointState { + StandardColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, TableDataWriter &writer) + : ColumnCheckpointState(row_group, column_data, writer) { + } + + unique_ptr validity_state; + +public: + unique_ptr GetStatistics() override { + auto stats = global_stats->Copy(); + stats->validity_stats = validity_state->GetStatistics(); + return stats; + } + + void FlushToDisk() override { + ColumnCheckpointState::FlushToDisk(); + validity_state->FlushToDisk(); + } +}; + +unique_ptr StandardColumnData::CreateCheckpointState(RowGroup &row_group, + TableDataWriter &writer) { + return make_unique(row_group, *this, writer); +} + +unique_ptr StandardColumnData::Checkpoint(RowGroup &row_group, TableDataWriter &writer) { + auto validity_state = validity.Checkpoint(row_group, writer); + auto base_state = ColumnData::Checkpoint(row_group, writer); + auto &checkpoint_state = (StandardColumnCheckpointState &)*base_state; + checkpoint_state.validity_state = move(validity_state); + return base_state; +} + +void StandardColumnData::CheckpointScan(ColumnSegment *segment, ColumnScanState &state, idx_t row_group_start, + idx_t count, Vector &scan_vector) { + ColumnData::CheckpointScan(segment, state, row_group_start, count, scan_vector); + + idx_t offset_in_row_group = state.row_index - row_group_start; + validity.ScanCommittedRange(row_group_start, offset_in_row_group, count, scan_vector); } -void StandardColumnData::Initialize(PersistentColumnData &column_data) { - auto &persistent = (StandardPersistentColumnData &)column_data; - ColumnData::Initialize(column_data); - validity.Initialize(*persistent.validity); +void StandardColumnData::DeserializeColumn(Deserializer &source) { + ColumnData::DeserializeColumn(source); + validity.DeserializeColumn(source); } -unique_ptr StandardColumnData::Deserialize(DatabaseInstance &db, Deserializer &source, - const LogicalType &type) { - auto result = make_unique(); - BaseDeserialize(db, source, type, *result); - result->validity = ValidityColumnData::Deserialize(db, source); - return move(result); +void StandardColumnData::GetStorageInfo(idx_t row_group_index, vector col_path, vector> &result) { + ColumnData::GetStorageInfo(row_group_index, col_path, result); + col_path.push_back(0); + validity.GetStorageInfo(row_group_index, move(col_path), result); +} + +void StandardColumnData::Verify(RowGroup &parent) { +#ifdef DEBUG + ColumnData::Verify(parent); + validity.Verify(parent); +#endif } } // namespace duckdb +namespace duckdb { +StructColumnData::StructColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type_p, + ColumnData *parent) + : ColumnData(info, column_index, start_row, move(type_p), parent), validity(info, 0, start_row, this) { + D_ASSERT(type.InternalType() == PhysicalType::STRUCT); + auto &child_types = StructType::GetChildTypes(type); + D_ASSERT(child_types.size() > 0); + // the sub column index, starting at 1 (0 is the validity mask) + idx_t sub_column_index = 1; + for (auto &child_type : child_types) { + sub_columns.push_back( + ColumnData::CreateColumnUnique(info, sub_column_index, start_row, child_type.second, this)); + sub_column_index++; + } +} +bool StructColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) { + // table filters are not supported yet for struct columns + return false; +} +idx_t StructColumnData::GetMaxEntry() { + return sub_columns[0]->GetMaxEntry(); +} +void StructColumnData::InitializeScan(ColumnScanState &state) { + D_ASSERT(state.child_states.empty()); + state.row_index = 0; + state.current = nullptr; + // initialize the validity segment + ColumnScanState validity_state; + validity.InitializeScan(validity_state); + state.child_states.push_back(move(validity_state)); + // initialize the sub-columns + for (auto &sub_column : sub_columns) { + ColumnScanState child_state; + sub_column->InitializeScan(child_state); + state.child_states.push_back(move(child_state)); + } +} -namespace duckdb { +void StructColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) { + D_ASSERT(state.child_states.empty()); -TransientSegment::TransientSegment(DatabaseInstance &db, const LogicalType &type_p, idx_t start) - : ColumnSegment(type_p, ColumnSegmentType::TRANSIENT, start), db(db) { - if (type.InternalType() == PhysicalType::VARCHAR) { - data = make_unique(db, start); - } else if (type.InternalType() == PhysicalType::BIT) { - data = make_unique(db, start); + state.row_index = row_idx; + state.current = nullptr; + + // initialize the validity segment + ColumnScanState validity_state; + validity.InitializeScanWithOffset(validity_state, row_idx); + state.child_states.push_back(move(validity_state)); + + // initialize the sub-columns + for (auto &sub_column : sub_columns) { + ColumnScanState child_state; + sub_column->InitializeScanWithOffset(child_state, row_idx); + state.child_states.push_back(move(child_state)); + } +} + +idx_t StructColumnData::Scan(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result) { + auto scan_count = validity.Scan(transaction, vector_index, state.child_states[0], result); + auto &child_entries = StructVector::GetEntries(result); + for (idx_t i = 0; i < sub_columns.size(); i++) { + sub_columns[i]->Scan(transaction, vector_index, state.child_states[i + 1], *child_entries[i]); + } + return scan_count; +} + +idx_t StructColumnData::ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) { + auto scan_count = validity.ScanCommitted(vector_index, state.child_states[0], result, allow_updates); + auto &child_entries = StructVector::GetEntries(result); + for (idx_t i = 0; i < sub_columns.size(); i++) { + sub_columns[i]->ScanCommitted(vector_index, state.child_states[i + 1], *child_entries[i], allow_updates); + } + return scan_count; +} + +idx_t StructColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t count) { + auto scan_count = validity.ScanCount(state.child_states[0], result, count); + auto &child_entries = StructVector::GetEntries(result); + for (idx_t i = 0; i < sub_columns.size(); i++) { + sub_columns[i]->ScanCount(state.child_states[i + 1], *child_entries[i], count); + } + return scan_count; +} + +void StructColumnData::InitializeAppend(ColumnAppendState &state) { + ColumnAppendState validity_append; + validity.InitializeAppend(validity_append); + state.child_appends.push_back(move(validity_append)); + + for (auto &sub_column : sub_columns) { + ColumnAppendState child_append; + sub_column->InitializeAppend(child_append); + state.child_appends.push_back(move(child_append)); + } +} + +void StructColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) { + vector.Normalify(count); + + // append the null values + validity.Append(*stats.validity_stats, state.child_appends[0], vector, count); + + auto &struct_validity = FlatVector::Validity(vector); + + auto &struct_stats = (StructStatistics &)stats; + auto &child_entries = StructVector::GetEntries(vector); + for (idx_t i = 0; i < child_entries.size(); i++) { + if (!struct_validity.AllValid()) { + // we set the child entries of the struct to NULL + // for any values in which the struct itself is NULL + child_entries[i]->Normalify(count); + + auto &child_validity = FlatVector::Validity(*child_entries[i]); + child_validity.Combine(struct_validity, count); + } + sub_columns[i]->Append(*struct_stats.child_stats[i], state.child_appends[i + 1], *child_entries[i], count); + } +} + +void StructColumnData::RevertAppend(row_t start_row) { + validity.RevertAppend(start_row); + for (auto &sub_column : sub_columns) { + sub_column->RevertAppend(start_row); + } +} + +idx_t StructColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) { + // fetch validity mask + auto &child_entries = StructVector::GetEntries(result); + // insert any child states that are required + for (idx_t i = state.child_states.size(); i < child_entries.size() + 1; i++) { + ColumnScanState child_state; + state.child_states.push_back(move(child_state)); + } + // fetch the validity state + idx_t scan_count = validity.Fetch(state.child_states[0], row_id, result); + // fetch the sub-column states + for (idx_t i = 0; i < child_entries.size(); i++) { + sub_columns[i]->Fetch(state.child_states[i + 1], row_id, *child_entries[i]); + } + return scan_count; +} + +void StructColumnData::Update(Transaction &transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, + idx_t offset, idx_t update_count) { + validity.Update(transaction, column_index, update_vector, row_ids, offset, update_count); + auto &child_entries = StructVector::GetEntries(update_vector); + for (idx_t i = 0; i < child_entries.size(); i++) { + sub_columns[i]->Update(transaction, column_index, *child_entries[i], row_ids, offset, update_count); + } +} + +void StructColumnData::UpdateColumn(Transaction &transaction, const vector &column_path, + Vector &update_vector, row_t *row_ids, idx_t update_count, idx_t depth) { + // we can never DIRECTLY update a struct column + if (depth >= column_path.size()) { + throw InternalException("Attempting to directly update a struct column - this should not be possible"); + } + auto update_column = column_path[depth]; + if (update_column == 0) { + // update the validity column + validity.UpdateColumn(transaction, column_path, update_vector, row_ids, update_count, depth + 1); } else { - data = make_unique(db, type.InternalType(), start); + if (update_column > sub_columns.size()) { + throw InternalException("Update column_path out of range"); + } + sub_columns[update_column - 1]->UpdateColumn(transaction, column_path, update_vector, row_ids, update_count, + depth + 1); } } -TransientSegment::TransientSegment(PersistentSegment &segment) - : ColumnSegment(segment.type, ColumnSegmentType::TRANSIENT, segment.start), db(segment.db) { - if (segment.block_id == segment.data->block->BlockId()) { - segment.data->ToTemporary(); +unique_ptr StructColumnData::GetUpdateStatistics() { + // check if any child column has updates + auto stats = BaseStatistics::CreateEmpty(type); + auto &struct_stats = (StructStatistics &)*stats; + stats->validity_stats = validity.GetUpdateStatistics(); + for (idx_t i = 0; i < sub_columns.size(); i++) { + auto child_stats = sub_columns[i]->GetUpdateStatistics(); + if (child_stats) { + struct_stats.child_stats[i] = move(child_stats); + } } - data = move(segment.data); - stats = move(segment.stats); - count = segment.count; - D_ASSERT(!segment.next); + return stats; } -void TransientSegment::InitializeScan(ColumnScanState &state) { - data->InitializeScan(state); +void StructColumnData::FetchRow(Transaction &transaction, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx) { + // fetch validity mask + auto &child_entries = StructVector::GetEntries(result); + // insert any child states that are required + for (idx_t i = state.child_states.size(); i < child_entries.size() + 1; i++) { + auto child_state = make_unique(); + state.child_states.push_back(move(child_state)); + } + // fetch the validity state + validity.FetchRow(transaction, *state.child_states[0], row_id, result, result_idx); + // fetch the sub-column states + for (idx_t i = 0; i < child_entries.size(); i++) { + sub_columns[i]->FetchRow(transaction, *state.child_states[i + 1], row_id, *child_entries[i], result_idx); + } } -void TransientSegment::Scan(ColumnScanState &state, idx_t vector_index, Vector &result) { - data->Scan(state, vector_index, result); +void StructColumnData::CommitDropColumn() { + validity.CommitDropColumn(); + for (auto &sub_column : sub_columns) { + sub_column->CommitDropColumn(); + } } -void TransientSegment::Fetch(ColumnScanState &state, idx_t vector_index, Vector &result) { - data->Fetch(state, vector_index, result); +struct StructColumnCheckpointState : public ColumnCheckpointState { + StructColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, TableDataWriter &writer) + : ColumnCheckpointState(row_group, column_data, writer) { + global_stats = make_unique(column_data.type); + } + + unique_ptr validity_state; + vector> child_states; + +public: + unique_ptr GetStatistics() override { + auto stats = make_unique(column_data.type); + D_ASSERT(stats->child_stats.size() == child_states.size()); + stats->validity_stats = validity_state->GetStatistics(); + for (idx_t i = 0; i < child_states.size(); i++) { + stats->child_stats[i] = child_states[i]->GetStatistics(); + D_ASSERT(stats->child_stats[i]); + } + return move(stats); + } + + void FlushToDisk() override { + validity_state->FlushToDisk(); + for (auto &state : child_states) { + state->FlushToDisk(); + } + } +}; + +unique_ptr StructColumnData::CreateCheckpointState(RowGroup &row_group, + TableDataWriter &writer) { + return make_unique(row_group, *this, writer); } -void TransientSegment::FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) { - data->FetchRow(state, row_id - this->start, result, result_idx); +unique_ptr StructColumnData::Checkpoint(RowGroup &row_group, TableDataWriter &writer) { + auto checkpoint_state = make_unique(row_group, *this, writer); + checkpoint_state->validity_state = validity.Checkpoint(row_group, writer); + for (auto &sub_column : sub_columns) { + checkpoint_state->child_states.push_back(sub_column->Checkpoint(row_group, writer)); + } + return move(checkpoint_state); } -void TransientSegment::InitializeAppend(ColumnAppendState &state) { +void StructColumnData::DeserializeColumn(Deserializer &source) { + validity.DeserializeColumn(source); + for (auto &sub_column : sub_columns) { + sub_column->DeserializeColumn(source); + } } -idx_t TransientSegment::Append(ColumnAppendState &state, VectorData &append_data, idx_t offset, idx_t count) { - idx_t appended = data->Append(stats, append_data, offset, count); - this->count += appended; - return appended; +void StructColumnData::GetStorageInfo(idx_t row_group_index, vector col_path, vector> &result) { + col_path.push_back(0); + validity.GetStorageInfo(row_group_index, col_path, result); + for (idx_t i = 0; i < sub_columns.size(); i++) { + col_path.back() = i + 1; + sub_columns[i]->GetStorageInfo(row_group_index, col_path, result); + } } -void TransientSegment::RevertAppend(idx_t start_row) { - data->RevertAppend(start_row); - this->count = start_row - this->start; +void StructColumnData::Verify(RowGroup &parent) { +#ifdef DEBUG + ColumnData::Verify(parent); + validity.Verify(parent); + for (auto &sub_column : sub_columns) { + sub_column->Verify(parent); + } +#endif } } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/transaction/update_info.hpp +// +// +//===----------------------------------------------------------------------===// + + @@ -126538,24 +140510,72 @@ void TransientSegment::RevertAppend(idx_t start_row) { namespace duckdb { +class UpdateSegment; +struct DataTableInfo; -constexpr const idx_t UpdateSegment::MORSEL_VECTOR_COUNT; -constexpr const idx_t UpdateSegment::MORSEL_SIZE; -constexpr const idx_t UpdateSegment::MORSEL_LAYER_COUNT; -constexpr const idx_t UpdateSegment::MORSEL_LAYER_SIZE; +struct UpdateInfo { + //! The update segment that this update info affects + UpdateSegment *segment; + //! The column index of which column we are updating + idx_t column_index; + //! The version number + atomic version_number; + //! The vector index within the uncompressed segment + idx_t vector_index; + //! The amount of updated tuples + sel_t N; + //! The maximum amount of tuples that can fit into this UpdateInfo + sel_t max; + //! The row ids of the tuples that have been updated. This should always be kept sorted! + sel_t *tuples; + //! The data of the tuples + data_ptr_t tuple_data; + //! The previous update info (or nullptr if it is the base) + UpdateInfo *prev; + //! The next update info in the chain (or nullptr if it is the last) + UpdateInfo *next; + + //! Loop over the update chain and execute the specified callback on all UpdateInfo's that are relevant for that + //! transaction in-order of newest to oldest + template + static void UpdatesForTransaction(UpdateInfo *current, transaction_t start_time, transaction_t transaction_id, + T &&callback) { + while (current) { + if (current->version_number > start_time && current->version_number != transaction_id) { + // these tuples were either committed AFTER this transaction started or are not committed yet, use + // tuples stored in this version + callback(current); + } + current = current->next; + } + } + + Value GetValue(idx_t index); + string ToString(); + void Print(); + void Verify(); +}; + +} // namespace duckdb + + + + + + +namespace duckdb { static UpdateSegment::initialize_update_function_t GetInitializeUpdateFunction(PhysicalType type); static UpdateSegment::fetch_update_function_t GetFetchUpdateFunction(PhysicalType type); static UpdateSegment::fetch_committed_function_t GetFetchCommittedFunction(PhysicalType type); +static UpdateSegment::fetch_committed_range_function_t GetFetchCommittedRangeFunction(PhysicalType type); static UpdateSegment::merge_update_function_t GetMergeUpdateFunction(PhysicalType type); static UpdateSegment::rollback_update_function_t GetRollbackUpdateFunction(PhysicalType type); static UpdateSegment::statistics_update_function_t GetStatisticsUpdateFunction(PhysicalType type); static UpdateSegment::fetch_row_function_t GetFetchRowFunction(PhysicalType type); -UpdateSegment::UpdateSegment(ColumnData &column_data, idx_t start, idx_t count) - : SegmentBase(start, count), column_data(column_data), - stats(column_data.type, GetTypeIdSize(column_data.type.InternalType())) { +UpdateSegment::UpdateSegment(ColumnData &column_data) : column_data(column_data), stats(column_data.type) { auto physical_type = column_data.type.InternalType(); this->type_size = GetTypeIdSize(physical_type); @@ -126563,6 +140583,7 @@ UpdateSegment::UpdateSegment(ColumnData &column_data, idx_t start, idx_t count) this->initialize_update_function = GetInitializeUpdateFunction(physical_type); this->fetch_update_function = GetFetchUpdateFunction(physical_type); this->fetch_committed_function = GetFetchCommittedFunction(physical_type); + this->fetch_committed_range = GetFetchCommittedRangeFunction(physical_type); this->fetch_row_function = GetFetchRowFunction(physical_type); this->merge_update_function = GetMergeUpdateFunction(physical_type); this->rollback_update_function = GetRollbackUpdateFunction(physical_type); @@ -126586,7 +140607,6 @@ Value UpdateInfo::GetValue(idx_t index) { switch (type.id()) { case LogicalTypeId::VALIDITY: - case LogicalTypeId::BOOLEAN: return Value::BOOLEAN(((bool *)tuple_data)[index]); case LogicalTypeId::INTEGER: return Value::INTEGER(((int32_t *)tuple_data)[index]); @@ -126623,7 +140643,7 @@ void UpdateInfo::Verify() { //===--------------------------------------------------------------------===// // Update Fetch //===--------------------------------------------------------------------===// -static void MergeValidity(UpdateInfo *current, ValidityMask &result_mask) { +static void MergeValidityInfo(UpdateInfo *current, ValidityMask &result_mask) { auto info_data = (bool *)current->tuple_data; for (idx_t i = 0; i < current->N; i++) { result_mask.Set(current->tuples[i], info_data[i]); @@ -126634,7 +140654,7 @@ static void UpdateMergeValidity(transaction_t start_time, transaction_t transact Vector &result) { auto &result_mask = FlatVector::Validity(result); UpdateInfo::UpdatesForTransaction(info, start_time, transaction_id, - [&](UpdateInfo *current) { MergeValidity(current, result_mask); }); + [&](UpdateInfo *current) { MergeValidityInfo(current, result_mask); }); } template @@ -126696,10 +140716,10 @@ static UpdateSegment::fetch_update_function_t GetFetchUpdateFunction(PhysicalTyp } void UpdateSegment::FetchUpdates(Transaction &transaction, idx_t vector_index, Vector &result) { + auto lock_handle = lock.GetSharedLock(); if (!root) { return; } - auto lock_handle = lock.GetSharedLock(); if (!root->info[vector_index]) { return; } @@ -126713,9 +140733,9 @@ void UpdateSegment::FetchUpdates(Transaction &transaction, idx_t vector_index, V //===--------------------------------------------------------------------===// // Fetch Committed //===--------------------------------------------------------------------===// -static void FetchValidity(UpdateInfo *info, Vector &result) { +static void FetchCommittedValidity(UpdateInfo *info, Vector &result) { auto &result_mask = FlatVector::Validity(result); - MergeValidity(info, result_mask); + MergeValidityInfo(info, result_mask); } template @@ -126727,7 +140747,7 @@ static void TemplatedFetchCommitted(UpdateInfo *info, Vector &result) { static UpdateSegment::fetch_committed_function_t GetFetchCommittedFunction(PhysicalType type) { switch (type) { case PhysicalType::BIT: - return FetchValidity; + return FetchCommittedValidity; case PhysicalType::BOOL: case PhysicalType::INT8: return TemplatedFetchCommitted; @@ -126761,6 +140781,8 @@ static UpdateSegment::fetch_committed_function_t GetFetchCommittedFunction(Physi } void UpdateSegment::FetchCommitted(idx_t vector_index, Vector &result) { + auto lock_handle = lock.GetSharedLock(); + if (!root) { return; } @@ -126773,6 +140795,115 @@ void UpdateSegment::FetchCommitted(idx_t vector_index, Vector &result) { fetch_committed_function(root->info[vector_index]->info.get(), result); } +//===--------------------------------------------------------------------===// +// Fetch Range +//===--------------------------------------------------------------------===// +static void MergeUpdateInfoRangeValidity(UpdateInfo *current, idx_t start, idx_t end, idx_t result_offset, + ValidityMask &result_mask) { + auto info_data = (bool *)current->tuple_data; + for (idx_t i = 0; i < current->N; i++) { + auto tuple_idx = current->tuples[i]; + if (tuple_idx < start) { + continue; + } else if (tuple_idx >= end) { + break; + } + auto result_idx = result_offset + tuple_idx - start; + result_mask.Set(result_idx, info_data[i]); + } +} + +static void FetchCommittedRangeValidity(UpdateInfo *info, idx_t start, idx_t end, idx_t result_offset, Vector &result) { + auto &result_mask = FlatVector::Validity(result); + MergeUpdateInfoRangeValidity(info, start, end, result_offset, result_mask); +} + +template +static void MergeUpdateInfoRange(UpdateInfo *current, idx_t start, idx_t end, idx_t result_offset, T *result_data) { + auto info_data = (T *)current->tuple_data; + for (idx_t i = 0; i < current->N; i++) { + auto tuple_idx = current->tuples[i]; + if (tuple_idx < start) { + continue; + } else if (tuple_idx >= end) { + break; + } + auto result_idx = result_offset + tuple_idx - start; + result_data[result_idx] = info_data[i]; + } +} + +template +static void TemplatedFetchCommittedRange(UpdateInfo *info, idx_t start, idx_t end, idx_t result_offset, + Vector &result) { + auto result_data = FlatVector::GetData(result); + MergeUpdateInfoRange(info, start, end, result_offset, result_data); +} + +static UpdateSegment::fetch_committed_range_function_t GetFetchCommittedRangeFunction(PhysicalType type) { + switch (type) { + case PhysicalType::BIT: + return FetchCommittedRangeValidity; + case PhysicalType::BOOL: + case PhysicalType::INT8: + return TemplatedFetchCommittedRange; + case PhysicalType::INT16: + return TemplatedFetchCommittedRange; + case PhysicalType::INT32: + return TemplatedFetchCommittedRange; + case PhysicalType::INT64: + return TemplatedFetchCommittedRange; + case PhysicalType::UINT8: + return TemplatedFetchCommittedRange; + case PhysicalType::UINT16: + return TemplatedFetchCommittedRange; + case PhysicalType::UINT32: + return TemplatedFetchCommittedRange; + case PhysicalType::UINT64: + return TemplatedFetchCommittedRange; + case PhysicalType::INT128: + return TemplatedFetchCommittedRange; + case PhysicalType::FLOAT: + return TemplatedFetchCommittedRange; + case PhysicalType::DOUBLE: + return TemplatedFetchCommittedRange; + case PhysicalType::INTERVAL: + return TemplatedFetchCommittedRange; + case PhysicalType::VARCHAR: + return TemplatedFetchCommittedRange; + default: + throw NotImplementedException("Unimplemented type for update segment"); + } +} + +void UpdateSegment::FetchCommittedRange(idx_t start_row, idx_t count, Vector &result) { + D_ASSERT(count > 0); + if (!root) { + return; + } + D_ASSERT(result.GetVectorType() == VectorType::FLAT_VECTOR); + + idx_t end_row = start_row + count; + idx_t start_vector = start_row / STANDARD_VECTOR_SIZE; + idx_t end_vector = (end_row - 1) / STANDARD_VECTOR_SIZE; + D_ASSERT(start_vector <= end_vector); + D_ASSERT(end_vector < RowGroup::ROW_GROUP_VECTOR_COUNT); + + for (idx_t vector_idx = start_vector; vector_idx <= end_vector; vector_idx++) { + if (!root->info[vector_idx]) { + continue; + } + idx_t start_in_vector = vector_idx == start_vector ? start_row - start_vector * STANDARD_VECTOR_SIZE : 0; + idx_t end_in_vector = + vector_idx == end_vector ? end_row - end_vector * STANDARD_VECTOR_SIZE : STANDARD_VECTOR_SIZE; + D_ASSERT(start_in_vector < end_in_vector); + D_ASSERT(end_in_vector > 0 && end_in_vector <= STANDARD_VECTOR_SIZE); + idx_t result_offset = ((vector_idx * STANDARD_VECTOR_SIZE) + start_in_vector) - start_row; + fetch_committed_range(root->info[vector_idx]->info.get(), start_in_vector, end_in_vector, result_offset, + result); + } +} + //===--------------------------------------------------------------------===// // Fetch Row //===--------------------------------------------------------------------===// @@ -126851,7 +140982,7 @@ void UpdateSegment::FetchRow(Transaction &transaction, idx_t row_id, Vector &res if (!root) { return; } - idx_t vector_index = (row_id - start) / STANDARD_VECTOR_SIZE; + idx_t vector_index = (row_id - column_data.start) / STANDARD_VECTOR_SIZE; if (!root->info[vector_index]) { return; } @@ -126867,7 +140998,6 @@ template static void RollbackUpdate(UpdateInfo *base_info, UpdateInfo *rollback_info) { auto base_data = (T *)base_info->tuple_data; auto rollback_data = (T *)rollback_info->tuple_data; - idx_t base_offset = 0; for (idx_t i = 0; i < rollback_info->N; i++) { auto id = rollback_info->tuples[i]; @@ -126948,8 +141078,8 @@ void UpdateSegment::CleanupUpdate(UpdateInfo *info) { //===--------------------------------------------------------------------===// // Check for conflicts in update //===--------------------------------------------------------------------===// -static void CheckForConflicts(UpdateInfo *info, Transaction &transaction, row_t *ids, const SelectionVector &sel, - idx_t count, row_t offset, UpdateInfo *&node) { +static void CheckForConflicts(UpdateInfo *info, Transaction &transaction, row_t *ids, idx_t count, row_t offset, + UpdateInfo *&node) { if (!info) { return; } @@ -126961,7 +141091,7 @@ static void CheckForConflicts(UpdateInfo *info, Transaction &transaction, row_t // as both ids and info->tuples are sorted, this is similar to a merge join idx_t i = 0, j = 0; while (true) { - auto id = ids[sel.get_index(i)] - offset; + auto id = ids[i] - offset; if (id == info->tuples[j]) { throw TransactionException("Conflict on update!"); } else if (id < info->tuples[j]) { @@ -126979,7 +141109,7 @@ static void CheckForConflicts(UpdateInfo *info, Transaction &transaction, row_t } } } - CheckForConflicts(info->next, transaction, ids, sel, count, offset, node); + CheckForConflicts(info->next, transaction, ids, count, offset, node); } //===--------------------------------------------------------------------===// @@ -127002,8 +141132,8 @@ void UpdateSegment::InitializeUpdateInfo(UpdateInfo &info, row_t *ids, const Sel }; } -static void InitializeUpdateValidity(SegmentStatistics &stats, UpdateInfo *base_info, Vector &base_data, - UpdateInfo *update_info, Vector &update, const SelectionVector &sel) { +static void InitializeUpdateValidity(UpdateInfo *base_info, Vector &base_data, UpdateInfo *update_info, Vector &update, + const SelectionVector &sel) { auto &update_mask = FlatVector::Validity(update); auto tuple_data = (bool *)update_info->tuple_data; @@ -127032,8 +141162,8 @@ static void InitializeUpdateValidity(SegmentStatistics &stats, UpdateInfo *base_ } template -static void InitializeUpdateData(SegmentStatistics &stats, UpdateInfo *base_info, Vector &base_data, - UpdateInfo *update_info, Vector &update, const SelectionVector &sel) { +static void InitializeUpdateData(UpdateInfo *base_info, Vector &base_data, UpdateInfo *update_info, Vector &update, + const SelectionVector &sel) { auto update_data = FlatVector::GetData(update); auto tuple_data = (T *)update_info->tuple_data; @@ -127139,10 +141269,9 @@ struct ExtractValidityEntry { }; template -static void MergeUpdateLoopInternal(SegmentStatistics &stats, UpdateInfo *base_info, V *base_table_data, - UpdateInfo *update_info, V *update_vector_data, row_t *ids, idx_t count, - const SelectionVector &sel) { - auto base_id = base_info->segment->start + base_info->vector_index * STANDARD_VECTOR_SIZE; +static void MergeUpdateLoopInternal(UpdateInfo *base_info, V *base_table_data, UpdateInfo *update_info, + V *update_vector_data, row_t *ids, idx_t count, const SelectionVector &sel) { + auto base_id = base_info->segment->column_data.start + base_info->vector_index * STANDARD_VECTOR_SIZE; #ifdef DEBUG // all of these should be sorted, otherwise the below algorithm does not work for (idx_t i = 1; i < count; i++) { @@ -127240,21 +141369,20 @@ static void MergeUpdateLoopInternal(SegmentStatistics &stats, UpdateInfo *base_i memcpy(base_info->tuples, result_ids, result_offset * sizeof(sel_t)); } -static void MergeValidityLoop(SegmentStatistics &stats, UpdateInfo *base_info, Vector &base_data, - UpdateInfo *update_info, Vector &update, row_t *ids, idx_t count, - const SelectionVector &sel) { +static void MergeValidityLoop(UpdateInfo *base_info, Vector &base_data, UpdateInfo *update_info, Vector &update, + row_t *ids, idx_t count, const SelectionVector &sel) { auto &base_validity = FlatVector::Validity(base_data); auto &update_validity = FlatVector::Validity(update); - MergeUpdateLoopInternal(stats, base_info, &base_validity, update_info, + MergeUpdateLoopInternal(base_info, &base_validity, update_info, &update_validity, ids, count, sel); } template -static void MergeUpdateLoop(SegmentStatistics &stats, UpdateInfo *base_info, Vector &base_data, UpdateInfo *update_info, - Vector &update, row_t *ids, idx_t count, const SelectionVector &sel) { +static void MergeUpdateLoop(UpdateInfo *base_info, Vector &base_data, UpdateInfo *update_info, Vector &update, + row_t *ids, idx_t count, const SelectionVector &sel) { auto base_table_data = FlatVector::GetData(base_data); auto update_vector_data = FlatVector::GetData(update); - MergeUpdateLoopInternal(stats, base_info, base_table_data, update_info, update_vector_data, ids, count, sel); + MergeUpdateLoopInternal(base_info, base_table_data, update_info, update_vector_data, ids, count, sel); } static UpdateSegment::merge_update_function_t GetMergeUpdateFunction(PhysicalType type) { @@ -127296,56 +141424,65 @@ static UpdateSegment::merge_update_function_t GetMergeUpdateFunction(PhysicalTyp //===--------------------------------------------------------------------===// // Update statistics //===--------------------------------------------------------------------===// -idx_t UpdateValidityStatistics(UpdateSegment *segment, SegmentStatistics &stats, Vector &update, idx_t count, - SelectionVector &sel) { +unique_ptr UpdateSegment::GetStatistics() { + lock_guard stats_guard(stats_lock); + return stats.statistics->Copy(); +} + +idx_t UpdateValidityStatistics(UpdateSegment *segment, SegmentStatistics &stats, Vector &update, idx_t offset, + idx_t count, SelectionVector &sel) { auto &mask = FlatVector::Validity(update); auto &validity = (ValidityStatistics &)*stats.statistics; if (!mask.AllValid() && !validity.has_null) { for (idx_t i = 0; i < count; i++) { - if (!mask.RowIsValid(i)) { + auto idx = offset + i; + if (!mask.RowIsValid(idx)) { validity.has_null = true; break; } } } - sel.Initialize(FlatVector::INCREMENTAL_SELECTION_VECTOR); + sel.Initialize((sel_t *)(FlatVector::INCREMENTAL_VECTOR + offset)); return count; } template -idx_t TemplatedUpdateNumericStatistics(UpdateSegment *segment, SegmentStatistics &stats, Vector &update, idx_t count, - SelectionVector &sel) { +idx_t TemplatedUpdateNumericStatistics(UpdateSegment *segment, SegmentStatistics &stats, Vector &update, idx_t offset, + idx_t count, SelectionVector &sel) { auto update_data = FlatVector::GetData(update); auto &mask = FlatVector::Validity(update); if (mask.AllValid()) { for (idx_t i = 0; i < count; i++) { - NumericStatistics::Update(stats, update_data[i]); + auto idx = offset + i; + NumericStatistics::Update(stats, update_data[idx]); } - sel.Initialize(FlatVector::INCREMENTAL_SELECTION_VECTOR); + sel.Initialize((sel_t *)(FlatVector::INCREMENTAL_VECTOR + offset)); return count; } else { idx_t not_null_count = 0; sel.Initialize(STANDARD_VECTOR_SIZE); for (idx_t i = 0; i < count; i++) { - if (mask.RowIsValid(i)) { - sel.set_index(not_null_count++, i); - NumericStatistics::Update(stats, update_data[i]); + auto idx = offset + i; + if (mask.RowIsValid(idx)) { + sel.set_index(not_null_count++, idx); + NumericStatistics::Update(stats, update_data[idx]); } } return not_null_count; } } -idx_t UpdateStringStatistics(UpdateSegment *segment, SegmentStatistics &stats, Vector &update, idx_t count, - SelectionVector &sel) { +idx_t UpdateStringStatistics(UpdateSegment *segment, SegmentStatistics &stats, Vector &update, idx_t offset, + idx_t count, SelectionVector &sel) { auto update_data = FlatVector::GetData(update); auto &mask = FlatVector::Validity(update); if (mask.AllValid()) { for (idx_t i = 0; i < count; i++) { - ((StringStatistics &)*stats.statistics).Update(update_data[i]); - if (!update_data[i].IsInlined()) { - update_data[i] = segment->GetStringHeap().AddString(update_data[i]); + auto idx = offset + i; + ((StringStatistics &)*stats.statistics).Update(update_data[idx]); + if (!update_data[idx].IsInlined()) { + update_data[idx] = segment->GetStringHeap().AddString(update_data[idx]); } } sel.Initialize(FlatVector::INCREMENTAL_SELECTION_VECTOR); @@ -127354,11 +141491,12 @@ idx_t UpdateStringStatistics(UpdateSegment *segment, SegmentStatistics &stats, V idx_t not_null_count = 0; sel.Initialize(STANDARD_VECTOR_SIZE); for (idx_t i = 0; i < count; i++) { - if (mask.RowIsValid(i)) { - sel.set_index(not_null_count++, i); - ((StringStatistics &)*stats.statistics).Update(update_data[i]); - if (!update_data[i].IsInlined()) { - update_data[i] = segment->GetStringHeap().AddString(update_data[i]); + auto idx = offset + i; + if (mask.RowIsValid(idx)) { + sel.set_index(not_null_count++, idx); + ((StringStatistics &)*stats.statistics).Update(update_data[idx]); + if (!update_data[idx].IsInlined()) { + update_data[idx] = segment->GetStringHeap().AddString(update_data[idx]); } } } @@ -127405,47 +141543,89 @@ UpdateSegment::statistics_update_function_t GetStatisticsUpdateFunction(Physical //===--------------------------------------------------------------------===// // Update //===--------------------------------------------------------------------===// -void UpdateSegment::Update(Transaction &transaction, Vector &update, row_t *ids, idx_t count, Vector &base_data) { - // get the vector index based on the first id - // we assert that all updates must be part of the same vector - auto first_id = ids[0]; - idx_t vector_index = (first_id - this->start) / STANDARD_VECTOR_SIZE; - idx_t vector_offset = this->start + vector_index * STANDARD_VECTOR_SIZE; - - D_ASSERT(idx_t(first_id) >= this->start); - D_ASSERT(vector_index < MORSEL_VECTOR_COUNT); +static idx_t SortSelectionVector(SelectionVector &sel, idx_t count, row_t *ids) { + D_ASSERT(count > 0); - if (column_data.type.id() == LogicalTypeId::VALIDITY) { - if ((!root || !root->info[vector_index]) && FlatVector::Validity(update).AllValid() && - FlatVector::Validity(base_data).AllValid()) { - // fast path: updating a validity segment, and both the base data and the update data have no null values - // in this case we can skip the entire update (null-ness will not change) - // this happens when we do an update that does not affect null-ness (e.g. i = i + 1) - return; + bool is_sorted = true; + for (idx_t i = 1; i < count; i++) { + auto prev_idx = sel.get_index(i - 1); + auto idx = sel.get_index(i); + if (ids[idx] <= ids[prev_idx]) { + is_sorted = false; + break; + } + } + if (is_sorted) { + // already sorted: bailout + return count; + } + // not sorted: need to sort the selection vector + SelectionVector sorted_sel(count); + for (idx_t i = 0; i < count; i++) { + sorted_sel.set_index(i, sel.get_index(i)); + } + std::sort(sorted_sel.data(), sorted_sel.data() + count, [&](sel_t l, sel_t r) { return ids[l] < ids[r]; }); + // eliminate any duplicates + idx_t pos = 1; + for (idx_t i = 1; i < count; i++) { + auto prev_idx = sorted_sel.get_index(i - 1); + auto idx = sorted_sel.get_index(i); + D_ASSERT(ids[idx] >= ids[prev_idx]); + if (ids[prev_idx] != ids[idx]) { + sorted_sel.set_index(pos++, idx); } } +#ifdef DEBUG + for (idx_t i = 1; i < pos; i++) { + auto prev_idx = sorted_sel.get_index(i - 1); + auto idx = sorted_sel.get_index(i); + D_ASSERT(ids[idx] > ids[prev_idx]); + } +#endif + + sel.Initialize(sorted_sel); + D_ASSERT(pos > 0); + return pos; +} + +void UpdateSegment::Update(Transaction &transaction, idx_t column_index, Vector &update, row_t *ids, idx_t offset, + idx_t count, Vector &base_data) { // obtain an exclusive lock auto write_lock = lock.GetExclusiveLock(); + update.Normalify(count); + // update statistics SelectionVector sel; - count = statistics_update_function(this, stats, update, count, sel); + { + lock_guard stats_guard(stats_lock); + count = statistics_update_function(this, stats, update, offset, count, sel); + } if (count == 0) { return; } -#ifdef DEBUG - // verify that the ids are sorted and there are no duplicates - for (idx_t i = 1; i < count; i++) { - D_ASSERT(ids[i] > ids[i - 1]); - } -#endif + // subsequent algorithms used by the update require row ids to be (1) sorted, and (2) unique + // this is usually the case for "standard" queries (e.g. UPDATE tbl SET x=bla WHERE cond) + // however, for more exotic queries involving e.g. cross products/joins this might not be the case + // hence we explicitly check here if the ids are sorted and, if not, sort + duplicate eliminate them + count = SortSelectionVector(sel, count, ids); + D_ASSERT(count > 0); // create the versions for this segment, if there are none yet if (!root) { root = make_unique(); } + // get the vector index based on the first id + // we assert that all updates must be part of the same vector + auto first_id = ids[sel.get_index(0)]; + idx_t vector_index = (first_id - column_data.start) / STANDARD_VECTOR_SIZE; + idx_t vector_offset = column_data.start + vector_index * STANDARD_VECTOR_SIZE; + + D_ASSERT(idx_t(first_id) >= column_data.start); + D_ASSERT(vector_index < RowGroup::ROW_GROUP_VECTOR_COUNT); + // first check the version chain UpdateInfo *node = nullptr; @@ -127453,7 +141633,7 @@ void UpdateSegment::Update(Transaction &transaction, Vector &update, row_t *ids, // there is already a version here, check if there are any conflicts and search for the node that belongs to // this transaction in the version chain auto base_info = root->info[vector_index]->info.get(); - CheckForConflicts(base_info->next, transaction, ids, sel, count, vector_offset, node); + CheckForConflicts(base_info->next, transaction, ids, count, vector_offset, node); // there are no conflicts // first, check if this thread has already done any updates @@ -127471,6 +141651,7 @@ void UpdateSegment::Update(Transaction &transaction, Vector &update, row_t *ids, node->segment = this; node->vector_index = vector_index; node->N = 0; + node->column_index = column_index; // insert the new node into the chain node->next = base_info->next; @@ -127484,7 +141665,7 @@ void UpdateSegment::Update(Transaction &transaction, Vector &update, row_t *ids, node->Verify(); // now we are going to perform the merge - merge_update_function(stats, base_info, base_data, node, update, ids, count, sel); + merge_update_function(base_info, base_data, node, update, ids, count, sel); base_info->Verify(); node->Verify(); @@ -127498,6 +141679,7 @@ void UpdateSegment::Update(Transaction &transaction, Vector &update, row_t *ids, result->info->tuples = result->tuples.get(); result->info->tuple_data = result->tuple_data.get(); result->info->version_number = TRANSACTION_ID_START - 1; + result->info->column_index = column_index; InitializeUpdateInfo(*result->info, ids, sel, count, vector_index, vector_offset); // now create the transaction level update info in the undo log @@ -127505,19 +141687,19 @@ void UpdateSegment::Update(Transaction &transaction, Vector &update, row_t *ids, InitializeUpdateInfo(*transaction_node, ids, sel, count, vector_index, vector_offset); // we write the updates in the - initialize_update_function(stats, transaction_node, base_data, result->info.get(), update, sel); + initialize_update_function(transaction_node, base_data, result->info.get(), update, sel); result->info->next = transaction_node; result->info->prev = nullptr; transaction_node->next = nullptr; transaction_node->prev = result->info.get(); + transaction_node->column_index = column_index; transaction_node->Verify(); result->info->Verify(); root->info[vector_index] = move(result); } - column_data.MergeStatistics(*GetStatistics().statistics); } bool UpdateSegment::HasUpdates() const { @@ -127543,37 +141725,21 @@ bool UpdateSegment::HasUncommittedUpdates(idx_t vector_index) { return false; } -bool UpdateSegment::HasUpdates(idx_t start_vector_index, idx_t end_vector_index) const { - idx_t base_vector_index = start / STANDARD_VECTOR_SIZE; - D_ASSERT(start_vector_index >= base_vector_index); - auto segment = this; - for (idx_t i = start_vector_index; i <= end_vector_index; i++) { - idx_t vector_index = i - base_vector_index; - while (vector_index >= UpdateSegment::MORSEL_VECTOR_COUNT) { - segment = (UpdateSegment *)segment->next.get(); - D_ASSERT(segment); - base_vector_index = segment->start / STANDARD_VECTOR_SIZE; - vector_index -= UpdateSegment::MORSEL_VECTOR_COUNT; - } - if (segment->HasUpdates(vector_index)) { +bool UpdateSegment::HasUpdates(idx_t start_row_index, idx_t end_row_index) { + if (!HasUpdates()) { + return false; + } + auto read_lock = lock.GetSharedLock(); + idx_t base_vector_index = start_row_index / STANDARD_VECTOR_SIZE; + idx_t end_vector_index = end_row_index / STANDARD_VECTOR_SIZE; + for (idx_t i = base_vector_index; i <= end_vector_index; i++) { + if (root->info[i]) { return true; } } return false; } -UpdateSegment *UpdateSegment::FindSegment(idx_t end_vector_index) const { - idx_t base_vector_index = start / STANDARD_VECTOR_SIZE; - D_ASSERT(end_vector_index >= base_vector_index); - auto segment = this; - while (end_vector_index >= base_vector_index + UpdateSegment::MORSEL_VECTOR_COUNT) { - segment = (UpdateSegment *)segment->next.get(); - D_ASSERT(segment); - base_vector_index += UpdateSegment::MORSEL_VECTOR_COUNT; - } - return (UpdateSegment *)segment; -} - } // namespace duckdb @@ -127581,455 +141747,14 @@ UpdateSegment *UpdateSegment::FindSegment(idx_t end_vector_index) const { namespace duckdb { -ValidityColumnData::ValidityColumnData(DatabaseInstance &db, DataTableInfo &table_info, idx_t column_idx) - : ColumnData(db, table_info, LogicalType(LogicalTypeId::VALIDITY), column_idx) { +ValidityColumnData::ValidityColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, ColumnData *parent) + : ColumnData(info, column_index, start_row, LogicalType(LogicalTypeId::VALIDITY), parent) { } bool ValidityColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) { return true; } -void ValidityColumnData::InitializeScan(ColumnScanState &state) { - state.current = (ColumnSegment *)data.GetRootSegment(); - state.updates = (UpdateSegment *)updates.GetRootSegment(); - state.vector_index = 0; - state.vector_index_updates = 0; - state.initialized = false; -} - -void ValidityColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t vector_idx) { - idx_t row_idx = vector_idx * STANDARD_VECTOR_SIZE; - state.current = (ColumnSegment *)data.GetSegment(row_idx); - state.updates = (UpdateSegment *)updates.GetSegment(row_idx); - state.vector_index = (row_idx - state.current->start) / STANDARD_VECTOR_SIZE; - state.vector_index_updates = (row_idx - state.updates->start) / STANDARD_VECTOR_SIZE; - state.initialized = false; -} - -void ValidityColumnData::Scan(Transaction &transaction, ColumnScanState &state, Vector &result) { - if (!state.initialized) { - state.current->InitializeScan(state); - state.initialized = true; - } - // perform a scan of this segment - state.current->Scan(state, state.vector_index, result); - - // merge the updates into the result - state.updates->FetchUpdates(transaction, state.vector_index_updates, result); -} - -void ValidityColumnData::IndexScan(ColumnScanState &state, Vector &result, bool allow_pending_updates) { - if (!state.initialized) { - state.current->InitializeScan(state); - state.initialized = true; - } - state.current->Scan(state, state.vector_index, result); - if (!allow_pending_updates && state.updates->HasUncommittedUpdates(state.vector_index)) { - throw TransactionException("Cannot create index with outstanding updates"); - } - state.updates->FetchCommitted(state.vector_index_updates, result); -} - -void ValidityColumnData::Update(Transaction &transaction, Vector &update_vector, Vector &row_ids, idx_t count) { - idx_t first_id = FlatVector::GetValue(row_ids, 0); - - // fetch the validity data for this segment - Vector base_data(LogicalType::BOOLEAN, nullptr); - auto column_segment = (ColumnSegment *)data.GetSegment(first_id); - auto vector_index = (first_id - column_segment->start) / STANDARD_VECTOR_SIZE; - // now perform the fetch within the segment - ColumnScanState state; - column_segment->Fetch(state, vector_index, base_data); - - // first find the segment that the update belongs to - auto segment = (UpdateSegment *)updates.GetSegment(first_id); - // now perform the update within the segment - segment->Update(transaction, update_vector, FlatVector::GetData(row_ids), count, base_data); -} - -unique_ptr ValidityColumnData::Deserialize(DatabaseInstance &db, Deserializer &source) { - auto result = make_unique(); - BaseDeserialize(db, source, LogicalType(LogicalTypeId::VALIDITY), *result); - return result; -} - -} // namespace duckdb - - - - - -namespace duckdb { - -ValiditySegment::ValiditySegment(DatabaseInstance &db, idx_t row_start, block_id_t block_id) - : UncompressedSegment(db, PhysicalType::BIT, row_start) { - // figure out how many vectors we want to store in this block - - this->vector_size = ValidityMask::STANDARD_MASK_SIZE; - this->max_vector_count = Storage::BLOCK_SIZE / vector_size; - // FIXME: this is a fix for test/sql/storage/checkpointed_self_append_tinyint.test - // it is only required because of ToTemporary() - // this should be removed when ToTemporary() is removed - if (max_vector_count > 80) { - max_vector_count = 80; - } - auto &buffer_manager = BufferManager::GetBufferManager(db); - if (block_id == INVALID_BLOCK) { - // no block id specified: allocate a buffer for the uncompressed segment - this->block = buffer_manager.RegisterMemory(Storage::BLOCK_ALLOC_SIZE, false); - // pin the block and initialize - auto handle = buffer_manager.Pin(block); - memset(handle->node->buffer, 0xFF, Storage::BLOCK_SIZE); - } else { - this->block = buffer_manager.RegisterBlock(block_id); - } -} - -ValiditySegment::~ValiditySegment() { -} - -void ValiditySegment::InitializeScan(ColumnScanState &state) { - auto &buffer_manager = BufferManager::GetBufferManager(db); - state.primary_handle = buffer_manager.Pin(block); -} - -void ValiditySegment::FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) { - D_ASSERT(row_id >= 0 && row_id < row_t(this->tuple_count)); - auto &buffer_manager = BufferManager::GetBufferManager(db); - auto handle = buffer_manager.Pin(block); - ValidityMask mask((validity_t *)handle->node->buffer); - if (!mask.RowIsValidUnsafe(row_id)) { - FlatVector::SetNull(result, result_idx, true); - } -} - -idx_t ValiditySegment::Append(SegmentStatistics &stats, VectorData &data, idx_t offset, idx_t vcount) { - idx_t append_count = MinValue(vcount, max_vector_count * STANDARD_VECTOR_SIZE - tuple_count); - if (data.validity.AllValid()) { - // no null values: skip append - tuple_count += append_count; - return append_count; - } - auto &buffer_manager = BufferManager::GetBufferManager(db); - auto handle = buffer_manager.Pin(block); - - auto &validity_stats = (ValidityStatistics &)*stats.statistics; - ValidityMask mask((validity_t *)handle->node->buffer); - for (idx_t i = 0; i < append_count; i++) { - auto idx = data.sel->get_index(i); - if (!data.validity.RowIsValidUnsafe(idx)) { - mask.SetInvalidUnsafe(tuple_count + i); - validity_stats.has_null = true; - } - } - tuple_count += append_count; - return append_count; -} - -void ValiditySegment::FetchBaseData(ColumnScanState &state, idx_t vector_index, Vector &result) { -#if STANDARD_VECTOR_SIZE >= 64 - auto vector_ptr = state.primary_handle->node->buffer + vector_index * ValidityMask::STANDARD_MASK_SIZE; - ValidityMask vector_mask(vector_ptr); - if (!vector_mask.CheckAllValid(STANDARD_VECTOR_SIZE)) { - FlatVector::Validity(result).Copy(vector_mask, STANDARD_VECTOR_SIZE); - } -#else - idx_t base_tuple = vector_index * STANDARD_VECTOR_SIZE; - ValidityMask source_mask(state.primary_handle->node->buffer); - auto &target = FlatVector::Validity(result); - for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) { - target.Set(i, source_mask.RowIsValid(base_tuple + i)); - } -#endif -} - -void ValiditySegment::RevertAppend(idx_t start_row) { - idx_t start_bit = start_row - this->row_start; - UncompressedSegment::RevertAppend(start_row); - - auto &buffer_manager = BufferManager::GetBufferManager(db); - auto handle = buffer_manager.Pin(block); - idx_t revert_start; - if (start_bit % 8 != 0) { - // handle sub-bit stuff (yay) - idx_t byte_pos = start_bit / 8; - idx_t bit_start = byte_pos * 8; - idx_t bit_end = (byte_pos + 1) * 8; - ValidityMask mask(handle->node->buffer + byte_pos); - for (idx_t i = start_bit; i < bit_end; i++) { - mask.SetValid(i - bit_start); - } - revert_start = bit_end / 8; - } else { - revert_start = start_bit / 8; - } - // for the rest, we just memset - memset(handle->node->buffer + revert_start, 0xFF, Storage::BLOCK_SIZE - revert_start); -} - -} // namespace duckdb - - - - - - - - - - - - -namespace duckdb { - -UncompressedSegment::UncompressedSegment(DatabaseInstance &db, PhysicalType type, idx_t row_start) - : db(db), type(type), max_vector_count(0), tuple_count(0), row_start(row_start) { -} - -UncompressedSegment::~UncompressedSegment() { -} - -void UncompressedSegment::Verify() { -#ifdef DEBUG - // ColumnScanState state; - // InitializeScan(state); - - // Vector result(this->type); - // for (idx_t i = 0; i < this->tuple_count; i += STANDARD_VECTOR_SIZE) { - // idx_t vector_idx = i / STANDARD_VECTOR_SIZE; - // idx_t count = MinValue((idx_t)STANDARD_VECTOR_SIZE, tuple_count - i); - // Scan(transaction, state, vector_idx, result); - // result.Verify(count); - // } -#endif -} - -void UncompressedSegment::Fetch(ColumnScanState &state, idx_t vector_index, Vector &result) { - InitializeScan(state); - FetchBaseData(state, vector_index, result); -} - -//===--------------------------------------------------------------------===// -// Scan -//===--------------------------------------------------------------------===// -void UncompressedSegment::Scan(ColumnScanState &state, idx_t vector_index, Vector &result) { - FetchBaseData(state, vector_index, result); -} - -//===--------------------------------------------------------------------===// -// Filter -//===--------------------------------------------------------------------===// -template -static idx_t TemplatedFilterSelection(T *vec, T *predicate, SelectionVector &sel, idx_t approved_tuple_count, - ValidityMask &mask, SelectionVector &result_sel) { - idx_t result_count = 0; - for (idx_t i = 0; i < approved_tuple_count; i++) { - auto idx = sel.get_index(i); - if ((!HAS_NULL || mask.RowIsValid(idx)) && OP::Operation(vec[idx], *predicate)) { - result_sel.set_index(result_count++, idx); - } - } - return result_count; -} - -template -static void FilterSelectionSwitch(T *vec, T *predicate, SelectionVector &sel, idx_t &approved_tuple_count, - ExpressionType comparison_type, ValidityMask &mask) { - SelectionVector new_sel(approved_tuple_count); - // the inplace loops take the result as the last parameter - switch (comparison_type) { - case ExpressionType::COMPARE_EQUAL: { - if (mask.AllValid()) { - approved_tuple_count = - TemplatedFilterSelection(vec, predicate, sel, approved_tuple_count, mask, new_sel); - } else { - approved_tuple_count = - TemplatedFilterSelection(vec, predicate, sel, approved_tuple_count, mask, new_sel); - } - break; - } - case ExpressionType::COMPARE_LESSTHAN: { - if (mask.AllValid()) { - approved_tuple_count = - TemplatedFilterSelection(vec, predicate, sel, approved_tuple_count, mask, new_sel); - } else { - approved_tuple_count = - TemplatedFilterSelection(vec, predicate, sel, approved_tuple_count, mask, new_sel); - } - break; - } - case ExpressionType::COMPARE_GREATERTHAN: { - if (mask.AllValid()) { - approved_tuple_count = TemplatedFilterSelection(vec, predicate, sel, - approved_tuple_count, mask, new_sel); - } else { - approved_tuple_count = TemplatedFilterSelection(vec, predicate, sel, - approved_tuple_count, mask, new_sel); - } - break; - } - case ExpressionType::COMPARE_LESSTHANOREQUALTO: { - if (mask.AllValid()) { - approved_tuple_count = TemplatedFilterSelection( - vec, predicate, sel, approved_tuple_count, mask, new_sel); - } else { - approved_tuple_count = TemplatedFilterSelection( - vec, predicate, sel, approved_tuple_count, mask, new_sel); - } - break; - } - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: { - if (mask.AllValid()) { - approved_tuple_count = TemplatedFilterSelection( - vec, predicate, sel, approved_tuple_count, mask, new_sel); - } else { - approved_tuple_count = TemplatedFilterSelection( - vec, predicate, sel, approved_tuple_count, mask, new_sel); - } - break; - } - default: - throw NotImplementedException("Unknown comparison type for filter pushed down to table!"); - } - sel.Initialize(new_sel); -} - -void UncompressedSegment::FilterSelection(SelectionVector &sel, Vector &result, const TableFilter &filter, - idx_t &approved_tuple_count, ValidityMask &mask) { - // the inplace loops take the result as the last parameter - switch (result.GetType().InternalType()) { - case PhysicalType::UINT8: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, mask); - break; - } - case PhysicalType::UINT16: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, - mask); - break; - } - case PhysicalType::UINT32: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, - mask); - break; - } - case PhysicalType::UINT64: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, - mask); - break; - } - case PhysicalType::INT8: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, mask); - break; - } - case PhysicalType::INT16: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, mask); - break; - } - case PhysicalType::INT32: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, mask); - break; - } - case PhysicalType::INT64: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, mask); - break; - } - case PhysicalType::INT128: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, - mask); - break; - } - case PhysicalType::FLOAT: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, mask); - break; - } - case PhysicalType::DOUBLE: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, mask); - break; - } - case PhysicalType::VARCHAR: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant.str_value); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, - mask); - break; - } - case PhysicalType::BOOL: { - auto result_flat = FlatVector::GetData(result); - Vector predicate_vector(filter.constant); - auto predicate = FlatVector::GetData(predicate_vector); - FilterSelectionSwitch(result_flat, predicate, sel, approved_tuple_count, filter.comparison_type, mask); - break; - } - default: - throw InvalidTypeException(result.GetType(), "Invalid type for filter pushed down to table comparison"); - } -} - -void UncompressedSegment::RevertAppend(idx_t start_row) { - tuple_count = start_row - this->row_start; -} - -//===--------------------------------------------------------------------===// -// ToTemporary -//===--------------------------------------------------------------------===// -void UncompressedSegment::ToTemporary() { - ToTemporaryInternal(); -} - -void UncompressedSegment::ToTemporaryInternal() { - if (block->BlockId() >= MAXIMUM_BLOCK) { - // conversion has already been performed by a different thread - return; - } - auto &block_manager = BlockManager::GetBlockManager(db); - block_manager.MarkBlockAsModified(block->BlockId()); - - // pin the current block - auto &buffer_manager = BufferManager::GetBufferManager(db); - auto current = buffer_manager.Pin(block); - - // now allocate a new block from the buffer manager - auto new_block = buffer_manager.RegisterMemory(Storage::BLOCK_ALLOC_SIZE, false); - auto handle = buffer_manager.Pin(new_block); - // now copy the data over and switch to using the new block id - memcpy(handle->node->buffer, current->node->buffer, Storage::BLOCK_SIZE); - this->block = move(new_block); -} - } // namespace duckdb @@ -128121,10 +141846,13 @@ bool WriteAheadLog::Replay(DatabaseInstance &database, string &path) { checkpoint_state.ReplayEntry(entry_type); } } - } catch (std::exception &ex) { + } catch (std::exception &ex) { // LCOV_EXCL_START Printer::Print(StringUtil::Format("Exception in WAL playback during initial read: %s\n", ex.what())); return false; - } + } catch (...) { + Printer::Print("Unknown Exception in WAL playback during initial read"); + return false; + } // LCOV_EXCL_STOP initial_reader.reset(); if (checkpoint_state.checkpoint_id != INVALID_BLOCK) { // there is a checkpoint flag: check if we need to deserialize the WAL @@ -128163,12 +141891,16 @@ bool WriteAheadLog::Replay(DatabaseInstance &database, string &path) { state.ReplayEntry(entry_type); } } - } catch (std::exception &ex) { + } catch (std::exception &ex) { // LCOV_EXCL_START // FIXME: this should report a proper warning in the connection Printer::Print(StringUtil::Format("Exception in WAL playback: %s\n", ex.what())); // exception thrown in WAL replay: rollback con.Rollback(); - } + } catch (...) { + Printer::Print("Unknown Exception in WAL playback: %s\n"); + // exception thrown in WAL replay: rollback + con.Rollback(); + } // LCOV_EXCL_STOP return false; } @@ -128229,7 +141961,7 @@ void ReplayState::ReplayEntry(WALType entry_type) { ReplayCheckpoint(); break; default: - throw Exception("Invalid WAL entry type!"); + throw InternalException("Invalid WAL entry type!"); } } @@ -128429,7 +142161,7 @@ void ReplayState::ReplayDelete() { return; } if (!current_table) { - throw Exception("Corrupt WAL: delete without table"); + throw InternalException("Corrupt WAL: delete without table"); } D_ASSERT(chunk.ColumnCount() == 1 && chunk.data[0].GetType() == LOGICAL_ROW_TYPE); @@ -128445,20 +142177,23 @@ void ReplayState::ReplayDelete() { } void ReplayState::ReplayUpdate() { - idx_t column_index = source.Read(); - + vector column_path; + auto column_index_count = source.Read(); + column_path.reserve(column_index_count); + for (idx_t i = 0; i < column_index_count; i++) { + column_path.push_back(source.Read()); + } DataChunk chunk; chunk.Deserialize(source); if (deserialize_only) { return; } if (!current_table) { - throw Exception("Corrupt WAL: update without table"); + throw InternalException("Corrupt WAL: update without table"); } - vector column_ids {column_index}; - if (column_index >= current_table->columns.size()) { - throw Exception("Corrupt WAL: column index for update out of bounds"); + if (column_path[0] >= current_table->columns.size()) { + throw InternalException("Corrupt WAL: column index for update out of bounds"); } // remove the row id vector from the chunk @@ -128466,7 +142201,7 @@ void ReplayState::ReplayUpdate() { chunk.data.pop_back(); // now perform the update - current_table->storage->Update(*current_table, context, row_ids, column_ids, chunk); + current_table->storage->UpdateColumn(*current_table, context, row_ids, column_path, chunk); } void ReplayState::ReplayCheckpoint() { @@ -128682,15 +142417,20 @@ void WriteAheadLog::WriteDelete(DataChunk &chunk) { chunk.Serialize(*writer); } -void WriteAheadLog::WriteUpdate(DataChunk &chunk, column_t col_idx) { +void WriteAheadLog::WriteUpdate(DataChunk &chunk, const vector &column_indexes) { if (skip_writing) { return; } D_ASSERT(chunk.size() > 0); + D_ASSERT(chunk.ColumnCount() == 2); + D_ASSERT(chunk.data[1].GetType().id() == LOGICAL_ROW_TYPE.id()); chunk.Verify(); writer->Write(WALType::UPDATE_TUPLE); - writer->Write(col_idx); + writer->Write(column_indexes.size()); + for (auto &col_idx : column_indexes) { + writer->Write(col_idx); + } chunk.Serialize(*writer); } @@ -128797,7 +142537,6 @@ struct DeleteInfo { - namespace duckdb { CleanupState::CleanupState() : current_table(nullptr), count(0) { @@ -128811,15 +142550,9 @@ void CleanupState::CleanupEntry(UndoFlags type, data_ptr_t data) { switch (type) { case UndoFlags::CATALOG_ENTRY: { auto catalog_entry = Load(data); - // destroy the backed up entry: it is no longer required - D_ASSERT(catalog_entry->parent); - if (catalog_entry->parent->type != CatalogType::UPDATED_ENTRY) { - if (!catalog_entry->deleted) { - // delete the entry from the dependency manager, if it is not deleted yet - catalog_entry->catalog->dependency_manager->EraseObject(catalog_entry); - } - catalog_entry->parent->child = move(catalog_entry->child); - } + D_ASSERT(catalog_entry); + D_ASSERT(catalog_entry->set); + catalog_entry->set->CleanupEntry(catalog_entry); break; } case UndoFlags::DELETE_TUPLE: { @@ -128846,7 +142579,7 @@ void CleanupState::CleanupUpdate(UpdateInfo *info) { void CleanupState::CleanupDelete(DeleteInfo *info) { auto version_table = info->table; version_table->info->cardinality -= info->count; - if (version_table->info->indexes.empty()) { + if (version_table->info->indexes.Empty()) { // this table has no indexes: no cleanup to be done return; } @@ -128855,12 +142588,11 @@ void CleanupState::CleanupDelete(DeleteInfo *info) { Flush(); current_table = version_table; } + count = 0; for (idx_t i = 0; i < info->count; i++) { - if (count == STANDARD_VECTOR_SIZE) { - Flush(); - } row_numbers[count++] = info->vinfo->start + info->rows[i]; } + Flush(); } void CleanupState::Flush() { @@ -128967,6 +142699,7 @@ struct AppendInfo { + namespace duckdb { CommitState::CommitState(transaction_t commit_id, WriteAheadLog *log) @@ -129034,25 +142767,34 @@ void CommitState::WriteCatalogEntry(CatalogEntry *entry, data_ptr_t dataptr) { log->WriteCreateMacro((MacroCatalogEntry *)parent); break; case CatalogType::DELETED_ENTRY: - if (entry->type == CatalogType::TABLE_ENTRY) { + switch (entry->type) { + case CatalogType::TABLE_ENTRY: { auto table_entry = (TableCatalogEntry *)entry; table_entry->CommitDrop(); log->WriteDropTable(table_entry); - } else if (entry->type == CatalogType::SCHEMA_ENTRY) { + break; + } + case CatalogType::SCHEMA_ENTRY: log->WriteDropSchema((SchemaCatalogEntry *)entry); - } else if (entry->type == CatalogType::VIEW_ENTRY) { + break; + case CatalogType::VIEW_ENTRY: log->WriteDropView((ViewCatalogEntry *)entry); - } else if (entry->type == CatalogType::SEQUENCE_ENTRY) { + break; + case CatalogType::SEQUENCE_ENTRY: log->WriteDropSequence((SequenceCatalogEntry *)entry); - } else if (entry->type == CatalogType::MACRO_ENTRY) { + break; + case CatalogType::MACRO_ENTRY: log->WriteDropMacro((MacroCatalogEntry *)entry); - } else if (entry->type == CatalogType::PREPARED_STATEMENT) { - // do nothing, prepared statements aren't persisted to disk - } else { - throw NotImplementedException("Don't know how to drop this type!"); + break; + case CatalogType::INDEX_ENTRY: + case CatalogType::PREPARED_STATEMENT: + case CatalogType::SCALAR_FUNCTION_ENTRY: + // do nothing, indexes/prepared statements/functions aren't persisted to disk + break; + default: + throw InternalException("Don't know how to drop this type!"); } break; - case CatalogType::INDEX_ENTRY: case CatalogType::PREPARED_STATEMENT: case CatalogType::AGGREGATE_FUNCTION_ENTRY: @@ -129064,7 +142806,7 @@ void CommitState::WriteCatalogEntry(CatalogEntry *entry, data_ptr_t dataptr) { // do nothing, these entries are not persisted to disk break; default: - throw NotImplementedException("UndoBuffer - don't know how to write this entry to the WAL"); + throw InternalException("UndoBuffer - don't know how to write this entry to the WAL"); } } @@ -129090,8 +142832,11 @@ void CommitState::WriteUpdate(UpdateInfo *info) { D_ASSERT(log); // switch to the current table, if necessary auto &column_data = info->segment->column_data; - SwitchTable(&column_data.table_info, UndoFlags::UPDATE_TUPLE); + auto &table_info = column_data.GetTableInfo(); + + SwitchTable(&table_info, UndoFlags::UPDATE_TUPLE); + // initialize the update chunk vector update_types; if (column_data.type.id() == LogicalTypeId::VALIDITY) { update_types.push_back(LogicalType::BOOLEAN); @@ -129108,14 +142853,33 @@ void CommitState::WriteUpdate(UpdateInfo *info) { // write the row ids into the chunk auto row_ids = FlatVector::GetData(update_chunk->data[1]); - idx_t start = info->segment->start + info->vector_index * STANDARD_VECTOR_SIZE; + idx_t start = column_data.start + info->vector_index * STANDARD_VECTOR_SIZE; for (idx_t i = 0; i < info->N; i++) { row_ids[info->tuples[i]] = start + info->tuples[i]; } + if (column_data.type.id() == LogicalTypeId::VALIDITY) { + // zero-initialize the booleans + // FIXME: this is only required because of NullValue in Vector::Serialize... + auto booleans = FlatVector::GetData(update_chunk->data[0]); + for (idx_t i = 0; i < info->N; i++) { + auto idx = info->tuples[i]; + booleans[idx] = false; + } + } SelectionVector sel(info->tuples); update_chunk->Slice(sel, info->N); - log->WriteUpdate(*update_chunk, column_data.column_idx); + // construct the column index path + vector column_indexes; + auto column_data_ptr = &column_data; + while (column_data_ptr->parent) { + column_indexes.push_back(column_data_ptr->column_index); + column_data_ptr = column_data_ptr->parent; + } + column_indexes.push_back(info->column_index); + std::reverse(column_indexes.begin(), column_indexes.end()); + + log->WriteUpdate(*update_chunk, column_indexes); } template @@ -129158,14 +142922,14 @@ void CommitState::CommitEntry(UndoFlags type, data_ptr_t data) { case UndoFlags::UPDATE_TUPLE: { // update: auto info = (UpdateInfo *)data; - if (HAS_LOG && !info->segment->column_data.table_info.IsTemporary()) { + if (HAS_LOG && !info->segment->column_data.GetTableInfo().IsTemporary()) { WriteUpdate(info); } info->version_number = commit_id; break; } default: - throw NotImplementedException("UndoBuffer - don't know how to commit this type!"); + throw InternalException("UndoBuffer - don't know how to commit this type!"); } } @@ -129203,7 +142967,7 @@ void CommitState::RevertCommit(UndoFlags type, data_ptr_t data) { break; } default: - throw NotImplementedException("UndoBuffer - don't know how to revert commit of this type!"); + throw InternalException("UndoBuffer - don't know how to revert commit of this type!"); } } @@ -129250,7 +143014,6 @@ class RollbackState { - namespace duckdb { void RollbackState::RollbackEntry(UndoFlags type, data_ptr_t data) { @@ -129279,10 +143042,10 @@ void RollbackState::RollbackEntry(UndoFlags type, data_ptr_t data) { info->segment->RollbackUpdate(info); break; } - default: + default: // LCOV_EXCL_START D_ASSERT(type == UndoFlags::EMPTY_ENTRY); break; - } + } // LCOV_EXCL_STOP } } // namespace duckdb @@ -129301,6 +143064,8 @@ void RollbackState::RollbackEntry(UndoFlags type, data_ptr_t data) { + + #include namespace duckdb { @@ -129444,12 +143209,16 @@ TransactionContext::~TransactionContext() { } void TransactionContext::BeginTransaction() { - D_ASSERT(!current_transaction); // cannot start a transaction within a transaction + if (current_transaction) { + throw TransactionException("cannot start a transaction within a transaction"); + } current_transaction = transaction_manager.StartTransaction(context); } void TransactionContext::Commit() { - D_ASSERT(current_transaction); // cannot commit if there is no active transaction + if (!current_transaction) { + throw TransactionException("failed to commit: no transaction active"); + } auto transaction = current_transaction; SetAutoCommit(true); current_transaction = nullptr; @@ -129467,7 +143236,9 @@ void TransactionContext::SetAutoCommit(bool value) { } void TransactionContext::Rollback() { - D_ASSERT(current_transaction); // cannot rollback if there is no active transaction + if (!current_transaction) { + throw TransactionException("failed to rollback: no transaction active"); + } auto transaction = current_transaction; ClearTransaction(); transaction_manager.RollbackTransaction(transaction); @@ -129529,6 +143300,8 @@ TransactionManager::TransactionManager(DatabaseInstance &db) : db(db), thread_is current_transaction_id = TRANSACTION_ID_START; // the current active query id current_query_number = 1; + lowest_active_id = TRANSACTION_ID_START; + lowest_active_start = MAX_TRANSACTION_ID; } TransactionManager::~TransactionManager() { @@ -129537,15 +143310,19 @@ TransactionManager::~TransactionManager() { Transaction *TransactionManager::StartTransaction(ClientContext &context) { // obtain the transaction lock during this function lock_guard lock(transaction_lock); - if (current_start_timestamp >= TRANSACTION_ID_START) { - throw Exception("Cannot start more transactions, ran out of " - "transaction identifiers!"); - } + if (current_start_timestamp >= TRANSACTION_ID_START) { // LCOV_EXCL_START + throw InternalException("Cannot start more transactions, ran out of " + "transaction identifiers!"); + } // LCOV_EXCL_STOP // obtain the start time and transaction ID of this transaction transaction_t start_time = current_start_timestamp++; transaction_t transaction_id = current_transaction_id++; timestamp_t start_timestamp = Timestamp::GetCurrentTimestamp(); + if (active_transactions.empty()) { + lowest_active_start = start_time; + lowest_active_id = transaction_id; + } // create the actual transaction auto &catalog = Catalog::GetCatalog(db); @@ -129721,6 +143498,7 @@ void TransactionManager::RemoveTransaction(Transaction *transaction) noexcept { idx_t t_index = active_transactions.size(); // check for the lowest and highest start time in the list of transactions transaction_t lowest_start_time = TRANSACTION_ID_START; + transaction_t lowest_transaction_id = MAX_TRANSACTION_ID; transaction_t lowest_active_query = MAXIMUM_QUERY_ID; for (idx_t i = 0; i < active_transactions.size(); i++) { if (active_transactions[i].get() == transaction) { @@ -129729,8 +143507,12 @@ void TransactionManager::RemoveTransaction(Transaction *transaction) noexcept { transaction_t active_query = active_transactions[i]->active_query; lowest_start_time = MinValue(lowest_start_time, active_transactions[i]->start_time); lowest_active_query = MinValue(lowest_active_query, active_query); + lowest_transaction_id = MinValue(lowest_transaction_id, active_transactions[i]->transaction_id); } } + lowest_active_start = lowest_start_time; + lowest_active_id = lowest_transaction_id; + transaction_t lowest_stored_query = lowest_start_time; D_ASSERT(t_index != active_transactions.size()); auto current_transaction = move(active_transactions[t_index]); @@ -129811,21 +143593,6 @@ void TransactionManager::RemoveTransaction(Transaction *transaction) noexcept { } } -void TransactionManager::AddCatalogSet(ClientContext &context, unique_ptr catalog_set) { - // remove the dependencies from all entries of the CatalogSet - Catalog::GetCatalog(context).dependency_manager->ClearDependencies(*catalog_set); - - lock_guard lock(transaction_lock); - if (!active_transactions.empty()) { - // if there are active transactions we wait with deleting the objects - StoredCatalogSet set; - set.stored_set = move(catalog_set); - set.highest_active_query = current_start_timestamp; - - old_catalog_sets.push_back(move(set)); - } -} - } // namespace duckdb @@ -131428,7 +145195,7 @@ FMT_END_NAMESPACE // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list /************************************************************************** @@ -138980,7 +152747,7 @@ mz_bool mz_zip_end(mz_zip_archive *pZip) // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2008 The RE2 Authors. All Rights Reserved. @@ -139011,7 +152778,7 @@ mz_bool mz_zip_end(mz_zip_archive *pZip) // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2009 The RE2 Authors. All Rights Reserved. @@ -139032,7 +152799,7 @@ mz_bool mz_zip_end(mz_zip_archive *pZip) // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2009 The RE2 Authors. All Rights Reserved. @@ -139179,7 +152946,7 @@ class LogMessageFatal : public LogMessage { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2018 The RE2 Authors. All Rights Reserved. @@ -139244,7 +153011,7 @@ class PODArray { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2007 The RE2 Authors. All Rights Reserved. @@ -139271,7 +153038,7 @@ class PODArray { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2006 The RE2 Authors. All Rights Reserved. @@ -139673,7 +153440,7 @@ template bool SparseArray::less(const IndexValue& a, // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2006 The RE2 Authors. All Rights Reserved. @@ -140362,7 +154129,7 @@ class Prog { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2006 The RE2 Authors. All Rights Reserved. @@ -140463,7 +154230,7 @@ class Prog { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list /* @@ -141437,7 +155204,7 @@ bool Prog::SearchBitState(const StringPiece& text, // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2007 The RE2 Authors. All Rights Reserved. @@ -141464,7 +155231,7 @@ bool Prog::SearchBitState(const StringPiece& text, // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2006 The RE2 Authors. All Rights Reserved. @@ -142980,7 +156747,7 @@ Prog* Prog::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2008 The RE2 Authors. All Rights Reserved. @@ -143025,7 +156792,7 @@ Prog* Prog::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2016 The RE2 Authors. All Rights Reserved. @@ -143076,7 +156843,7 @@ class HashMix { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2007 The RE2 Authors. All Rights Reserved. @@ -143220,7 +156987,7 @@ class WriterMutexLock { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2016 The RE2 Authors. All Rights Reserved. @@ -143786,7 +157553,7 @@ std::string DFA::DumpState(State* state) { // but while a state is being analyzed, these instruction ids are represented // as a Workq, which is an array that allows iteration in insertion order. -// NOTE: The choice of State construction determines whether the DFA +// NOTE(rsc): The choice of State construction determines whether the DFA // mimics backtracking implementations (so-called leftmost first matching) or // traditional DFA implementations (so-called leftmost longest matching as // prescribed by POSIX). This implementation chooses to mimic the @@ -143865,7 +157632,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) { if (needflags == 0) flag &= kFlagMatch; - // NOTE: The code above cannot do flag &= needflags, + // NOTE(rsc): The code above cannot do flag &= needflags, // because if the right flags were present to pass the current // kInstEmptyWidth instructions, new kInstEmptyWidth instructions // might be reached that in turn need different flags. @@ -145296,7 +159063,7 @@ bool Prog::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2009 The RE2 Authors. All Rights Reserved. @@ -145306,7 +159073,7 @@ bool Prog::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2009 The RE2 Authors. All Rights Reserved. @@ -145432,7 +159199,7 @@ class FilteredRE2 { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2009 The RE2 Authors. All Rights Reserved. @@ -145550,7 +159317,7 @@ class Prefilter { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2009 The RE2 Authors. All Rights Reserved. @@ -145810,7 +159577,7 @@ void FilteredRE2::PrintPrefilter(int regexpid) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2008 The RE2 Authors. All Rights Reserved. @@ -146006,7 +159773,7 @@ static bool CanBeEmptyString(Regexp* re) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2006-2007 The RE2 Authors. All Rights Reserved. @@ -146729,7 +160496,7 @@ void Prog::Fanout(SparseArray* fanout) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2008 The RE2 Authors. All Rights Reserved. @@ -147180,7 +160947,7 @@ bool Prog::IsOnePass() { break; case kInstAltMatch: - // TODO: Ignoring kInstAltMatch optimization. + // TODO(rsc): Ignoring kInstAltMatch optimization. // Should implement it in this engine, but it's subtle. DCHECK(!ip->last()); // If already on work queue, (1) is violated: bail out. @@ -147313,7 +161080,7 @@ bool Prog::IsOnePass() { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2006 The RE2 Authors. All Rights Reserved. @@ -147353,7 +161120,7 @@ bool Prog::IsOnePass() { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2008 The RE2 Authors. All Rights Reserved. @@ -147441,7 +161208,7 @@ extern Rune ApplyFold(const CaseFold *f, Rune r); // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2008 The RE2 Authors. All Rights Reserved. @@ -149949,7 +163716,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // GENERATED BY make_perl_groups.pl; DO NOT EDIT. @@ -150077,7 +163844,7 @@ const int num_posix_groups = 28; // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2009 The RE2 Authors. All Rights Reserved. @@ -150778,7 +164545,7 @@ Prefilter* Prefilter::FromRE2(const RE2* re2) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2009 The RE2 Authors. All Rights Reserved. @@ -150848,7 +164615,7 @@ void PrefilterTree::Compile(std::vector* atom_vec) { compiled_ = true; - // TODO: Use std::unordered_set instead? + // TODO(junyer): Use std::unordered_set instead? NodeMap nodes; AssignUniqueIds(&nodes, atom_vec); @@ -150863,7 +164630,7 @@ void PrefilterTree::Compile(std::vector* atom_vec) { if (parents->size() > 8) { // This one triggers too many things. If all the parents are AND // nodes and have other things guarding them, then get rid of - // this trigger. TODO: Adjust the threshold appropriately, + // this trigger. TODO(vsri): Adjust the threshold appropriately, // make it a function of total number of nodes? bool have_other_guard = true; for (StdIntMap::iterator it = parents->begin(); @@ -151189,7 +164956,7 @@ std::string PrefilterTree::DebugNodeString(Prefilter* node) const { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2007 The RE2 Authors. All Rights Reserved. @@ -151213,7 +164980,7 @@ std::string PrefilterTree::DebugNodeString(Prefilter* node) const { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2016 The RE2 Authors. All Rights Reserved. @@ -152239,7 +166006,7 @@ void Prog::ComputeHints(std::vector* flat, int begin, int end) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2003-2009 The RE2 Authors. All Rights Reserved. @@ -152526,7 +166293,7 @@ static int Fanout(Prog* prog, std::map* histogram) { prog->Fanout(&fanout); histogram->clear(); for (SparseArray::iterator i = fanout.begin(); i != fanout.end(); ++i) { - // TODO: Optimise this? + // TODO(junyer): Optimise this? int bucket = 0; while (1 << bucket < i->value()) { bucket++; @@ -153483,7 +167250,7 @@ DEFINE_INTEGER_PARSER(ulonglong) // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2006 The RE2 Authors. All Rights Reserved. @@ -154463,7 +168230,7 @@ CharClass* CharClassBuilder::GetCharClass() { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2010 The RE2 Authors. All Rights Reserved. @@ -154473,7 +168240,7 @@ CharClass* CharClassBuilder::GetCharClass() { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2010 The RE2 Authors. All Rights Reserved. @@ -154714,7 +168481,7 @@ bool RE2::Set::Match(const StringPiece& text, std::vector* v, // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2006 The RE2 Authors. All Rights Reserved. @@ -155379,7 +169146,7 @@ Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2004 The RE2 Authors. All Rights Reserved. @@ -155453,7 +169220,7 @@ std::ostream& operator<<(std::ostream& o, const StringPiece& p) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 2006 The RE2 Authors. All Rights Reserved. @@ -155811,7 +169578,7 @@ static void AppendCCRange(std::string* t, Rune lo, Rune hi) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list @@ -156387,7 +170154,7 @@ const int num_unicode_tolower = 194; // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list @@ -162455,7 +176222,7 @@ const int num_unicode_groups = 184; // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list /* @@ -162486,7 +176253,7 @@ enum Bit2 = 5, Bit3 = 4, Bit4 = 3, - Bit5 = 2, + Bit5 = 2, T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ @@ -162724,7 +176491,7 @@ utfrune(const char *s, Rune c) // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #7 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 // See the end of this file for a list // Copyright 1999-2005 The RE2 Authors. All Rights Reserved. @@ -165562,7 +179329,7 @@ void sdfree(void *ptr) { free(ptr); } // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 // See the end of this file for a list /* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */ @@ -165622,7013 +179389,7013 @@ namespace duckdb { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 // See the end of this file for a list static const utf8proc_uint16_t utf8proc_sequences[] = { - 97, 98, 99, 100, 101, 102, 103, - 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 32, 32, 776, 32, 772, - 50, 51, 32, 769, 956, 32, 807, 49, - 49, 8260, 52, 49, 8260, 50, 51, 8260, - 52, 65, 768, 224, 65, 769, 225, 65, - 770, 226, 65, 771, 227, 65, 776, 228, - 65, 778, 229, 230, 67, 807, 231, 69, - 768, 232, 69, 769, 233, 69, 770, 234, - 69, 776, 235, 73, 768, 236, 73, 769, - 237, 73, 770, 238, 73, 776, 239, 240, - 78, 771, 241, 79, 768, 242, 79, 769, - 243, 79, 770, 244, 79, 771, 245, 79, - 776, 246, 248, 85, 768, 249, 85, 769, - 250, 85, 770, 251, 85, 776, 252, 89, - 769, 253, 254, 115, 115, 97, 768, 97, - 769, 97, 770, 97, 771, 97, 776, 97, - 778, 99, 807, 101, 768, 101, 769, 101, - 770, 101, 776, 105, 768, 105, 769, 105, - 770, 105, 776, 110, 771, 111, 768, 111, - 769, 111, 770, 111, 771, 111, 776, 117, - 768, 117, 769, 117, 770, 117, 776, 121, - 769, 121, 776, 65, 772, 257, 97, 772, - 65, 774, 259, 97, 774, 65, 808, 261, - 97, 808, 67, 769, 263, 99, 769, 67, - 770, 265, 99, 770, 67, 775, 267, 99, - 775, 67, 780, 269, 99, 780, 68, 780, - 271, 100, 780, 273, 69, 772, 275, 101, - 772, 69, 774, 277, 101, 774, 69, 775, - 279, 101, 775, 69, 808, 281, 101, 808, - 69, 780, 283, 101, 780, 71, 770, 285, - 103, 770, 71, 774, 287, 103, 774, 71, - 775, 289, 103, 775, 71, 807, 291, 103, - 807, 72, 770, 293, 104, 770, 295, 73, - 771, 297, 105, 771, 73, 772, 299, 105, - 772, 73, 774, 301, 105, 774, 73, 808, - 303, 105, 808, 73, 775, 105, 775, 73, - 74, 307, 105, 106, 74, 770, 309, 106, - 770, 75, 807, 311, 107, 807, 76, 769, - 314, 108, 769, 76, 807, 316, 108, 807, - 76, 780, 318, 108, 780, 76, 183, 320, - 108, 183, 322, 78, 769, 324, 110, 769, - 78, 807, 326, 110, 807, 78, 780, 328, - 110, 780, 700, 110, 331, 79, 772, 333, - 111, 772, 79, 774, 335, 111, 774, 79, - 779, 337, 111, 779, 339, 82, 769, 341, - 114, 769, 82, 807, 343, 114, 807, 82, - 780, 345, 114, 780, 83, 769, 347, 115, - 769, 83, 770, 349, 115, 770, 83, 807, - 351, 115, 807, 83, 780, 353, 115, 780, - 84, 807, 355, 116, 807, 84, 780, 357, - 116, 780, 359, 85, 771, 361, 117, 771, - 85, 772, 363, 117, 772, 85, 774, 365, - 117, 774, 85, 778, 367, 117, 778, 85, - 779, 369, 117, 779, 85, 808, 371, 117, - 808, 87, 770, 373, 119, 770, 89, 770, - 375, 121, 770, 89, 776, 255, 90, 769, - 378, 122, 769, 90, 775, 380, 122, 775, - 90, 780, 382, 122, 780, 595, 387, 389, - 596, 392, 598, 599, 396, 477, 601, 603, - 402, 608, 611, 617, 616, 409, 623, 626, - 629, 79, 795, 417, 111, 795, 419, 421, - 640, 424, 643, 429, 648, 85, 795, 432, - 117, 795, 650, 651, 436, 438, 658, 441, - 445, 68, 381, 454, 68, 382, 100, 382, - 76, 74, 457, 76, 106, 108, 106, 78, - 74, 460, 78, 106, 110, 106, 65, 780, - 462, 97, 780, 73, 780, 464, 105, 780, - 79, 780, 466, 111, 780, 85, 780, 468, - 117, 780, 220, 772, 470, 252, 772, 220, - 769, 472, 252, 769, 220, 780, 474, 252, - 780, 220, 768, 476, 252, 768, 196, 772, - 479, 228, 772, 550, 772, 481, 551, 772, - 198, 772, 483, 230, 772, 485, 71, 780, - 487, 103, 780, 75, 780, 489, 107, 780, - 79, 808, 491, 111, 808, 490, 772, 493, - 491, 772, 439, 780, 495, 658, 780, 106, - 780, 68, 90, 499, 68, 122, 100, 122, - 71, 769, 501, 103, 769, 405, 447, 78, - 768, 505, 110, 768, 197, 769, 507, 229, - 769, 198, 769, 509, 230, 769, 216, 769, - 511, 248, 769, 65, 783, 513, 97, 783, - 65, 785, 515, 97, 785, 69, 783, 517, - 101, 783, 69, 785, 519, 101, 785, 73, - 783, 521, 105, 783, 73, 785, 523, 105, - 785, 79, 783, 525, 111, 783, 79, 785, - 527, 111, 785, 82, 783, 529, 114, 783, - 82, 785, 531, 114, 785, 85, 783, 533, - 117, 783, 85, 785, 535, 117, 785, 83, - 806, 537, 115, 806, 84, 806, 539, 116, - 806, 541, 72, 780, 543, 104, 780, 414, - 547, 549, 65, 775, 551, 97, 775, 69, - 807, 553, 101, 807, 214, 772, 555, 246, - 772, 213, 772, 557, 245, 772, 79, 775, - 559, 111, 775, 558, 772, 561, 559, 772, - 89, 772, 563, 121, 772, 11365, 572, 410, - 11366, 578, 384, 649, 652, 583, 585, 587, - 589, 591, 614, 633, 635, 641, 32, 774, - 32, 775, 32, 778, 32, 808, 32, 771, - 32, 779, 661, 768, 769, 787, 776, 769, - 953, 881, 883, 697, 887, 32, 837, 59, - 1011, 168, 769, 913, 769, 940, 183, 917, - 769, 941, 919, 769, 942, 921, 769, 943, - 927, 769, 972, 933, 769, 973, 937, 769, - 974, 970, 769, 953, 776, 769, 945, 946, - 947, 948, 949, 950, 951, 952, 954, 955, - 957, 958, 959, 960, 961, 963, 964, 965, - 966, 967, 968, 969, 921, 776, 970, 933, - 776, 971, 945, 769, 949, 769, 951, 769, - 953, 769, 971, 769, 965, 776, 769, 953, - 776, 965, 776, 959, 769, 965, 769, 969, - 769, 983, 933, 978, 769, 978, 776, 985, - 987, 989, 991, 993, 995, 997, 999, 1001, - 1003, 1005, 1007, 962, 920, 1016, 931, 1010, - 1019, 891, 892, 893, 1045, 768, 1104, 1045, - 776, 1105, 1106, 1043, 769, 1107, 1108, 1109, - 1110, 1030, 776, 1111, 1112, 1113, 1114, 1115, - 1050, 769, 1116, 1048, 768, 1117, 1059, 774, - 1118, 1119, 1072, 1073, 1074, 1075, 1076, 1077, - 1078, 1079, 1080, 1048, 774, 1081, 1082, 1083, - 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, - 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, - 1100, 1101, 1102, 1103, 1080, 774, 1077, 768, - 1077, 776, 1075, 769, 1110, 776, 1082, 769, - 1080, 768, 1091, 774, 1121, 1123, 1125, 1127, - 1129, 1131, 1133, 1135, 1137, 1139, 1141, 1140, - 783, 1143, 1141, 783, 1145, 1147, 1149, 1151, - 1153, 1163, 1165, 1167, 1169, 1171, 1173, 1175, - 1177, 1179, 1181, 1183, 1185, 1187, 1189, 1191, - 1193, 1195, 1197, 1199, 1201, 1203, 1205, 1207, - 1209, 1211, 1213, 1215, 1231, 1046, 774, 1218, - 1078, 774, 1220, 1222, 1224, 1226, 1228, 1230, - 1040, 774, 1233, 1072, 774, 1040, 776, 1235, - 1072, 776, 1237, 1045, 774, 1239, 1077, 774, - 1241, 1240, 776, 1243, 1241, 776, 1046, 776, - 1245, 1078, 776, 1047, 776, 1247, 1079, 776, - 1249, 1048, 772, 1251, 1080, 772, 1048, 776, - 1253, 1080, 776, 1054, 776, 1255, 1086, 776, - 1257, 1256, 776, 1259, 1257, 776, 1069, 776, - 1261, 1101, 776, 1059, 772, 1263, 1091, 772, - 1059, 776, 1265, 1091, 776, 1059, 779, 1267, - 1091, 779, 1063, 776, 1269, 1095, 776, 1271, - 1067, 776, 1273, 1099, 776, 1275, 1277, 1279, - 1281, 1283, 1285, 1287, 1289, 1291, 1293, 1295, - 1297, 1299, 1301, 1303, 1305, 1307, 1309, 1311, - 1313, 1315, 1317, 1319, 1321, 1323, 1325, 1327, - 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, - 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392, - 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, - 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408, - 1409, 1410, 1411, 1412, 1413, 1414, 1381, 1410, - 1575, 1619, 1575, 1620, 1608, 1620, 1575, 1621, - 1610, 1620, 1575, 1652, 1608, 1652, 1735, 1652, - 1610, 1652, 1749, 1620, 1729, 1620, 1746, 1620, - 2344, 2364, 2352, 2364, 2355, 2364, 2325, 2364, - 2326, 2364, 2327, 2364, 2332, 2364, 2337, 2364, - 2338, 2364, 2347, 2364, 2351, 2364, 2503, 2494, - 2503, 2519, 2465, 2492, 2466, 2492, 2479, 2492, - 2610, 2620, 2616, 2620, 2582, 2620, 2583, 2620, - 2588, 2620, 2603, 2620, 2887, 2902, 2887, 2878, - 2887, 2903, 2849, 2876, 2850, 2876, 2962, 3031, - 3014, 3006, 3015, 3006, 3014, 3031, 3142, 3158, - 3263, 3285, 3270, 3285, 3270, 3286, 3270, 3266, - 3274, 3285, 3398, 3390, 3399, 3390, 3398, 3415, - 3545, 3530, 3545, 3535, 3548, 3530, 3545, 3551, - 3661, 3634, 3789, 3762, 3755, 3737, 3755, 3745, - 3851, 3906, 4023, 3916, 4023, 3921, 4023, 3926, - 4023, 3931, 4023, 3904, 4021, 3953, 3954, 3953, - 3956, 4018, 3968, 4018, 3969, 4019, 3968, 4019, - 3969, 3953, 3968, 3986, 4023, 3996, 4023, 4001, - 4023, 4006, 4023, 4011, 4023, 3984, 4021, 4133, - 4142, 11520, 11521, 11522, 11523, 11524, 11525, 11526, - 11527, 11528, 11529, 11530, 11531, 11532, 11533, 11534, - 11535, 11536, 11537, 11538, 11539, 11540, 11541, 11542, - 11543, 11544, 11545, 11546, 11547, 11548, 11549, 11550, - 11551, 11552, 11553, 11554, 11555, 11556, 11557, 11559, - 11565, 4316, 5104, 5105, 5106, 5107, 5108, 5109, - 6917, 6965, 6919, 6965, 6921, 6965, 6923, 6965, - 6925, 6965, 6929, 6965, 6970, 6965, 6972, 6965, - 6974, 6965, 6975, 6965, 6978, 6965, 42571, 4304, - 4305, 4306, 4307, 4308, 4309, 4310, 4311, 4312, - 4313, 4314, 4315, 4317, 4318, 4319, 4320, 4321, - 4322, 4323, 4324, 4325, 4326, 4327, 4328, 4329, - 4330, 4331, 4332, 4333, 4334, 4335, 4336, 4337, - 4338, 4339, 4340, 4341, 4342, 4343, 4344, 4345, - 4346, 4349, 4350, 4351, 65, 198, 66, 68, - 69, 398, 71, 72, 73, 74, 75, 76, - 77, 78, 79, 546, 80, 82, 84, 85, - 87, 592, 593, 7426, 604, 7446, 7447, 7453, - 7461, 594, 597, 607, 609, 613, 618, 7547, - 669, 621, 7557, 671, 625, 624, 627, 628, - 632, 642, 427, 7452, 656, 657, 65, 805, - 7681, 97, 805, 66, 775, 7683, 98, 775, - 66, 803, 7685, 98, 803, 66, 817, 7687, - 98, 817, 199, 769, 7689, 231, 769, 68, - 775, 7691, 100, 775, 68, 803, 7693, 100, - 803, 68, 817, 7695, 100, 817, 68, 807, - 7697, 100, 807, 68, 813, 7699, 100, 813, - 274, 768, 7701, 275, 768, 274, 769, 7703, - 275, 769, 69, 813, 7705, 101, 813, 69, - 816, 7707, 101, 816, 552, 774, 7709, 553, - 774, 70, 775, 7711, 102, 775, 71, 772, - 7713, 103, 772, 72, 775, 7715, 104, 775, - 72, 803, 7717, 104, 803, 72, 776, 7719, - 104, 776, 72, 807, 7721, 104, 807, 72, - 814, 7723, 104, 814, 73, 816, 7725, 105, - 816, 207, 769, 7727, 239, 769, 75, 769, - 7729, 107, 769, 75, 803, 7731, 107, 803, - 75, 817, 7733, 107, 817, 76, 803, 7735, - 108, 803, 7734, 772, 7737, 7735, 772, 76, - 817, 7739, 108, 817, 76, 813, 7741, 108, - 813, 77, 769, 7743, 109, 769, 77, 775, - 7745, 109, 775, 77, 803, 7747, 109, 803, - 78, 775, 7749, 110, 775, 78, 803, 7751, - 110, 803, 78, 817, 7753, 110, 817, 78, - 813, 7755, 110, 813, 213, 769, 7757, 245, - 769, 213, 776, 7759, 245, 776, 332, 768, - 7761, 333, 768, 332, 769, 7763, 333, 769, - 80, 769, 7765, 112, 769, 80, 775, 7767, - 112, 775, 82, 775, 7769, 114, 775, 82, - 803, 7771, 114, 803, 7770, 772, 7773, 7771, - 772, 82, 817, 7775, 114, 817, 83, 775, - 7777, 115, 775, 83, 803, 7779, 115, 803, - 346, 775, 7781, 347, 775, 352, 775, 7783, - 353, 775, 7778, 775, 7785, 7779, 775, 84, - 775, 7787, 116, 775, 84, 803, 7789, 116, - 803, 84, 817, 7791, 116, 817, 84, 813, - 7793, 116, 813, 85, 804, 7795, 117, 804, - 85, 816, 7797, 117, 816, 85, 813, 7799, - 117, 813, 360, 769, 7801, 361, 769, 362, - 776, 7803, 363, 776, 86, 771, 7805, 118, - 771, 86, 803, 7807, 118, 803, 87, 768, - 7809, 119, 768, 87, 769, 7811, 119, 769, - 87, 776, 7813, 119, 776, 87, 775, 7815, - 119, 775, 87, 803, 7817, 119, 803, 88, - 775, 7819, 120, 775, 88, 776, 7821, 120, - 776, 89, 775, 7823, 121, 775, 90, 770, - 7825, 122, 770, 90, 803, 7827, 122, 803, - 90, 817, 7829, 122, 817, 104, 817, 116, - 776, 119, 778, 121, 778, 97, 702, 383, - 775, 65, 803, 7841, 97, 803, 65, 777, - 7843, 97, 777, 194, 769, 7845, 226, 769, - 194, 768, 7847, 226, 768, 194, 777, 7849, - 226, 777, 194, 771, 7851, 226, 771, 7840, - 770, 7853, 7841, 770, 258, 769, 7855, 259, - 769, 258, 768, 7857, 259, 768, 258, 777, - 7859, 259, 777, 258, 771, 7861, 259, 771, - 7840, 774, 7863, 7841, 774, 69, 803, 7865, - 101, 803, 69, 777, 7867, 101, 777, 69, - 771, 7869, 101, 771, 202, 769, 7871, 234, - 769, 202, 768, 7873, 234, 768, 202, 777, - 7875, 234, 777, 202, 771, 7877, 234, 771, - 7864, 770, 7879, 7865, 770, 73, 777, 7881, - 105, 777, 73, 803, 7883, 105, 803, 79, - 803, 7885, 111, 803, 79, 777, 7887, 111, - 777, 212, 769, 7889, 244, 769, 212, 768, - 7891, 244, 768, 212, 777, 7893, 244, 777, - 212, 771, 7895, 244, 771, 7884, 770, 7897, - 7885, 770, 416, 769, 7899, 417, 769, 416, - 768, 7901, 417, 768, 416, 777, 7903, 417, - 777, 416, 771, 7905, 417, 771, 416, 803, - 7907, 417, 803, 85, 803, 7909, 117, 803, - 85, 777, 7911, 117, 777, 431, 769, 7913, - 432, 769, 431, 768, 7915, 432, 768, 431, - 777, 7917, 432, 777, 431, 771, 7919, 432, - 771, 431, 803, 7921, 432, 803, 89, 768, - 7923, 121, 768, 89, 803, 7925, 121, 803, - 89, 777, 7927, 121, 777, 89, 771, 7929, - 121, 771, 7931, 7933, 7935, 945, 787, 945, - 788, 7936, 768, 7937, 768, 7936, 769, 7937, - 769, 7936, 834, 7937, 834, 913, 787, 7936, - 913, 788, 7937, 7944, 768, 7938, 7945, 768, - 7939, 7944, 769, 7940, 7945, 769, 7941, 7944, - 834, 7942, 7945, 834, 7943, 949, 787, 949, - 788, 7952, 768, 7953, 768, 7952, 769, 7953, - 769, 917, 787, 7952, 917, 788, 7953, 7960, - 768, 7954, 7961, 768, 7955, 7960, 769, 7956, - 7961, 769, 7957, 951, 787, 951, 788, 7968, - 768, 7969, 768, 7968, 769, 7969, 769, 7968, - 834, 7969, 834, 919, 787, 7968, 919, 788, - 7969, 7976, 768, 7970, 7977, 768, 7971, 7976, - 769, 7972, 7977, 769, 7973, 7976, 834, 7974, - 7977, 834, 7975, 953, 787, 953, 788, 7984, - 768, 7985, 768, 7984, 769, 7985, 769, 7984, - 834, 7985, 834, 921, 787, 7984, 921, 788, - 7985, 7992, 768, 7986, 7993, 768, 7987, 7992, - 769, 7988, 7993, 769, 7989, 7992, 834, 7990, - 7993, 834, 7991, 959, 787, 959, 788, 8000, - 768, 8001, 768, 8000, 769, 8001, 769, 927, - 787, 8000, 927, 788, 8001, 8008, 768, 8002, - 8009, 768, 8003, 8008, 769, 8004, 8009, 769, - 8005, 965, 787, 965, 788, 8016, 768, 965, - 787, 768, 8017, 768, 8016, 769, 965, 787, - 769, 8017, 769, 8016, 834, 965, 787, 834, - 8017, 834, 933, 788, 8017, 8025, 768, 8019, - 8025, 769, 8021, 8025, 834, 8023, 969, 787, - 969, 788, 8032, 768, 8033, 768, 8032, 769, - 8033, 769, 8032, 834, 8033, 834, 937, 787, - 8032, 937, 788, 8033, 8040, 768, 8034, 8041, - 768, 8035, 8040, 769, 8036, 8041, 769, 8037, - 8040, 834, 8038, 8041, 834, 8039, 945, 768, - 949, 768, 951, 768, 953, 768, 959, 768, - 965, 768, 969, 768, 7936, 837, 7936, 953, - 7937, 837, 7937, 953, 7938, 837, 7938, 953, - 7939, 837, 7939, 953, 7940, 837, 7940, 953, - 7941, 837, 7941, 953, 7942, 837, 7942, 953, - 7943, 837, 7943, 953, 7944, 837, 7945, 837, - 7946, 837, 7947, 837, 7948, 837, 7949, 837, - 7950, 837, 7951, 837, 7968, 837, 7968, 953, - 7969, 837, 7969, 953, 7970, 837, 7970, 953, - 7971, 837, 7971, 953, 7972, 837, 7972, 953, - 7973, 837, 7973, 953, 7974, 837, 7974, 953, - 7975, 837, 7975, 953, 7976, 837, 7977, 837, - 7978, 837, 7979, 837, 7980, 837, 7981, 837, - 7982, 837, 7983, 837, 8032, 837, 8032, 953, - 8033, 837, 8033, 953, 8034, 837, 8034, 953, - 8035, 837, 8035, 953, 8036, 837, 8036, 953, - 8037, 837, 8037, 953, 8038, 837, 8038, 953, - 8039, 837, 8039, 953, 8040, 837, 8041, 837, - 8042, 837, 8043, 837, 8044, 837, 8045, 837, - 8046, 837, 8047, 837, 945, 774, 945, 772, - 8048, 837, 8048, 953, 945, 837, 945, 953, - 940, 837, 940, 953, 945, 834, 8118, 837, - 945, 834, 953, 913, 774, 8112, 913, 772, - 8113, 913, 768, 8048, 902, 8049, 913, 837, - 32, 787, 32, 834, 168, 834, 8052, 837, - 8052, 953, 951, 837, 951, 953, 942, 837, - 942, 953, 951, 834, 8134, 837, 951, 834, - 953, 917, 768, 8050, 904, 8051, 919, 768, - 8052, 905, 8053, 919, 837, 8127, 768, 8127, - 769, 8127, 834, 953, 774, 953, 772, 970, - 768, 953, 776, 768, 912, 953, 834, 970, - 834, 953, 776, 834, 921, 774, 8144, 921, - 772, 8145, 921, 768, 8054, 906, 8055, 8190, - 768, 8190, 769, 8190, 834, 965, 774, 965, - 772, 971, 768, 965, 776, 768, 944, 961, - 787, 961, 788, 965, 834, 971, 834, 965, - 776, 834, 933, 774, 8160, 933, 772, 8161, - 933, 768, 8058, 910, 8059, 929, 788, 8165, - 168, 768, 901, 96, 8060, 837, 8060, 953, - 969, 837, 969, 953, 974, 837, 974, 953, - 969, 834, 8182, 837, 969, 834, 953, 927, - 768, 8056, 908, 8057, 937, 768, 8060, 911, - 8061, 937, 837, 180, 32, 788, 8194, 8195, - 8208, 32, 819, 46, 46, 46, 46, 46, - 46, 8242, 8242, 8242, 8242, 8242, 8245, 8245, - 8245, 8245, 8245, 33, 33, 32, 773, 63, - 63, 63, 33, 33, 63, 8242, 8242, 8242, - 8242, 48, 52, 53, 54, 55, 56, 57, - 43, 8722, 61, 40, 41, 82, 115, 97, - 47, 99, 97, 47, 115, 67, 176, 67, - 99, 47, 111, 99, 47, 117, 400, 176, - 70, 78, 111, 81, 83, 77, 84, 69, - 76, 84, 77, 90, 937, 197, 70, 8526, - 1488, 1489, 1490, 1491, 70, 65, 88, 915, - 928, 8721, 49, 8260, 55, 49, 8260, 57, - 49, 8260, 49, 48, 49, 8260, 51, 50, - 8260, 51, 49, 8260, 53, 50, 8260, 53, - 51, 8260, 53, 52, 8260, 53, 49, 8260, - 54, 53, 8260, 54, 49, 8260, 56, 51, - 8260, 56, 53, 8260, 56, 55, 8260, 56, - 49, 8260, 8560, 73, 73, 8561, 73, 73, - 73, 8562, 73, 86, 8563, 86, 8564, 86, - 73, 8565, 86, 73, 73, 8566, 86, 73, - 73, 73, 8567, 73, 88, 8568, 88, 8569, - 88, 73, 8570, 88, 73, 73, 8571, 8572, - 8573, 8574, 8575, 105, 105, 105, 105, 105, - 105, 118, 118, 105, 118, 105, 105, 118, - 105, 105, 105, 105, 120, 120, 105, 120, - 105, 105, 8580, 48, 8260, 51, 8592, 824, - 8594, 824, 8596, 824, 8656, 824, 8660, 824, - 8658, 824, 8707, 824, 8712, 824, 8715, 824, - 8739, 824, 8741, 824, 8747, 8747, 8747, 8747, - 8747, 8750, 8750, 8750, 8750, 8750, 8764, 824, - 8771, 824, 8773, 824, 8776, 824, 61, 824, - 8801, 824, 8781, 824, 60, 824, 62, 824, - 8804, 824, 8805, 824, 8818, 824, 8819, 824, - 8822, 824, 8823, 824, 8826, 824, 8827, 824, - 8834, 824, 8835, 824, 8838, 824, 8839, 824, - 8866, 824, 8872, 824, 8873, 824, 8875, 824, - 8828, 824, 8829, 824, 8849, 824, 8850, 824, - 8882, 824, 8883, 824, 8884, 824, 8885, 824, - 12296, 12297, 49, 48, 49, 49, 49, 50, - 49, 51, 49, 52, 49, 53, 49, 54, - 49, 55, 49, 56, 49, 57, 50, 48, - 40, 49, 41, 40, 50, 41, 40, 51, - 41, 40, 52, 41, 40, 53, 41, 40, - 54, 41, 40, 55, 41, 40, 56, 41, - 40, 57, 41, 40, 49, 48, 41, 40, - 49, 49, 41, 40, 49, 50, 41, 40, - 49, 51, 41, 40, 49, 52, 41, 40, - 49, 53, 41, 40, 49, 54, 41, 40, - 49, 55, 41, 40, 49, 56, 41, 40, - 49, 57, 41, 40, 50, 48, 41, 49, - 46, 50, 46, 51, 46, 52, 46, 53, - 46, 54, 46, 55, 46, 56, 46, 57, - 46, 49, 48, 46, 49, 49, 46, 49, - 50, 46, 49, 51, 46, 49, 52, 46, - 49, 53, 46, 49, 54, 46, 49, 55, - 46, 49, 56, 46, 49, 57, 46, 50, - 48, 46, 40, 97, 41, 40, 98, 41, - 40, 99, 41, 40, 100, 41, 40, 101, - 41, 40, 102, 41, 40, 103, 41, 40, - 104, 41, 40, 105, 41, 40, 106, 41, - 40, 107, 41, 40, 108, 41, 40, 109, - 41, 40, 110, 41, 40, 111, 41, 40, - 112, 41, 40, 113, 41, 40, 114, 41, - 40, 115, 41, 40, 116, 41, 40, 117, - 41, 40, 118, 41, 40, 119, 41, 40, - 120, 41, 40, 121, 41, 40, 122, 41, - 9424, 9425, 9426, 9427, 9428, 9429, 9430, 9431, - 9432, 9433, 9434, 9435, 9436, 9437, 9438, 9439, - 9440, 9441, 83, 9442, 9443, 9444, 9445, 9446, - 9447, 89, 9448, 9449, 8747, 8747, 8747, 8747, - 58, 58, 61, 61, 61, 61, 61, 61, - 10973, 824, 11312, 11313, 11314, 11315, 11316, 11317, - 11318, 11319, 11320, 11321, 11322, 11323, 11324, 11325, - 11326, 11327, 11328, 11329, 11330, 11331, 11332, 11333, - 11334, 11335, 11336, 11337, 11338, 11339, 11340, 11341, - 11342, 11343, 11344, 11345, 11346, 11347, 11348, 11349, - 11350, 11351, 11352, 11353, 11354, 11355, 11356, 11357, - 11358, 11361, 619, 7549, 637, 11368, 11370, 11372, - 11379, 11382, 575, 576, 11393, 11395, 11397, 11399, - 11401, 11403, 11405, 11407, 11409, 11411, 11413, 11415, - 11417, 11419, 11421, 11423, 11425, 11427, 11429, 11431, - 11433, 11435, 11437, 11439, 11441, 11443, 11445, 11447, - 11449, 11451, 11453, 11455, 11457, 11459, 11461, 11463, - 11465, 11467, 11469, 11471, 11473, 11475, 11477, 11479, - 11481, 11483, 11485, 11487, 11489, 11491, 11500, 11502, - 11507, 11617, 27597, 40863, 19968, 20008, 20022, 20031, - 20057, 20101, 20108, 20128, 20154, 20799, 20837, 20843, - 20866, 20886, 20907, 20960, 20981, 20992, 21147, 21241, - 21269, 21274, 21304, 21313, 21340, 21353, 21378, 21430, - 21448, 21475, 22231, 22303, 22763, 22786, 22794, 22805, - 22823, 22899, 23376, 23424, 23544, 23567, 23586, 23608, - 23662, 23665, 24027, 24037, 24049, 24062, 24178, 24186, - 24191, 24308, 24318, 24331, 24339, 24400, 24417, 24435, - 24515, 25096, 25142, 25163, 25903, 25908, 25991, 26007, - 26020, 26041, 26080, 26085, 26352, 26376, 26408, 27424, - 27490, 27513, 27571, 27595, 27604, 27611, 27663, 27668, - 27700, 28779, 29226, 29238, 29243, 29247, 29255, 29273, - 29275, 29356, 29572, 29577, 29916, 29926, 29976, 29983, - 29992, 30000, 30091, 30098, 30326, 30333, 30382, 30399, - 30446, 30683, 30690, 30707, 31034, 31160, 31166, 31348, - 31435, 31481, 31859, 31992, 32566, 32593, 32650, 32701, - 32769, 32780, 32786, 32819, 32895, 32905, 33251, 33258, - 33267, 33276, 33292, 33307, 33311, 33390, 33394, 33400, - 34381, 34411, 34880, 34892, 34915, 35198, 35211, 35282, - 35328, 35895, 35910, 35925, 35960, 35997, 36196, 36208, - 36275, 36523, 36554, 36763, 36784, 36789, 37009, 37193, - 37318, 37324, 37329, 38263, 38272, 38428, 38582, 38585, - 38632, 38737, 38750, 38754, 38761, 38859, 38893, 38899, - 38913, 39080, 39131, 39135, 39318, 39321, 39340, 39592, - 39640, 39647, 39717, 39727, 39730, 39740, 39770, 40165, - 40565, 40575, 40613, 40635, 40643, 40653, 40657, 40697, - 40701, 40718, 40723, 40736, 40763, 40778, 40786, 40845, - 40860, 40864, 12306, 21316, 21317, 12363, 12441, 12365, - 12441, 12367, 12441, 12369, 12441, 12371, 12441, 12373, - 12441, 12375, 12441, 12377, 12441, 12379, 12441, 12381, - 12441, 12383, 12441, 12385, 12441, 12388, 12441, 12390, - 12441, 12392, 12441, 12399, 12441, 12399, 12442, 12402, - 12441, 12402, 12442, 12405, 12441, 12405, 12442, 12408, - 12441, 12408, 12442, 12411, 12441, 12411, 12442, 12358, - 12441, 32, 12441, 32, 12442, 12445, 12441, 12424, - 12426, 12459, 12441, 12461, 12441, 12463, 12441, 12465, - 12441, 12467, 12441, 12469, 12441, 12471, 12441, 12473, - 12441, 12475, 12441, 12477, 12441, 12479, 12441, 12481, - 12441, 12484, 12441, 12486, 12441, 12488, 12441, 12495, - 12441, 12495, 12442, 12498, 12441, 12498, 12442, 12501, - 12441, 12501, 12442, 12504, 12441, 12504, 12442, 12507, - 12441, 12507, 12442, 12454, 12441, 12527, 12441, 12528, - 12441, 12529, 12441, 12530, 12441, 12541, 12441, 12467, - 12488, 4352, 4353, 4522, 4354, 4524, 4525, 4355, - 4356, 4357, 4528, 4529, 4530, 4531, 4532, 4533, - 4378, 4358, 4359, 4360, 4385, 4361, 4362, 4363, - 4364, 4365, 4366, 4367, 4368, 4369, 4370, 4449, - 4450, 4451, 4452, 4453, 4454, 4455, 4456, 4457, - 4458, 4459, 4460, 4461, 4462, 4463, 4464, 4465, - 4466, 4467, 4468, 4469, 4448, 4372, 4373, 4551, - 4552, 4556, 4558, 4563, 4567, 4569, 4380, 4573, - 4575, 4381, 4382, 4384, 4386, 4387, 4391, 4393, - 4395, 4396, 4397, 4398, 4399, 4402, 4406, 4416, - 4423, 4428, 4593, 4594, 4439, 4440, 4441, 4484, - 4485, 4488, 4497, 4498, 4500, 4510, 4513, 19977, - 22235, 19978, 20013, 19979, 30002, 19993, 19969, 22825, - 22320, 40, 4352, 41, 40, 4354, 41, 40, - 4355, 41, 40, 4357, 41, 40, 4358, 41, - 40, 4359, 41, 40, 4361, 41, 40, 4363, - 41, 40, 4364, 41, 40, 4366, 41, 40, - 4367, 41, 40, 4368, 41, 40, 4369, 41, - 40, 4370, 41, 40, 4352, 4449, 41, 40, - 4354, 4449, 41, 40, 4355, 4449, 41, 40, - 4357, 4449, 41, 40, 4358, 4449, 41, 40, - 4359, 4449, 41, 40, 4361, 4449, 41, 40, - 4363, 4449, 41, 40, 4364, 4449, 41, 40, - 4366, 4449, 41, 40, 4367, 4449, 41, 40, - 4368, 4449, 41, 40, 4369, 4449, 41, 40, - 4370, 4449, 41, 40, 4364, 4462, 41, 40, - 4363, 4457, 4364, 4453, 4523, 41, 40, 4363, - 4457, 4370, 4462, 41, 40, 19968, 41, 40, - 20108, 41, 40, 19977, 41, 40, 22235, 41, - 40, 20116, 41, 40, 20845, 41, 40, 19971, - 41, 40, 20843, 41, 40, 20061, 41, 40, - 21313, 41, 40, 26376, 41, 40, 28779, 41, - 40, 27700, 41, 40, 26408, 41, 40, 37329, - 41, 40, 22303, 41, 40, 26085, 41, 40, - 26666, 41, 40, 26377, 41, 40, 31038, 41, - 40, 21517, 41, 40, 29305, 41, 40, 36001, - 41, 40, 31069, 41, 40, 21172, 41, 40, - 20195, 41, 40, 21628, 41, 40, 23398, 41, - 40, 30435, 41, 40, 20225, 41, 40, 36039, - 41, 40, 21332, 41, 40, 31085, 41, 40, - 20241, 41, 40, 33258, 41, 40, 33267, 41, - 21839, 24188, 31631, 80, 84, 69, 50, 49, - 50, 50, 50, 51, 50, 52, 50, 53, - 50, 54, 50, 55, 50, 56, 50, 57, - 51, 48, 51, 49, 51, 50, 51, 51, - 51, 52, 51, 53, 4352, 4449, 4354, 4449, - 4355, 4449, 4357, 4449, 4358, 4449, 4359, 4449, - 4361, 4449, 4363, 4449, 4364, 4449, 4366, 4449, - 4367, 4449, 4368, 4449, 4369, 4449, 4370, 4449, - 4366, 4449, 4535, 4352, 4457, 4364, 4462, 4363, - 4468, 4363, 4462, 20116, 20845, 19971, 20061, 26666, - 26377, 31038, 21517, 29305, 36001, 31069, 21172, 31192, - 30007, 36969, 20778, 21360, 27880, 38917, 20241, 20889, - 27491, 24038, 21491, 21307, 23447, 23398, 30435, 20225, - 36039, 21332, 22812, 51, 54, 51, 55, 51, - 56, 51, 57, 52, 48, 52, 49, 52, - 50, 52, 51, 52, 52, 52, 53, 52, - 54, 52, 55, 52, 56, 52, 57, 53, - 48, 49, 26376, 50, 26376, 51, 26376, 52, - 26376, 53, 26376, 54, 26376, 55, 26376, 56, - 26376, 57, 26376, 49, 48, 26376, 49, 49, - 26376, 49, 50, 26376, 72, 103, 101, 114, - 103, 101, 86, 76, 84, 68, 12450, 12452, - 12454, 12456, 12458, 12459, 12461, 12463, 12465, 12467, - 12469, 12471, 12473, 12475, 12477, 12479, 12481, 12484, - 12486, 12488, 12490, 12491, 12492, 12493, 12494, 12495, - 12498, 12501, 12504, 12507, 12510, 12511, 12512, 12513, - 12514, 12516, 12518, 12520, 12521, 12522, 12523, 12524, - 12525, 12527, 12528, 12529, 12530, 20196, 21644, 12450, - 12497, 12540, 12488, 12450, 12523, 12501, 12449, 12450, - 12531, 12506, 12450, 12450, 12540, 12523, 12452, 12491, - 12531, 12464, 12452, 12531, 12481, 12454, 12457, 12531, - 12456, 12473, 12463, 12540, 12489, 12456, 12540, 12459, - 12540, 12458, 12531, 12473, 12458, 12540, 12512, 12459, - 12452, 12522, 12459, 12521, 12483, 12488, 12459, 12525, - 12522, 12540, 12460, 12525, 12531, 12460, 12531, 12510, - 12462, 12460, 12462, 12491, 12540, 12461, 12517, 12522, - 12540, 12462, 12523, 12480, 12540, 12461, 12525, 12461, - 12525, 12464, 12521, 12512, 12461, 12525, 12513, 12540, - 12488, 12523, 12461, 12525, 12527, 12483, 12488, 12464, - 12521, 12512, 12464, 12521, 12512, 12488, 12531, 12463, - 12523, 12476, 12452, 12525, 12463, 12525, 12540, 12493, - 12465, 12540, 12473, 12467, 12523, 12490, 12467, 12540, - 12509, 12469, 12452, 12463, 12523, 12469, 12531, 12481, - 12540, 12512, 12471, 12522, 12531, 12464, 12475, 12531, - 12481, 12475, 12531, 12488, 12480, 12540, 12473, 12487, - 12471, 12489, 12523, 12488, 12531, 12490, 12494, 12494, - 12483, 12488, 12495, 12452, 12484, 12497, 12540, 12475, - 12531, 12488, 12497, 12540, 12484, 12496, 12540, 12524, - 12523, 12500, 12450, 12473, 12488, 12523, 12500, 12463, - 12523, 12500, 12467, 12499, 12523, 12501, 12449, 12521, - 12483, 12489, 12501, 12451, 12540, 12488, 12502, 12483, - 12471, 12455, 12523, 12501, 12521, 12531, 12504, 12463, - 12479, 12540, 12523, 12506, 12477, 12506, 12491, 12498, - 12504, 12523, 12484, 12506, 12531, 12473, 12506, 12540, - 12472, 12505, 12540, 12479, 12509, 12452, 12531, 12488, - 12508, 12523, 12488, 12507, 12531, 12509, 12531, 12489, - 12507, 12540, 12523, 12507, 12540, 12531, 12510, 12452, - 12463, 12525, 12510, 12452, 12523, 12510, 12483, 12495, - 12510, 12523, 12463, 12510, 12531, 12471, 12519, 12531, - 12511, 12463, 12525, 12531, 12511, 12522, 12511, 12522, - 12496, 12540, 12523, 12513, 12460, 12513, 12460, 12488, - 12531, 12513, 12540, 12488, 12523, 12516, 12540, 12489, - 12516, 12540, 12523, 12518, 12450, 12531, 12522, 12483, - 12488, 12523, 12522, 12521, 12523, 12500, 12540, 12523, - 12540, 12502, 12523, 12524, 12512, 12524, 12531, 12488, - 12466, 12531, 12527, 12483, 12488, 48, 28857, 49, - 28857, 50, 28857, 51, 28857, 52, 28857, 53, - 28857, 54, 28857, 55, 28857, 56, 28857, 57, - 28857, 49, 48, 28857, 49, 49, 28857, 49, - 50, 28857, 49, 51, 28857, 49, 52, 28857, - 49, 53, 28857, 49, 54, 28857, 49, 55, - 28857, 49, 56, 28857, 49, 57, 28857, 50, - 48, 28857, 50, 49, 28857, 50, 50, 28857, - 50, 51, 28857, 50, 52, 28857, 104, 80, - 97, 100, 97, 65, 85, 98, 97, 114, - 111, 86, 112, 99, 100, 109, 100, 109, - 178, 100, 109, 179, 73, 85, 24179, 25104, - 26157, 21644, 22823, 27491, 26126, 27835, 26666, 24335, - 20250, 31038, 112, 65, 110, 65, 956, 65, - 109, 65, 107, 65, 75, 66, 77, 66, - 71, 66, 99, 97, 108, 107, 99, 97, - 108, 112, 70, 110, 70, 956, 70, 956, - 103, 109, 103, 107, 103, 72, 122, 107, - 72, 122, 77, 72, 122, 71, 72, 122, - 84, 72, 122, 956, 8467, 109, 8467, 100, - 8467, 107, 8467, 102, 109, 110, 109, 956, - 109, 109, 109, 99, 109, 107, 109, 109, - 109, 178, 99, 109, 178, 109, 178, 107, - 109, 178, 109, 109, 179, 99, 109, 179, - 109, 179, 107, 109, 179, 109, 8725, 115, - 109, 8725, 115, 178, 80, 97, 107, 80, - 97, 77, 80, 97, 71, 80, 97, 114, - 97, 100, 114, 97, 100, 8725, 115, 114, - 97, 100, 8725, 115, 178, 112, 115, 110, - 115, 956, 115, 109, 115, 112, 86, 110, - 86, 956, 86, 109, 86, 107, 86, 77, - 86, 112, 87, 110, 87, 956, 87, 109, - 87, 107, 87, 77, 87, 107, 937, 77, - 937, 97, 46, 109, 46, 66, 113, 99, - 99, 99, 100, 67, 8725, 107, 103, 67, - 111, 46, 100, 66, 71, 121, 104, 97, - 72, 80, 105, 110, 75, 75, 75, 77, - 107, 116, 108, 109, 108, 110, 108, 111, - 103, 108, 120, 109, 98, 109, 105, 108, - 109, 111, 108, 80, 72, 112, 46, 109, - 46, 80, 80, 77, 80, 82, 115, 114, - 83, 118, 87, 98, 86, 8725, 109, 65, - 8725, 109, 49, 26085, 50, 26085, 51, 26085, - 52, 26085, 53, 26085, 54, 26085, 55, 26085, - 56, 26085, 57, 26085, 49, 48, 26085, 49, - 49, 26085, 49, 50, 26085, 49, 51, 26085, - 49, 52, 26085, 49, 53, 26085, 49, 54, - 26085, 49, 55, 26085, 49, 56, 26085, 49, - 57, 26085, 50, 48, 26085, 50, 49, 26085, - 50, 50, 26085, 50, 51, 26085, 50, 52, - 26085, 50, 53, 26085, 50, 54, 26085, 50, - 55, 26085, 50, 56, 26085, 50, 57, 26085, - 51, 48, 26085, 51, 49, 26085, 103, 97, - 108, 42561, 42563, 42565, 42567, 42569, 42573, 42575, - 42577, 42579, 42581, 42583, 42585, 42587, 42589, 42591, - 42593, 42595, 42597, 42599, 42601, 42603, 42605, 42625, - 42627, 42629, 42631, 42633, 42635, 42637, 42639, 42641, - 42643, 42645, 42647, 42649, 42651, 42787, 42789, 42791, - 42793, 42795, 42797, 42799, 42803, 42805, 42807, 42809, - 42811, 42813, 42815, 42817, 42819, 42821, 42823, 42825, - 42827, 42829, 42831, 42833, 42835, 42837, 42839, 42841, - 42843, 42845, 42847, 42849, 42851, 42853, 42855, 42857, - 42859, 42861, 42863, 42874, 42876, 7545, 42879, 42881, - 42883, 42885, 42887, 42892, 42897, 42899, 42903, 42905, - 42907, 42909, 42911, 42913, 42915, 42917, 42919, 42921, - 620, 670, 647, 43859, 42933, 42935, 42937, 42939, - 42941, 42943, 42947, 42900, 7566, 294, 43831, 43858, - 5024, 5025, 5026, 5027, 5028, 5029, 5030, 5031, - 5032, 5033, 5034, 5035, 5036, 5037, 5038, 5039, - 5040, 5041, 5042, 5043, 5044, 5045, 5046, 5047, - 5048, 5049, 5050, 5051, 5052, 5053, 5054, 5055, - 5056, 5057, 5058, 5059, 5060, 5061, 5062, 5063, - 5064, 5065, 5066, 5067, 5068, 5069, 5070, 5071, - 5072, 5073, 5074, 5075, 5076, 5077, 5078, 5079, - 5080, 5081, 5082, 5083, 5084, 5085, 5086, 5087, - 5088, 5089, 5090, 5091, 5092, 5093, 5094, 5095, - 5096, 5097, 5098, 5099, 5100, 5101, 5102, 5103, - 35912, 26356, 36040, 28369, 20018, 21477, 22865, 21895, - 22856, 25078, 30313, 32645, 34367, 34746, 35064, 37007, - 27138, 27931, 28889, 29662, 33853, 37226, 39409, 20098, - 21365, 27396, 29211, 34349, 40478, 23888, 28651, 34253, - 35172, 25289, 33240, 34847, 24266, 26391, 28010, 29436, - 37070, 20358, 20919, 21214, 25796, 27347, 29200, 30439, - 34310, 34396, 36335, 38706, 39791, 40442, 30860, 31103, - 32160, 33737, 37636, 35542, 22751, 24324, 31840, 32894, - 29282, 30922, 36034, 38647, 22744, 23650, 27155, 28122, - 28431, 32047, 32311, 38475, 21202, 32907, 20956, 20940, - 31260, 32190, 33777, 38517, 35712, 25295, 35582, 20025, - 23527, 24594, 29575, 30064, 21271, 30971, 20415, 24489, - 19981, 27852, 25976, 32034, 21443, 22622, 30465, 33865, - 35498, 27578, 27784, 25342, 33509, 25504, 30053, 20142, - 20841, 20937, 26753, 31975, 33391, 35538, 37327, 21237, - 21570, 24300, 26053, 28670, 31018, 38317, 39530, 40599, - 40654, 26310, 27511, 36706, 24180, 24976, 25088, 25754, - 28451, 29001, 29833, 31178, 32244, 32879, 36646, 34030, - 36899, 37706, 21015, 21155, 21693, 28872, 35010, 24265, - 24565, 25467, 27566, 31806, 29557, 20196, 22265, 23994, - 24604, 29618, 29801, 32666, 32838, 37428, 38646, 38728, - 38936, 20363, 31150, 37300, 38584, 24801, 20102, 20698, - 23534, 23615, 26009, 29134, 30274, 34044, 36988, 26248, - 38446, 21129, 26491, 26611, 27969, 28316, 29705, 30041, - 30827, 32016, 39006, 25134, 38520, 20523, 23833, 28138, - 36650, 24459, 24900, 26647, 38534, 21033, 21519, 23653, - 26131, 26446, 26792, 27877, 29702, 30178, 32633, 35023, - 35041, 38626, 21311, 28346, 21533, 29136, 29848, 34298, - 38563, 40023, 40607, 26519, 28107, 33256, 31520, 31890, - 29376, 28825, 35672, 20160, 33590, 21050, 20999, 24230, - 25299, 31958, 23429, 27934, 26292, 36667, 38477, 24275, - 20800, 21952, 22618, 26228, 20958, 29482, 30410, 31036, - 31070, 31077, 31119, 38742, 31934, 34322, 35576, 36920, - 37117, 39151, 39164, 39208, 40372, 37086, 38583, 20398, - 20711, 20813, 21193, 21220, 21329, 21917, 22022, 22120, - 22592, 22696, 23652, 24724, 24936, 24974, 25074, 25935, - 26082, 26257, 26757, 28023, 28186, 28450, 29038, 29227, - 29730, 30865, 31049, 31048, 31056, 31062, 31117, 31118, - 31296, 31361, 31680, 32265, 32321, 32626, 32773, 33261, - 33401, 33879, 35088, 35222, 35585, 35641, 36051, 36104, - 36790, 38627, 38911, 38971, 24693, 55376, 57070, 33304, - 20006, 20917, 20840, 20352, 20805, 20864, 21191, 21242, - 21845, 21913, 21986, 22707, 22852, 22868, 23138, 23336, - 24274, 24281, 24425, 24493, 24792, 24910, 24840, 24928, - 25140, 25540, 25628, 25682, 25942, 26395, 26454, 28379, - 28363, 28702, 30631, 29237, 29359, 29809, 29958, 30011, - 30237, 30239, 30427, 30452, 30538, 30528, 30924, 31409, - 31867, 32091, 32574, 33618, 33775, 34681, 35137, 35206, - 35519, 35531, 35565, 35722, 36664, 36978, 37273, 37494, - 38524, 38875, 38923, 39698, 55370, 56394, 55370, 56388, - 55372, 57301, 15261, 16408, 16441, 55380, 56905, 55383, - 56528, 55391, 57043, 40771, 40846, 102, 102, 102, - 105, 102, 108, 102, 102, 105, 102, 102, - 108, 383, 116, 115, 116, 1396, 1398, 1396, - 1381, 1396, 1387, 1406, 1398, 1396, 1389, 1497, - 1460, 1522, 1463, 1506, 1492, 1499, 1500, 1501, - 1512, 1514, 1513, 1473, 1513, 1474, 64329, 1473, - 64329, 1474, 1488, 1463, 1488, 1464, 1488, 1468, - 1489, 1468, 1490, 1468, 1491, 1468, 1492, 1468, - 1493, 1468, 1494, 1468, 1496, 1468, 1497, 1468, - 1498, 1468, 1499, 1468, 1500, 1468, 1502, 1468, - 1504, 1468, 1505, 1468, 1507, 1468, 1508, 1468, - 1510, 1468, 1511, 1468, 1512, 1468, 1513, 1468, - 1514, 1468, 1493, 1465, 1489, 1471, 1499, 1471, - 1508, 1471, 1488, 1500, 1649, 1659, 1662, 1664, - 1658, 1663, 1657, 1700, 1702, 1668, 1667, 1670, - 1671, 1677, 1676, 1678, 1672, 1688, 1681, 1705, - 1711, 1715, 1713, 1722, 1723, 1728, 1729, 1726, - 1746, 1747, 1709, 1735, 1734, 1736, 1655, 1739, - 1733, 1737, 1744, 1609, 1574, 1575, 1574, 1749, - 1574, 1608, 1574, 1735, 1574, 1734, 1574, 1736, - 1574, 1744, 1574, 1609, 1740, 1574, 1580, 1574, - 1581, 1574, 1605, 1574, 1610, 1576, 1580, 1576, - 1581, 1576, 1582, 1576, 1605, 1576, 1609, 1576, - 1610, 1578, 1580, 1578, 1581, 1578, 1582, 1578, - 1605, 1578, 1609, 1578, 1610, 1579, 1580, 1579, - 1605, 1579, 1609, 1579, 1610, 1580, 1581, 1580, - 1605, 1581, 1580, 1581, 1605, 1582, 1580, 1582, - 1581, 1582, 1605, 1587, 1580, 1587, 1581, 1587, - 1582, 1587, 1605, 1589, 1581, 1589, 1605, 1590, - 1580, 1590, 1581, 1590, 1582, 1590, 1605, 1591, - 1581, 1591, 1605, 1592, 1605, 1593, 1580, 1593, - 1605, 1594, 1580, 1594, 1605, 1601, 1580, 1601, - 1581, 1601, 1582, 1601, 1605, 1601, 1609, 1601, - 1610, 1602, 1581, 1602, 1605, 1602, 1609, 1602, - 1610, 1603, 1575, 1603, 1580, 1603, 1581, 1603, - 1582, 1603, 1604, 1603, 1605, 1603, 1609, 1603, - 1610, 1604, 1580, 1604, 1581, 1604, 1582, 1604, - 1605, 1604, 1609, 1604, 1610, 1605, 1580, 1605, - 1581, 1605, 1582, 1605, 1605, 1605, 1609, 1605, - 1610, 1606, 1580, 1606, 1581, 1606, 1582, 1606, - 1605, 1606, 1609, 1606, 1610, 1607, 1580, 1607, - 1605, 1607, 1609, 1607, 1610, 1610, 1580, 1610, - 1581, 1610, 1582, 1610, 1605, 1610, 1609, 1610, - 1610, 1584, 1648, 1585, 1648, 1609, 1648, 32, - 1612, 1617, 32, 1613, 1617, 32, 1614, 1617, - 32, 1615, 1617, 32, 1616, 1617, 32, 1617, - 1648, 1574, 1585, 1574, 1586, 1574, 1606, 1576, - 1585, 1576, 1586, 1576, 1606, 1578, 1585, 1578, - 1586, 1578, 1606, 1579, 1585, 1579, 1586, 1579, - 1606, 1605, 1575, 1606, 1585, 1606, 1586, 1606, - 1606, 1610, 1585, 1610, 1586, 1610, 1606, 1574, - 1582, 1574, 1607, 1576, 1607, 1578, 1607, 1589, - 1582, 1604, 1607, 1606, 1607, 1607, 1648, 1610, - 1607, 1579, 1607, 1587, 1607, 1588, 1605, 1588, - 1607, 1600, 1614, 1617, 1600, 1615, 1617, 1600, - 1616, 1617, 1591, 1609, 1591, 1610, 1593, 1609, - 1593, 1610, 1594, 1609, 1594, 1610, 1587, 1609, - 1587, 1610, 1588, 1609, 1588, 1610, 1581, 1609, - 1581, 1610, 1580, 1609, 1580, 1610, 1582, 1609, - 1582, 1610, 1589, 1609, 1589, 1610, 1590, 1609, - 1590, 1610, 1588, 1580, 1588, 1581, 1588, 1582, - 1588, 1585, 1587, 1585, 1589, 1585, 1590, 1585, - 1575, 1611, 1578, 1580, 1605, 1578, 1581, 1580, - 1578, 1581, 1605, 1578, 1582, 1605, 1578, 1605, - 1580, 1578, 1605, 1581, 1578, 1605, 1582, 1580, - 1605, 1581, 1581, 1605, 1610, 1581, 1605, 1609, - 1587, 1581, 1580, 1587, 1580, 1581, 1587, 1580, - 1609, 1587, 1605, 1581, 1587, 1605, 1580, 1587, - 1605, 1605, 1589, 1581, 1581, 1589, 1605, 1605, - 1588, 1581, 1605, 1588, 1580, 1610, 1588, 1605, - 1582, 1588, 1605, 1605, 1590, 1581, 1609, 1590, - 1582, 1605, 1591, 1605, 1581, 1591, 1605, 1605, - 1591, 1605, 1610, 1593, 1580, 1605, 1593, 1605, - 1605, 1593, 1605, 1609, 1594, 1605, 1605, 1594, - 1605, 1610, 1594, 1605, 1609, 1601, 1582, 1605, - 1602, 1605, 1581, 1602, 1605, 1605, 1604, 1581, - 1605, 1604, 1581, 1610, 1604, 1581, 1609, 1604, - 1580, 1580, 1604, 1582, 1605, 1604, 1605, 1581, - 1605, 1581, 1580, 1605, 1581, 1605, 1605, 1581, - 1610, 1605, 1580, 1581, 1605, 1580, 1605, 1605, - 1582, 1580, 1605, 1582, 1605, 1605, 1580, 1582, - 1607, 1605, 1580, 1607, 1605, 1605, 1606, 1581, - 1605, 1606, 1581, 1609, 1606, 1580, 1605, 1606, - 1580, 1609, 1606, 1605, 1610, 1606, 1605, 1609, - 1610, 1605, 1605, 1576, 1582, 1610, 1578, 1580, - 1610, 1578, 1580, 1609, 1578, 1582, 1610, 1578, - 1582, 1609, 1578, 1605, 1610, 1578, 1605, 1609, - 1580, 1605, 1610, 1580, 1581, 1609, 1580, 1605, - 1609, 1587, 1582, 1609, 1589, 1581, 1610, 1588, - 1581, 1610, 1590, 1581, 1610, 1604, 1580, 1610, - 1604, 1605, 1610, 1610, 1581, 1610, 1610, 1580, - 1610, 1610, 1605, 1610, 1605, 1605, 1610, 1602, - 1605, 1610, 1606, 1581, 1610, 1593, 1605, 1610, - 1603, 1605, 1610, 1606, 1580, 1581, 1605, 1582, - 1610, 1604, 1580, 1605, 1603, 1605, 1605, 1580, - 1581, 1610, 1581, 1580, 1610, 1605, 1580, 1610, - 1601, 1605, 1610, 1576, 1581, 1610, 1587, 1582, - 1610, 1606, 1580, 1610, 1589, 1604, 1746, 1602, - 1604, 1746, 1575, 1604, 1604, 1607, 1575, 1603, - 1576, 1585, 1605, 1581, 1605, 1583, 1589, 1604, - 1593, 1605, 1585, 1587, 1608, 1604, 1593, 1604, - 1610, 1607, 1608, 1587, 1604, 1605, 1589, 1604, - 1609, 17, 1589, 1604, 1609, 32, 1575, 1604, - 1604, 1607, 32, 1593, 1604, 1610, 1607, 32, - 1608, 1587, 1604, 1605, 7, 1580, 1604, 32, - 1580, 1604, 1575, 1604, 1607, 1585, 1740, 1575, - 1604, 44, 12289, 12290, 58, 33, 63, 12310, - 12311, 8230, 8229, 8212, 8211, 95, 123, 125, - 12308, 12309, 12304, 12305, 12298, 12299, 12300, 12301, - 12302, 12303, 91, 93, 8254, 35, 38, 42, - 45, 60, 62, 92, 36, 37, 64, 32, - 1611, 1600, 1611, 32, 1612, 32, 1613, 32, - 1614, 1600, 1614, 32, 1615, 1600, 1615, 32, - 1616, 1600, 1616, 32, 1617, 1600, 1617, 32, - 1618, 1600, 1618, 1569, 1570, 1571, 1572, 1573, - 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, - 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, - 1590, 1591, 1592, 1593, 1594, 1601, 1602, 1603, - 1604, 1605, 1606, 1607, 1608, 1610, 1604, 1570, - 1604, 1571, 1604, 1573, 1604, 1575, 34, 39, - 47, 65345, 65346, 65347, 65348, 65349, 65350, 65351, - 65352, 65353, 65354, 65355, 65356, 65357, 65358, 65359, - 65360, 65361, 65362, 65363, 65364, 65365, 65366, 65367, - 65368, 65369, 65370, 94, 124, 126, 10629, 10630, - 12539, 12449, 12451, 12453, 12455, 12457, 12515, 12517, - 12519, 12483, 12540, 12531, 12441, 12442, 12644, 12593, - 12594, 12595, 12596, 12597, 12598, 12599, 12600, 12601, - 12602, 12603, 12604, 12605, 12606, 12607, 12608, 12609, - 12610, 12611, 12612, 12613, 12614, 12615, 12616, 12617, - 12618, 12619, 12620, 12621, 12622, 12623, 12624, 12625, - 12626, 12627, 12628, 12629, 12630, 12631, 12632, 12633, - 12634, 12635, 12636, 12637, 12638, 12639, 12640, 12641, - 12642, 12643, 162, 163, 172, 175, 166, 165, - 8361, 9474, 8592, 8593, 8594, 8595, 9632, 9675, - 55297, 56360, 55297, 56361, 55297, 56362, 55297, 56363, - 55297, 56364, 55297, 56365, 55297, 56366, 55297, 56367, - 55297, 56368, 55297, 56369, 55297, 56370, 55297, 56371, - 55297, 56372, 55297, 56373, 55297, 56374, 55297, 56375, - 55297, 56376, 55297, 56377, 55297, 56378, 55297, 56379, - 55297, 56380, 55297, 56381, 55297, 56382, 55297, 56383, - 55297, 56384, 55297, 56385, 55297, 56386, 55297, 56387, - 55297, 56388, 55297, 56389, 55297, 56390, 55297, 56391, - 55297, 56392, 55297, 56393, 55297, 56394, 55297, 56395, - 55297, 56396, 55297, 56397, 55297, 56398, 55297, 56399, - 55297, 56536, 55297, 56537, 55297, 56538, 55297, 56539, - 55297, 56540, 55297, 56541, 55297, 56542, 55297, 56543, - 55297, 56544, 55297, 56545, 55297, 56546, 55297, 56547, - 55297, 56548, 55297, 56549, 55297, 56550, 55297, 56551, - 55297, 56552, 55297, 56553, 55297, 56554, 55297, 56555, - 55297, 56556, 55297, 56557, 55297, 56558, 55297, 56559, - 55297, 56560, 55297, 56561, 55297, 56562, 55297, 56563, - 55297, 56564, 55297, 56565, 55297, 56566, 55297, 56567, - 55297, 56568, 55297, 56569, 55297, 56570, 55297, 56571, - 55299, 56512, 55299, 56513, 55299, 56514, 55299, 56515, - 55299, 56516, 55299, 56517, 55299, 56518, 55299, 56519, - 55299, 56520, 55299, 56521, 55299, 56522, 55299, 56523, - 55299, 56524, 55299, 56525, 55299, 56526, 55299, 56527, - 55299, 56528, 55299, 56529, 55299, 56530, 55299, 56531, - 55299, 56532, 55299, 56533, 55299, 56534, 55299, 56535, - 55299, 56536, 55299, 56537, 55299, 56538, 55299, 56539, - 55299, 56540, 55299, 56541, 55299, 56542, 55299, 56543, - 55299, 56544, 55299, 56545, 55299, 56546, 55299, 56547, - 55299, 56548, 55299, 56549, 55299, 56550, 55299, 56551, - 55299, 56552, 55299, 56553, 55299, 56554, 55299, 56555, - 55299, 56556, 55299, 56557, 55299, 56558, 55299, 56559, - 55299, 56560, 55299, 56561, 55299, 56562, 55300, 56473, - 55300, 56506, 55300, 56475, 55300, 56506, 55300, 56485, - 55300, 56506, 55300, 56625, 55300, 56615, 55300, 56626, - 55300, 56615, 55300, 57159, 55300, 57150, 55300, 57159, - 55300, 57175, 55301, 56505, 55301, 56506, 55301, 56505, - 55301, 56496, 55301, 56505, 55301, 56509, 55301, 56760, - 55301, 56751, 55301, 56761, 55301, 56751, 55302, 56512, - 55302, 56513, 55302, 56514, 55302, 56515, 55302, 56516, - 55302, 56517, 55302, 56518, 55302, 56519, 55302, 56520, - 55302, 56521, 55302, 56522, 55302, 56523, 55302, 56524, - 55302, 56525, 55302, 56526, 55302, 56527, 55302, 56528, - 55302, 56529, 55302, 56530, 55302, 56531, 55302, 56532, - 55302, 56533, 55302, 56534, 55302, 56535, 55302, 56536, - 55302, 56537, 55302, 56538, 55302, 56539, 55302, 56540, - 55302, 56541, 55302, 56542, 55302, 56543, 55323, 56928, - 55323, 56929, 55323, 56930, 55323, 56931, 55323, 56932, - 55323, 56933, 55323, 56934, 55323, 56935, 55323, 56936, - 55323, 56937, 55323, 56938, 55323, 56939, 55323, 56940, - 55323, 56941, 55323, 56942, 55323, 56943, 55323, 56944, - 55323, 56945, 55323, 56946, 55323, 56947, 55323, 56948, - 55323, 56949, 55323, 56950, 55323, 56951, 55323, 56952, - 55323, 56953, 55323, 56954, 55323, 56955, 55323, 56956, - 55323, 56957, 55323, 56958, 55323, 56959, 55348, 56663, - 55348, 56677, 55348, 56664, 55348, 56677, 55348, 56671, - 55348, 56686, 55348, 56671, 55348, 56687, 55348, 56671, - 55348, 56688, 55348, 56671, 55348, 56689, 55348, 56671, - 55348, 56690, 55348, 56761, 55348, 56677, 55348, 56762, - 55348, 56677, 55348, 56763, 55348, 56686, 55348, 56764, - 55348, 56686, 55348, 56763, 55348, 56687, 55348, 56764, - 55348, 56687, 305, 567, 913, 914, 916, 917, - 918, 919, 921, 922, 923, 924, 925, 926, - 927, 929, 1012, 932, 934, 935, 936, 8711, - 8706, 1013, 977, 1008, 981, 1009, 982, 988, - 55354, 56610, 55354, 56611, 55354, 56612, 55354, 56613, - 55354, 56614, 55354, 56615, 55354, 56616, 55354, 56617, - 55354, 56618, 55354, 56619, 55354, 56620, 55354, 56621, - 55354, 56622, 55354, 56623, 55354, 56624, 55354, 56625, - 55354, 56626, 55354, 56627, 55354, 56628, 55354, 56629, - 55354, 56630, 55354, 56631, 55354, 56632, 55354, 56633, - 55354, 56634, 55354, 56635, 55354, 56636, 55354, 56637, - 55354, 56638, 55354, 56639, 55354, 56640, 55354, 56641, - 55354, 56642, 55354, 56643, 1646, 1697, 1647, 48, - 46, 48, 44, 49, 44, 50, 44, 51, - 44, 52, 44, 53, 44, 54, 44, 55, - 44, 56, 44, 57, 44, 40, 65, 41, - 40, 66, 41, 40, 67, 41, 40, 68, - 41, 40, 69, 41, 40, 70, 41, 40, - 71, 41, 40, 72, 41, 40, 73, 41, - 40, 74, 41, 40, 75, 41, 40, 76, - 41, 40, 77, 41, 40, 78, 41, 40, - 79, 41, 40, 80, 41, 40, 81, 41, - 40, 82, 41, 40, 83, 41, 40, 84, - 41, 40, 85, 41, 40, 86, 41, 40, - 87, 41, 40, 88, 41, 40, 89, 41, - 40, 90, 41, 12308, 83, 12309, 67, 68, - 87, 90, 72, 86, 83, 68, 83, 83, - 80, 80, 86, 87, 67, 77, 67, 77, - 68, 77, 82, 68, 74, 12411, 12363, 12467, - 12467, 23383, 21452, 12487, 22810, 35299, 20132, 26144, - 28961, 21069, 24460, 20877, 26032, 21021, 32066, 36009, - 22768, 21561, 28436, 25237, 25429, 36938, 25351, 25171, - 31105, 31354, 21512, 28288, 30003, 21106, 21942, 37197, - 12308, 26412, 12309, 12308, 19977, 12309, 12308, 20108, - 12309, 12308, 23433, 12309, 12308, 28857, 12309, 12308, - 25171, 12309, 12308, 30423, 12309, 12308, 21213, 12309, - 12308, 25943, 12309, 24471, 21487, 20029, 20024, 20033, - 55360, 56610, 20320, 20411, 20482, 20602, 20633, 20687, - 13470, 55361, 56890, 20820, 20836, 20855, 55361, 56604, - 13497, 20839, 55361, 56651, 20887, 20900, 20172, 20908, - 55396, 56799, 20995, 13535, 21051, 21062, 21111, 13589, - 21253, 21254, 21321, 21338, 21363, 21373, 21375, 55362, - 56876, 28784, 21450, 21471, 55362, 57187, 21483, 21489, - 21510, 21662, 21560, 21576, 21608, 21666, 21750, 21776, - 21843, 21859, 21892, 21931, 21939, 21954, 22294, 22295, - 22097, 22132, 22766, 22478, 22516, 22541, 22411, 22578, - 22577, 22700, 55365, 56548, 22770, 22775, 22790, 22818, - 22882, 55365, 57000, 55365, 57066, 23020, 23067, 23079, - 23000, 23142, 14062, 14076, 23304, 23358, 55366, 56776, - 23491, 23512, 23539, 55366, 57112, 23551, 23558, 24403, - 14209, 23648, 23744, 23693, 55367, 56804, 23875, 55367, - 56806, 23918, 23915, 23932, 24033, 24034, 14383, 24061, - 24104, 24125, 24169, 14434, 55368, 56707, 14460, 24240, - 24243, 24246, 55400, 57234, 55368, 57137, 33281, 24354, - 14535, 55372, 57016, 55384, 56794, 24418, 24427, 14563, - 24474, 24525, 24535, 24569, 24705, 14650, 14620, 55369, - 57044, 24775, 24904, 24908, 24954, 25010, 24996, 25007, - 25054, 25104, 25115, 25181, 25265, 25300, 25424, 55370, - 57100, 25405, 25340, 25448, 25475, 25572, 55370, 57329, - 25634, 25541, 25513, 14894, 25705, 25726, 25757, 25719, - 14956, 25964, 55372, 56330, 26083, 26360, 26185, 15129, - 15112, 15076, 20882, 20885, 26368, 26268, 32941, 17369, - 26401, 26462, 26451, 55372, 57283, 15177, 26618, 26501, - 26706, 55373, 56429, 26766, 26655, 26900, 26946, 27043, - 27114, 27304, 55373, 56995, 27355, 15384, 27425, 55374, - 56487, 27476, 15438, 27506, 27551, 27579, 55374, 56973, - 55367, 56587, 55374, 57082, 27726, 55375, 56508, 27839, - 27853, 27751, 27926, 27966, 28009, 28024, 28037, 55375, - 56606, 27956, 28207, 28270, 15667, 28359, 55375, 57041, - 28153, 28526, 55375, 57182, 55375, 57230, 28614, 28729, - 28699, 15766, 28746, 28797, 28791, 28845, 55361, 56613, - 28997, 55376, 56931, 29084, 55376, 57259, 29224, 29264, - 55377, 56840, 29312, 29333, 55377, 57141, 55378, 56340, - 29562, 29579, 16044, 29605, 16056, 29767, 29788, 29829, - 29898, 16155, 29988, 55379, 56374, 30014, 55379, 56466, - 55368, 56735, 30224, 55379, 57249, 55379, 57272, 55380, - 56388, 16380, 16392, 55380, 56563, 55380, 56562, 55380, - 56601, 55380, 56627, 30494, 30495, 30603, 16454, 16534, - 55381, 56349, 30798, 16611, 55381, 56870, 55381, 56986, - 55381, 57029, 31211, 16687, 31306, 31311, 55382, 56700, - 55382, 56999, 31470, 16898, 55382, 57259, 31686, 31689, - 16935, 55383, 56448, 31954, 17056, 31976, 31971, 32000, - 55383, 57222, 32099, 17153, 32199, 32258, 32325, 17204, - 55384, 56872, 55384, 56903, 17241, 55384, 57049, 32634, - 55384, 57150, 32661, 32762, 55385, 56538, 55385, 56611, - 32864, 55385, 56744, 32880, 55372, 57183, 17365, 32946, - 33027, 17419, 33086, 23221, 55385, 57255, 55385, 57269, - 55372, 57235, 55372, 57244, 33284, 36766, 17515, 33425, - 33419, 33437, 21171, 33457, 33459, 33469, 33510, 55386, - 57148, 33565, 33635, 33709, 33571, 33725, 33767, 33619, - 33738, 33740, 33756, 55387, 56374, 55387, 56683, 55387, - 56533, 17707, 34033, 34035, 34070, 55388, 57290, 34148, - 55387, 57132, 17757, 17761, 55387, 57265, 55388, 56530, - 17771, 34384, 34407, 34409, 34473, 34440, 34574, 34530, - 34600, 34667, 34694, 17879, 34785, 34817, 17913, 34912, - 55389, 56935, 35031, 35038, 17973, 35066, 13499, 55390, - 56494, 55390, 56678, 18110, 18119, 35488, 55391, 56488, - 36011, 36033, 36123, 36215, 55391, 57135, 55362, 56324, - 36299, 36284, 36336, 55362, 56542, 36564, 55393, 56786, - 55393, 56813, 37012, 37105, 37137, 55393, 57134, 37147, - 37432, 37591, 37592, 37500, 37881, 37909, 55394, 57338, - 38283, 18837, 38327, 55395, 56695, 18918, 38595, 23986, - 38691, 55396, 56645, 55396, 56858, 19054, 19062, 38880, - 55397, 56330, 19122, 55397, 56470, 38953, 55397, 56758, - 39138, 19251, 39209, 39335, 39362, 39422, 19406, 55398, - 57136, 40000, 40189, 19662, 19693, 40295, 55400, 56526, - 19704, 55400, 56581, 55400, 56846, 55400, 56977, 19798, - 40702, 40709, 40719, 40726, 55401, 56832, 7838, 192, - 193, 194, 195, 196, 199, 200, 201, 202, - 203, 204, 205, 206, 207, 208, 209, 210, - 211, 212, 213, 214, 216, 217, 218, 219, - 220, 221, 222, 376, 256, 258, 260, 262, - 264, 266, 268, 270, 272, 274, 276, 278, - 280, 282, 284, 286, 288, 290, 292, 296, - 298, 300, 302, 306, 308, 310, 313, 315, - 317, 319, 321, 323, 325, 327, 330, 332, - 334, 336, 338, 340, 342, 344, 346, 348, - 350, 352, 354, 356, 358, 360, 362, 364, - 366, 368, 370, 372, 374, 377, 379, 381, - 579, 386, 388, 391, 395, 401, 502, 408, - 573, 544, 416, 418, 420, 423, 428, 431, - 435, 437, 440, 444, 503, 453, 452, 456, - 455, 459, 458, 461, 463, 465, 467, 469, - 471, 473, 475, 478, 480, 482, 484, 486, - 488, 490, 492, 494, 498, 497, 500, 504, - 506, 508, 510, 512, 514, 516, 518, 520, - 522, 524, 526, 528, 530, 532, 534, 536, - 538, 540, 542, 548, 550, 552, 554, 556, - 558, 560, 562, 571, 11390, 11391, 577, 582, - 584, 586, 588, 590, 11375, 11373, 11376, 385, - 390, 393, 394, 399, 42923, 403, 42924, 404, - 42893, 42922, 407, 406, 42926, 11362, 42925, 412, - 11374, 413, 415, 11364, 422, 42949, 425, 42929, - 430, 580, 433, 434, 581, 439, 42930, 42928, - 880, 882, 886, 1021, 1022, 1023, 938, 939, - 975, 984, 986, 990, 992, 994, 996, 998, - 1000, 1002, 1004, 1006, 1017, 895, 1015, 1018, - 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, - 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, - 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, - 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, - 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, - 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, - 1120, 1122, 1124, 1126, 1128, 1130, 1132, 1134, - 1136, 1138, 1140, 1142, 1144, 1146, 1148, 1150, - 1152, 1162, 1164, 1166, 1168, 1170, 1172, 1174, - 1176, 1178, 1180, 1182, 1184, 1186, 1188, 1190, - 1192, 1194, 1196, 1198, 1200, 1202, 1204, 1206, - 1208, 1210, 1212, 1214, 1217, 1219, 1221, 1223, - 1225, 1227, 1229, 1216, 1232, 1234, 1236, 1238, - 1240, 1242, 1244, 1246, 1248, 1250, 1252, 1254, - 1256, 1258, 1260, 1262, 1264, 1266, 1268, 1270, - 1272, 1274, 1276, 1278, 1280, 1282, 1284, 1286, - 1288, 1290, 1292, 1294, 1296, 1298, 1300, 1302, - 1304, 1306, 1308, 1310, 1312, 1314, 1316, 1318, - 1320, 1322, 1324, 1326, 1329, 1330, 1331, 1332, - 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, - 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, - 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, - 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, - 1365, 1366, 7312, 7313, 7314, 7315, 7316, 7317, - 7318, 7319, 7320, 7321, 7322, 7323, 7324, 7325, - 7326, 7327, 7328, 7329, 7330, 7331, 7332, 7333, - 7334, 7335, 7336, 7337, 7338, 7339, 7340, 7341, - 7342, 7343, 7344, 7345, 7346, 7347, 7348, 7349, - 7350, 7351, 7352, 7353, 7354, 7357, 7358, 7359, - 43888, 43889, 43890, 43891, 43892, 43893, 43894, 43895, - 43896, 43897, 43898, 43899, 43900, 43901, 43902, 43903, - 43904, 43905, 43906, 43907, 43908, 43909, 43910, 43911, - 43912, 43913, 43914, 43915, 43916, 43917, 43918, 43919, - 43920, 43921, 43922, 43923, 43924, 43925, 43926, 43927, - 43928, 43929, 43930, 43931, 43932, 43933, 43934, 43935, - 43936, 43937, 43938, 43939, 43940, 43941, 43942, 43943, - 43944, 43945, 43946, 43947, 43948, 43949, 43950, 43951, - 43952, 43953, 43954, 43955, 43956, 43957, 43958, 43959, - 43960, 43961, 43962, 43963, 43964, 43965, 43966, 43967, - 5112, 5113, 5114, 5115, 5116, 5117, 42570, 42877, - 11363, 42950, 7680, 7682, 7684, 7686, 7688, 7690, - 7692, 7694, 7696, 7698, 7700, 7702, 7704, 7706, - 7708, 7710, 7712, 7714, 7716, 7718, 7720, 7722, - 7724, 7726, 7728, 7730, 7732, 7734, 7736, 7738, - 7740, 7742, 7744, 7746, 7748, 7750, 7752, 7754, - 7756, 7758, 7760, 7762, 7764, 7766, 7768, 7770, - 7772, 7774, 7776, 7778, 7780, 7782, 7784, 7786, - 7788, 7790, 7792, 7794, 7796, 7798, 7800, 7802, - 7804, 7806, 7808, 7810, 7812, 7814, 7816, 7818, - 7820, 7822, 7824, 7826, 7828, 223, 7840, 7842, - 7844, 7846, 7848, 7850, 7852, 7854, 7856, 7858, - 7860, 7862, 7864, 7866, 7868, 7870, 7872, 7874, - 7876, 7878, 7880, 7882, 7884, 7886, 7888, 7890, - 7892, 7894, 7896, 7898, 7900, 7902, 7904, 7906, - 7908, 7910, 7912, 7914, 7916, 7918, 7920, 7922, - 7924, 7926, 7928, 7930, 7932, 7934, 7944, 7945, - 7946, 7947, 7948, 7949, 7950, 7951, 7960, 7961, - 7962, 7963, 7964, 7965, 7976, 7977, 7978, 7979, - 7980, 7981, 7982, 7983, 7992, 7993, 7994, 7995, - 7996, 7997, 7998, 7999, 8008, 8009, 8010, 8011, - 8012, 8013, 8025, 8027, 8029, 8031, 8040, 8041, - 8042, 8043, 8044, 8045, 8046, 8047, 8122, 8123, - 8136, 8137, 8138, 8139, 8154, 8155, 8184, 8185, - 8170, 8171, 8186, 8187, 8072, 8073, 8074, 8075, - 8076, 8077, 8078, 8079, 8064, 8065, 8066, 8067, - 8068, 8069, 8070, 8071, 8088, 8089, 8090, 8091, - 8092, 8093, 8094, 8095, 8080, 8081, 8082, 8083, - 8084, 8085, 8086, 8087, 8104, 8105, 8106, 8107, - 8108, 8109, 8110, 8111, 8096, 8097, 8098, 8099, - 8100, 8101, 8102, 8103, 8120, 8121, 8124, 8115, - 8140, 8131, 8152, 8153, 8168, 8169, 8172, 8188, - 8179, 8498, 8544, 8545, 8546, 8547, 8548, 8549, - 8550, 8551, 8552, 8553, 8554, 8555, 8556, 8557, - 8558, 8559, 8579, 9398, 9399, 9400, 9401, 9402, - 9403, 9404, 9405, 9406, 9407, 9408, 9409, 9410, - 9411, 9412, 9413, 9414, 9415, 9416, 9417, 9418, - 9419, 9420, 9421, 9422, 9423, 11264, 11265, 11266, - 11267, 11268, 11269, 11270, 11271, 11272, 11273, 11274, - 11275, 11276, 11277, 11278, 11279, 11280, 11281, 11282, - 11283, 11284, 11285, 11286, 11287, 11288, 11289, 11290, - 11291, 11292, 11293, 11294, 11295, 11296, 11297, 11298, - 11299, 11300, 11301, 11302, 11303, 11304, 11305, 11306, - 11307, 11308, 11309, 11310, 11360, 570, 574, 11367, - 11369, 11371, 11378, 11381, 11392, 11394, 11396, 11398, - 11400, 11402, 11404, 11406, 11408, 11410, 11412, 11414, - 11416, 11418, 11420, 11422, 11424, 11426, 11428, 11430, - 11432, 11434, 11436, 11438, 11440, 11442, 11444, 11446, - 11448, 11450, 11452, 11454, 11456, 11458, 11460, 11462, - 11464, 11466, 11468, 11470, 11472, 11474, 11476, 11478, - 11480, 11482, 11484, 11486, 11488, 11490, 11499, 11501, - 11506, 4256, 4257, 4258, 4259, 4260, 4261, 4262, - 4263, 4264, 4265, 4266, 4267, 4268, 4269, 4270, - 4271, 4272, 4273, 4274, 4275, 4276, 4277, 4278, - 4279, 4280, 4281, 4282, 4283, 4284, 4285, 4286, - 4287, 4288, 4289, 4290, 4291, 4292, 4293, 4295, - 4301, 42560, 42562, 42564, 42566, 42568, 42572, 42574, - 42576, 42578, 42580, 42582, 42584, 42586, 42588, 42590, - 42592, 42594, 42596, 42598, 42600, 42602, 42604, 42624, - 42626, 42628, 42630, 42632, 42634, 42636, 42638, 42640, - 42642, 42644, 42646, 42648, 42650, 42786, 42788, 42790, - 42792, 42794, 42796, 42798, 42802, 42804, 42806, 42808, - 42810, 42812, 42814, 42816, 42818, 42820, 42822, 42824, - 42826, 42828, 42830, 42832, 42834, 42836, 42838, 42840, - 42842, 42844, 42846, 42848, 42850, 42852, 42854, 42856, - 42858, 42860, 42862, 42873, 42875, 42878, 42880, 42882, - 42884, 42886, 42891, 42896, 42898, 42948, 42902, 42904, - 42906, 42908, 42910, 42912, 42914, 42916, 42918, 42920, - 42932, 42934, 42936, 42938, 42940, 42942, 42946, 42931, - 65313, 65314, 65315, 65316, 65317, 65318, 65319, 65320, - 65321, 65322, 65323, 65324, 65325, 65326, 65327, 65328, - 65329, 65330, 65331, 65332, 65333, 65334, 65335, 65336, - 65337, 65338, 55297, 56320, 55297, 56321, 55297, 56322, - 55297, 56323, 55297, 56324, 55297, 56325, 55297, 56326, - 55297, 56327, 55297, 56328, 55297, 56329, 55297, 56330, - 55297, 56331, 55297, 56332, 55297, 56333, 55297, 56334, - 55297, 56335, 55297, 56336, 55297, 56337, 55297, 56338, - 55297, 56339, 55297, 56340, 55297, 56341, 55297, 56342, - 55297, 56343, 55297, 56344, 55297, 56345, 55297, 56346, - 55297, 56347, 55297, 56348, 55297, 56349, 55297, 56350, - 55297, 56351, 55297, 56352, 55297, 56353, 55297, 56354, - 55297, 56355, 55297, 56356, 55297, 56357, 55297, 56358, - 55297, 56359, 55297, 56496, 55297, 56497, 55297, 56498, - 55297, 56499, 55297, 56500, 55297, 56501, 55297, 56502, - 55297, 56503, 55297, 56504, 55297, 56505, 55297, 56506, - 55297, 56507, 55297, 56508, 55297, 56509, 55297, 56510, - 55297, 56511, 55297, 56512, 55297, 56513, 55297, 56514, - 55297, 56515, 55297, 56516, 55297, 56517, 55297, 56518, - 55297, 56519, 55297, 56520, 55297, 56521, 55297, 56522, - 55297, 56523, 55297, 56524, 55297, 56525, 55297, 56526, - 55297, 56527, 55297, 56528, 55297, 56529, 55297, 56530, - 55297, 56531, 55299, 56448, 55299, 56449, 55299, 56450, - 55299, 56451, 55299, 56452, 55299, 56453, 55299, 56454, - 55299, 56455, 55299, 56456, 55299, 56457, 55299, 56458, - 55299, 56459, 55299, 56460, 55299, 56461, 55299, 56462, - 55299, 56463, 55299, 56464, 55299, 56465, 55299, 56466, - 55299, 56467, 55299, 56468, 55299, 56469, 55299, 56470, - 55299, 56471, 55299, 56472, 55299, 56473, 55299, 56474, - 55299, 56475, 55299, 56476, 55299, 56477, 55299, 56478, - 55299, 56479, 55299, 56480, 55299, 56481, 55299, 56482, - 55299, 56483, 55299, 56484, 55299, 56485, 55299, 56486, - 55299, 56487, 55299, 56488, 55299, 56489, 55299, 56490, - 55299, 56491, 55299, 56492, 55299, 56493, 55299, 56494, - 55299, 56495, 55299, 56496, 55299, 56497, 55299, 56498, - 55302, 56480, 55302, 56481, 55302, 56482, 55302, 56483, - 55302, 56484, 55302, 56485, 55302, 56486, 55302, 56487, - 55302, 56488, 55302, 56489, 55302, 56490, 55302, 56491, - 55302, 56492, 55302, 56493, 55302, 56494, 55302, 56495, - 55302, 56496, 55302, 56497, 55302, 56498, 55302, 56499, - 55302, 56500, 55302, 56501, 55302, 56502, 55302, 56503, - 55302, 56504, 55302, 56505, 55302, 56506, 55302, 56507, - 55302, 56508, 55302, 56509, 55302, 56510, 55302, 56511, - 55323, 56896, 55323, 56897, 55323, 56898, 55323, 56899, - 55323, 56900, 55323, 56901, 55323, 56902, 55323, 56903, - 55323, 56904, 55323, 56905, 55323, 56906, 55323, 56907, - 55323, 56908, 55323, 56909, 55323, 56910, 55323, 56911, - 55323, 56912, 55323, 56913, 55323, 56914, 55323, 56915, - 55323, 56916, 55323, 56917, 55323, 56918, 55323, 56919, - 55323, 56920, 55323, 56921, 55323, 56922, 55323, 56923, - 55323, 56924, 55323, 56925, 55323, 56926, 55323, 56927, - 55354, 56576, 55354, 56577, 55354, 56578, 55354, 56579, - 55354, 56580, 55354, 56581, 55354, 56582, 55354, 56583, - 55354, 56584, 55354, 56585, 55354, 56586, 55354, 56587, - 55354, 56588, 55354, 56589, 55354, 56590, 55354, 56591, - 55354, 56592, 55354, 56593, 55354, 56594, 55354, 56595, - 55354, 56596, 55354, 56597, 55354, 56598, 55354, 56599, - 55354, 56600, 55354, 56601, 55354, 56602, 55354, 56603, - 55354, 56604, 55354, 56605, 55354, 56606, 55354, 56607, + 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 32, 32, 776, 32, 772, + 50, 51, 32, 769, 956, 32, 807, 49, + 49, 8260, 52, 49, 8260, 50, 51, 8260, + 52, 65, 768, 224, 65, 769, 225, 65, + 770, 226, 65, 771, 227, 65, 776, 228, + 65, 778, 229, 230, 67, 807, 231, 69, + 768, 232, 69, 769, 233, 69, 770, 234, + 69, 776, 235, 73, 768, 236, 73, 769, + 237, 73, 770, 238, 73, 776, 239, 240, + 78, 771, 241, 79, 768, 242, 79, 769, + 243, 79, 770, 244, 79, 771, 245, 79, + 776, 246, 248, 85, 768, 249, 85, 769, + 250, 85, 770, 251, 85, 776, 252, 89, + 769, 253, 254, 115, 115, 97, 768, 97, + 769, 97, 770, 97, 771, 97, 776, 97, + 778, 99, 807, 101, 768, 101, 769, 101, + 770, 101, 776, 105, 768, 105, 769, 105, + 770, 105, 776, 110, 771, 111, 768, 111, + 769, 111, 770, 111, 771, 111, 776, 117, + 768, 117, 769, 117, 770, 117, 776, 121, + 769, 121, 776, 65, 772, 257, 97, 772, + 65, 774, 259, 97, 774, 65, 808, 261, + 97, 808, 67, 769, 263, 99, 769, 67, + 770, 265, 99, 770, 67, 775, 267, 99, + 775, 67, 780, 269, 99, 780, 68, 780, + 271, 100, 780, 273, 69, 772, 275, 101, + 772, 69, 774, 277, 101, 774, 69, 775, + 279, 101, 775, 69, 808, 281, 101, 808, + 69, 780, 283, 101, 780, 71, 770, 285, + 103, 770, 71, 774, 287, 103, 774, 71, + 775, 289, 103, 775, 71, 807, 291, 103, + 807, 72, 770, 293, 104, 770, 295, 73, + 771, 297, 105, 771, 73, 772, 299, 105, + 772, 73, 774, 301, 105, 774, 73, 808, + 303, 105, 808, 73, 775, 105, 775, 73, + 74, 307, 105, 106, 74, 770, 309, 106, + 770, 75, 807, 311, 107, 807, 76, 769, + 314, 108, 769, 76, 807, 316, 108, 807, + 76, 780, 318, 108, 780, 76, 183, 320, + 108, 183, 322, 78, 769, 324, 110, 769, + 78, 807, 326, 110, 807, 78, 780, 328, + 110, 780, 700, 110, 331, 79, 772, 333, + 111, 772, 79, 774, 335, 111, 774, 79, + 779, 337, 111, 779, 339, 82, 769, 341, + 114, 769, 82, 807, 343, 114, 807, 82, + 780, 345, 114, 780, 83, 769, 347, 115, + 769, 83, 770, 349, 115, 770, 83, 807, + 351, 115, 807, 83, 780, 353, 115, 780, + 84, 807, 355, 116, 807, 84, 780, 357, + 116, 780, 359, 85, 771, 361, 117, 771, + 85, 772, 363, 117, 772, 85, 774, 365, + 117, 774, 85, 778, 367, 117, 778, 85, + 779, 369, 117, 779, 85, 808, 371, 117, + 808, 87, 770, 373, 119, 770, 89, 770, + 375, 121, 770, 89, 776, 255, 90, 769, + 378, 122, 769, 90, 775, 380, 122, 775, + 90, 780, 382, 122, 780, 595, 387, 389, + 596, 392, 598, 599, 396, 477, 601, 603, + 402, 608, 611, 617, 616, 409, 623, 626, + 629, 79, 795, 417, 111, 795, 419, 421, + 640, 424, 643, 429, 648, 85, 795, 432, + 117, 795, 650, 651, 436, 438, 658, 441, + 445, 68, 381, 454, 68, 382, 100, 382, + 76, 74, 457, 76, 106, 108, 106, 78, + 74, 460, 78, 106, 110, 106, 65, 780, + 462, 97, 780, 73, 780, 464, 105, 780, + 79, 780, 466, 111, 780, 85, 780, 468, + 117, 780, 220, 772, 470, 252, 772, 220, + 769, 472, 252, 769, 220, 780, 474, 252, + 780, 220, 768, 476, 252, 768, 196, 772, + 479, 228, 772, 550, 772, 481, 551, 772, + 198, 772, 483, 230, 772, 485, 71, 780, + 487, 103, 780, 75, 780, 489, 107, 780, + 79, 808, 491, 111, 808, 490, 772, 493, + 491, 772, 439, 780, 495, 658, 780, 106, + 780, 68, 90, 499, 68, 122, 100, 122, + 71, 769, 501, 103, 769, 405, 447, 78, + 768, 505, 110, 768, 197, 769, 507, 229, + 769, 198, 769, 509, 230, 769, 216, 769, + 511, 248, 769, 65, 783, 513, 97, 783, + 65, 785, 515, 97, 785, 69, 783, 517, + 101, 783, 69, 785, 519, 101, 785, 73, + 783, 521, 105, 783, 73, 785, 523, 105, + 785, 79, 783, 525, 111, 783, 79, 785, + 527, 111, 785, 82, 783, 529, 114, 783, + 82, 785, 531, 114, 785, 85, 783, 533, + 117, 783, 85, 785, 535, 117, 785, 83, + 806, 537, 115, 806, 84, 806, 539, 116, + 806, 541, 72, 780, 543, 104, 780, 414, + 547, 549, 65, 775, 551, 97, 775, 69, + 807, 553, 101, 807, 214, 772, 555, 246, + 772, 213, 772, 557, 245, 772, 79, 775, + 559, 111, 775, 558, 772, 561, 559, 772, + 89, 772, 563, 121, 772, 11365, 572, 410, + 11366, 578, 384, 649, 652, 583, 585, 587, + 589, 591, 614, 633, 635, 641, 32, 774, + 32, 775, 32, 778, 32, 808, 32, 771, + 32, 779, 661, 768, 769, 787, 776, 769, + 953, 881, 883, 697, 887, 32, 837, 59, + 1011, 168, 769, 913, 769, 940, 183, 917, + 769, 941, 919, 769, 942, 921, 769, 943, + 927, 769, 972, 933, 769, 973, 937, 769, + 974, 970, 769, 953, 776, 769, 945, 946, + 947, 948, 949, 950, 951, 952, 954, 955, + 957, 958, 959, 960, 961, 963, 964, 965, + 966, 967, 968, 969, 921, 776, 970, 933, + 776, 971, 945, 769, 949, 769, 951, 769, + 953, 769, 971, 769, 965, 776, 769, 953, + 776, 965, 776, 959, 769, 965, 769, 969, + 769, 983, 933, 978, 769, 978, 776, 985, + 987, 989, 991, 993, 995, 997, 999, 1001, + 1003, 1005, 1007, 962, 920, 1016, 931, 1010, + 1019, 891, 892, 893, 1045, 768, 1104, 1045, + 776, 1105, 1106, 1043, 769, 1107, 1108, 1109, + 1110, 1030, 776, 1111, 1112, 1113, 1114, 1115, + 1050, 769, 1116, 1048, 768, 1117, 1059, 774, + 1118, 1119, 1072, 1073, 1074, 1075, 1076, 1077, + 1078, 1079, 1080, 1048, 774, 1081, 1082, 1083, + 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, + 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, + 1100, 1101, 1102, 1103, 1080, 774, 1077, 768, + 1077, 776, 1075, 769, 1110, 776, 1082, 769, + 1080, 768, 1091, 774, 1121, 1123, 1125, 1127, + 1129, 1131, 1133, 1135, 1137, 1139, 1141, 1140, + 783, 1143, 1141, 783, 1145, 1147, 1149, 1151, + 1153, 1163, 1165, 1167, 1169, 1171, 1173, 1175, + 1177, 1179, 1181, 1183, 1185, 1187, 1189, 1191, + 1193, 1195, 1197, 1199, 1201, 1203, 1205, 1207, + 1209, 1211, 1213, 1215, 1231, 1046, 774, 1218, + 1078, 774, 1220, 1222, 1224, 1226, 1228, 1230, + 1040, 774, 1233, 1072, 774, 1040, 776, 1235, + 1072, 776, 1237, 1045, 774, 1239, 1077, 774, + 1241, 1240, 776, 1243, 1241, 776, 1046, 776, + 1245, 1078, 776, 1047, 776, 1247, 1079, 776, + 1249, 1048, 772, 1251, 1080, 772, 1048, 776, + 1253, 1080, 776, 1054, 776, 1255, 1086, 776, + 1257, 1256, 776, 1259, 1257, 776, 1069, 776, + 1261, 1101, 776, 1059, 772, 1263, 1091, 772, + 1059, 776, 1265, 1091, 776, 1059, 779, 1267, + 1091, 779, 1063, 776, 1269, 1095, 776, 1271, + 1067, 776, 1273, 1099, 776, 1275, 1277, 1279, + 1281, 1283, 1285, 1287, 1289, 1291, 1293, 1295, + 1297, 1299, 1301, 1303, 1305, 1307, 1309, 1311, + 1313, 1315, 1317, 1319, 1321, 1323, 1325, 1327, + 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, + 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392, + 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, + 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408, + 1409, 1410, 1411, 1412, 1413, 1414, 1381, 1410, + 1575, 1619, 1575, 1620, 1608, 1620, 1575, 1621, + 1610, 1620, 1575, 1652, 1608, 1652, 1735, 1652, + 1610, 1652, 1749, 1620, 1729, 1620, 1746, 1620, + 2344, 2364, 2352, 2364, 2355, 2364, 2325, 2364, + 2326, 2364, 2327, 2364, 2332, 2364, 2337, 2364, + 2338, 2364, 2347, 2364, 2351, 2364, 2503, 2494, + 2503, 2519, 2465, 2492, 2466, 2492, 2479, 2492, + 2610, 2620, 2616, 2620, 2582, 2620, 2583, 2620, + 2588, 2620, 2603, 2620, 2887, 2902, 2887, 2878, + 2887, 2903, 2849, 2876, 2850, 2876, 2962, 3031, + 3014, 3006, 3015, 3006, 3014, 3031, 3142, 3158, + 3263, 3285, 3270, 3285, 3270, 3286, 3270, 3266, + 3274, 3285, 3398, 3390, 3399, 3390, 3398, 3415, + 3545, 3530, 3545, 3535, 3548, 3530, 3545, 3551, + 3661, 3634, 3789, 3762, 3755, 3737, 3755, 3745, + 3851, 3906, 4023, 3916, 4023, 3921, 4023, 3926, + 4023, 3931, 4023, 3904, 4021, 3953, 3954, 3953, + 3956, 4018, 3968, 4018, 3969, 4019, 3968, 4019, + 3969, 3953, 3968, 3986, 4023, 3996, 4023, 4001, + 4023, 4006, 4023, 4011, 4023, 3984, 4021, 4133, + 4142, 11520, 11521, 11522, 11523, 11524, 11525, 11526, + 11527, 11528, 11529, 11530, 11531, 11532, 11533, 11534, + 11535, 11536, 11537, 11538, 11539, 11540, 11541, 11542, + 11543, 11544, 11545, 11546, 11547, 11548, 11549, 11550, + 11551, 11552, 11553, 11554, 11555, 11556, 11557, 11559, + 11565, 4316, 5104, 5105, 5106, 5107, 5108, 5109, + 6917, 6965, 6919, 6965, 6921, 6965, 6923, 6965, + 6925, 6965, 6929, 6965, 6970, 6965, 6972, 6965, + 6974, 6965, 6975, 6965, 6978, 6965, 42571, 4304, + 4305, 4306, 4307, 4308, 4309, 4310, 4311, 4312, + 4313, 4314, 4315, 4317, 4318, 4319, 4320, 4321, + 4322, 4323, 4324, 4325, 4326, 4327, 4328, 4329, + 4330, 4331, 4332, 4333, 4334, 4335, 4336, 4337, + 4338, 4339, 4340, 4341, 4342, 4343, 4344, 4345, + 4346, 4349, 4350, 4351, 65, 198, 66, 68, + 69, 398, 71, 72, 73, 74, 75, 76, + 77, 78, 79, 546, 80, 82, 84, 85, + 87, 592, 593, 7426, 604, 7446, 7447, 7453, + 7461, 594, 597, 607, 609, 613, 618, 7547, + 669, 621, 7557, 671, 625, 624, 627, 628, + 632, 642, 427, 7452, 656, 657, 65, 805, + 7681, 97, 805, 66, 775, 7683, 98, 775, + 66, 803, 7685, 98, 803, 66, 817, 7687, + 98, 817, 199, 769, 7689, 231, 769, 68, + 775, 7691, 100, 775, 68, 803, 7693, 100, + 803, 68, 817, 7695, 100, 817, 68, 807, + 7697, 100, 807, 68, 813, 7699, 100, 813, + 274, 768, 7701, 275, 768, 274, 769, 7703, + 275, 769, 69, 813, 7705, 101, 813, 69, + 816, 7707, 101, 816, 552, 774, 7709, 553, + 774, 70, 775, 7711, 102, 775, 71, 772, + 7713, 103, 772, 72, 775, 7715, 104, 775, + 72, 803, 7717, 104, 803, 72, 776, 7719, + 104, 776, 72, 807, 7721, 104, 807, 72, + 814, 7723, 104, 814, 73, 816, 7725, 105, + 816, 207, 769, 7727, 239, 769, 75, 769, + 7729, 107, 769, 75, 803, 7731, 107, 803, + 75, 817, 7733, 107, 817, 76, 803, 7735, + 108, 803, 7734, 772, 7737, 7735, 772, 76, + 817, 7739, 108, 817, 76, 813, 7741, 108, + 813, 77, 769, 7743, 109, 769, 77, 775, + 7745, 109, 775, 77, 803, 7747, 109, 803, + 78, 775, 7749, 110, 775, 78, 803, 7751, + 110, 803, 78, 817, 7753, 110, 817, 78, + 813, 7755, 110, 813, 213, 769, 7757, 245, + 769, 213, 776, 7759, 245, 776, 332, 768, + 7761, 333, 768, 332, 769, 7763, 333, 769, + 80, 769, 7765, 112, 769, 80, 775, 7767, + 112, 775, 82, 775, 7769, 114, 775, 82, + 803, 7771, 114, 803, 7770, 772, 7773, 7771, + 772, 82, 817, 7775, 114, 817, 83, 775, + 7777, 115, 775, 83, 803, 7779, 115, 803, + 346, 775, 7781, 347, 775, 352, 775, 7783, + 353, 775, 7778, 775, 7785, 7779, 775, 84, + 775, 7787, 116, 775, 84, 803, 7789, 116, + 803, 84, 817, 7791, 116, 817, 84, 813, + 7793, 116, 813, 85, 804, 7795, 117, 804, + 85, 816, 7797, 117, 816, 85, 813, 7799, + 117, 813, 360, 769, 7801, 361, 769, 362, + 776, 7803, 363, 776, 86, 771, 7805, 118, + 771, 86, 803, 7807, 118, 803, 87, 768, + 7809, 119, 768, 87, 769, 7811, 119, 769, + 87, 776, 7813, 119, 776, 87, 775, 7815, + 119, 775, 87, 803, 7817, 119, 803, 88, + 775, 7819, 120, 775, 88, 776, 7821, 120, + 776, 89, 775, 7823, 121, 775, 90, 770, + 7825, 122, 770, 90, 803, 7827, 122, 803, + 90, 817, 7829, 122, 817, 104, 817, 116, + 776, 119, 778, 121, 778, 97, 702, 383, + 775, 65, 803, 7841, 97, 803, 65, 777, + 7843, 97, 777, 194, 769, 7845, 226, 769, + 194, 768, 7847, 226, 768, 194, 777, 7849, + 226, 777, 194, 771, 7851, 226, 771, 7840, + 770, 7853, 7841, 770, 258, 769, 7855, 259, + 769, 258, 768, 7857, 259, 768, 258, 777, + 7859, 259, 777, 258, 771, 7861, 259, 771, + 7840, 774, 7863, 7841, 774, 69, 803, 7865, + 101, 803, 69, 777, 7867, 101, 777, 69, + 771, 7869, 101, 771, 202, 769, 7871, 234, + 769, 202, 768, 7873, 234, 768, 202, 777, + 7875, 234, 777, 202, 771, 7877, 234, 771, + 7864, 770, 7879, 7865, 770, 73, 777, 7881, + 105, 777, 73, 803, 7883, 105, 803, 79, + 803, 7885, 111, 803, 79, 777, 7887, 111, + 777, 212, 769, 7889, 244, 769, 212, 768, + 7891, 244, 768, 212, 777, 7893, 244, 777, + 212, 771, 7895, 244, 771, 7884, 770, 7897, + 7885, 770, 416, 769, 7899, 417, 769, 416, + 768, 7901, 417, 768, 416, 777, 7903, 417, + 777, 416, 771, 7905, 417, 771, 416, 803, + 7907, 417, 803, 85, 803, 7909, 117, 803, + 85, 777, 7911, 117, 777, 431, 769, 7913, + 432, 769, 431, 768, 7915, 432, 768, 431, + 777, 7917, 432, 777, 431, 771, 7919, 432, + 771, 431, 803, 7921, 432, 803, 89, 768, + 7923, 121, 768, 89, 803, 7925, 121, 803, + 89, 777, 7927, 121, 777, 89, 771, 7929, + 121, 771, 7931, 7933, 7935, 945, 787, 945, + 788, 7936, 768, 7937, 768, 7936, 769, 7937, + 769, 7936, 834, 7937, 834, 913, 787, 7936, + 913, 788, 7937, 7944, 768, 7938, 7945, 768, + 7939, 7944, 769, 7940, 7945, 769, 7941, 7944, + 834, 7942, 7945, 834, 7943, 949, 787, 949, + 788, 7952, 768, 7953, 768, 7952, 769, 7953, + 769, 917, 787, 7952, 917, 788, 7953, 7960, + 768, 7954, 7961, 768, 7955, 7960, 769, 7956, + 7961, 769, 7957, 951, 787, 951, 788, 7968, + 768, 7969, 768, 7968, 769, 7969, 769, 7968, + 834, 7969, 834, 919, 787, 7968, 919, 788, + 7969, 7976, 768, 7970, 7977, 768, 7971, 7976, + 769, 7972, 7977, 769, 7973, 7976, 834, 7974, + 7977, 834, 7975, 953, 787, 953, 788, 7984, + 768, 7985, 768, 7984, 769, 7985, 769, 7984, + 834, 7985, 834, 921, 787, 7984, 921, 788, + 7985, 7992, 768, 7986, 7993, 768, 7987, 7992, + 769, 7988, 7993, 769, 7989, 7992, 834, 7990, + 7993, 834, 7991, 959, 787, 959, 788, 8000, + 768, 8001, 768, 8000, 769, 8001, 769, 927, + 787, 8000, 927, 788, 8001, 8008, 768, 8002, + 8009, 768, 8003, 8008, 769, 8004, 8009, 769, + 8005, 965, 787, 965, 788, 8016, 768, 965, + 787, 768, 8017, 768, 8016, 769, 965, 787, + 769, 8017, 769, 8016, 834, 965, 787, 834, + 8017, 834, 933, 788, 8017, 8025, 768, 8019, + 8025, 769, 8021, 8025, 834, 8023, 969, 787, + 969, 788, 8032, 768, 8033, 768, 8032, 769, + 8033, 769, 8032, 834, 8033, 834, 937, 787, + 8032, 937, 788, 8033, 8040, 768, 8034, 8041, + 768, 8035, 8040, 769, 8036, 8041, 769, 8037, + 8040, 834, 8038, 8041, 834, 8039, 945, 768, + 949, 768, 951, 768, 953, 768, 959, 768, + 965, 768, 969, 768, 7936, 837, 7936, 953, + 7937, 837, 7937, 953, 7938, 837, 7938, 953, + 7939, 837, 7939, 953, 7940, 837, 7940, 953, + 7941, 837, 7941, 953, 7942, 837, 7942, 953, + 7943, 837, 7943, 953, 7944, 837, 7945, 837, + 7946, 837, 7947, 837, 7948, 837, 7949, 837, + 7950, 837, 7951, 837, 7968, 837, 7968, 953, + 7969, 837, 7969, 953, 7970, 837, 7970, 953, + 7971, 837, 7971, 953, 7972, 837, 7972, 953, + 7973, 837, 7973, 953, 7974, 837, 7974, 953, + 7975, 837, 7975, 953, 7976, 837, 7977, 837, + 7978, 837, 7979, 837, 7980, 837, 7981, 837, + 7982, 837, 7983, 837, 8032, 837, 8032, 953, + 8033, 837, 8033, 953, 8034, 837, 8034, 953, + 8035, 837, 8035, 953, 8036, 837, 8036, 953, + 8037, 837, 8037, 953, 8038, 837, 8038, 953, + 8039, 837, 8039, 953, 8040, 837, 8041, 837, + 8042, 837, 8043, 837, 8044, 837, 8045, 837, + 8046, 837, 8047, 837, 945, 774, 945, 772, + 8048, 837, 8048, 953, 945, 837, 945, 953, + 940, 837, 940, 953, 945, 834, 8118, 837, + 945, 834, 953, 913, 774, 8112, 913, 772, + 8113, 913, 768, 8048, 902, 8049, 913, 837, + 32, 787, 32, 834, 168, 834, 8052, 837, + 8052, 953, 951, 837, 951, 953, 942, 837, + 942, 953, 951, 834, 8134, 837, 951, 834, + 953, 917, 768, 8050, 904, 8051, 919, 768, + 8052, 905, 8053, 919, 837, 8127, 768, 8127, + 769, 8127, 834, 953, 774, 953, 772, 970, + 768, 953, 776, 768, 912, 953, 834, 970, + 834, 953, 776, 834, 921, 774, 8144, 921, + 772, 8145, 921, 768, 8054, 906, 8055, 8190, + 768, 8190, 769, 8190, 834, 965, 774, 965, + 772, 971, 768, 965, 776, 768, 944, 961, + 787, 961, 788, 965, 834, 971, 834, 965, + 776, 834, 933, 774, 8160, 933, 772, 8161, + 933, 768, 8058, 910, 8059, 929, 788, 8165, + 168, 768, 901, 96, 8060, 837, 8060, 953, + 969, 837, 969, 953, 974, 837, 974, 953, + 969, 834, 8182, 837, 969, 834, 953, 927, + 768, 8056, 908, 8057, 937, 768, 8060, 911, + 8061, 937, 837, 180, 32, 788, 8194, 8195, + 8208, 32, 819, 46, 46, 46, 46, 46, + 46, 8242, 8242, 8242, 8242, 8242, 8245, 8245, + 8245, 8245, 8245, 33, 33, 32, 773, 63, + 63, 63, 33, 33, 63, 8242, 8242, 8242, + 8242, 48, 52, 53, 54, 55, 56, 57, + 43, 8722, 61, 40, 41, 82, 115, 97, + 47, 99, 97, 47, 115, 67, 176, 67, + 99, 47, 111, 99, 47, 117, 400, 176, + 70, 78, 111, 81, 83, 77, 84, 69, + 76, 84, 77, 90, 937, 197, 70, 8526, + 1488, 1489, 1490, 1491, 70, 65, 88, 915, + 928, 8721, 49, 8260, 55, 49, 8260, 57, + 49, 8260, 49, 48, 49, 8260, 51, 50, + 8260, 51, 49, 8260, 53, 50, 8260, 53, + 51, 8260, 53, 52, 8260, 53, 49, 8260, + 54, 53, 8260, 54, 49, 8260, 56, 51, + 8260, 56, 53, 8260, 56, 55, 8260, 56, + 49, 8260, 8560, 73, 73, 8561, 73, 73, + 73, 8562, 73, 86, 8563, 86, 8564, 86, + 73, 8565, 86, 73, 73, 8566, 86, 73, + 73, 73, 8567, 73, 88, 8568, 88, 8569, + 88, 73, 8570, 88, 73, 73, 8571, 8572, + 8573, 8574, 8575, 105, 105, 105, 105, 105, + 105, 118, 118, 105, 118, 105, 105, 118, + 105, 105, 105, 105, 120, 120, 105, 120, + 105, 105, 8580, 48, 8260, 51, 8592, 824, + 8594, 824, 8596, 824, 8656, 824, 8660, 824, + 8658, 824, 8707, 824, 8712, 824, 8715, 824, + 8739, 824, 8741, 824, 8747, 8747, 8747, 8747, + 8747, 8750, 8750, 8750, 8750, 8750, 8764, 824, + 8771, 824, 8773, 824, 8776, 824, 61, 824, + 8801, 824, 8781, 824, 60, 824, 62, 824, + 8804, 824, 8805, 824, 8818, 824, 8819, 824, + 8822, 824, 8823, 824, 8826, 824, 8827, 824, + 8834, 824, 8835, 824, 8838, 824, 8839, 824, + 8866, 824, 8872, 824, 8873, 824, 8875, 824, + 8828, 824, 8829, 824, 8849, 824, 8850, 824, + 8882, 824, 8883, 824, 8884, 824, 8885, 824, + 12296, 12297, 49, 48, 49, 49, 49, 50, + 49, 51, 49, 52, 49, 53, 49, 54, + 49, 55, 49, 56, 49, 57, 50, 48, + 40, 49, 41, 40, 50, 41, 40, 51, + 41, 40, 52, 41, 40, 53, 41, 40, + 54, 41, 40, 55, 41, 40, 56, 41, + 40, 57, 41, 40, 49, 48, 41, 40, + 49, 49, 41, 40, 49, 50, 41, 40, + 49, 51, 41, 40, 49, 52, 41, 40, + 49, 53, 41, 40, 49, 54, 41, 40, + 49, 55, 41, 40, 49, 56, 41, 40, + 49, 57, 41, 40, 50, 48, 41, 49, + 46, 50, 46, 51, 46, 52, 46, 53, + 46, 54, 46, 55, 46, 56, 46, 57, + 46, 49, 48, 46, 49, 49, 46, 49, + 50, 46, 49, 51, 46, 49, 52, 46, + 49, 53, 46, 49, 54, 46, 49, 55, + 46, 49, 56, 46, 49, 57, 46, 50, + 48, 46, 40, 97, 41, 40, 98, 41, + 40, 99, 41, 40, 100, 41, 40, 101, + 41, 40, 102, 41, 40, 103, 41, 40, + 104, 41, 40, 105, 41, 40, 106, 41, + 40, 107, 41, 40, 108, 41, 40, 109, + 41, 40, 110, 41, 40, 111, 41, 40, + 112, 41, 40, 113, 41, 40, 114, 41, + 40, 115, 41, 40, 116, 41, 40, 117, + 41, 40, 118, 41, 40, 119, 41, 40, + 120, 41, 40, 121, 41, 40, 122, 41, + 9424, 9425, 9426, 9427, 9428, 9429, 9430, 9431, + 9432, 9433, 9434, 9435, 9436, 9437, 9438, 9439, + 9440, 9441, 83, 9442, 9443, 9444, 9445, 9446, + 9447, 89, 9448, 9449, 8747, 8747, 8747, 8747, + 58, 58, 61, 61, 61, 61, 61, 61, + 10973, 824, 11312, 11313, 11314, 11315, 11316, 11317, + 11318, 11319, 11320, 11321, 11322, 11323, 11324, 11325, + 11326, 11327, 11328, 11329, 11330, 11331, 11332, 11333, + 11334, 11335, 11336, 11337, 11338, 11339, 11340, 11341, + 11342, 11343, 11344, 11345, 11346, 11347, 11348, 11349, + 11350, 11351, 11352, 11353, 11354, 11355, 11356, 11357, + 11358, 11361, 619, 7549, 637, 11368, 11370, 11372, + 11379, 11382, 575, 576, 11393, 11395, 11397, 11399, + 11401, 11403, 11405, 11407, 11409, 11411, 11413, 11415, + 11417, 11419, 11421, 11423, 11425, 11427, 11429, 11431, + 11433, 11435, 11437, 11439, 11441, 11443, 11445, 11447, + 11449, 11451, 11453, 11455, 11457, 11459, 11461, 11463, + 11465, 11467, 11469, 11471, 11473, 11475, 11477, 11479, + 11481, 11483, 11485, 11487, 11489, 11491, 11500, 11502, + 11507, 11617, 27597, 40863, 19968, 20008, 20022, 20031, + 20057, 20101, 20108, 20128, 20154, 20799, 20837, 20843, + 20866, 20886, 20907, 20960, 20981, 20992, 21147, 21241, + 21269, 21274, 21304, 21313, 21340, 21353, 21378, 21430, + 21448, 21475, 22231, 22303, 22763, 22786, 22794, 22805, + 22823, 22899, 23376, 23424, 23544, 23567, 23586, 23608, + 23662, 23665, 24027, 24037, 24049, 24062, 24178, 24186, + 24191, 24308, 24318, 24331, 24339, 24400, 24417, 24435, + 24515, 25096, 25142, 25163, 25903, 25908, 25991, 26007, + 26020, 26041, 26080, 26085, 26352, 26376, 26408, 27424, + 27490, 27513, 27571, 27595, 27604, 27611, 27663, 27668, + 27700, 28779, 29226, 29238, 29243, 29247, 29255, 29273, + 29275, 29356, 29572, 29577, 29916, 29926, 29976, 29983, + 29992, 30000, 30091, 30098, 30326, 30333, 30382, 30399, + 30446, 30683, 30690, 30707, 31034, 31160, 31166, 31348, + 31435, 31481, 31859, 31992, 32566, 32593, 32650, 32701, + 32769, 32780, 32786, 32819, 32895, 32905, 33251, 33258, + 33267, 33276, 33292, 33307, 33311, 33390, 33394, 33400, + 34381, 34411, 34880, 34892, 34915, 35198, 35211, 35282, + 35328, 35895, 35910, 35925, 35960, 35997, 36196, 36208, + 36275, 36523, 36554, 36763, 36784, 36789, 37009, 37193, + 37318, 37324, 37329, 38263, 38272, 38428, 38582, 38585, + 38632, 38737, 38750, 38754, 38761, 38859, 38893, 38899, + 38913, 39080, 39131, 39135, 39318, 39321, 39340, 39592, + 39640, 39647, 39717, 39727, 39730, 39740, 39770, 40165, + 40565, 40575, 40613, 40635, 40643, 40653, 40657, 40697, + 40701, 40718, 40723, 40736, 40763, 40778, 40786, 40845, + 40860, 40864, 12306, 21316, 21317, 12363, 12441, 12365, + 12441, 12367, 12441, 12369, 12441, 12371, 12441, 12373, + 12441, 12375, 12441, 12377, 12441, 12379, 12441, 12381, + 12441, 12383, 12441, 12385, 12441, 12388, 12441, 12390, + 12441, 12392, 12441, 12399, 12441, 12399, 12442, 12402, + 12441, 12402, 12442, 12405, 12441, 12405, 12442, 12408, + 12441, 12408, 12442, 12411, 12441, 12411, 12442, 12358, + 12441, 32, 12441, 32, 12442, 12445, 12441, 12424, + 12426, 12459, 12441, 12461, 12441, 12463, 12441, 12465, + 12441, 12467, 12441, 12469, 12441, 12471, 12441, 12473, + 12441, 12475, 12441, 12477, 12441, 12479, 12441, 12481, + 12441, 12484, 12441, 12486, 12441, 12488, 12441, 12495, + 12441, 12495, 12442, 12498, 12441, 12498, 12442, 12501, + 12441, 12501, 12442, 12504, 12441, 12504, 12442, 12507, + 12441, 12507, 12442, 12454, 12441, 12527, 12441, 12528, + 12441, 12529, 12441, 12530, 12441, 12541, 12441, 12467, + 12488, 4352, 4353, 4522, 4354, 4524, 4525, 4355, + 4356, 4357, 4528, 4529, 4530, 4531, 4532, 4533, + 4378, 4358, 4359, 4360, 4385, 4361, 4362, 4363, + 4364, 4365, 4366, 4367, 4368, 4369, 4370, 4449, + 4450, 4451, 4452, 4453, 4454, 4455, 4456, 4457, + 4458, 4459, 4460, 4461, 4462, 4463, 4464, 4465, + 4466, 4467, 4468, 4469, 4448, 4372, 4373, 4551, + 4552, 4556, 4558, 4563, 4567, 4569, 4380, 4573, + 4575, 4381, 4382, 4384, 4386, 4387, 4391, 4393, + 4395, 4396, 4397, 4398, 4399, 4402, 4406, 4416, + 4423, 4428, 4593, 4594, 4439, 4440, 4441, 4484, + 4485, 4488, 4497, 4498, 4500, 4510, 4513, 19977, + 22235, 19978, 20013, 19979, 30002, 19993, 19969, 22825, + 22320, 40, 4352, 41, 40, 4354, 41, 40, + 4355, 41, 40, 4357, 41, 40, 4358, 41, + 40, 4359, 41, 40, 4361, 41, 40, 4363, + 41, 40, 4364, 41, 40, 4366, 41, 40, + 4367, 41, 40, 4368, 41, 40, 4369, 41, + 40, 4370, 41, 40, 4352, 4449, 41, 40, + 4354, 4449, 41, 40, 4355, 4449, 41, 40, + 4357, 4449, 41, 40, 4358, 4449, 41, 40, + 4359, 4449, 41, 40, 4361, 4449, 41, 40, + 4363, 4449, 41, 40, 4364, 4449, 41, 40, + 4366, 4449, 41, 40, 4367, 4449, 41, 40, + 4368, 4449, 41, 40, 4369, 4449, 41, 40, + 4370, 4449, 41, 40, 4364, 4462, 41, 40, + 4363, 4457, 4364, 4453, 4523, 41, 40, 4363, + 4457, 4370, 4462, 41, 40, 19968, 41, 40, + 20108, 41, 40, 19977, 41, 40, 22235, 41, + 40, 20116, 41, 40, 20845, 41, 40, 19971, + 41, 40, 20843, 41, 40, 20061, 41, 40, + 21313, 41, 40, 26376, 41, 40, 28779, 41, + 40, 27700, 41, 40, 26408, 41, 40, 37329, + 41, 40, 22303, 41, 40, 26085, 41, 40, + 26666, 41, 40, 26377, 41, 40, 31038, 41, + 40, 21517, 41, 40, 29305, 41, 40, 36001, + 41, 40, 31069, 41, 40, 21172, 41, 40, + 20195, 41, 40, 21628, 41, 40, 23398, 41, + 40, 30435, 41, 40, 20225, 41, 40, 36039, + 41, 40, 21332, 41, 40, 31085, 41, 40, + 20241, 41, 40, 33258, 41, 40, 33267, 41, + 21839, 24188, 31631, 80, 84, 69, 50, 49, + 50, 50, 50, 51, 50, 52, 50, 53, + 50, 54, 50, 55, 50, 56, 50, 57, + 51, 48, 51, 49, 51, 50, 51, 51, + 51, 52, 51, 53, 4352, 4449, 4354, 4449, + 4355, 4449, 4357, 4449, 4358, 4449, 4359, 4449, + 4361, 4449, 4363, 4449, 4364, 4449, 4366, 4449, + 4367, 4449, 4368, 4449, 4369, 4449, 4370, 4449, + 4366, 4449, 4535, 4352, 4457, 4364, 4462, 4363, + 4468, 4363, 4462, 20116, 20845, 19971, 20061, 26666, + 26377, 31038, 21517, 29305, 36001, 31069, 21172, 31192, + 30007, 36969, 20778, 21360, 27880, 38917, 20241, 20889, + 27491, 24038, 21491, 21307, 23447, 23398, 30435, 20225, + 36039, 21332, 22812, 51, 54, 51, 55, 51, + 56, 51, 57, 52, 48, 52, 49, 52, + 50, 52, 51, 52, 52, 52, 53, 52, + 54, 52, 55, 52, 56, 52, 57, 53, + 48, 49, 26376, 50, 26376, 51, 26376, 52, + 26376, 53, 26376, 54, 26376, 55, 26376, 56, + 26376, 57, 26376, 49, 48, 26376, 49, 49, + 26376, 49, 50, 26376, 72, 103, 101, 114, + 103, 101, 86, 76, 84, 68, 12450, 12452, + 12454, 12456, 12458, 12459, 12461, 12463, 12465, 12467, + 12469, 12471, 12473, 12475, 12477, 12479, 12481, 12484, + 12486, 12488, 12490, 12491, 12492, 12493, 12494, 12495, + 12498, 12501, 12504, 12507, 12510, 12511, 12512, 12513, + 12514, 12516, 12518, 12520, 12521, 12522, 12523, 12524, + 12525, 12527, 12528, 12529, 12530, 20196, 21644, 12450, + 12497, 12540, 12488, 12450, 12523, 12501, 12449, 12450, + 12531, 12506, 12450, 12450, 12540, 12523, 12452, 12491, + 12531, 12464, 12452, 12531, 12481, 12454, 12457, 12531, + 12456, 12473, 12463, 12540, 12489, 12456, 12540, 12459, + 12540, 12458, 12531, 12473, 12458, 12540, 12512, 12459, + 12452, 12522, 12459, 12521, 12483, 12488, 12459, 12525, + 12522, 12540, 12460, 12525, 12531, 12460, 12531, 12510, + 12462, 12460, 12462, 12491, 12540, 12461, 12517, 12522, + 12540, 12462, 12523, 12480, 12540, 12461, 12525, 12461, + 12525, 12464, 12521, 12512, 12461, 12525, 12513, 12540, + 12488, 12523, 12461, 12525, 12527, 12483, 12488, 12464, + 12521, 12512, 12464, 12521, 12512, 12488, 12531, 12463, + 12523, 12476, 12452, 12525, 12463, 12525, 12540, 12493, + 12465, 12540, 12473, 12467, 12523, 12490, 12467, 12540, + 12509, 12469, 12452, 12463, 12523, 12469, 12531, 12481, + 12540, 12512, 12471, 12522, 12531, 12464, 12475, 12531, + 12481, 12475, 12531, 12488, 12480, 12540, 12473, 12487, + 12471, 12489, 12523, 12488, 12531, 12490, 12494, 12494, + 12483, 12488, 12495, 12452, 12484, 12497, 12540, 12475, + 12531, 12488, 12497, 12540, 12484, 12496, 12540, 12524, + 12523, 12500, 12450, 12473, 12488, 12523, 12500, 12463, + 12523, 12500, 12467, 12499, 12523, 12501, 12449, 12521, + 12483, 12489, 12501, 12451, 12540, 12488, 12502, 12483, + 12471, 12455, 12523, 12501, 12521, 12531, 12504, 12463, + 12479, 12540, 12523, 12506, 12477, 12506, 12491, 12498, + 12504, 12523, 12484, 12506, 12531, 12473, 12506, 12540, + 12472, 12505, 12540, 12479, 12509, 12452, 12531, 12488, + 12508, 12523, 12488, 12507, 12531, 12509, 12531, 12489, + 12507, 12540, 12523, 12507, 12540, 12531, 12510, 12452, + 12463, 12525, 12510, 12452, 12523, 12510, 12483, 12495, + 12510, 12523, 12463, 12510, 12531, 12471, 12519, 12531, + 12511, 12463, 12525, 12531, 12511, 12522, 12511, 12522, + 12496, 12540, 12523, 12513, 12460, 12513, 12460, 12488, + 12531, 12513, 12540, 12488, 12523, 12516, 12540, 12489, + 12516, 12540, 12523, 12518, 12450, 12531, 12522, 12483, + 12488, 12523, 12522, 12521, 12523, 12500, 12540, 12523, + 12540, 12502, 12523, 12524, 12512, 12524, 12531, 12488, + 12466, 12531, 12527, 12483, 12488, 48, 28857, 49, + 28857, 50, 28857, 51, 28857, 52, 28857, 53, + 28857, 54, 28857, 55, 28857, 56, 28857, 57, + 28857, 49, 48, 28857, 49, 49, 28857, 49, + 50, 28857, 49, 51, 28857, 49, 52, 28857, + 49, 53, 28857, 49, 54, 28857, 49, 55, + 28857, 49, 56, 28857, 49, 57, 28857, 50, + 48, 28857, 50, 49, 28857, 50, 50, 28857, + 50, 51, 28857, 50, 52, 28857, 104, 80, + 97, 100, 97, 65, 85, 98, 97, 114, + 111, 86, 112, 99, 100, 109, 100, 109, + 178, 100, 109, 179, 73, 85, 24179, 25104, + 26157, 21644, 22823, 27491, 26126, 27835, 26666, 24335, + 20250, 31038, 112, 65, 110, 65, 956, 65, + 109, 65, 107, 65, 75, 66, 77, 66, + 71, 66, 99, 97, 108, 107, 99, 97, + 108, 112, 70, 110, 70, 956, 70, 956, + 103, 109, 103, 107, 103, 72, 122, 107, + 72, 122, 77, 72, 122, 71, 72, 122, + 84, 72, 122, 956, 8467, 109, 8467, 100, + 8467, 107, 8467, 102, 109, 110, 109, 956, + 109, 109, 109, 99, 109, 107, 109, 109, + 109, 178, 99, 109, 178, 109, 178, 107, + 109, 178, 109, 109, 179, 99, 109, 179, + 109, 179, 107, 109, 179, 109, 8725, 115, + 109, 8725, 115, 178, 80, 97, 107, 80, + 97, 77, 80, 97, 71, 80, 97, 114, + 97, 100, 114, 97, 100, 8725, 115, 114, + 97, 100, 8725, 115, 178, 112, 115, 110, + 115, 956, 115, 109, 115, 112, 86, 110, + 86, 956, 86, 109, 86, 107, 86, 77, + 86, 112, 87, 110, 87, 956, 87, 109, + 87, 107, 87, 77, 87, 107, 937, 77, + 937, 97, 46, 109, 46, 66, 113, 99, + 99, 99, 100, 67, 8725, 107, 103, 67, + 111, 46, 100, 66, 71, 121, 104, 97, + 72, 80, 105, 110, 75, 75, 75, 77, + 107, 116, 108, 109, 108, 110, 108, 111, + 103, 108, 120, 109, 98, 109, 105, 108, + 109, 111, 108, 80, 72, 112, 46, 109, + 46, 80, 80, 77, 80, 82, 115, 114, + 83, 118, 87, 98, 86, 8725, 109, 65, + 8725, 109, 49, 26085, 50, 26085, 51, 26085, + 52, 26085, 53, 26085, 54, 26085, 55, 26085, + 56, 26085, 57, 26085, 49, 48, 26085, 49, + 49, 26085, 49, 50, 26085, 49, 51, 26085, + 49, 52, 26085, 49, 53, 26085, 49, 54, + 26085, 49, 55, 26085, 49, 56, 26085, 49, + 57, 26085, 50, 48, 26085, 50, 49, 26085, + 50, 50, 26085, 50, 51, 26085, 50, 52, + 26085, 50, 53, 26085, 50, 54, 26085, 50, + 55, 26085, 50, 56, 26085, 50, 57, 26085, + 51, 48, 26085, 51, 49, 26085, 103, 97, + 108, 42561, 42563, 42565, 42567, 42569, 42573, 42575, + 42577, 42579, 42581, 42583, 42585, 42587, 42589, 42591, + 42593, 42595, 42597, 42599, 42601, 42603, 42605, 42625, + 42627, 42629, 42631, 42633, 42635, 42637, 42639, 42641, + 42643, 42645, 42647, 42649, 42651, 42787, 42789, 42791, + 42793, 42795, 42797, 42799, 42803, 42805, 42807, 42809, + 42811, 42813, 42815, 42817, 42819, 42821, 42823, 42825, + 42827, 42829, 42831, 42833, 42835, 42837, 42839, 42841, + 42843, 42845, 42847, 42849, 42851, 42853, 42855, 42857, + 42859, 42861, 42863, 42874, 42876, 7545, 42879, 42881, + 42883, 42885, 42887, 42892, 42897, 42899, 42903, 42905, + 42907, 42909, 42911, 42913, 42915, 42917, 42919, 42921, + 620, 670, 647, 43859, 42933, 42935, 42937, 42939, + 42941, 42943, 42947, 42900, 7566, 294, 43831, 43858, + 5024, 5025, 5026, 5027, 5028, 5029, 5030, 5031, + 5032, 5033, 5034, 5035, 5036, 5037, 5038, 5039, + 5040, 5041, 5042, 5043, 5044, 5045, 5046, 5047, + 5048, 5049, 5050, 5051, 5052, 5053, 5054, 5055, + 5056, 5057, 5058, 5059, 5060, 5061, 5062, 5063, + 5064, 5065, 5066, 5067, 5068, 5069, 5070, 5071, + 5072, 5073, 5074, 5075, 5076, 5077, 5078, 5079, + 5080, 5081, 5082, 5083, 5084, 5085, 5086, 5087, + 5088, 5089, 5090, 5091, 5092, 5093, 5094, 5095, + 5096, 5097, 5098, 5099, 5100, 5101, 5102, 5103, + 35912, 26356, 36040, 28369, 20018, 21477, 22865, 21895, + 22856, 25078, 30313, 32645, 34367, 34746, 35064, 37007, + 27138, 27931, 28889, 29662, 33853, 37226, 39409, 20098, + 21365, 27396, 29211, 34349, 40478, 23888, 28651, 34253, + 35172, 25289, 33240, 34847, 24266, 26391, 28010, 29436, + 37070, 20358, 20919, 21214, 25796, 27347, 29200, 30439, + 34310, 34396, 36335, 38706, 39791, 40442, 30860, 31103, + 32160, 33737, 37636, 35542, 22751, 24324, 31840, 32894, + 29282, 30922, 36034, 38647, 22744, 23650, 27155, 28122, + 28431, 32047, 32311, 38475, 21202, 32907, 20956, 20940, + 31260, 32190, 33777, 38517, 35712, 25295, 35582, 20025, + 23527, 24594, 29575, 30064, 21271, 30971, 20415, 24489, + 19981, 27852, 25976, 32034, 21443, 22622, 30465, 33865, + 35498, 27578, 27784, 25342, 33509, 25504, 30053, 20142, + 20841, 20937, 26753, 31975, 33391, 35538, 37327, 21237, + 21570, 24300, 26053, 28670, 31018, 38317, 39530, 40599, + 40654, 26310, 27511, 36706, 24180, 24976, 25088, 25754, + 28451, 29001, 29833, 31178, 32244, 32879, 36646, 34030, + 36899, 37706, 21015, 21155, 21693, 28872, 35010, 24265, + 24565, 25467, 27566, 31806, 29557, 20196, 22265, 23994, + 24604, 29618, 29801, 32666, 32838, 37428, 38646, 38728, + 38936, 20363, 31150, 37300, 38584, 24801, 20102, 20698, + 23534, 23615, 26009, 29134, 30274, 34044, 36988, 26248, + 38446, 21129, 26491, 26611, 27969, 28316, 29705, 30041, + 30827, 32016, 39006, 25134, 38520, 20523, 23833, 28138, + 36650, 24459, 24900, 26647, 38534, 21033, 21519, 23653, + 26131, 26446, 26792, 27877, 29702, 30178, 32633, 35023, + 35041, 38626, 21311, 28346, 21533, 29136, 29848, 34298, + 38563, 40023, 40607, 26519, 28107, 33256, 31520, 31890, + 29376, 28825, 35672, 20160, 33590, 21050, 20999, 24230, + 25299, 31958, 23429, 27934, 26292, 36667, 38477, 24275, + 20800, 21952, 22618, 26228, 20958, 29482, 30410, 31036, + 31070, 31077, 31119, 38742, 31934, 34322, 35576, 36920, + 37117, 39151, 39164, 39208, 40372, 37086, 38583, 20398, + 20711, 20813, 21193, 21220, 21329, 21917, 22022, 22120, + 22592, 22696, 23652, 24724, 24936, 24974, 25074, 25935, + 26082, 26257, 26757, 28023, 28186, 28450, 29038, 29227, + 29730, 30865, 31049, 31048, 31056, 31062, 31117, 31118, + 31296, 31361, 31680, 32265, 32321, 32626, 32773, 33261, + 33401, 33879, 35088, 35222, 35585, 35641, 36051, 36104, + 36790, 38627, 38911, 38971, 24693, 55376, 57070, 33304, + 20006, 20917, 20840, 20352, 20805, 20864, 21191, 21242, + 21845, 21913, 21986, 22707, 22852, 22868, 23138, 23336, + 24274, 24281, 24425, 24493, 24792, 24910, 24840, 24928, + 25140, 25540, 25628, 25682, 25942, 26395, 26454, 28379, + 28363, 28702, 30631, 29237, 29359, 29809, 29958, 30011, + 30237, 30239, 30427, 30452, 30538, 30528, 30924, 31409, + 31867, 32091, 32574, 33618, 33775, 34681, 35137, 35206, + 35519, 35531, 35565, 35722, 36664, 36978, 37273, 37494, + 38524, 38875, 38923, 39698, 55370, 56394, 55370, 56388, + 55372, 57301, 15261, 16408, 16441, 55380, 56905, 55383, + 56528, 55391, 57043, 40771, 40846, 102, 102, 102, + 105, 102, 108, 102, 102, 105, 102, 102, + 108, 383, 116, 115, 116, 1396, 1398, 1396, + 1381, 1396, 1387, 1406, 1398, 1396, 1389, 1497, + 1460, 1522, 1463, 1506, 1492, 1499, 1500, 1501, + 1512, 1514, 1513, 1473, 1513, 1474, 64329, 1473, + 64329, 1474, 1488, 1463, 1488, 1464, 1488, 1468, + 1489, 1468, 1490, 1468, 1491, 1468, 1492, 1468, + 1493, 1468, 1494, 1468, 1496, 1468, 1497, 1468, + 1498, 1468, 1499, 1468, 1500, 1468, 1502, 1468, + 1504, 1468, 1505, 1468, 1507, 1468, 1508, 1468, + 1510, 1468, 1511, 1468, 1512, 1468, 1513, 1468, + 1514, 1468, 1493, 1465, 1489, 1471, 1499, 1471, + 1508, 1471, 1488, 1500, 1649, 1659, 1662, 1664, + 1658, 1663, 1657, 1700, 1702, 1668, 1667, 1670, + 1671, 1677, 1676, 1678, 1672, 1688, 1681, 1705, + 1711, 1715, 1713, 1722, 1723, 1728, 1729, 1726, + 1746, 1747, 1709, 1735, 1734, 1736, 1655, 1739, + 1733, 1737, 1744, 1609, 1574, 1575, 1574, 1749, + 1574, 1608, 1574, 1735, 1574, 1734, 1574, 1736, + 1574, 1744, 1574, 1609, 1740, 1574, 1580, 1574, + 1581, 1574, 1605, 1574, 1610, 1576, 1580, 1576, + 1581, 1576, 1582, 1576, 1605, 1576, 1609, 1576, + 1610, 1578, 1580, 1578, 1581, 1578, 1582, 1578, + 1605, 1578, 1609, 1578, 1610, 1579, 1580, 1579, + 1605, 1579, 1609, 1579, 1610, 1580, 1581, 1580, + 1605, 1581, 1580, 1581, 1605, 1582, 1580, 1582, + 1581, 1582, 1605, 1587, 1580, 1587, 1581, 1587, + 1582, 1587, 1605, 1589, 1581, 1589, 1605, 1590, + 1580, 1590, 1581, 1590, 1582, 1590, 1605, 1591, + 1581, 1591, 1605, 1592, 1605, 1593, 1580, 1593, + 1605, 1594, 1580, 1594, 1605, 1601, 1580, 1601, + 1581, 1601, 1582, 1601, 1605, 1601, 1609, 1601, + 1610, 1602, 1581, 1602, 1605, 1602, 1609, 1602, + 1610, 1603, 1575, 1603, 1580, 1603, 1581, 1603, + 1582, 1603, 1604, 1603, 1605, 1603, 1609, 1603, + 1610, 1604, 1580, 1604, 1581, 1604, 1582, 1604, + 1605, 1604, 1609, 1604, 1610, 1605, 1580, 1605, + 1581, 1605, 1582, 1605, 1605, 1605, 1609, 1605, + 1610, 1606, 1580, 1606, 1581, 1606, 1582, 1606, + 1605, 1606, 1609, 1606, 1610, 1607, 1580, 1607, + 1605, 1607, 1609, 1607, 1610, 1610, 1580, 1610, + 1581, 1610, 1582, 1610, 1605, 1610, 1609, 1610, + 1610, 1584, 1648, 1585, 1648, 1609, 1648, 32, + 1612, 1617, 32, 1613, 1617, 32, 1614, 1617, + 32, 1615, 1617, 32, 1616, 1617, 32, 1617, + 1648, 1574, 1585, 1574, 1586, 1574, 1606, 1576, + 1585, 1576, 1586, 1576, 1606, 1578, 1585, 1578, + 1586, 1578, 1606, 1579, 1585, 1579, 1586, 1579, + 1606, 1605, 1575, 1606, 1585, 1606, 1586, 1606, + 1606, 1610, 1585, 1610, 1586, 1610, 1606, 1574, + 1582, 1574, 1607, 1576, 1607, 1578, 1607, 1589, + 1582, 1604, 1607, 1606, 1607, 1607, 1648, 1610, + 1607, 1579, 1607, 1587, 1607, 1588, 1605, 1588, + 1607, 1600, 1614, 1617, 1600, 1615, 1617, 1600, + 1616, 1617, 1591, 1609, 1591, 1610, 1593, 1609, + 1593, 1610, 1594, 1609, 1594, 1610, 1587, 1609, + 1587, 1610, 1588, 1609, 1588, 1610, 1581, 1609, + 1581, 1610, 1580, 1609, 1580, 1610, 1582, 1609, + 1582, 1610, 1589, 1609, 1589, 1610, 1590, 1609, + 1590, 1610, 1588, 1580, 1588, 1581, 1588, 1582, + 1588, 1585, 1587, 1585, 1589, 1585, 1590, 1585, + 1575, 1611, 1578, 1580, 1605, 1578, 1581, 1580, + 1578, 1581, 1605, 1578, 1582, 1605, 1578, 1605, + 1580, 1578, 1605, 1581, 1578, 1605, 1582, 1580, + 1605, 1581, 1581, 1605, 1610, 1581, 1605, 1609, + 1587, 1581, 1580, 1587, 1580, 1581, 1587, 1580, + 1609, 1587, 1605, 1581, 1587, 1605, 1580, 1587, + 1605, 1605, 1589, 1581, 1581, 1589, 1605, 1605, + 1588, 1581, 1605, 1588, 1580, 1610, 1588, 1605, + 1582, 1588, 1605, 1605, 1590, 1581, 1609, 1590, + 1582, 1605, 1591, 1605, 1581, 1591, 1605, 1605, + 1591, 1605, 1610, 1593, 1580, 1605, 1593, 1605, + 1605, 1593, 1605, 1609, 1594, 1605, 1605, 1594, + 1605, 1610, 1594, 1605, 1609, 1601, 1582, 1605, + 1602, 1605, 1581, 1602, 1605, 1605, 1604, 1581, + 1605, 1604, 1581, 1610, 1604, 1581, 1609, 1604, + 1580, 1580, 1604, 1582, 1605, 1604, 1605, 1581, + 1605, 1581, 1580, 1605, 1581, 1605, 1605, 1581, + 1610, 1605, 1580, 1581, 1605, 1580, 1605, 1605, + 1582, 1580, 1605, 1582, 1605, 1605, 1580, 1582, + 1607, 1605, 1580, 1607, 1605, 1605, 1606, 1581, + 1605, 1606, 1581, 1609, 1606, 1580, 1605, 1606, + 1580, 1609, 1606, 1605, 1610, 1606, 1605, 1609, + 1610, 1605, 1605, 1576, 1582, 1610, 1578, 1580, + 1610, 1578, 1580, 1609, 1578, 1582, 1610, 1578, + 1582, 1609, 1578, 1605, 1610, 1578, 1605, 1609, + 1580, 1605, 1610, 1580, 1581, 1609, 1580, 1605, + 1609, 1587, 1582, 1609, 1589, 1581, 1610, 1588, + 1581, 1610, 1590, 1581, 1610, 1604, 1580, 1610, + 1604, 1605, 1610, 1610, 1581, 1610, 1610, 1580, + 1610, 1610, 1605, 1610, 1605, 1605, 1610, 1602, + 1605, 1610, 1606, 1581, 1610, 1593, 1605, 1610, + 1603, 1605, 1610, 1606, 1580, 1581, 1605, 1582, + 1610, 1604, 1580, 1605, 1603, 1605, 1605, 1580, + 1581, 1610, 1581, 1580, 1610, 1605, 1580, 1610, + 1601, 1605, 1610, 1576, 1581, 1610, 1587, 1582, + 1610, 1606, 1580, 1610, 1589, 1604, 1746, 1602, + 1604, 1746, 1575, 1604, 1604, 1607, 1575, 1603, + 1576, 1585, 1605, 1581, 1605, 1583, 1589, 1604, + 1593, 1605, 1585, 1587, 1608, 1604, 1593, 1604, + 1610, 1607, 1608, 1587, 1604, 1605, 1589, 1604, + 1609, 17, 1589, 1604, 1609, 32, 1575, 1604, + 1604, 1607, 32, 1593, 1604, 1610, 1607, 32, + 1608, 1587, 1604, 1605, 7, 1580, 1604, 32, + 1580, 1604, 1575, 1604, 1607, 1585, 1740, 1575, + 1604, 44, 12289, 12290, 58, 33, 63, 12310, + 12311, 8230, 8229, 8212, 8211, 95, 123, 125, + 12308, 12309, 12304, 12305, 12298, 12299, 12300, 12301, + 12302, 12303, 91, 93, 8254, 35, 38, 42, + 45, 60, 62, 92, 36, 37, 64, 32, + 1611, 1600, 1611, 32, 1612, 32, 1613, 32, + 1614, 1600, 1614, 32, 1615, 1600, 1615, 32, + 1616, 1600, 1616, 32, 1617, 1600, 1617, 32, + 1618, 1600, 1618, 1569, 1570, 1571, 1572, 1573, + 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, + 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, + 1590, 1591, 1592, 1593, 1594, 1601, 1602, 1603, + 1604, 1605, 1606, 1607, 1608, 1610, 1604, 1570, + 1604, 1571, 1604, 1573, 1604, 1575, 34, 39, + 47, 65345, 65346, 65347, 65348, 65349, 65350, 65351, + 65352, 65353, 65354, 65355, 65356, 65357, 65358, 65359, + 65360, 65361, 65362, 65363, 65364, 65365, 65366, 65367, + 65368, 65369, 65370, 94, 124, 126, 10629, 10630, + 12539, 12449, 12451, 12453, 12455, 12457, 12515, 12517, + 12519, 12483, 12540, 12531, 12441, 12442, 12644, 12593, + 12594, 12595, 12596, 12597, 12598, 12599, 12600, 12601, + 12602, 12603, 12604, 12605, 12606, 12607, 12608, 12609, + 12610, 12611, 12612, 12613, 12614, 12615, 12616, 12617, + 12618, 12619, 12620, 12621, 12622, 12623, 12624, 12625, + 12626, 12627, 12628, 12629, 12630, 12631, 12632, 12633, + 12634, 12635, 12636, 12637, 12638, 12639, 12640, 12641, + 12642, 12643, 162, 163, 172, 175, 166, 165, + 8361, 9474, 8592, 8593, 8594, 8595, 9632, 9675, + 55297, 56360, 55297, 56361, 55297, 56362, 55297, 56363, + 55297, 56364, 55297, 56365, 55297, 56366, 55297, 56367, + 55297, 56368, 55297, 56369, 55297, 56370, 55297, 56371, + 55297, 56372, 55297, 56373, 55297, 56374, 55297, 56375, + 55297, 56376, 55297, 56377, 55297, 56378, 55297, 56379, + 55297, 56380, 55297, 56381, 55297, 56382, 55297, 56383, + 55297, 56384, 55297, 56385, 55297, 56386, 55297, 56387, + 55297, 56388, 55297, 56389, 55297, 56390, 55297, 56391, + 55297, 56392, 55297, 56393, 55297, 56394, 55297, 56395, + 55297, 56396, 55297, 56397, 55297, 56398, 55297, 56399, + 55297, 56536, 55297, 56537, 55297, 56538, 55297, 56539, + 55297, 56540, 55297, 56541, 55297, 56542, 55297, 56543, + 55297, 56544, 55297, 56545, 55297, 56546, 55297, 56547, + 55297, 56548, 55297, 56549, 55297, 56550, 55297, 56551, + 55297, 56552, 55297, 56553, 55297, 56554, 55297, 56555, + 55297, 56556, 55297, 56557, 55297, 56558, 55297, 56559, + 55297, 56560, 55297, 56561, 55297, 56562, 55297, 56563, + 55297, 56564, 55297, 56565, 55297, 56566, 55297, 56567, + 55297, 56568, 55297, 56569, 55297, 56570, 55297, 56571, + 55299, 56512, 55299, 56513, 55299, 56514, 55299, 56515, + 55299, 56516, 55299, 56517, 55299, 56518, 55299, 56519, + 55299, 56520, 55299, 56521, 55299, 56522, 55299, 56523, + 55299, 56524, 55299, 56525, 55299, 56526, 55299, 56527, + 55299, 56528, 55299, 56529, 55299, 56530, 55299, 56531, + 55299, 56532, 55299, 56533, 55299, 56534, 55299, 56535, + 55299, 56536, 55299, 56537, 55299, 56538, 55299, 56539, + 55299, 56540, 55299, 56541, 55299, 56542, 55299, 56543, + 55299, 56544, 55299, 56545, 55299, 56546, 55299, 56547, + 55299, 56548, 55299, 56549, 55299, 56550, 55299, 56551, + 55299, 56552, 55299, 56553, 55299, 56554, 55299, 56555, + 55299, 56556, 55299, 56557, 55299, 56558, 55299, 56559, + 55299, 56560, 55299, 56561, 55299, 56562, 55300, 56473, + 55300, 56506, 55300, 56475, 55300, 56506, 55300, 56485, + 55300, 56506, 55300, 56625, 55300, 56615, 55300, 56626, + 55300, 56615, 55300, 57159, 55300, 57150, 55300, 57159, + 55300, 57175, 55301, 56505, 55301, 56506, 55301, 56505, + 55301, 56496, 55301, 56505, 55301, 56509, 55301, 56760, + 55301, 56751, 55301, 56761, 55301, 56751, 55302, 56512, + 55302, 56513, 55302, 56514, 55302, 56515, 55302, 56516, + 55302, 56517, 55302, 56518, 55302, 56519, 55302, 56520, + 55302, 56521, 55302, 56522, 55302, 56523, 55302, 56524, + 55302, 56525, 55302, 56526, 55302, 56527, 55302, 56528, + 55302, 56529, 55302, 56530, 55302, 56531, 55302, 56532, + 55302, 56533, 55302, 56534, 55302, 56535, 55302, 56536, + 55302, 56537, 55302, 56538, 55302, 56539, 55302, 56540, + 55302, 56541, 55302, 56542, 55302, 56543, 55323, 56928, + 55323, 56929, 55323, 56930, 55323, 56931, 55323, 56932, + 55323, 56933, 55323, 56934, 55323, 56935, 55323, 56936, + 55323, 56937, 55323, 56938, 55323, 56939, 55323, 56940, + 55323, 56941, 55323, 56942, 55323, 56943, 55323, 56944, + 55323, 56945, 55323, 56946, 55323, 56947, 55323, 56948, + 55323, 56949, 55323, 56950, 55323, 56951, 55323, 56952, + 55323, 56953, 55323, 56954, 55323, 56955, 55323, 56956, + 55323, 56957, 55323, 56958, 55323, 56959, 55348, 56663, + 55348, 56677, 55348, 56664, 55348, 56677, 55348, 56671, + 55348, 56686, 55348, 56671, 55348, 56687, 55348, 56671, + 55348, 56688, 55348, 56671, 55348, 56689, 55348, 56671, + 55348, 56690, 55348, 56761, 55348, 56677, 55348, 56762, + 55348, 56677, 55348, 56763, 55348, 56686, 55348, 56764, + 55348, 56686, 55348, 56763, 55348, 56687, 55348, 56764, + 55348, 56687, 305, 567, 913, 914, 916, 917, + 918, 919, 921, 922, 923, 924, 925, 926, + 927, 929, 1012, 932, 934, 935, 936, 8711, + 8706, 1013, 977, 1008, 981, 1009, 982, 988, + 55354, 56610, 55354, 56611, 55354, 56612, 55354, 56613, + 55354, 56614, 55354, 56615, 55354, 56616, 55354, 56617, + 55354, 56618, 55354, 56619, 55354, 56620, 55354, 56621, + 55354, 56622, 55354, 56623, 55354, 56624, 55354, 56625, + 55354, 56626, 55354, 56627, 55354, 56628, 55354, 56629, + 55354, 56630, 55354, 56631, 55354, 56632, 55354, 56633, + 55354, 56634, 55354, 56635, 55354, 56636, 55354, 56637, + 55354, 56638, 55354, 56639, 55354, 56640, 55354, 56641, + 55354, 56642, 55354, 56643, 1646, 1697, 1647, 48, + 46, 48, 44, 49, 44, 50, 44, 51, + 44, 52, 44, 53, 44, 54, 44, 55, + 44, 56, 44, 57, 44, 40, 65, 41, + 40, 66, 41, 40, 67, 41, 40, 68, + 41, 40, 69, 41, 40, 70, 41, 40, + 71, 41, 40, 72, 41, 40, 73, 41, + 40, 74, 41, 40, 75, 41, 40, 76, + 41, 40, 77, 41, 40, 78, 41, 40, + 79, 41, 40, 80, 41, 40, 81, 41, + 40, 82, 41, 40, 83, 41, 40, 84, + 41, 40, 85, 41, 40, 86, 41, 40, + 87, 41, 40, 88, 41, 40, 89, 41, + 40, 90, 41, 12308, 83, 12309, 67, 68, + 87, 90, 72, 86, 83, 68, 83, 83, + 80, 80, 86, 87, 67, 77, 67, 77, + 68, 77, 82, 68, 74, 12411, 12363, 12467, + 12467, 23383, 21452, 12487, 22810, 35299, 20132, 26144, + 28961, 21069, 24460, 20877, 26032, 21021, 32066, 36009, + 22768, 21561, 28436, 25237, 25429, 36938, 25351, 25171, + 31105, 31354, 21512, 28288, 30003, 21106, 21942, 37197, + 12308, 26412, 12309, 12308, 19977, 12309, 12308, 20108, + 12309, 12308, 23433, 12309, 12308, 28857, 12309, 12308, + 25171, 12309, 12308, 30423, 12309, 12308, 21213, 12309, + 12308, 25943, 12309, 24471, 21487, 20029, 20024, 20033, + 55360, 56610, 20320, 20411, 20482, 20602, 20633, 20687, + 13470, 55361, 56890, 20820, 20836, 20855, 55361, 56604, + 13497, 20839, 55361, 56651, 20887, 20900, 20172, 20908, + 55396, 56799, 20995, 13535, 21051, 21062, 21111, 13589, + 21253, 21254, 21321, 21338, 21363, 21373, 21375, 55362, + 56876, 28784, 21450, 21471, 55362, 57187, 21483, 21489, + 21510, 21662, 21560, 21576, 21608, 21666, 21750, 21776, + 21843, 21859, 21892, 21931, 21939, 21954, 22294, 22295, + 22097, 22132, 22766, 22478, 22516, 22541, 22411, 22578, + 22577, 22700, 55365, 56548, 22770, 22775, 22790, 22818, + 22882, 55365, 57000, 55365, 57066, 23020, 23067, 23079, + 23000, 23142, 14062, 14076, 23304, 23358, 55366, 56776, + 23491, 23512, 23539, 55366, 57112, 23551, 23558, 24403, + 14209, 23648, 23744, 23693, 55367, 56804, 23875, 55367, + 56806, 23918, 23915, 23932, 24033, 24034, 14383, 24061, + 24104, 24125, 24169, 14434, 55368, 56707, 14460, 24240, + 24243, 24246, 55400, 57234, 55368, 57137, 33281, 24354, + 14535, 55372, 57016, 55384, 56794, 24418, 24427, 14563, + 24474, 24525, 24535, 24569, 24705, 14650, 14620, 55369, + 57044, 24775, 24904, 24908, 24954, 25010, 24996, 25007, + 25054, 25104, 25115, 25181, 25265, 25300, 25424, 55370, + 57100, 25405, 25340, 25448, 25475, 25572, 55370, 57329, + 25634, 25541, 25513, 14894, 25705, 25726, 25757, 25719, + 14956, 25964, 55372, 56330, 26083, 26360, 26185, 15129, + 15112, 15076, 20882, 20885, 26368, 26268, 32941, 17369, + 26401, 26462, 26451, 55372, 57283, 15177, 26618, 26501, + 26706, 55373, 56429, 26766, 26655, 26900, 26946, 27043, + 27114, 27304, 55373, 56995, 27355, 15384, 27425, 55374, + 56487, 27476, 15438, 27506, 27551, 27579, 55374, 56973, + 55367, 56587, 55374, 57082, 27726, 55375, 56508, 27839, + 27853, 27751, 27926, 27966, 28009, 28024, 28037, 55375, + 56606, 27956, 28207, 28270, 15667, 28359, 55375, 57041, + 28153, 28526, 55375, 57182, 55375, 57230, 28614, 28729, + 28699, 15766, 28746, 28797, 28791, 28845, 55361, 56613, + 28997, 55376, 56931, 29084, 55376, 57259, 29224, 29264, + 55377, 56840, 29312, 29333, 55377, 57141, 55378, 56340, + 29562, 29579, 16044, 29605, 16056, 29767, 29788, 29829, + 29898, 16155, 29988, 55379, 56374, 30014, 55379, 56466, + 55368, 56735, 30224, 55379, 57249, 55379, 57272, 55380, + 56388, 16380, 16392, 55380, 56563, 55380, 56562, 55380, + 56601, 55380, 56627, 30494, 30495, 30603, 16454, 16534, + 55381, 56349, 30798, 16611, 55381, 56870, 55381, 56986, + 55381, 57029, 31211, 16687, 31306, 31311, 55382, 56700, + 55382, 56999, 31470, 16898, 55382, 57259, 31686, 31689, + 16935, 55383, 56448, 31954, 17056, 31976, 31971, 32000, + 55383, 57222, 32099, 17153, 32199, 32258, 32325, 17204, + 55384, 56872, 55384, 56903, 17241, 55384, 57049, 32634, + 55384, 57150, 32661, 32762, 55385, 56538, 55385, 56611, + 32864, 55385, 56744, 32880, 55372, 57183, 17365, 32946, + 33027, 17419, 33086, 23221, 55385, 57255, 55385, 57269, + 55372, 57235, 55372, 57244, 33284, 36766, 17515, 33425, + 33419, 33437, 21171, 33457, 33459, 33469, 33510, 55386, + 57148, 33565, 33635, 33709, 33571, 33725, 33767, 33619, + 33738, 33740, 33756, 55387, 56374, 55387, 56683, 55387, + 56533, 17707, 34033, 34035, 34070, 55388, 57290, 34148, + 55387, 57132, 17757, 17761, 55387, 57265, 55388, 56530, + 17771, 34384, 34407, 34409, 34473, 34440, 34574, 34530, + 34600, 34667, 34694, 17879, 34785, 34817, 17913, 34912, + 55389, 56935, 35031, 35038, 17973, 35066, 13499, 55390, + 56494, 55390, 56678, 18110, 18119, 35488, 55391, 56488, + 36011, 36033, 36123, 36215, 55391, 57135, 55362, 56324, + 36299, 36284, 36336, 55362, 56542, 36564, 55393, 56786, + 55393, 56813, 37012, 37105, 37137, 55393, 57134, 37147, + 37432, 37591, 37592, 37500, 37881, 37909, 55394, 57338, + 38283, 18837, 38327, 55395, 56695, 18918, 38595, 23986, + 38691, 55396, 56645, 55396, 56858, 19054, 19062, 38880, + 55397, 56330, 19122, 55397, 56470, 38953, 55397, 56758, + 39138, 19251, 39209, 39335, 39362, 39422, 19406, 55398, + 57136, 40000, 40189, 19662, 19693, 40295, 55400, 56526, + 19704, 55400, 56581, 55400, 56846, 55400, 56977, 19798, + 40702, 40709, 40719, 40726, 55401, 56832, 7838, 192, + 193, 194, 195, 196, 199, 200, 201, 202, + 203, 204, 205, 206, 207, 208, 209, 210, + 211, 212, 213, 214, 216, 217, 218, 219, + 220, 221, 222, 376, 256, 258, 260, 262, + 264, 266, 268, 270, 272, 274, 276, 278, + 280, 282, 284, 286, 288, 290, 292, 296, + 298, 300, 302, 306, 308, 310, 313, 315, + 317, 319, 321, 323, 325, 327, 330, 332, + 334, 336, 338, 340, 342, 344, 346, 348, + 350, 352, 354, 356, 358, 360, 362, 364, + 366, 368, 370, 372, 374, 377, 379, 381, + 579, 386, 388, 391, 395, 401, 502, 408, + 573, 544, 416, 418, 420, 423, 428, 431, + 435, 437, 440, 444, 503, 453, 452, 456, + 455, 459, 458, 461, 463, 465, 467, 469, + 471, 473, 475, 478, 480, 482, 484, 486, + 488, 490, 492, 494, 498, 497, 500, 504, + 506, 508, 510, 512, 514, 516, 518, 520, + 522, 524, 526, 528, 530, 532, 534, 536, + 538, 540, 542, 548, 550, 552, 554, 556, + 558, 560, 562, 571, 11390, 11391, 577, 582, + 584, 586, 588, 590, 11375, 11373, 11376, 385, + 390, 393, 394, 399, 42923, 403, 42924, 404, + 42893, 42922, 407, 406, 42926, 11362, 42925, 412, + 11374, 413, 415, 11364, 422, 42949, 425, 42929, + 430, 580, 433, 434, 581, 439, 42930, 42928, + 880, 882, 886, 1021, 1022, 1023, 938, 939, + 975, 984, 986, 990, 992, 994, 996, 998, + 1000, 1002, 1004, 1006, 1017, 895, 1015, 1018, + 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, + 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, + 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, + 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, + 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, + 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, + 1120, 1122, 1124, 1126, 1128, 1130, 1132, 1134, + 1136, 1138, 1140, 1142, 1144, 1146, 1148, 1150, + 1152, 1162, 1164, 1166, 1168, 1170, 1172, 1174, + 1176, 1178, 1180, 1182, 1184, 1186, 1188, 1190, + 1192, 1194, 1196, 1198, 1200, 1202, 1204, 1206, + 1208, 1210, 1212, 1214, 1217, 1219, 1221, 1223, + 1225, 1227, 1229, 1216, 1232, 1234, 1236, 1238, + 1240, 1242, 1244, 1246, 1248, 1250, 1252, 1254, + 1256, 1258, 1260, 1262, 1264, 1266, 1268, 1270, + 1272, 1274, 1276, 1278, 1280, 1282, 1284, 1286, + 1288, 1290, 1292, 1294, 1296, 1298, 1300, 1302, + 1304, 1306, 1308, 1310, 1312, 1314, 1316, 1318, + 1320, 1322, 1324, 1326, 1329, 1330, 1331, 1332, + 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, + 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, + 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, + 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, + 1365, 1366, 7312, 7313, 7314, 7315, 7316, 7317, + 7318, 7319, 7320, 7321, 7322, 7323, 7324, 7325, + 7326, 7327, 7328, 7329, 7330, 7331, 7332, 7333, + 7334, 7335, 7336, 7337, 7338, 7339, 7340, 7341, + 7342, 7343, 7344, 7345, 7346, 7347, 7348, 7349, + 7350, 7351, 7352, 7353, 7354, 7357, 7358, 7359, + 43888, 43889, 43890, 43891, 43892, 43893, 43894, 43895, + 43896, 43897, 43898, 43899, 43900, 43901, 43902, 43903, + 43904, 43905, 43906, 43907, 43908, 43909, 43910, 43911, + 43912, 43913, 43914, 43915, 43916, 43917, 43918, 43919, + 43920, 43921, 43922, 43923, 43924, 43925, 43926, 43927, + 43928, 43929, 43930, 43931, 43932, 43933, 43934, 43935, + 43936, 43937, 43938, 43939, 43940, 43941, 43942, 43943, + 43944, 43945, 43946, 43947, 43948, 43949, 43950, 43951, + 43952, 43953, 43954, 43955, 43956, 43957, 43958, 43959, + 43960, 43961, 43962, 43963, 43964, 43965, 43966, 43967, + 5112, 5113, 5114, 5115, 5116, 5117, 42570, 42877, + 11363, 42950, 7680, 7682, 7684, 7686, 7688, 7690, + 7692, 7694, 7696, 7698, 7700, 7702, 7704, 7706, + 7708, 7710, 7712, 7714, 7716, 7718, 7720, 7722, + 7724, 7726, 7728, 7730, 7732, 7734, 7736, 7738, + 7740, 7742, 7744, 7746, 7748, 7750, 7752, 7754, + 7756, 7758, 7760, 7762, 7764, 7766, 7768, 7770, + 7772, 7774, 7776, 7778, 7780, 7782, 7784, 7786, + 7788, 7790, 7792, 7794, 7796, 7798, 7800, 7802, + 7804, 7806, 7808, 7810, 7812, 7814, 7816, 7818, + 7820, 7822, 7824, 7826, 7828, 223, 7840, 7842, + 7844, 7846, 7848, 7850, 7852, 7854, 7856, 7858, + 7860, 7862, 7864, 7866, 7868, 7870, 7872, 7874, + 7876, 7878, 7880, 7882, 7884, 7886, 7888, 7890, + 7892, 7894, 7896, 7898, 7900, 7902, 7904, 7906, + 7908, 7910, 7912, 7914, 7916, 7918, 7920, 7922, + 7924, 7926, 7928, 7930, 7932, 7934, 7944, 7945, + 7946, 7947, 7948, 7949, 7950, 7951, 7960, 7961, + 7962, 7963, 7964, 7965, 7976, 7977, 7978, 7979, + 7980, 7981, 7982, 7983, 7992, 7993, 7994, 7995, + 7996, 7997, 7998, 7999, 8008, 8009, 8010, 8011, + 8012, 8013, 8025, 8027, 8029, 8031, 8040, 8041, + 8042, 8043, 8044, 8045, 8046, 8047, 8122, 8123, + 8136, 8137, 8138, 8139, 8154, 8155, 8184, 8185, + 8170, 8171, 8186, 8187, 8072, 8073, 8074, 8075, + 8076, 8077, 8078, 8079, 8064, 8065, 8066, 8067, + 8068, 8069, 8070, 8071, 8088, 8089, 8090, 8091, + 8092, 8093, 8094, 8095, 8080, 8081, 8082, 8083, + 8084, 8085, 8086, 8087, 8104, 8105, 8106, 8107, + 8108, 8109, 8110, 8111, 8096, 8097, 8098, 8099, + 8100, 8101, 8102, 8103, 8120, 8121, 8124, 8115, + 8140, 8131, 8152, 8153, 8168, 8169, 8172, 8188, + 8179, 8498, 8544, 8545, 8546, 8547, 8548, 8549, + 8550, 8551, 8552, 8553, 8554, 8555, 8556, 8557, + 8558, 8559, 8579, 9398, 9399, 9400, 9401, 9402, + 9403, 9404, 9405, 9406, 9407, 9408, 9409, 9410, + 9411, 9412, 9413, 9414, 9415, 9416, 9417, 9418, + 9419, 9420, 9421, 9422, 9423, 11264, 11265, 11266, + 11267, 11268, 11269, 11270, 11271, 11272, 11273, 11274, + 11275, 11276, 11277, 11278, 11279, 11280, 11281, 11282, + 11283, 11284, 11285, 11286, 11287, 11288, 11289, 11290, + 11291, 11292, 11293, 11294, 11295, 11296, 11297, 11298, + 11299, 11300, 11301, 11302, 11303, 11304, 11305, 11306, + 11307, 11308, 11309, 11310, 11360, 570, 574, 11367, + 11369, 11371, 11378, 11381, 11392, 11394, 11396, 11398, + 11400, 11402, 11404, 11406, 11408, 11410, 11412, 11414, + 11416, 11418, 11420, 11422, 11424, 11426, 11428, 11430, + 11432, 11434, 11436, 11438, 11440, 11442, 11444, 11446, + 11448, 11450, 11452, 11454, 11456, 11458, 11460, 11462, + 11464, 11466, 11468, 11470, 11472, 11474, 11476, 11478, + 11480, 11482, 11484, 11486, 11488, 11490, 11499, 11501, + 11506, 4256, 4257, 4258, 4259, 4260, 4261, 4262, + 4263, 4264, 4265, 4266, 4267, 4268, 4269, 4270, + 4271, 4272, 4273, 4274, 4275, 4276, 4277, 4278, + 4279, 4280, 4281, 4282, 4283, 4284, 4285, 4286, + 4287, 4288, 4289, 4290, 4291, 4292, 4293, 4295, + 4301, 42560, 42562, 42564, 42566, 42568, 42572, 42574, + 42576, 42578, 42580, 42582, 42584, 42586, 42588, 42590, + 42592, 42594, 42596, 42598, 42600, 42602, 42604, 42624, + 42626, 42628, 42630, 42632, 42634, 42636, 42638, 42640, + 42642, 42644, 42646, 42648, 42650, 42786, 42788, 42790, + 42792, 42794, 42796, 42798, 42802, 42804, 42806, 42808, + 42810, 42812, 42814, 42816, 42818, 42820, 42822, 42824, + 42826, 42828, 42830, 42832, 42834, 42836, 42838, 42840, + 42842, 42844, 42846, 42848, 42850, 42852, 42854, 42856, + 42858, 42860, 42862, 42873, 42875, 42878, 42880, 42882, + 42884, 42886, 42891, 42896, 42898, 42948, 42902, 42904, + 42906, 42908, 42910, 42912, 42914, 42916, 42918, 42920, + 42932, 42934, 42936, 42938, 42940, 42942, 42946, 42931, + 65313, 65314, 65315, 65316, 65317, 65318, 65319, 65320, + 65321, 65322, 65323, 65324, 65325, 65326, 65327, 65328, + 65329, 65330, 65331, 65332, 65333, 65334, 65335, 65336, + 65337, 65338, 55297, 56320, 55297, 56321, 55297, 56322, + 55297, 56323, 55297, 56324, 55297, 56325, 55297, 56326, + 55297, 56327, 55297, 56328, 55297, 56329, 55297, 56330, + 55297, 56331, 55297, 56332, 55297, 56333, 55297, 56334, + 55297, 56335, 55297, 56336, 55297, 56337, 55297, 56338, + 55297, 56339, 55297, 56340, 55297, 56341, 55297, 56342, + 55297, 56343, 55297, 56344, 55297, 56345, 55297, 56346, + 55297, 56347, 55297, 56348, 55297, 56349, 55297, 56350, + 55297, 56351, 55297, 56352, 55297, 56353, 55297, 56354, + 55297, 56355, 55297, 56356, 55297, 56357, 55297, 56358, + 55297, 56359, 55297, 56496, 55297, 56497, 55297, 56498, + 55297, 56499, 55297, 56500, 55297, 56501, 55297, 56502, + 55297, 56503, 55297, 56504, 55297, 56505, 55297, 56506, + 55297, 56507, 55297, 56508, 55297, 56509, 55297, 56510, + 55297, 56511, 55297, 56512, 55297, 56513, 55297, 56514, + 55297, 56515, 55297, 56516, 55297, 56517, 55297, 56518, + 55297, 56519, 55297, 56520, 55297, 56521, 55297, 56522, + 55297, 56523, 55297, 56524, 55297, 56525, 55297, 56526, + 55297, 56527, 55297, 56528, 55297, 56529, 55297, 56530, + 55297, 56531, 55299, 56448, 55299, 56449, 55299, 56450, + 55299, 56451, 55299, 56452, 55299, 56453, 55299, 56454, + 55299, 56455, 55299, 56456, 55299, 56457, 55299, 56458, + 55299, 56459, 55299, 56460, 55299, 56461, 55299, 56462, + 55299, 56463, 55299, 56464, 55299, 56465, 55299, 56466, + 55299, 56467, 55299, 56468, 55299, 56469, 55299, 56470, + 55299, 56471, 55299, 56472, 55299, 56473, 55299, 56474, + 55299, 56475, 55299, 56476, 55299, 56477, 55299, 56478, + 55299, 56479, 55299, 56480, 55299, 56481, 55299, 56482, + 55299, 56483, 55299, 56484, 55299, 56485, 55299, 56486, + 55299, 56487, 55299, 56488, 55299, 56489, 55299, 56490, + 55299, 56491, 55299, 56492, 55299, 56493, 55299, 56494, + 55299, 56495, 55299, 56496, 55299, 56497, 55299, 56498, + 55302, 56480, 55302, 56481, 55302, 56482, 55302, 56483, + 55302, 56484, 55302, 56485, 55302, 56486, 55302, 56487, + 55302, 56488, 55302, 56489, 55302, 56490, 55302, 56491, + 55302, 56492, 55302, 56493, 55302, 56494, 55302, 56495, + 55302, 56496, 55302, 56497, 55302, 56498, 55302, 56499, + 55302, 56500, 55302, 56501, 55302, 56502, 55302, 56503, + 55302, 56504, 55302, 56505, 55302, 56506, 55302, 56507, + 55302, 56508, 55302, 56509, 55302, 56510, 55302, 56511, + 55323, 56896, 55323, 56897, 55323, 56898, 55323, 56899, + 55323, 56900, 55323, 56901, 55323, 56902, 55323, 56903, + 55323, 56904, 55323, 56905, 55323, 56906, 55323, 56907, + 55323, 56908, 55323, 56909, 55323, 56910, 55323, 56911, + 55323, 56912, 55323, 56913, 55323, 56914, 55323, 56915, + 55323, 56916, 55323, 56917, 55323, 56918, 55323, 56919, + 55323, 56920, 55323, 56921, 55323, 56922, 55323, 56923, + 55323, 56924, 55323, 56925, 55323, 56926, 55323, 56927, + 55354, 56576, 55354, 56577, 55354, 56578, 55354, 56579, + 55354, 56580, 55354, 56581, 55354, 56582, 55354, 56583, + 55354, 56584, 55354, 56585, 55354, 56586, 55354, 56587, + 55354, 56588, 55354, 56589, 55354, 56590, 55354, 56591, + 55354, 56592, 55354, 56593, 55354, 56594, 55354, 56595, + 55354, 56596, 55354, 56597, 55354, 56598, 55354, 56599, + 55354, 56600, 55354, 56601, 55354, 56602, 55354, 56603, + 55354, 56604, 55354, 56605, 55354, 56606, 55354, 56607, 55354, 56608, 55354, 56609, }; static const utf8proc_uint16_t utf8proc_stage1table[] = { - 0, 256, 512, 768, 1024, 1280, 1536, - 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584, - 3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, - 5888, 6144, 6400, 6656, 6912, 7168, 7424, 7680, - 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, - 9984, 10240, 10496, 10752, 11008, 11264, 11520, 11776, - 12032, 12288, 12544, 12800, 13056, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13568, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13824, 14080, 13312, 13312, 13312, 14336, 5376, 14592, - 14848, 15104, 15360, 15616, 15872, 16128, 16384, 16640, - 16896, 17152, 17408, 17664, 16128, 16384, 16640, 16896, - 17152, 17408, 17664, 16128, 16384, 16640, 16896, 17152, - 17408, 17664, 16128, 16384, 16640, 16896, 17152, 17408, - 17664, 16128, 16384, 16640, 16896, 17152, 17408, 17664, - 16128, 16384, 16640, 16896, 17152, 17408, 17664, 16128, - 17920, 18176, 18176, 18176, 18176, 18176, 18176, 18176, - 18176, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18688, 18944, 19200, 19456, 19712, 19968, - 20224, 20480, 20736, 20992, 21248, 21504, 21760, 5376, - 22016, 22272, 22528, 22784, 23040, 23296, 23552, 23808, - 24064, 24320, 24576, 24832, 25088, 25344, 25600, 25856, - 26112, 26368, 26624, 26880, 27136, 27392, 27648, 27904, - 28160, 5376, 5376, 5376, 28416, 28672, 28928, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 5376, 5376, 5376, 5376, 29184, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 5376, 5376, 29440, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 5376, 5376, 29696, 29952, 27136, 27136, 30208, - 30464, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 30720, 13312, 13312, 30976, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 13312, 31232, 31488, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 31744, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 32000, 32256, 32512, 32768, 33024, 33280, 33536, - 33792, 10240, 10240, 34048, 27136, 27136, 27136, 27136, - 27136, 34304, 34560, 34816, 27136, 27136, 27136, 27136, - 27136, 35072, 35328, 27136, 27136, 35584, 35840, 36096, - 27136, 36352, 36608, 36864, 37120, 37376, 37632, 37888, - 38144, 38400, 38656, 38912, 27136, 27136, 27136, 27136, - 27136, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 39168, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 39424, 39680, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 39936, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, - 13312, 13312, 13312, 13312, 40192, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 40448, 40704, 40960, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 41216, 41472, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, - 27136, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 41728, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, - 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 0, 256, 512, 768, 1024, 1280, 1536, + 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584, + 3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, + 5888, 6144, 6400, 6656, 6912, 7168, 7424, 7680, + 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, + 9984, 10240, 10496, 10752, 11008, 11264, 11520, 11776, + 12032, 12288, 12544, 12800, 13056, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13568, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13824, 14080, 13312, 13312, 13312, 14336, 5376, 14592, + 14848, 15104, 15360, 15616, 15872, 16128, 16384, 16640, + 16896, 17152, 17408, 17664, 16128, 16384, 16640, 16896, + 17152, 17408, 17664, 16128, 16384, 16640, 16896, 17152, + 17408, 17664, 16128, 16384, 16640, 16896, 17152, 17408, + 17664, 16128, 16384, 16640, 16896, 17152, 17408, 17664, + 16128, 16384, 16640, 16896, 17152, 17408, 17664, 16128, + 17920, 18176, 18176, 18176, 18176, 18176, 18176, 18176, + 18176, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18688, 18944, 19200, 19456, 19712, 19968, + 20224, 20480, 20736, 20992, 21248, 21504, 21760, 5376, + 22016, 22272, 22528, 22784, 23040, 23296, 23552, 23808, + 24064, 24320, 24576, 24832, 25088, 25344, 25600, 25856, + 26112, 26368, 26624, 26880, 27136, 27392, 27648, 27904, + 28160, 5376, 5376, 5376, 28416, 28672, 28928, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 5376, 5376, 5376, 5376, 29184, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 5376, 5376, 29440, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 5376, 5376, 29696, 29952, 27136, 27136, 30208, + 30464, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 30720, 13312, 13312, 30976, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 13312, 31232, 31488, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 31744, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 32000, 32256, 32512, 32768, 33024, 33280, 33536, + 33792, 10240, 10240, 34048, 27136, 27136, 27136, 27136, + 27136, 34304, 34560, 34816, 27136, 27136, 27136, 27136, + 27136, 35072, 35328, 27136, 27136, 35584, 35840, 36096, + 27136, 36352, 36608, 36864, 37120, 37376, 37632, 37888, + 38144, 38400, 38656, 38912, 27136, 27136, 27136, 27136, + 27136, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 39168, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 39424, 39680, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 39936, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 13312, 13312, 13312, 13312, + 13312, 13312, 13312, 13312, 40192, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 40448, 40704, 40960, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 41216, 41472, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 27136, 27136, 27136, 27136, 27136, 27136, 27136, + 27136, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 41728, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, + 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, 41728, }; static const utf8proc_uint16_t utf8proc_stage2table[] = { - 1, 2, 2, 2, 2, 2, 2, - 2, 2, 3, 4, 3, 5, 6, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 7, 7, 7, - 3, 8, 9, 9, 10, 11, 10, 9, - 9, 12, 13, 9, 14, 15, 16, 15, - 15, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 15, 9, 18, 19, 20, - 9, 9, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 42, - 43, 44, 45, 46, 12, 9, 13, 47, - 48, 47, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, - 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 12, 75, 13, 75, - 2, 2, 2, 2, 2, 2, 7, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 76, 9, 11, 11, 11, 11, 77, - 9, 78, 79, 80, 81, 75, 82, 79, - 83, 84, 85, 86, 87, 88, 89, 9, - 9, 90, 91, 92, 93, 94, 95, 96, - 9, 97, 98, 99, 100, 101, 102, 103, - 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, - 75, 120, 121, 122, 123, 124, 125, 126, - 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, - 143, 144, 145, 146, 147, 148, 149, 150, - 75, 151, 152, 153, 154, 155, 156, 157, - 158, 159, 160, 161, 162, 163, 164, 165, - 166, 167, 168, 169, 170, 171, 172, 173, - 174, 175, 176, 177, 178, 179, 180, 181, - 182, 183, 184, 185, 186, 187, 188, 189, - 190, 191, 192, 193, 194, 195, 196, 197, - 198, 199, 200, 201, 202, 203, 204, 205, - 206, 207, 208, 209, 210, 211, 212, 213, - 214, 215, 216, 217, 218, 219, 220, 221, - 222, 223, 224, 225, 226, 227, 228, 229, - 230, 231, 232, 233, 234, 235, 236, 237, - 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, - 254, 255, 256, 257, 258, 259, 260, 261, - 262, 263, 264, 265, 266, 267, 268, 269, - 270, 271, 272, 273, 274, 275, 276, 277, - 278, 279, 280, 281, 282, 283, 284, 285, - 286, 287, 288, 289, 290, 291, 292, 293, - 294, 295, 296, 297, 298, 299, 215, 300, - 301, 302, 303, 304, 305, 306, 307, 308, - 309, 310, 311, 312, 215, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, - 324, 325, 326, 215, 215, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 336, 337, - 338, 339, 340, 215, 341, 342, 343, 215, - 344, 341, 341, 341, 341, 345, 346, 347, - 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, 359, 360, 361, 362, 363, - 364, 365, 366, 367, 368, 369, 370, 371, - 372, 373, 374, 375, 376, 377, 378, 379, - 380, 381, 382, 383, 384, 385, 386, 387, - 388, 389, 390, 391, 392, 393, 394, 395, - 396, 397, 398, 399, 400, 401, 402, 403, - 404, 405, 406, 407, 408, 409, 410, 411, - 412, 413, 414, 415, 416, 417, 418, 419, - 420, 421, 422, 423, 424, 425, 426, 427, - 428, 429, 430, 431, 432, 433, 434, 435, - 436, 437, 215, 438, 439, 440, 441, 442, - 443, 444, 445, 446, 447, 448, 449, 450, - 451, 452, 453, 454, 455, 215, 215, 215, - 215, 215, 215, 456, 457, 458, 459, 460, - 461, 462, 463, 464, 465, 466, 467, 468, - 469, 470, 471, 472, 473, 474, 475, 476, - 477, 478, 479, 480, 481, 482, 215, 483, - 484, 215, 485, 215, 486, 487, 215, 215, - 215, 488, 489, 215, 490, 215, 491, 492, - 215, 493, 494, 495, 496, 497, 215, 215, - 498, 215, 499, 500, 215, 215, 501, 215, - 215, 215, 215, 215, 215, 215, 502, 215, - 215, 503, 215, 504, 505, 215, 215, 215, - 506, 507, 508, 509, 510, 511, 215, 215, - 215, 215, 215, 512, 215, 341, 215, 215, - 215, 215, 215, 215, 215, 215, 513, 514, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 515, 516, 517, 518, 519, 520, 521, - 522, 523, 524, 524, 525, 525, 525, 525, - 525, 525, 525, 526, 526, 526, 526, 524, - 524, 524, 524, 524, 524, 524, 524, 524, - 524, 525, 525, 526, 526, 526, 526, 526, - 526, 527, 528, 529, 530, 531, 532, 526, - 526, 533, 534, 535, 536, 537, 526, 526, - 526, 526, 526, 526, 526, 524, 526, 525, - 526, 526, 526, 526, 526, 526, 526, 526, - 526, 526, 526, 526, 526, 526, 526, 526, - 526, 538, 539, 540, 541, 542, 543, 544, - 545, 546, 547, 548, 549, 550, 543, 543, - 551, 543, 552, 543, 553, 554, 555, 556, - 556, 556, 556, 555, 557, 556, 556, 556, - 556, 556, 558, 558, 559, 560, 561, 562, - 563, 564, 556, 556, 556, 556, 565, 566, - 556, 567, 568, 556, 556, 569, 569, 569, - 569, 570, 556, 556, 556, 556, 543, 543, - 543, 571, 572, 573, 574, 575, 576, 543, - 556, 556, 556, 543, 543, 543, 556, 556, - 577, 543, 543, 543, 556, 556, 556, 556, - 543, 555, 556, 556, 543, 578, 579, 579, - 578, 579, 579, 578, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 543, - 543, 580, 581, 582, 583, 584, 526, 585, - 586, 0, 0, 587, 588, 589, 590, 591, - 592, 0, 0, 0, 0, 88, 593, 594, - 595, 596, 597, 598, 0, 599, 0, 600, - 601, 602, 603, 604, 605, 606, 607, 608, - 609, 610, 611, 612, 613, 614, 615, 616, - 617, 618, 619, 0, 620, 621, 622, 623, - 624, 625, 626, 627, 628, 629, 630, 631, - 632, 633, 634, 635, 636, 637, 638, 639, - 640, 641, 642, 643, 644, 645, 646, 647, - 648, 649, 650, 651, 652, 653, 654, 655, - 656, 657, 658, 659, 660, 661, 662, 663, - 664, 665, 666, 667, 668, 669, 670, 671, - 672, 673, 674, 675, 676, 677, 678, 679, - 680, 681, 682, 683, 684, 685, 686, 687, - 688, 689, 690, 691, 692, 693, 694, 695, - 696, 697, 698, 699, 700, 701, 702, 75, - 703, 704, 705, 706, 707, 215, 708, 709, - 710, 711, 712, 713, 714, 715, 716, 717, - 718, 719, 720, 721, 722, 723, 724, 725, - 726, 727, 728, 729, 730, 731, 732, 733, - 734, 735, 736, 737, 738, 739, 740, 741, - 742, 743, 744, 745, 746, 747, 748, 749, - 750, 751, 752, 753, 754, 755, 756, 757, - 758, 759, 760, 761, 762, 763, 764, 765, - 766, 767, 768, 769, 770, 771, 772, 773, - 774, 775, 776, 777, 778, 779, 780, 781, - 782, 783, 784, 785, 786, 787, 788, 789, - 790, 791, 792, 793, 794, 795, 796, 797, - 798, 799, 800, 801, 802, 803, 804, 805, - 806, 807, 808, 809, 810, 811, 812, 813, - 814, 815, 816, 817, 818, 819, 820, 821, - 822, 823, 824, 825, 826, 827, 828, 829, - 830, 831, 832, 833, 834, 835, 836, 837, - 838, 839, 840, 841, 543, 543, 543, 543, - 543, 842, 842, 843, 844, 845, 846, 847, - 848, 849, 850, 851, 852, 853, 854, 855, - 856, 857, 858, 859, 860, 861, 862, 863, - 864, 865, 866, 867, 868, 869, 870, 871, - 872, 873, 874, 875, 876, 877, 878, 879, - 880, 881, 882, 883, 884, 885, 886, 887, - 888, 889, 890, 891, 892, 893, 894, 895, - 896, 897, 898, 899, 900, 901, 902, 903, - 904, 905, 906, 907, 908, 909, 910, 911, - 912, 913, 914, 915, 916, 917, 918, 919, - 920, 921, 922, 923, 924, 925, 926, 927, - 928, 929, 930, 931, 932, 933, 934, 935, - 936, 937, 938, 939, 940, 941, 942, 943, - 944, 945, 946, 947, 948, 949, 950, 951, - 952, 953, 954, 955, 956, 957, 958, 959, - 960, 961, 962, 963, 964, 965, 966, 967, - 968, 969, 970, 971, 972, 973, 974, 975, - 976, 977, 978, 979, 980, 981, 982, 983, - 984, 985, 986, 987, 988, 989, 990, 991, - 992, 993, 994, 995, 996, 997, 998, 999, - 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, - 1008, 0, 1009, 1010, 1011, 1012, 1013, 1014, - 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, - 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, - 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, - 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, - 0, 0, 525, 1047, 1047, 1047, 1047, 1047, - 1047, 215, 1048, 1049, 1050, 1051, 1052, 1053, - 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, - 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, - 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, - 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, - 1086, 215, 1047, 1087, 0, 0, 77, 77, - 11, 0, 556, 543, 543, 543, 543, 556, - 543, 543, 543, 1088, 556, 543, 543, 543, - 543, 543, 543, 556, 556, 556, 556, 556, - 556, 543, 543, 556, 543, 543, 1088, 1089, - 543, 1090, 1091, 1092, 1093, 1094, 1095, 1096, - 1097, 1098, 1099, 1099, 1100, 1101, 1102, 1103, - 1104, 1105, 1106, 1107, 1105, 543, 556, 1105, - 1098, 0, 0, 0, 0, 0, 0, 0, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 0, 0, 0, 0, - 1108, 1108, 1108, 1108, 1105, 1105, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1109, 1109, 1109, 1109, 1109, 1109, 75, - 75, 1110, 10, 10, 1111, 15, 1112, 77, - 77, 543, 543, 543, 543, 543, 543, 543, - 543, 1113, 1114, 1115, 1112, 1116, 0, 1112, - 1112, 1117, 1117, 1118, 1119, 1120, 1121, 1122, - 1123, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1124, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1125, 1117, 1126, 1127, 1128, 1129, 1113, - 1114, 1115, 1130, 1131, 1132, 1133, 1134, 556, - 543, 543, 543, 543, 543, 556, 543, 543, - 556, 1135, 1135, 1135, 1135, 1135, 1135, 1135, - 1135, 1135, 1135, 10, 1136, 1136, 1112, 1117, - 1117, 1137, 1117, 1117, 1117, 1117, 1138, 1139, - 1140, 1141, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1142, 1143, 1144, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1145, 1146, 1112, 1147, 543, - 543, 543, 543, 543, 543, 543, 1109, 77, - 543, 543, 543, 543, 556, 543, 1124, 1124, - 543, 543, 77, 556, 543, 543, 556, 1117, - 1117, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 1117, 1117, 1117, 1148, 1148, - 1117, 1112, 1112, 1112, 1112, 1112, 1112, 1112, - 1112, 1112, 1112, 1112, 1112, 1112, 1112, 0, - 1149, 1117, 1150, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 543, 556, 543, 543, 556, 543, 543, - 556, 556, 556, 543, 556, 556, 543, 556, - 543, 543, 543, 556, 543, 556, 543, 556, - 543, 556, 543, 543, 0, 0, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1117, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1152, 1152, 1152, 1152, 1152, 1152, 1152, - 1152, 1152, 1152, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 543, 543, 543, 543, - 543, 543, 543, 556, 543, 1153, 1153, 77, - 9, 9, 9, 1153, 0, 0, 556, 1154, - 1154, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 543, - 543, 543, 543, 1153, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 1153, 543, 543, - 543, 1153, 543, 543, 543, 543, 543, 0, - 0, 1105, 1105, 1105, 1105, 1105, 1105, 1105, - 1105, 1105, 1105, 1105, 1105, 1105, 1105, 1105, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 556, 556, 556, 0, 0, 1105, - 0, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 0, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 556, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 1109, 556, 543, 543, 556, - 543, 543, 556, 543, 543, 543, 556, 556, - 556, 1127, 1128, 1129, 543, 543, 543, 556, - 543, 543, 556, 556, 543, 543, 543, 543, - 543, 1151, 1151, 1151, 1155, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 1156, 1157, 341, 341, 341, 341, 341, - 341, 1158, 1159, 341, 1160, 1161, 341, 341, - 341, 341, 341, 1151, 1155, 1162, 341, 1155, - 1155, 1155, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1155, 1155, 1155, 1155, 1163, 1155, - 1155, 341, 543, 556, 543, 543, 1151, 1151, - 1151, 1164, 1165, 1166, 1167, 1168, 1169, 1170, - 1171, 341, 341, 1151, 1151, 1047, 1047, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1047, 525, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1151, 1155, 1155, 0, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 0, - 341, 341, 0, 0, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 0, 0, 0, 341, - 341, 341, 341, 0, 0, 1173, 341, 1174, - 1155, 1155, 1151, 1151, 1151, 1151, 0, 0, - 1175, 1155, 0, 0, 1176, 1177, 1163, 341, - 0, 0, 0, 0, 0, 0, 0, 0, - 1178, 0, 0, 0, 0, 1179, 1180, 0, - 1181, 341, 341, 1151, 1151, 0, 0, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 341, 341, 11, 11, 1182, 1182, 1182, - 1182, 1182, 1182, 841, 11, 341, 1047, 543, - 0, 0, 1151, 1151, 1155, 0, 341, 341, - 341, 341, 341, 341, 0, 0, 0, 0, - 341, 341, 0, 0, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 1183, 0, 341, 1184, - 0, 341, 341, 0, 0, 1173, 0, 1155, - 1155, 1155, 1151, 1151, 0, 0, 0, 0, - 1151, 1151, 0, 0, 1151, 1151, 1163, 0, - 0, 0, 1151, 0, 0, 0, 0, 0, - 0, 0, 1185, 1186, 1187, 341, 0, 1188, - 0, 0, 0, 0, 0, 0, 0, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1151, 1151, 341, 341, 341, 1151, 1047, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1151, 1151, 1155, 0, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 0, - 341, 341, 341, 0, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 0, 341, 341, - 341, 341, 341, 0, 0, 1173, 341, 1155, - 1155, 1155, 1151, 1151, 1151, 1151, 1151, 0, - 1151, 1151, 1155, 0, 1155, 1155, 1163, 0, - 0, 341, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 1151, 1151, 0, 0, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1047, 11, 0, 0, 0, 0, 0, - 0, 0, 341, 1151, 1151, 1151, 1151, 1151, - 1151, 0, 1151, 1155, 1155, 0, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 0, - 341, 341, 0, 0, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 0, 341, 341, - 341, 341, 341, 0, 0, 1173, 341, 1189, - 1151, 1155, 1151, 1151, 1151, 1151, 0, 0, - 1190, 1191, 0, 0, 1192, 1193, 1163, 0, - 0, 0, 0, 0, 0, 0, 0, 1194, - 1195, 0, 0, 0, 0, 1196, 1197, 0, - 341, 341, 341, 1151, 1151, 0, 0, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 841, 341, 1182, 1182, 1182, 1182, 1182, - 1182, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1151, 341, 0, 341, 341, - 341, 341, 341, 341, 0, 0, 0, 341, - 341, 341, 0, 1198, 341, 1199, 341, 0, - 0, 0, 341, 341, 0, 341, 0, 341, - 341, 0, 0, 0, 341, 341, 0, 0, - 0, 341, 341, 341, 0, 0, 0, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 0, 0, 0, 0, 1200, - 1155, 1151, 1155, 1155, 0, 0, 0, 1201, - 1202, 1155, 0, 1203, 1204, 1205, 1163, 0, - 0, 341, 0, 0, 0, 0, 0, 0, - 1206, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1182, 1182, 1182, 77, 77, 77, 77, - 77, 77, 11, 77, 0, 0, 0, 0, - 0, 1151, 1155, 1155, 1155, 1151, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 0, 0, 0, 341, 1151, - 1151, 1151, 1155, 1155, 1155, 1155, 0, 1207, - 1151, 1208, 0, 1151, 1151, 1151, 1163, 0, - 0, 0, 0, 0, 0, 0, 1209, 1210, - 0, 341, 341, 341, 0, 0, 0, 0, - 0, 341, 341, 1151, 1151, 0, 0, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 0, 0, 0, 0, 0, 0, 0, - 1047, 1211, 1211, 1211, 1211, 1211, 1211, 1211, - 841, 341, 1151, 1155, 1155, 1047, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 0, 341, 341, - 341, 341, 341, 0, 0, 1173, 341, 1155, - 1212, 1213, 1155, 1214, 1155, 1155, 0, 1215, - 1216, 1217, 0, 1218, 1219, 1151, 1163, 0, - 0, 0, 0, 0, 0, 0, 1220, 1221, - 0, 0, 0, 0, 0, 0, 0, 341, - 0, 341, 341, 1151, 1151, 0, 0, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 0, 341, 341, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1151, 1151, 1155, 1155, 0, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1163, 1163, 341, 1222, - 1155, 1155, 1151, 1151, 1151, 1151, 0, 1223, - 1224, 1155, 0, 1225, 1226, 1227, 1163, 1228, - 841, 0, 0, 0, 0, 341, 341, 341, - 1229, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 341, 341, 341, 1151, 1151, 0, 0, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 841, 341, 341, 341, 341, 341, - 341, 0, 0, 1155, 1155, 0, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 0, 0, 0, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 0, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 0, 341, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 0, 0, 1230, 0, 0, 0, 0, - 1231, 1155, 1155, 1151, 1151, 1151, 0, 1151, - 0, 1155, 1232, 1233, 1155, 1234, 1235, 1236, - 1237, 0, 0, 0, 0, 0, 0, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 0, 0, 1155, 1155, 1047, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1151, 341, 1238, 1151, 1151, 1151, - 1151, 1239, 1239, 1163, 0, 0, 0, 0, - 11, 341, 341, 341, 341, 341, 341, 525, - 1151, 1240, 1240, 1240, 1240, 1151, 1151, 1151, - 1047, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 1047, 1047, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 341, 341, 0, 341, 0, 341, - 341, 341, 341, 341, 0, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 0, 341, 0, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1151, 341, 1241, 1151, 1151, 1151, - 1151, 1242, 1242, 1163, 1151, 1151, 341, 0, - 0, 341, 341, 341, 341, 341, 0, 525, - 0, 1243, 1243, 1243, 1243, 1151, 1151, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 1244, 1245, 341, - 341, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 841, 841, 841, 1047, 1047, 1047, - 1047, 1047, 1047, 1047, 1047, 1246, 1047, 1047, - 1047, 1047, 1047, 1047, 841, 1047, 841, 841, - 841, 556, 556, 841, 841, 841, 841, 841, - 841, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 841, 556, 841, - 556, 841, 1247, 12, 13, 12, 13, 1155, - 1155, 341, 341, 341, 1248, 341, 341, 341, - 341, 0, 341, 341, 341, 341, 1249, 341, - 341, 341, 341, 1250, 341, 341, 341, 341, - 1251, 341, 341, 341, 341, 1252, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1253, 341, 341, 341, 0, 0, - 0, 0, 1254, 1255, 1256, 1257, 1258, 1259, - 1260, 1261, 1262, 1255, 1255, 1255, 1255, 1151, - 1155, 1255, 1263, 543, 543, 1163, 1047, 543, - 543, 341, 341, 341, 341, 341, 1151, 1151, - 1151, 1151, 1151, 1151, 1264, 1151, 1151, 1151, - 1151, 0, 1151, 1151, 1151, 1151, 1265, 1151, - 1151, 1151, 1151, 1266, 1151, 1151, 1151, 1151, - 1267, 1151, 1151, 1151, 1151, 1268, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1269, 1151, 1151, 1151, 0, 841, - 841, 841, 841, 841, 841, 841, 841, 556, - 841, 841, 841, 841, 841, 841, 0, 841, - 841, 1047, 1047, 1047, 1047, 1047, 841, 841, - 841, 841, 1047, 1047, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 1270, 1271, - 341, 341, 341, 341, 1272, 1272, 1151, 1273, - 1151, 1151, 1155, 1151, 1151, 1151, 1151, 1151, - 1173, 1272, 1163, 1163, 1155, 1155, 1151, 1151, - 341, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 1047, 1047, 1047, 1047, 1047, - 1047, 341, 341, 341, 341, 341, 341, 1155, - 1155, 1151, 1151, 341, 341, 341, 341, 1151, - 1151, 1151, 341, 1272, 1272, 1272, 341, 341, - 1272, 1272, 1272, 1272, 1272, 1272, 1272, 341, - 341, 341, 1151, 1151, 1151, 1151, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 1151, 1272, 1155, 1151, 1151, - 1272, 1272, 1272, 1272, 1272, 1272, 556, 341, - 1272, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 1272, 1272, 1272, 1151, 841, - 841, 1274, 1275, 1276, 1277, 1278, 1279, 1280, - 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, - 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, - 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, - 1305, 1306, 1307, 1308, 1309, 1310, 1311, 0, - 1312, 0, 0, 0, 0, 0, 1313, 0, - 0, 1314, 1315, 1316, 1317, 1318, 1319, 1320, - 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, - 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, - 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, - 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, - 1353, 1354, 1355, 1356, 1047, 1357, 1358, 1359, - 1360, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1362, 1363, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 0, 341, 341, 341, 341, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 0, 341, 341, 341, 341, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 0, 0, 543, 543, - 543, 1047, 1047, 1047, 1047, 1047, 1047, 1047, - 1047, 1047, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 0, 0, 0, 0, 0, - 0, 1366, 1367, 1368, 1369, 1370, 1371, 1372, - 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, - 1381, 1382, 1383, 1384, 1385, 1386, 1387, 1388, - 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, - 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, - 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, - 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, - 1421, 1422, 1423, 1424, 1425, 1426, 1427, 1428, - 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, - 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, - 1445, 1446, 1447, 1448, 1449, 1450, 1451, 0, - 0, 1452, 1453, 1454, 1455, 1456, 1457, 0, - 0, 1087, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 841, 1047, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 8, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 12, 13, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1047, 1047, 1047, 1458, - 1458, 1458, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 341, - 341, 341, 341, 1151, 1151, 1163, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 1151, 1151, 1163, 1047, 1047, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 1151, 1151, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 341, - 341, 341, 0, 1151, 1151, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 577, 577, 1155, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1155, - 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1151, - 1155, 1155, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1163, 1151, 1047, 1047, 1047, - 525, 1047, 1047, 1047, 11, 341, 543, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 1211, 1211, 1211, 1211, 1211, 1211, 1211, - 1211, 1211, 1211, 0, 0, 0, 0, 0, - 0, 9, 9, 9, 9, 9, 9, 1087, - 9, 9, 9, 9, 577, 577, 577, 1459, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 525, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 1151, 1151, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1089, 341, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 0, 1151, 1151, 1151, 1155, 1155, 1155, 1155, - 1151, 1151, 1155, 1155, 1155, 0, 0, 0, - 0, 1155, 1155, 1151, 1155, 1155, 1155, 1155, - 1155, 1155, 1088, 543, 556, 0, 0, 0, - 0, 77, 0, 0, 0, 9, 9, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 0, - 0, 341, 341, 341, 341, 341, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 0, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 1182, 0, 0, 0, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 543, 556, 1155, 1155, 1151, 0, 0, 1047, - 1047, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 1155, 1151, - 1155, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 0, 1163, 1272, 1151, 1272, 1272, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1155, 1155, - 1155, 1155, 1155, 1155, 1151, 1151, 543, 543, - 543, 543, 543, 543, 543, 543, 0, 0, - 556, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 1047, 1047, 1047, 1047, 1047, 1047, 1047, - 525, 1047, 1047, 1047, 1047, 1047, 1047, 0, - 0, 543, 543, 543, 543, 543, 556, 556, - 556, 556, 556, 556, 543, 543, 556, 842, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1151, 1151, 1151, 1151, 1155, 1460, 1461, - 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, - 341, 341, 1470, 1471, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 1173, 1472, 1151, - 1151, 1151, 1151, 1473, 1474, 1475, 1476, 1477, - 1478, 1479, 1480, 1481, 1482, 1483, 341, 341, - 341, 341, 341, 341, 341, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 1047, 1047, 1047, 1047, 1047, - 1047, 1047, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 543, 556, 543, 543, - 543, 543, 543, 543, 543, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 0, 0, - 0, 1151, 1151, 1155, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1155, 1151, 1151, 1151, 1151, 1155, - 1155, 1151, 1151, 1483, 1163, 1151, 1151, 341, - 341, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 1173, - 1155, 1151, 1151, 1155, 1155, 1155, 1151, 1155, - 1151, 1151, 1151, 1483, 1483, 0, 0, 0, - 0, 0, 0, 0, 0, 1047, 1047, 1047, - 1047, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 1155, 1155, 1155, - 1155, 1155, 1155, 1155, 1155, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1155, 1155, 1151, - 1173, 0, 0, 0, 1047, 1047, 1047, 1047, - 1047, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 341, 341, - 341, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 525, 525, 525, 525, 525, 525, 1047, - 1047, 1484, 1485, 1486, 1487, 1488, 1488, 1489, - 1490, 1491, 0, 0, 0, 0, 0, 0, - 0, 1492, 1493, 1494, 1495, 1496, 1497, 1498, - 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, - 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, - 1515, 1516, 1517, 1518, 1519, 1520, 1521, 1522, - 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, - 1531, 1532, 1533, 1534, 0, 0, 1535, 1536, - 1537, 1047, 1047, 1047, 1047, 1047, 1047, 1047, - 1047, 0, 0, 0, 0, 0, 0, 0, - 0, 543, 543, 543, 1047, 569, 556, 556, - 556, 556, 556, 543, 543, 556, 556, 556, - 556, 543, 1155, 569, 569, 569, 569, 569, - 569, 569, 341, 341, 341, 341, 556, 341, - 341, 341, 341, 341, 341, 543, 341, 341, - 1155, 543, 543, 341, 0, 0, 0, 0, - 0, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 1538, 1539, 1540, - 525, 1541, 1542, 1543, 1544, 1545, 1546, 1547, - 1548, 1549, 1550, 1551, 525, 1552, 1553, 1554, - 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, - 1563, 1564, 1565, 1566, 1567, 1568, 1569, 525, - 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, - 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, - 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, - 1594, 1595, 1596, 1597, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 1598, 1599, 215, 215, 215, 1600, 215, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 1601, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 1602, 1603, 1604, 1605, - 1568, 1606, 1607, 1608, 1609, 1610, 1611, 1612, - 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, - 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, - 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, - 1637, 543, 543, 556, 543, 543, 543, 543, - 543, 543, 543, 556, 543, 543, 579, 1638, - 556, 558, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 555, - 1089, 1089, 556, 0, 543, 578, 556, 543, - 556, 1639, 1640, 1641, 1642, 1643, 1644, 1645, - 1646, 1647, 1648, 1649, 1650, 1651, 1652, 1653, - 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, - 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, - 1670, 1671, 1672, 1673, 1674, 1675, 1676, 1677, - 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, - 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, - 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, - 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, - 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, - 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, - 1726, 1727, 1728, 1729, 1730, 1731, 1732, 1733, - 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, - 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, - 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, - 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, - 1766, 1767, 1768, 1769, 1770, 1771, 1772, 1773, - 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, - 1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, - 1790, 1791, 1792, 1793, 1794, 215, 215, 1795, - 215, 1796, 1797, 1798, 1799, 1800, 1801, 1802, - 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810, - 1811, 1812, 1813, 1814, 1815, 1816, 1817, 1818, - 1819, 1820, 1821, 1822, 1823, 1824, 1825, 1826, - 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1834, - 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, - 1843, 1844, 1845, 1846, 1847, 1848, 1849, 1850, - 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, - 1859, 1860, 1861, 1862, 1863, 1864, 1865, 1866, - 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, - 1875, 1876, 1877, 1878, 1879, 1880, 1881, 1882, - 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, - 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, - 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, - 1907, 1908, 1909, 1910, 1911, 1912, 1913, 0, - 0, 1914, 1915, 1916, 1917, 1918, 1919, 0, - 0, 1920, 1921, 1922, 1923, 1924, 1925, 1926, - 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, - 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, - 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, - 1951, 1952, 1953, 1954, 1955, 1956, 1957, 0, - 0, 1958, 1959, 1960, 1961, 1962, 1963, 0, - 0, 1964, 1965, 1966, 1967, 1968, 1969, 1970, - 1971, 0, 1972, 0, 1973, 0, 1974, 0, - 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, - 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, - 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, - 1999, 2000, 2001, 2002, 2003, 2004, 2005, 0, - 0, 2006, 2007, 2008, 2009, 2010, 2011, 2012, - 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, - 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, - 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, - 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, - 2045, 2046, 2047, 2048, 2049, 2050, 2051, 2052, - 2053, 2054, 2055, 2056, 2057, 2058, 0, 2059, - 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, - 2068, 2069, 2070, 2071, 2072, 2073, 0, 2074, - 2075, 2076, 2077, 2078, 2079, 2080, 2081, 2082, - 2083, 2084, 2085, 2086, 2087, 0, 0, 2088, - 2089, 2090, 2091, 2092, 2093, 0, 2094, 2095, - 2096, 2097, 2098, 2099, 2100, 2101, 2102, 2103, - 2104, 2105, 2106, 2107, 2108, 2109, 2110, 2111, - 2112, 0, 0, 2113, 2114, 2115, 0, 2116, - 2117, 2118, 2119, 2120, 2121, 2122, 2123, 2124, - 0, 2125, 2126, 2127, 2127, 2127, 2127, 2127, - 2128, 2127, 2127, 2127, 1459, 2129, 2130, 2131, - 2132, 1087, 2133, 1087, 1087, 1087, 1087, 9, - 2134, 2135, 2136, 2137, 2135, 2135, 2136, 2137, - 2135, 9, 9, 9, 9, 2138, 2139, 2140, - 9, 2141, 2142, 2143, 2144, 2145, 2146, 2147, - 76, 10, 10, 10, 2148, 2149, 9, 2150, - 2151, 9, 81, 93, 9, 2152, 9, 2153, - 48, 48, 9, 9, 9, 2154, 12, 13, - 2155, 2156, 2157, 9, 9, 9, 9, 9, - 9, 9, 9, 75, 9, 48, 9, 9, - 2158, 9, 9, 9, 9, 9, 9, 9, - 2127, 1459, 1459, 1459, 1459, 1459, 0, 2159, - 2160, 2161, 2162, 1459, 1459, 1459, 1459, 1459, - 1459, 2163, 2164, 0, 0, 2165, 2166, 2167, - 2168, 2169, 2170, 2171, 2172, 2173, 2174, 2175, - 2176, 2177, 2178, 2179, 2180, 2181, 2182, 2183, - 2184, 2185, 2186, 2187, 2188, 2189, 2190, 2191, - 0, 2192, 2193, 2194, 2195, 2196, 2197, 2198, - 2199, 2200, 2201, 2202, 2203, 2204, 0, 0, - 0, 11, 11, 11, 11, 11, 11, 11, - 11, 2205, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, - 11, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 543, 543, 569, 569, 543, 543, 543, - 543, 569, 569, 569, 543, 543, 842, 842, - 842, 842, 543, 842, 842, 842, 569, 569, - 543, 556, 543, 569, 569, 556, 556, 556, - 556, 543, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2206, 2207, 2208, 2209, 77, 2210, 2211, - 2212, 77, 2213, 2214, 2215, 2215, 2215, 2216, - 2217, 2218, 2218, 2219, 2220, 77, 2221, 2222, - 77, 75, 2223, 2224, 2225, 2225, 2225, 77, - 77, 2226, 2227, 2228, 77, 2229, 77, 2230, - 77, 2229, 77, 2231, 2232, 2233, 2208, 84, - 2234, 2235, 2236, 2237, 2238, 2239, 2240, 2241, - 2242, 2243, 2244, 77, 2245, 2246, 2247, 2248, - 2249, 2250, 75, 75, 75, 75, 2251, 2252, - 2234, 2253, 2254, 77, 75, 77, 77, 2255, - 841, 2256, 2257, 2258, 2259, 2260, 2261, 2262, - 2263, 2264, 2265, 2266, 2267, 2268, 2269, 2270, - 2271, 2272, 2273, 2274, 2275, 2276, 2277, 2278, - 2279, 2280, 2281, 2282, 2283, 2284, 2285, 2286, - 2287, 2288, 2289, 2290, 2291, 2292, 2293, 2294, - 2295, 2296, 2297, 2298, 2299, 2300, 2301, 2302, - 2303, 1458, 1458, 1458, 2304, 2305, 1458, 1458, - 1458, 1458, 2306, 77, 77, 0, 0, 0, - 0, 2307, 75, 2308, 75, 2309, 79, 79, - 79, 79, 79, 2310, 2311, 77, 77, 77, - 77, 75, 77, 77, 75, 77, 77, 75, - 77, 77, 79, 79, 77, 77, 77, 2312, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 2313, 2314, - 2315, 2316, 77, 2317, 77, 2318, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 2319, 2319, 2320, 2321, 75, 75, - 75, 2322, 2323, 2319, 2324, 2325, 2319, 75, - 75, 75, 2319, 14, 85, 75, 2319, 2319, - 75, 75, 75, 2319, 2319, 2319, 2319, 75, - 2319, 2319, 2319, 2319, 2326, 2327, 2328, 2329, - 75, 75, 75, 75, 2319, 2330, 2331, 2319, - 2332, 2333, 2319, 2319, 2319, 75, 75, 75, - 75, 75, 2319, 75, 2319, 2334, 2319, 2319, - 2319, 2319, 2335, 2319, 2336, 2337, 2338, 2319, - 2339, 2340, 2341, 2319, 2319, 2319, 2342, 75, - 75, 75, 75, 2319, 2319, 2319, 2319, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 2319, 2343, 2344, 2345, 75, 2346, 2347, 2319, - 2319, 2319, 2319, 2319, 2319, 75, 2348, 2349, - 2350, 2351, 2352, 2353, 2354, 2355, 2356, 2357, - 2358, 2359, 2360, 2361, 2362, 2363, 2364, 2319, - 2319, 2365, 2366, 2367, 2368, 2369, 2370, 2371, - 2372, 2373, 2374, 2319, 2319, 2319, 75, 75, - 2319, 2319, 2375, 2376, 75, 75, 75, 75, - 75, 2319, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 2377, 2319, 75, 75, 2319, - 2319, 2378, 2379, 2319, 2380, 2381, 2382, 2383, - 2384, 2319, 2319, 2385, 2386, 2387, 2388, 2319, - 2319, 2319, 75, 75, 75, 75, 75, 2319, - 2319, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 2319, 2319, 2319, 2319, 2319, 75, - 75, 2319, 2319, 75, 75, 75, 75, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2389, 2390, 2391, 2392, 2319, 2319, 2319, - 2319, 2319, 2319, 2393, 2394, 2395, 2396, 75, - 75, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 77, 77, 77, 77, 77, 77, 77, - 77, 12, 13, 12, 13, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 2397, 2397, 77, 77, 77, - 77, 2319, 2319, 77, 77, 77, 77, 77, - 77, 79, 2398, 2399, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 77, 75, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 79, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 841, 77, - 77, 77, 77, 77, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 79, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 75, 75, 75, - 75, 75, 75, 77, 77, 77, 77, 77, - 77, 77, 2397, 2397, 2397, 2397, 79, 79, - 79, 2397, 79, 79, 2397, 77, 77, 77, - 77, 79, 79, 79, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2400, 2401, 2402, 2403, 2404, 2405, 2406, - 2407, 2408, 2409, 2410, 2411, 2412, 2413, 2414, - 2415, 2416, 2417, 2418, 2419, 2420, 2421, 2422, - 2423, 2424, 2425, 2426, 2427, 2428, 2429, 2430, - 2431, 2432, 2433, 2434, 2435, 2436, 2437, 2438, - 2439, 2440, 2441, 2442, 2443, 2444, 2445, 2446, - 2447, 2448, 2449, 2450, 2451, 2452, 2453, 2454, - 2455, 2456, 2457, 2458, 2459, 2460, 2461, 2462, - 2463, 2464, 2465, 2466, 2467, 2468, 2469, 2470, - 2471, 2472, 2473, 2474, 2475, 2476, 2477, 2478, - 2479, 2480, 2481, 2482, 2483, 2484, 2485, 2486, - 2487, 2488, 2489, 2490, 2491, 2492, 2493, 2494, - 2495, 2496, 2497, 2498, 2499, 2500, 2501, 2502, - 2503, 2504, 2505, 2506, 2507, 2508, 2509, 2510, - 2511, 2512, 2513, 2514, 2515, 2516, 2517, 2518, - 2519, 2520, 2521, 2522, 2523, 2524, 2525, 2526, - 2527, 2528, 2529, 2530, 2531, 2532, 2533, 2534, - 2535, 2536, 2537, 2538, 1211, 1211, 1211, 1211, - 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, - 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, - 1211, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 79, 79, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 79, - 75, 77, 77, 77, 77, 77, 77, 77, - 77, 79, 75, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 75, 75, 75, 2539, 2539, 2540, 2540, - 75, 79, 79, 79, 79, 79, 79, 77, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 77, 2397, 2397, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 2539, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 2397, 79, 79, 79, 79, 79, 79, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 79, 79, 79, 2397, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 2397, 79, 79, 79, 79, 79, - 79, 79, 79, 2397, 2397, 2541, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 2397, 2397, - 79, 79, 79, 79, 79, 2397, 2397, 79, - 79, 79, 79, 79, 79, 79, 79, 2397, - 79, 79, 79, 79, 79, 2397, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 2397, 79, 79, 79, 79, - 79, 79, 79, 2397, 2397, 79, 2397, 79, - 79, 79, 79, 2397, 79, 79, 2397, 79, - 79, 79, 79, 79, 79, 79, 2397, 77, - 77, 79, 79, 2397, 2397, 79, 79, 79, - 79, 79, 79, 79, 77, 79, 77, 79, - 77, 77, 77, 77, 77, 77, 79, 77, - 77, 77, 79, 77, 77, 77, 77, 77, - 77, 2397, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 79, 79, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 79, 77, 77, - 79, 77, 77, 77, 77, 2397, 77, 2397, - 77, 77, 77, 77, 2397, 2397, 2397, 77, - 2397, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 79, 79, 79, 79, - 79, 12, 13, 12, 13, 12, 13, 12, - 13, 12, 13, 12, 13, 12, 13, 1211, - 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, - 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, - 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, - 1211, 1211, 1211, 1211, 1211, 77, 2397, 2397, - 2397, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 79, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 2397, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 2397, 2319, 75, 75, 2319, 2319, 12, 13, - 75, 2319, 2319, 75, 2319, 2319, 2319, 75, - 75, 75, 75, 75, 2319, 2319, 2319, 2319, - 75, 75, 75, 75, 75, 2319, 2319, 2319, - 75, 75, 75, 2319, 2319, 2319, 2319, 12, - 13, 12, 13, 12, 13, 12, 13, 12, - 13, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 2539, 2539, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 12, 13, 12, 13, - 12, 13, 12, 13, 12, 13, 12, 13, - 12, 13, 12, 13, 12, 13, 12, 13, - 12, 13, 75, 75, 2319, 2319, 2319, 2319, - 2319, 2319, 75, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 75, 75, 75, 75, 75, 75, 75, - 75, 2319, 75, 75, 75, 75, 75, 75, - 75, 2319, 2319, 2319, 2319, 2319, 2319, 75, - 75, 75, 2319, 75, 75, 75, 75, 2319, - 2319, 2319, 2319, 2319, 75, 2319, 2319, 75, - 75, 12, 13, 12, 13, 2319, 75, 75, - 75, 75, 2319, 75, 2319, 2319, 2319, 75, - 75, 2319, 2319, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 2319, 2319, 2319, - 2319, 2319, 2319, 75, 75, 12, 13, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 2319, 2319, 2542, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 75, 2319, - 2319, 2319, 2319, 75, 75, 2319, 75, 2319, - 75, 75, 2319, 75, 2319, 2319, 2319, 2319, - 75, 75, 75, 75, 75, 2319, 2319, 75, - 75, 75, 75, 75, 75, 2319, 2319, 2319, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 2319, 2319, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 2319, 2319, 75, - 75, 75, 75, 2319, 2319, 2319, 2319, 75, - 2319, 2319, 75, 75, 2319, 2543, 2544, 2545, - 75, 75, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 75, 75, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 75, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, - 75, 75, 75, 75, 75, 2546, 2547, 2319, - 75, 75, 75, 2319, 2319, 2319, 2319, 2319, - 75, 75, 75, 75, 75, 2319, 2319, 2319, - 75, 75, 75, 75, 2319, 75, 75, 75, - 2319, 2319, 2319, 2319, 2319, 75, 2319, 75, - 75, 77, 77, 77, 77, 77, 79, 79, - 79, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 2397, 2397, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 77, 77, - 75, 75, 75, 75, 75, 75, 77, 77, - 77, 2397, 77, 77, 77, 77, 2397, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 0, 0, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 2548, - 77, 2549, 2550, 2551, 2552, 2553, 2554, 2555, - 2556, 2557, 2558, 2559, 2560, 2561, 2562, 2563, - 2564, 2565, 2566, 2567, 2568, 2569, 2570, 2571, - 2572, 2573, 2574, 2575, 2576, 2577, 2578, 2579, - 2580, 2581, 2582, 2583, 2584, 2585, 2586, 2587, - 2588, 2589, 2590, 2591, 2592, 2593, 2594, 2595, - 0, 2596, 2597, 2598, 2599, 2600, 2601, 2602, - 2603, 2604, 2605, 2606, 2607, 2608, 2609, 2610, - 2611, 2612, 2613, 2614, 2615, 2616, 2617, 2618, - 2619, 2620, 2621, 2622, 2623, 2624, 2625, 2626, - 2627, 2628, 2629, 2630, 2631, 2632, 2633, 2634, - 2635, 2636, 2637, 2638, 2639, 2640, 2641, 2642, - 0, 2643, 2644, 2645, 2646, 2647, 2648, 2649, - 2650, 2651, 2652, 2653, 2654, 2655, 2656, 2657, - 2658, 2659, 215, 2660, 2661, 215, 2662, 2663, - 215, 215, 215, 215, 215, 2664, 2665, 2666, - 2667, 2668, 2669, 2670, 2671, 2672, 2673, 2674, - 2675, 2676, 2677, 2678, 2679, 2680, 2681, 2682, - 2683, 2684, 2685, 2686, 2687, 2688, 2689, 2690, - 2691, 2692, 2693, 2694, 2695, 2696, 2697, 2698, - 2699, 2700, 2701, 2702, 2703, 2704, 2705, 2706, - 2707, 2708, 2709, 2710, 2711, 2712, 2713, 2714, - 2715, 2716, 2717, 2718, 2719, 2720, 2721, 2722, - 2723, 2724, 2725, 2726, 2727, 2728, 2729, 2730, - 2731, 2732, 2733, 2734, 2735, 2736, 2737, 2738, - 2739, 2740, 2741, 2742, 2743, 2744, 2745, 2746, - 2747, 2748, 2749, 2750, 2751, 2752, 2753, 2754, - 2755, 2756, 2757, 2758, 2759, 2760, 2761, 2762, - 2763, 2764, 2765, 2766, 2767, 215, 77, 77, - 77, 77, 77, 77, 2768, 2769, 2770, 2771, - 543, 543, 543, 2772, 2773, 0, 0, 0, - 0, 0, 9, 9, 9, 9, 1211, 9, - 9, 2774, 2775, 2776, 2777, 2778, 2779, 2780, - 2781, 2782, 2783, 2784, 2785, 2786, 2787, 2788, - 2789, 2790, 2791, 2792, 2793, 2794, 2795, 2796, - 2797, 2798, 2799, 2800, 2801, 2802, 2803, 2804, - 2805, 2806, 2807, 2808, 2809, 2810, 2811, 0, - 2812, 0, 0, 0, 0, 0, 2813, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 0, 0, 0, 0, 0, 0, 0, - 2814, 1047, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1163, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 543, - 543, 9, 9, 81, 93, 81, 93, 9, - 9, 9, 81, 93, 9, 81, 93, 9, - 9, 9, 9, 9, 9, 9, 9, 9, - 1087, 9, 9, 1087, 9, 81, 93, 9, - 9, 81, 93, 12, 13, 12, 13, 12, - 13, 12, 13, 9, 9, 9, 9, 9, - 524, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 1087, 1087, 9, 9, 9, - 9, 1087, 9, 2137, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, - 9, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 0, 2815, 2815, 2815, 2815, - 2816, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2817, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2818, 2819, 2820, 2821, 2822, 2823, 2824, - 2825, 2826, 2827, 2828, 2829, 2830, 2831, 2832, - 2833, 2834, 2835, 2836, 2837, 2838, 2839, 2840, - 2841, 2842, 2843, 2844, 2845, 2846, 2847, 2848, - 2849, 2850, 2851, 2852, 2853, 2854, 2855, 2856, - 2857, 2858, 2859, 2860, 2861, 2862, 2863, 2864, - 2865, 2866, 2867, 2868, 2869, 2870, 2871, 2872, - 2873, 2874, 2875, 2876, 2877, 2878, 2879, 2880, - 2881, 2882, 2883, 2884, 2885, 2886, 2887, 2888, - 2889, 2890, 2891, 2892, 2893, 2894, 2895, 2896, - 2897, 2898, 2899, 2900, 2901, 2902, 2903, 2904, - 2905, 2906, 2907, 2908, 2909, 2910, 2911, 2912, - 2913, 2914, 2915, 2916, 2917, 2918, 2919, 2920, - 2921, 2922, 2923, 2924, 2925, 2926, 2927, 2928, - 2929, 2930, 2931, 2932, 2933, 2934, 2935, 2936, - 2937, 2938, 2939, 2940, 2941, 2942, 2943, 2944, - 2945, 2946, 2947, 2948, 2949, 2950, 2951, 2952, - 2953, 2954, 2955, 2956, 2957, 2958, 2959, 2960, - 2961, 2962, 2963, 2964, 2965, 2966, 2967, 2968, - 2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, - 2977, 2978, 2979, 2980, 2981, 2982, 2983, 2984, - 2985, 2986, 2987, 2988, 2989, 2990, 2991, 2992, - 2993, 2994, 2995, 2996, 2997, 2998, 2999, 3000, - 3001, 3002, 3003, 3004, 3005, 3006, 3007, 3008, - 3009, 3010, 3011, 3012, 3013, 3014, 3015, 3016, - 3017, 3018, 3019, 3020, 3021, 3022, 3023, 3024, - 3025, 3026, 3027, 3028, 3029, 3030, 3031, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 0, 0, 0, - 0, 3032, 3033, 3033, 3033, 2815, 3034, 3035, - 3036, 3037, 3038, 3037, 3038, 3037, 3038, 3037, - 3038, 3037, 3038, 2815, 2815, 3037, 3038, 3037, - 3038, 3037, 3038, 3037, 3038, 3039, 3040, 3041, - 3041, 2815, 3036, 3036, 3036, 3036, 3036, 3036, - 3036, 3036, 3036, 3042, 1089, 555, 1088, 3043, - 3043, 3044, 3034, 3034, 3034, 3034, 3034, 3045, - 2815, 3046, 3047, 3048, 3034, 3035, 3049, 2815, - 77, 0, 3035, 3035, 3035, 3035, 3035, 3050, - 3035, 3035, 3035, 3035, 3051, 3052, 3053, 3054, - 3055, 3056, 3057, 3058, 3059, 3060, 3061, 3062, - 3063, 3064, 3065, 3066, 3067, 3068, 3069, 3070, - 3071, 3072, 3073, 3074, 3035, 3075, 3076, 3077, - 3078, 3079, 3080, 3035, 3035, 3035, 3035, 3035, - 3081, 3082, 3083, 3084, 3085, 3086, 3087, 3088, - 3089, 3090, 3091, 3092, 3093, 3094, 3095, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3096, 3035, 3035, - 0, 0, 3097, 3098, 3099, 3100, 3101, 3102, - 3103, 3039, 3035, 3035, 3035, 3035, 3035, 3104, - 3035, 3035, 3035, 3035, 3105, 3106, 3107, 3108, - 3109, 3110, 3111, 3112, 3113, 3114, 3115, 3116, - 3117, 3118, 3119, 3120, 3121, 3122, 3123, 3124, - 3125, 3126, 3127, 3128, 3035, 3129, 3130, 3131, - 3132, 3133, 3134, 3035, 3035, 3035, 3035, 3035, - 3135, 3136, 3137, 3138, 3139, 3140, 3141, 3142, - 3143, 3144, 3145, 3146, 3147, 3148, 3149, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3150, 3151, 3152, 3153, 3035, 3154, 3035, 3035, - 3155, 3156, 3157, 3158, 3033, 3034, 3159, 3160, - 3161, 0, 0, 0, 0, 0, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 0, 3162, 3163, 3164, 3165, 3166, 3167, - 3168, 3169, 3170, 3171, 3172, 3173, 3174, 3175, - 3176, 3177, 3178, 3179, 3180, 3181, 3182, 3183, - 3184, 3185, 3186, 3187, 3188, 3189, 3190, 3191, - 3192, 3193, 3194, 3195, 3196, 3197, 3198, 3199, - 3200, 3201, 3202, 3203, 3204, 3205, 3206, 3207, - 3208, 3209, 3210, 3211, 3212, 3213, 3214, 3215, - 3216, 3217, 3218, 3219, 3220, 3221, 3222, 3223, - 3224, 3225, 3226, 3227, 3228, 3229, 3230, 3231, - 3232, 3233, 3234, 3235, 3236, 3237, 3238, 3239, - 3240, 3241, 3242, 3243, 3244, 3245, 3246, 3247, - 3248, 3249, 3250, 3251, 3252, 3253, 3254, 3255, - 0, 3256, 3256, 3257, 3258, 3259, 3260, 3261, - 3262, 3263, 3264, 3265, 3266, 3267, 3268, 3269, - 3270, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 0, 0, 0, 0, - 0, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3271, 3272, 3273, 3274, 3275, 3276, 3277, - 3278, 3279, 3280, 3281, 3282, 3283, 3284, 3285, - 3286, 3287, 3288, 3289, 3290, 3291, 3292, 3293, - 3294, 3295, 3296, 3297, 3298, 3299, 3300, 3301, - 0, 3302, 3303, 3304, 3305, 3306, 3307, 3308, - 3309, 3310, 3311, 3312, 3313, 3314, 3315, 3316, - 3317, 3318, 3319, 3320, 3321, 3322, 3323, 3324, - 3325, 3326, 3327, 3328, 3329, 3330, 3331, 3332, - 3333, 3334, 3335, 3336, 3337, 3338, 3339, 3340, - 3341, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 3342, 3343, 3344, 3345, 3346, 3347, 3348, - 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356, - 3357, 3358, 3359, 3360, 3361, 3362, 3363, 3364, - 3365, 3366, 3367, 3368, 3369, 3370, 3371, 3372, - 3373, 3374, 3375, 3376, 3377, 3378, 3379, 3380, - 3381, 3382, 3383, 3384, 3385, 3386, 3387, 3388, - 3256, 3389, 3390, 3391, 3392, 3393, 3394, 3395, - 3396, 3397, 3398, 3399, 3400, 3401, 3402, 3403, - 3404, 3405, 3406, 3407, 3408, 3409, 3410, 3411, - 3412, 3413, 3414, 3415, 3416, 3417, 3418, 3419, - 3420, 3421, 3422, 3423, 3424, 3425, 3426, 3427, - 3428, 3429, 3430, 3431, 3432, 3433, 3434, 3435, - 3436, 3437, 3438, 3439, 3440, 3441, 3442, 3443, - 3444, 3445, 3446, 3447, 3448, 3449, 3450, 3451, - 3452, 3453, 3454, 3455, 3456, 3457, 3458, 3459, - 3460, 3461, 3462, 3463, 3464, 3465, 3466, 3467, - 3468, 3469, 3470, 3471, 3472, 3473, 3474, 3475, - 3476, 3477, 3478, 3479, 3480, 3481, 3482, 3483, - 3484, 3485, 3486, 3487, 3488, 3489, 3490, 3491, - 3492, 3493, 3494, 3495, 3496, 3497, 3498, 3499, - 3500, 3501, 3502, 3503, 3504, 3505, 3506, 3507, - 3508, 3509, 3510, 3511, 3512, 3513, 3514, 3515, - 3516, 3517, 3518, 3519, 3520, 3521, 3522, 3523, - 3524, 3525, 3526, 3527, 3528, 3529, 3530, 3531, - 3532, 3533, 3534, 3535, 3536, 3537, 3538, 3539, - 3540, 3541, 3542, 3543, 3544, 3545, 3546, 3547, - 3548, 3549, 3550, 3551, 3552, 3553, 3554, 3555, - 3556, 3557, 3558, 3559, 3560, 3561, 3562, 3563, - 3564, 3565, 3566, 3567, 3568, 3569, 3570, 3571, - 3572, 3573, 3574, 3575, 3576, 3577, 3578, 3579, - 3580, 3581, 3582, 3583, 3584, 3585, 3586, 3587, - 3588, 3589, 3590, 3591, 3592, 3593, 3594, 3595, - 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, - 3604, 3605, 3606, 3607, 3608, 3609, 3610, 3611, - 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, - 3620, 3621, 3622, 3623, 3624, 3625, 3626, 3627, - 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, - 3636, 3637, 3638, 3639, 3640, 3641, 3642, 3643, - 3644, 3645, 3646, 3647, 3648, 3649, 3650, 3651, - 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, - 3660, 3661, 3662, 3663, 3664, 3665, 3666, 3667, - 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, - 3676, 3677, 3678, 3679, 3680, 3681, 3682, 3683, - 3684, 3685, 3686, 3687, 3688, 3689, 3690, 3691, - 3692, 3693, 3694, 3695, 3696, 3697, 3698, 3699, - 3700, 3701, 3702, 3703, 3704, 3705, 3706, 3707, - 3708, 3709, 3710, 3711, 3712, 3713, 3714, 3715, - 3716, 3717, 3718, 3719, 3720, 3721, 3722, 3723, - 3724, 3725, 3726, 3727, 3728, 3729, 3730, 3731, - 3732, 3733, 3734, 3735, 3736, 3737, 3738, 3739, - 3740, 3741, 3742, 3743, 3744, 3745, 3746, 3747, - 3748, 3749, 3750, 3751, 3752, 3753, 3754, 3755, - 3756, 3757, 3758, 3759, 3760, 3761, 3762, 3763, - 3764, 3765, 3766, 3767, 3768, 3769, 3770, 3771, - 3772, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3034, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 0, 0, - 0, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 525, 525, 525, 525, 525, 525, 1047, - 1047, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 525, 9, 9, - 9, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 341, 341, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3773, 3774, 3775, 3776, 3777, 3778, 3779, - 3780, 3781, 3782, 3783, 3784, 3785, 3786, 3787, - 3788, 3789, 3790, 3791, 3792, 3793, 3794, 3795, - 3796, 3797, 3798, 3799, 3800, 3801, 3802, 3803, - 3804, 3805, 3806, 3807, 3808, 3809, 3810, 3811, - 3812, 3813, 3814, 3815, 3816, 3817, 3818, 341, - 543, 842, 842, 842, 9, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 9, - 524, 3819, 3820, 3821, 3822, 3823, 3824, 3825, - 3826, 3827, 3828, 3829, 3830, 3831, 3832, 3833, - 3834, 3835, 3836, 3837, 3838, 3839, 3840, 3841, - 3842, 3843, 3844, 3845, 3846, 3847, 3848, 543, - 543, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 543, 543, 1047, 1047, 1047, 1047, 1047, - 1047, 0, 0, 0, 0, 0, 0, 0, - 0, 526, 526, 526, 526, 526, 526, 526, - 526, 526, 526, 526, 526, 526, 526, 526, - 526, 526, 526, 526, 526, 526, 526, 526, - 524, 524, 524, 524, 524, 524, 524, 524, - 524, 526, 526, 3849, 3850, 3851, 3852, 3853, - 3854, 3855, 3856, 3857, 3858, 3859, 3860, 3861, - 3862, 215, 215, 3863, 3864, 3865, 3866, 3867, - 3868, 3869, 3870, 3871, 3872, 3873, 3874, 3875, - 3876, 3877, 3878, 3879, 3880, 3881, 3882, 3883, - 3884, 3885, 3886, 3887, 3888, 3889, 3890, 3891, - 3892, 3893, 3894, 3895, 3896, 3897, 3898, 3899, - 3900, 3901, 3902, 3903, 3904, 3905, 3906, 3907, - 3908, 3909, 3910, 3911, 3912, 3913, 3914, 3915, - 3916, 3917, 3918, 3919, 3920, 3921, 3922, 3923, - 3924, 3925, 215, 215, 215, 215, 215, 215, - 215, 215, 3926, 3927, 3928, 3929, 3930, 3931, - 3932, 3933, 3934, 3935, 3936, 3937, 3938, 3939, - 3940, 524, 3941, 3941, 3942, 3943, 3944, 215, - 341, 3945, 3946, 3947, 3948, 3949, 215, 3950, - 3951, 3952, 3953, 3954, 3955, 3956, 3957, 3958, - 3959, 3960, 3961, 3962, 3963, 3964, 3965, 3966, - 3967, 3968, 3969, 3970, 3971, 3972, 3973, 3974, - 215, 3975, 3976, 3977, 3978, 3979, 3980, 3981, - 3982, 3983, 3984, 3985, 3986, 3987, 3988, 3989, - 3990, 0, 0, 3991, 3992, 3993, 3994, 3995, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 341, 3996, 3997, 215, 341, 341, 341, 341, - 341, 341, 341, 1151, 341, 341, 341, 1163, - 341, 341, 341, 341, 1151, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1155, 1155, 1151, 1151, - 1155, 77, 77, 77, 77, 0, 0, 0, - 0, 1182, 1182, 1182, 1182, 1182, 1182, 841, - 841, 11, 84, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 9, 9, 9, - 9, 0, 0, 0, 0, 0, 0, 0, - 0, 1155, 1155, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 1155, 1155, 1155, - 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, - 1155, 1155, 1155, 1155, 1155, 1163, 1151, 0, - 0, 0, 0, 0, 0, 0, 0, 1047, - 1047, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 341, 341, 341, 341, 341, - 341, 1047, 1047, 1047, 341, 1047, 341, 341, - 1151, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 1151, - 1151, 1151, 1151, 1151, 556, 556, 556, 1047, - 1047, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1155, 1483, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1047, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, - 1361, 1361, 1361, 1361, 1361, 1361, 0, 0, - 0, 1151, 1151, 1151, 1155, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1173, 1155, 1155, 1151, - 1151, 1151, 1151, 1155, 1155, 1151, 1151, 1155, - 1155, 1483, 1047, 1047, 1047, 1047, 1047, 1047, - 1047, 1047, 1047, 1047, 1047, 1047, 1047, 0, - 525, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 1047, - 1047, 341, 341, 341, 341, 341, 1151, 525, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1151, 1151, 1151, 1151, 1151, 1151, - 1155, 1155, 1151, 1151, 1155, 1155, 1151, 1151, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 1151, 341, 341, 341, - 341, 341, 341, 341, 341, 1151, 1155, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 1047, 1047, 1047, - 1047, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 525, 341, 341, 341, 341, 341, 341, - 841, 841, 841, 341, 1272, 1151, 1272, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 543, 341, 543, 543, 556, 341, 341, - 543, 543, 341, 341, 341, 341, 341, 543, - 543, 341, 543, 341, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 341, 341, 525, 1047, - 1047, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1155, 1151, 1151, 1155, - 1155, 1047, 1047, 341, 525, 525, 1155, 1163, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 341, 341, 341, 341, 341, 341, - 0, 0, 341, 341, 341, 341, 341, 341, - 0, 0, 341, 341, 341, 341, 341, 341, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 215, 3998, 215, 215, 215, - 215, 215, 215, 215, 3941, 3999, 4000, 4001, - 4002, 215, 215, 215, 215, 215, 215, 215, - 215, 0, 0, 0, 0, 0, 0, 0, - 0, 4003, 4004, 4005, 4006, 4007, 4008, 4009, - 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, - 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, - 4026, 4027, 4028, 4029, 4030, 4031, 4032, 4033, - 4034, 4035, 4036, 4037, 4038, 4039, 4040, 4041, - 4042, 4043, 4044, 4045, 4046, 4047, 4048, 4049, - 4050, 4051, 4052, 4053, 4054, 4055, 4056, 4057, - 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, - 4066, 4067, 4068, 4069, 4070, 4071, 4072, 4073, - 4074, 4075, 4076, 4077, 4078, 4079, 4080, 4081, - 4082, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1155, 1155, 1151, 1155, - 1155, 1151, 1155, 1155, 1047, 1155, 1163, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, - 4084, 4084, 4084, 4084, 4084, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, - 0, 0, 0, 0, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, - 1365, 1365, 1365, 1365, 1365, 0, 0, 0, - 0, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, - 4085, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4087, 4088, 4089, 4090, 4091, 4092, 4093, - 4094, 4094, 4095, 4096, 4097, 4098, 4099, 4100, - 4101, 4102, 4103, 4104, 4105, 4106, 4107, 4108, - 4109, 4110, 4111, 4112, 4113, 4114, 4115, 4116, - 4117, 4118, 4119, 4120, 4121, 4122, 4123, 4124, - 4125, 4126, 4127, 4128, 4129, 4130, 4131, 4132, - 4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, - 4141, 4142, 4143, 4144, 4145, 4146, 4147, 4148, - 4149, 4150, 4151, 4152, 4153, 4154, 4155, 4156, - 4157, 4158, 4159, 4160, 4161, 4162, 4163, 4164, - 4165, 4166, 4167, 4168, 4169, 4170, 4171, 4172, - 4173, 4174, 4175, 4176, 4177, 4106, 4178, 4179, - 4180, 4181, 4182, 4183, 4184, 4185, 4186, 4187, - 4188, 4189, 4190, 4191, 4192, 4193, 4194, 4195, - 4196, 4197, 4198, 4199, 4200, 4201, 4202, 4203, - 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, - 4212, 4213, 4214, 4215, 4216, 4217, 4218, 4219, - 4220, 4221, 4222, 4223, 4224, 4225, 4226, 4227, - 4228, 4229, 4230, 4231, 4232, 4233, 4234, 4235, - 4236, 4237, 4238, 4239, 4240, 4241, 4242, 4243, - 4244, 4245, 4196, 4246, 4247, 4248, 4249, 4250, - 4251, 4252, 4253, 4180, 4254, 4255, 4256, 4257, - 4258, 4259, 4260, 4261, 4262, 4263, 4264, 4265, - 4266, 4267, 4268, 4269, 4270, 4271, 4272, 4273, - 4106, 4274, 4275, 4276, 4277, 4278, 4279, 4280, - 4281, 4282, 4283, 4284, 4285, 4286, 4287, 4288, - 4289, 4290, 4291, 4292, 4293, 4294, 4295, 4296, - 4297, 4298, 4299, 4300, 4182, 4301, 4302, 4303, - 4304, 4305, 4306, 4307, 4308, 4309, 4310, 4311, - 4312, 4313, 4314, 4315, 4316, 4317, 4318, 4319, - 4320, 4321, 4322, 4323, 4324, 4325, 4326, 4327, - 4328, 4329, 4330, 4331, 4332, 4333, 4334, 4335, - 4336, 4337, 4338, 4339, 4340, 4341, 4342, 4343, - 4344, 4345, 4346, 4347, 4348, 4349, 4350, 3035, - 3035, 4351, 3035, 4352, 3035, 3035, 4353, 4354, - 4355, 4356, 4357, 4358, 4359, 4360, 4361, 4362, - 3035, 4363, 3035, 4364, 3035, 3035, 4365, 4366, - 3035, 3035, 3035, 4367, 4368, 4369, 4370, 4371, - 4372, 4373, 4374, 4375, 4376, 4377, 4378, 4379, - 4380, 4381, 4382, 4383, 4384, 4385, 4386, 4387, - 4388, 4389, 4390, 4391, 4392, 4393, 4394, 4395, - 4396, 4397, 4398, 4399, 4400, 4401, 4402, 4403, - 4404, 4405, 4406, 4407, 4408, 4409, 4410, 4411, - 4235, 4412, 4413, 4414, 4415, 4416, 4417, 4417, - 4418, 4419, 4420, 4421, 4422, 4423, 4424, 4425, - 4365, 4426, 4427, 4428, 4429, 4430, 4431, 0, - 0, 4432, 4433, 4434, 4435, 4436, 4437, 4438, - 4439, 4379, 4440, 4441, 4442, 4351, 4443, 4444, - 4445, 4446, 4447, 4448, 4449, 4450, 4451, 4452, - 4453, 4454, 4388, 4455, 4389, 4456, 4457, 4458, - 4459, 4460, 4352, 4127, 4461, 4462, 4463, 4197, - 4284, 4464, 4465, 4396, 4466, 4397, 4467, 4468, - 4469, 4354, 4470, 4471, 4472, 4473, 4474, 4355, - 4475, 4476, 4477, 4478, 4479, 4480, 4411, 4481, - 4482, 4235, 4483, 4415, 4484, 4485, 4486, 4487, - 4488, 4420, 4489, 4364, 4490, 4421, 4178, 4491, - 4422, 4492, 4424, 4493, 4494, 4495, 4496, 4497, - 4426, 4360, 4498, 4427, 4499, 4428, 4500, 4094, - 4501, 4502, 4503, 4504, 4505, 4506, 4507, 4508, - 4509, 4510, 4511, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 4512, 4513, 4514, 4515, 4516, 4517, 4518, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 4519, 4520, 4521, 4522, - 4523, 0, 0, 0, 0, 0, 4524, 4525, - 4526, 4527, 4528, 4529, 4530, 4531, 4532, 4533, - 4534, 4535, 4536, 4537, 4538, 4539, 4540, 4541, - 4542, 4543, 4544, 4545, 4546, 4547, 4548, 4549, - 0, 4550, 4551, 4552, 4553, 4554, 0, 4555, - 0, 4556, 4557, 0, 4558, 4559, 0, 4560, - 4561, 4562, 4563, 4564, 4565, 4566, 4567, 4568, - 4569, 4570, 4571, 4572, 4573, 4574, 4575, 4576, - 4577, 4578, 4579, 4580, 4581, 4582, 4583, 4584, - 4585, 4586, 4587, 4588, 4589, 4590, 4591, 4592, - 4593, 4594, 4595, 4596, 4597, 4598, 4599, 4600, - 4601, 4602, 4603, 4604, 4605, 4606, 4607, 4608, - 4609, 4610, 4611, 4612, 4613, 4614, 4615, 4616, - 4617, 4618, 4619, 4620, 4621, 4622, 4623, 4624, - 4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, - 4633, 4634, 4635, 4636, 4637, 4638, 4639, 4640, - 4641, 4642, 4643, 4644, 4645, 4646, 4647, 4648, - 4649, 4650, 4651, 4652, 4653, 4654, 4655, 4656, - 4657, 4658, 4659, 4660, 4661, 4662, 4663, 4664, - 4665, 4666, 4667, 4668, 4668, 4668, 4668, 4668, - 4668, 4668, 4668, 4668, 4668, 4668, 4668, 4668, - 4668, 4668, 4668, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 4669, 4670, 4671, 4672, - 4673, 4674, 4675, 4676, 4677, 4678, 4679, 4680, - 4681, 4682, 4683, 4684, 4685, 4686, 4687, 4688, - 4689, 4690, 4691, 4692, 4693, 4694, 4695, 4696, - 4697, 4698, 4699, 4700, 4701, 4702, 4703, 4704, - 4705, 4706, 4707, 4708, 4709, 4710, 4711, 4712, - 4713, 4714, 4715, 4716, 4707, 4717, 4718, 4719, - 4720, 4721, 4722, 4723, 4724, 4725, 4726, 4727, - 4728, 4729, 4730, 4731, 4732, 4733, 4734, 4735, - 4736, 4737, 4738, 4739, 4740, 4741, 4742, 4743, - 4744, 4745, 4746, 4747, 4748, 4749, 4750, 4751, - 4752, 4753, 4754, 4755, 4756, 4757, 4758, 4759, - 4760, 4761, 4762, 4763, 4764, 4765, 4766, 4767, - 4768, 4769, 4770, 4771, 4772, 4773, 4774, 4775, - 4776, 4777, 4778, 4779, 4780, 4781, 4782, 4783, - 4784, 4785, 4786, 4787, 4788, 4789, 4790, 4791, - 4792, 4793, 4794, 4795, 4796, 4797, 4798, 4799, - 4800, 4801, 4802, 4803, 4804, 4805, 4806, 4807, - 4808, 4809, 4810, 4811, 4812, 4813, 4814, 4815, - 4816, 4708, 4817, 4818, 4819, 4820, 4821, 4822, - 4823, 4824, 4825, 4826, 4827, 4828, 4829, 4830, - 4831, 4832, 4833, 4834, 4835, 4836, 4837, 4838, - 4839, 4840, 4841, 4842, 4843, 4844, 4845, 4846, - 4847, 4848, 4849, 4850, 4851, 4852, 4853, 4854, - 4855, 4856, 4857, 4858, 4859, 4860, 4861, 4862, - 4863, 4864, 4865, 4866, 4867, 4868, 4869, 4870, - 4871, 4872, 4873, 4874, 4875, 4876, 4877, 4878, - 4879, 4880, 4881, 4882, 4883, 4884, 4885, 4886, - 4887, 4888, 4889, 4890, 4891, 4892, 4893, 4894, - 4895, 4896, 4897, 4898, 4899, 4900, 4901, 4902, - 4903, 4904, 4905, 4906, 4907, 4908, 4909, 4910, - 4911, 4912, 4913, 4914, 4915, 4916, 4917, 4918, - 4919, 4920, 4921, 4922, 4923, 4924, 4925, 4926, - 4927, 4928, 4929, 4930, 4931, 4932, 4933, 4934, - 4935, 4936, 4937, 4938, 4939, 4940, 4941, 4942, - 4943, 4944, 4945, 4946, 4947, 4948, 4949, 4950, - 4951, 4952, 4953, 4954, 4955, 4956, 4957, 4958, - 4959, 4960, 4961, 4962, 4963, 4964, 4965, 4966, - 4967, 4968, 4969, 4970, 4971, 4972, 4973, 4974, - 4975, 4976, 4977, 4978, 4979, 4980, 4981, 4982, - 4983, 4984, 4985, 4986, 4987, 4988, 4989, 4990, - 4991, 4992, 4993, 4994, 4995, 4996, 4997, 4998, - 4999, 5000, 5001, 5002, 5003, 5004, 5005, 5006, - 5007, 5008, 5009, 5010, 5011, 5012, 5013, 5014, - 5015, 5016, 5017, 5018, 5019, 5020, 5021, 5022, - 5023, 5024, 5025, 5026, 5027, 5028, 5029, 5030, - 2137, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5031, 5032, 5033, 5034, 5035, 5036, 5037, - 5038, 5039, 5040, 5041, 5042, 5043, 5044, 5045, - 5046, 5047, 5048, 5049, 5050, 5051, 5052, 5053, - 5054, 5055, 5056, 5057, 5058, 5059, 5060, 5061, - 5062, 5063, 5064, 5065, 5066, 5067, 5068, 5069, - 5070, 5071, 5072, 5073, 5074, 5075, 5076, 5077, - 5078, 5079, 5080, 5081, 5082, 5083, 5084, 5085, - 5086, 5087, 5088, 5089, 5090, 5091, 5092, 5093, - 5094, 0, 0, 5095, 5096, 5097, 5098, 5099, - 5100, 5101, 5102, 5103, 5104, 5105, 5106, 5107, - 5108, 5109, 5110, 5111, 5112, 5113, 5114, 5115, - 5116, 5117, 5118, 5119, 5120, 5121, 5122, 5123, - 5124, 5125, 5126, 5127, 5128, 5129, 5130, 5131, - 5132, 5133, 5134, 5135, 5136, 5137, 5138, 5139, - 5140, 5141, 5142, 5143, 5144, 5145, 5146, 5147, - 5148, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5149, 5150, 5151, 5152, 5153, 5154, 5155, - 5156, 5157, 5158, 5159, 5160, 5161, 77, 0, - 0, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 5162, 5163, 5164, 5165, 5166, 5167, 5168, - 5169, 5170, 5171, 0, 0, 0, 0, 0, - 0, 543, 543, 543, 543, 543, 543, 543, - 556, 556, 556, 556, 556, 556, 556, 543, - 543, 5172, 5173, 5174, 5175, 5175, 5176, 5177, - 5178, 5179, 5180, 5181, 5182, 5183, 5184, 5185, - 5186, 5187, 5188, 5189, 5190, 5191, 3033, 3033, - 5192, 5193, 5194, 5194, 5194, 5194, 5195, 5195, - 5195, 5196, 5197, 5198, 0, 5199, 5200, 5201, - 5202, 5203, 5204, 5205, 5206, 5207, 5208, 5209, - 5210, 5211, 5212, 5213, 5214, 5215, 5216, 5217, - 0, 5218, 5219, 5220, 5221, 0, 0, 0, - 0, 5222, 5223, 5224, 1117, 5225, 0, 5226, - 5227, 5228, 5229, 5230, 5231, 5232, 5233, 5234, - 5235, 5236, 5237, 5238, 5239, 5240, 5241, 5242, - 5243, 5244, 5245, 5246, 5247, 5248, 5249, 5250, - 5251, 5252, 5253, 5254, 5255, 5256, 5257, 5258, - 5259, 5260, 5261, 5262, 5263, 5264, 5265, 5266, - 5267, 5268, 5269, 5270, 5271, 5272, 5273, 5274, - 5275, 5276, 5277, 5278, 5279, 5280, 5281, 5282, - 5283, 5284, 5285, 5286, 5287, 5288, 5289, 5290, - 5291, 5292, 5293, 5294, 5295, 5296, 5297, 5298, - 5299, 5300, 5301, 5302, 5303, 5304, 5305, 5306, - 5307, 5308, 5309, 5310, 5311, 5312, 5313, 5314, - 5315, 5316, 5317, 5318, 5319, 5320, 5321, 5322, - 5323, 5324, 5325, 5326, 5327, 5328, 5329, 5330, - 5331, 5332, 5333, 5334, 5335, 5336, 5337, 5338, - 5339, 5340, 5341, 5342, 5343, 5344, 5345, 5346, - 5347, 5348, 5349, 5350, 5351, 5352, 5353, 5354, - 5355, 5356, 5357, 5358, 5359, 5360, 0, 0, - 1459, 0, 5361, 5362, 5363, 5364, 5365, 5366, - 5367, 5368, 5369, 5370, 5371, 5372, 5373, 5374, - 5375, 5376, 5377, 5378, 5379, 5380, 5381, 5382, - 5383, 5384, 5385, 5386, 5387, 5388, 5389, 5390, - 5391, 5392, 5393, 5394, 5395, 5396, 5397, 5398, - 5399, 5400, 5401, 5402, 5403, 5404, 5405, 5406, - 5407, 5408, 5409, 5410, 5411, 5412, 5413, 5414, - 5415, 5416, 5417, 5418, 5419, 5420, 5421, 5422, - 5423, 5424, 5425, 5426, 5427, 5428, 5429, 5430, - 5431, 5432, 5433, 5434, 5435, 5436, 5437, 5438, - 5439, 5440, 5441, 5442, 5443, 5444, 5445, 5446, - 5447, 5448, 5449, 5450, 5451, 5452, 5453, 5454, - 5455, 5456, 5457, 5458, 5459, 5460, 5461, 5462, - 5463, 5464, 5465, 5466, 5467, 5468, 5469, 5470, - 5471, 5472, 5473, 5474, 5475, 5476, 5477, 5478, - 5479, 5480, 5481, 5482, 5483, 5484, 5485, 5486, - 5487, 5488, 5489, 5490, 5491, 5492, 5493, 5494, - 5495, 5496, 5497, 5498, 5499, 5500, 5501, 5502, - 5503, 5504, 5505, 5506, 5507, 5508, 5509, 5510, - 5511, 5512, 5513, 5514, 5515, 5516, 5517, 5518, - 5519, 5520, 5521, 5522, 5523, 5524, 5525, 5526, - 5527, 5528, 5529, 5530, 5531, 5532, 5533, 5534, - 5535, 5536, 5537, 5538, 5539, 5540, 5541, 5542, - 5543, 5544, 5545, 5546, 5547, 5548, 5549, 5550, - 0, 0, 0, 5551, 5552, 5553, 5554, 5555, - 5556, 0, 0, 5557, 5558, 5559, 5560, 5561, - 5562, 0, 0, 5563, 5564, 5565, 5566, 5567, - 5568, 0, 0, 5569, 5570, 5571, 0, 0, - 0, 5572, 5573, 5574, 5575, 5576, 5577, 5578, - 0, 5579, 5580, 5581, 5582, 5583, 5584, 5585, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 5586, 5586, 5586, 77, 77, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 0, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 0, 341, 341, 0, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 0, 0, 0, 0, - 0, 1047, 9, 1047, 0, 0, 0, 0, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 0, 0, 0, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 5587, 5587, 5587, 5587, 5587, 5587, 5587, - 5587, 5587, 5587, 5587, 5587, 5587, 5587, 5587, - 5587, 5587, 5587, 5587, 5587, 5587, 5587, 5587, - 5587, 5587, 5587, 5587, 5587, 5587, 5587, 5587, - 5587, 5587, 5587, 5587, 5587, 5587, 5587, 5587, - 5587, 5587, 5587, 5587, 5587, 5587, 5587, 5587, - 5587, 5587, 5587, 5587, 5587, 5587, 1211, 1211, - 1211, 1211, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 1211, 1211, 77, 841, 841, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 0, 0, 0, - 0, 77, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 556, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 556, 5588, 5588, 5588, 5588, 5588, 5588, - 5588, 5588, 5588, 5588, 5588, 5588, 5588, 5588, - 5588, 5588, 5588, 5588, 5588, 5588, 5588, 5588, - 5588, 5588, 5588, 5588, 5588, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 1182, 1182, 1182, 1182, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1458, 341, 341, 341, 341, 341, - 341, 341, 341, 1458, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 543, - 543, 543, 543, 543, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 0, - 1047, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 1047, 1458, 1458, 1458, 1458, 1458, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5589, 5590, 5591, 5592, 5593, 5594, 5595, - 5596, 5597, 5598, 5599, 5600, 5601, 5602, 5603, - 5604, 5605, 5606, 5607, 5608, 5609, 5610, 5611, - 5612, 5613, 5614, 5615, 5616, 5617, 5618, 5619, - 5620, 5621, 5622, 5623, 5624, 5625, 5626, 5627, - 5628, 5629, 5630, 5631, 5632, 5633, 5634, 5635, - 5636, 5637, 5638, 5639, 5640, 5641, 5642, 5643, - 5644, 5645, 5646, 5647, 5648, 5649, 5650, 5651, - 5652, 5653, 5654, 5655, 5656, 5657, 5658, 5659, - 5660, 5661, 5662, 5663, 5664, 5665, 5666, 5667, - 5668, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 5669, 5670, 5671, 5672, 5673, 5674, 5675, - 5676, 5677, 5678, 5679, 5680, 5681, 5682, 5683, - 5684, 5685, 5686, 5687, 5688, 5689, 5690, 5691, - 5692, 5693, 5694, 5695, 5696, 5697, 5698, 5699, - 5700, 5701, 5702, 5703, 5704, 0, 0, 0, - 0, 5705, 5706, 5707, 5708, 5709, 5710, 5711, - 5712, 5713, 5714, 5715, 5716, 5717, 5718, 5719, - 5720, 5721, 5722, 5723, 5724, 5725, 5726, 5727, - 5728, 5729, 5730, 5731, 5732, 5733, 5734, 5735, - 5736, 5737, 5738, 5739, 5740, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1047, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 0, - 0, 1108, 0, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 0, - 1108, 1108, 0, 0, 0, 1108, 0, 0, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 0, - 1105, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 5742, 5742, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 0, 0, 0, 0, 0, 0, 0, 0, - 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 0, 1108, 1108, 0, - 0, 0, 0, 0, 5741, 5741, 5741, 5741, - 5741, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 5741, - 5741, 5741, 5741, 5741, 5741, 0, 0, 0, - 9, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 0, 0, 0, 0, 0, - 1105, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 0, 0, 0, 0, 5741, 5741, 1108, - 1108, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 0, 0, 5741, 5741, 5741, 5741, 5741, - 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 1108, 1151, 1151, 1151, 0, 1151, 1151, - 0, 0, 0, 0, 0, 1151, 556, 1151, - 543, 1108, 1108, 1108, 1108, 0, 1108, 1108, - 1108, 0, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 0, - 0, 543, 569, 556, 0, 0, 0, 0, - 1163, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 5741, 0, 0, 0, 0, 0, 0, - 0, 1105, 1105, 1105, 1105, 1105, 1105, 1105, - 1105, 1105, 0, 0, 0, 0, 0, 0, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 5741, 5741, - 1105, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 5741, 5741, - 5741, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 5742, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 543, 556, - 0, 0, 0, 0, 5741, 5741, 5741, 5741, - 5741, 1105, 1105, 1105, 1105, 1105, 1105, 1105, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 0, - 0, 0, 9, 9, 9, 9, 9, 9, - 9, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 0, - 0, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 0, 0, 0, 0, - 0, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 0, 0, 0, 0, 0, - 0, 0, 1105, 1105, 1105, 1105, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5743, 5744, 5745, 5746, 5747, 5748, 5749, - 5750, 5751, 5752, 5753, 5754, 5755, 5756, 5757, - 5758, 5759, 5760, 5761, 5762, 5763, 5764, 5765, - 5766, 5767, 5768, 5769, 5770, 5771, 5772, 5773, - 5774, 5775, 5776, 5777, 5778, 5779, 5780, 5781, - 5782, 5783, 5784, 5785, 5786, 5787, 5788, 5789, - 5790, 5791, 5792, 5793, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5794, 5795, 5796, 5797, 5798, 5799, 5800, - 5801, 5802, 5803, 5804, 5805, 5806, 5807, 5808, - 5809, 5810, 5811, 5812, 5813, 5814, 5815, 5816, - 5817, 5818, 5819, 5820, 5821, 5822, 5823, 5824, - 5825, 5826, 5827, 5828, 5829, 5830, 5831, 5832, - 5833, 5834, 5835, 5836, 5837, 5838, 5839, 5840, - 5841, 5842, 5843, 5844, 0, 0, 0, 0, - 0, 0, 0, 5741, 5741, 5741, 5741, 5741, - 5741, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 543, 543, 543, - 543, 0, 0, 0, 0, 0, 0, 0, - 0, 1135, 1135, 1135, 1135, 1135, 1135, 1135, - 1135, 1135, 1135, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5845, 5845, 5845, 5845, 5845, 5845, 5845, - 5845, 5845, 5845, 5845, 5845, 5845, 5845, 5845, - 5845, 5845, 5845, 5845, 5845, 5845, 5845, 5845, - 5845, 5845, 5845, 5845, 5845, 5845, 5845, 5845, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 5741, 5741, - 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 1108, 0, 0, 0, 0, 0, 0, 0, - 0, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, - 1117, 1117, 1117, 1117, 1117, 1117, 1117, 556, - 556, 543, 543, 543, 556, 543, 556, 556, - 556, 556, 5846, 5846, 5846, 5846, 1112, 1112, - 1112, 1112, 1112, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1155, 1151, 1155, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1163, - 1047, 1047, 1047, 1047, 1047, 1047, 1047, 0, - 0, 0, 0, 1211, 1211, 1211, 1211, 1211, - 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, - 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1163, 1151, 1151, 1155, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 5847, 5848, 5849, 5850, 341, 341, - 341, 341, 341, 341, 341, 341, 5851, 341, - 341, 341, 341, 341, 5852, 341, 341, 341, - 341, 1155, 1155, 1155, 1151, 1151, 1151, 1151, - 1155, 1155, 1163, 5853, 1047, 1047, 5854, 1047, - 1047, 1047, 1047, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 5854, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 0, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 543, 543, 543, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 5855, 1151, 1151, 1151, 1151, 1155, 1151, 5856, - 5857, 1151, 5858, 5859, 1163, 1163, 0, 1172, - 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1047, 1047, 1047, 1047, 341, 1155, 1155, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1173, 1047, 1047, 341, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1151, 1151, 1155, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1155, 1155, 1155, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1155, 1483, 341, 1228, 1228, 341, 1047, 1047, - 1047, 1047, 1151, 1173, 1151, 1151, 1047, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 341, 1047, 341, 1047, 1047, - 1047, 0, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 0, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 1155, 1155, 1155, - 1151, 1151, 1151, 1155, 1155, 1151, 1483, 1173, - 1151, 1047, 1047, 1047, 1047, 1047, 1047, 1151, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 0, 341, 341, 341, 341, 0, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 0, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1047, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 1151, 1155, 1155, 1155, 1151, 1151, 1151, 1151, - 1151, 1151, 1173, 1163, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 1151, 1151, 1155, 1155, 0, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 0, - 341, 341, 0, 0, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 0, 341, 341, - 341, 341, 341, 0, 1173, 1173, 341, 5860, - 1155, 1151, 1155, 1155, 1155, 1155, 0, 0, - 5861, 1155, 0, 0, 5862, 5863, 1483, 0, - 0, 341, 0, 0, 0, 0, 0, 0, - 5864, 0, 0, 0, 0, 0, 341, 341, - 341, 341, 341, 1155, 1155, 0, 0, 543, - 543, 543, 543, 543, 543, 543, 0, 0, - 0, 543, 543, 543, 543, 543, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 1155, 1155, - 1155, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1155, 1155, 1163, 1151, 1151, 1155, 1173, - 341, 341, 341, 341, 1047, 1047, 1047, 1047, - 1047, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 1047, 0, 1047, 543, - 341, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 5865, 1155, 1155, 1151, 1151, 1151, 1151, - 1151, 1151, 5866, 5867, 5868, 5869, 5870, 5871, - 1151, 1151, 1155, 1163, 1173, 341, 341, 1047, - 341, 0, 0, 0, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 5872, 1155, 1155, 1151, 1151, 1151, 1151, 0, - 0, 5873, 5874, 5875, 5876, 1151, 1151, 1155, - 1163, 1173, 1047, 1047, 1047, 1047, 1047, 1047, - 1047, 1047, 1047, 1047, 1047, 1047, 1047, 1047, - 1047, 1047, 1047, 1047, 1047, 1047, 1047, 1047, - 1047, 341, 341, 341, 341, 1151, 1151, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 1155, 1155, 1155, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1155, 1155, 1151, 1155, - 1163, 1151, 1047, 1047, 1047, 341, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1151, 1155, 1151, 1155, - 1155, 1151, 1151, 1151, 1151, 1151, 1151, 1483, - 1173, 341, 0, 0, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 0, 0, 1151, 1151, - 1151, 1155, 1155, 1151, 1151, 1151, 1151, 1155, - 1151, 1151, 1151, 1151, 1163, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 1182, 1182, 1047, 1047, 1047, - 841, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 1155, 1155, 1155, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1155, 1163, 1173, 1047, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5877, 5878, 5879, 5880, 5881, 5882, 5883, - 5884, 5885, 5886, 5887, 5888, 5889, 5890, 5891, - 5892, 5893, 5894, 5895, 5896, 5897, 5898, 5899, - 5900, 5901, 5902, 5903, 5904, 5905, 5906, 5907, - 5908, 5909, 5910, 5911, 5912, 5913, 5914, 5915, - 5916, 5917, 5918, 5919, 5920, 5921, 5922, 5923, - 5924, 5925, 5926, 5927, 5928, 5929, 5930, 5931, - 5932, 5933, 5934, 5935, 5936, 5937, 5938, 5939, - 5940, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 341, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 0, 0, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1155, 1155, 1155, 1151, 1151, 1151, - 1151, 0, 0, 1151, 1151, 1155, 1155, 1155, - 1155, 1163, 341, 1047, 341, 1155, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 1151, 1151, 1151, 1151, 1151, 1151, - 5941, 5941, 1151, 1151, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1151, 1163, 1151, 1151, - 1151, 1151, 1155, 1228, 1151, 1151, 1151, 1151, - 1047, 1047, 1047, 1047, 1047, 1047, 1047, 1047, - 1163, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 1151, 1151, 1151, 1151, 1151, 1151, - 1155, 1155, 1151, 1151, 1151, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 1228, 1228, 1228, - 1228, 1228, 1228, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1155, 1151, 1163, 1047, 1047, 1047, 341, 1047, - 1047, 1047, 1047, 1047, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 1155, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 0, 1151, 1151, 1151, 1151, 1151, 1151, 1155, - 5942, 341, 1047, 1047, 1047, 1047, 1047, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 0, 0, - 0, 1047, 1047, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 0, 0, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 0, 1155, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1155, 1151, 1151, 1155, 1151, 1151, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 0, 341, 341, 0, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 1151, 1151, 1151, 1151, 1151, 1151, - 0, 0, 0, 1151, 0, 1151, 1151, 0, - 1151, 1151, 1151, 1173, 1151, 1163, 1163, 1228, - 1151, 0, 0, 0, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 0, - 341, 341, 0, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 1155, 1155, 1155, 1155, 1155, - 0, 1151, 1151, 0, 1155, 1155, 1151, 1155, - 1163, 341, 0, 0, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 1151, 1151, 1155, 1155, - 1047, 1047, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 77, 77, - 77, 77, 77, 77, 77, 77, 11, 11, - 11, 11, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1047, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, - 0, 1047, 1047, 1047, 1047, 1047, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 0, 5943, 5943, 5943, 5943, 5943, 5943, 5943, - 5943, 5943, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 1047, - 1047, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 0, - 0, 569, 569, 569, 569, 569, 1047, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 543, 543, 543, 543, 543, 543, 543, - 1047, 1047, 1047, 1047, 1047, 841, 841, 841, - 841, 525, 525, 525, 525, 1047, 841, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 0, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 0, 0, 0, 0, 0, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5944, 5945, 5946, 5947, 5948, 5949, 5950, - 5951, 5952, 5953, 5954, 5955, 5956, 5957, 5958, - 5959, 5960, 5961, 5962, 5963, 5964, 5965, 5966, - 5967, 5968, 5969, 5970, 5971, 5972, 5973, 5974, - 5975, 5976, 5977, 5978, 5979, 5980, 5981, 5982, - 5983, 5984, 5985, 5986, 5987, 5988, 5989, 5990, - 5991, 5992, 5993, 5994, 5995, 5996, 5997, 5998, - 5999, 6000, 6001, 6002, 6003, 6004, 6005, 6006, - 6007, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1047, 1047, 1047, 1047, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 0, 0, 0, 0, - 1151, 341, 1155, 1155, 1155, 1155, 1155, 1155, - 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, - 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, - 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, - 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, - 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, - 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, - 1155, 0, 0, 0, 0, 0, 0, 0, - 1151, 1151, 1151, 1151, 525, 525, 525, 525, - 525, 525, 525, 525, 525, 525, 525, 525, - 525, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3034, 3034, 3033, 3034, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 3035, 3035, 3035, - 3035, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 0, 0, 841, 1151, 569, - 1047, 1459, 1459, 1459, 1459, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 0, 0, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 6008, 6009, 841, 841, 841, 841, 841, 6010, - 6011, 6012, 6013, 6014, 6015, 6016, 6017, 6018, - 569, 569, 569, 841, 841, 841, 6019, 6020, - 6021, 6022, 6023, 6024, 1459, 1459, 1459, 1459, - 1459, 1459, 1459, 1459, 556, 556, 556, 556, - 556, 556, 556, 556, 841, 841, 543, 543, - 543, 543, 543, 556, 556, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 543, 543, 543, 543, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 6025, 6026, 6027, 6028, 6029, 6030, - 6031, 6032, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 543, 543, 543, 77, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, - 1182, 1182, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6033, 2233, 2208, 2251, 2235, 2236, 6034, - 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, - 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, - 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, - 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, - 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, - 6058, 6059, 6060, 6061, 6062, 6033, 2233, 2208, - 2251, 2235, 2236, 6034, 2215, 2218, 6035, 6036, - 2219, 2238, 2221, 6037, 2223, 2224, 2225, 6038, - 6039, 6040, 6041, 6042, 6043, 6044, 2229, 6045, - 6046, 6047, 2252, 2234, 6048, 2214, 0, 2253, - 2254, 6049, 2220, 6050, 6051, 2239, 6052, 6053, - 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, - 6062, 6033, 2233, 2208, 2251, 2235, 2236, 6034, - 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, - 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, - 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, - 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, - 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, - 6058, 6059, 6060, 6061, 6062, 6033, 0, 2208, - 2251, 0, 0, 6034, 0, 0, 6035, 6036, - 0, 0, 2221, 6037, 2223, 2224, 0, 6038, - 6039, 6040, 6041, 6042, 6043, 6044, 2229, 6045, - 6046, 6047, 2252, 0, 6048, 0, 2216, 2253, - 2254, 6049, 2220, 6050, 6051, 0, 6052, 6053, - 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, - 6062, 6033, 2233, 2208, 2251, 2235, 2236, 6034, - 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, - 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, - 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, - 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, - 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, - 6058, 6059, 6060, 6061, 6062, 6033, 2233, 0, - 2251, 2235, 2236, 6034, 0, 0, 6035, 6036, - 2219, 2238, 2221, 6037, 2223, 2224, 0, 6038, - 6039, 6040, 6041, 6042, 6043, 6044, 0, 6045, - 6046, 6047, 2252, 2234, 6048, 2214, 2216, 2253, - 2254, 6049, 2220, 6050, 6051, 2239, 6052, 6053, - 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, - 6062, 6033, 2233, 0, 2251, 2235, 2236, 6034, - 0, 2218, 6035, 6036, 2219, 2238, 0, 6037, - 0, 0, 0, 6038, 6039, 6040, 6041, 6042, - 6043, 6044, 0, 6045, 6046, 6047, 2252, 2234, - 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, - 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, - 6058, 6059, 6060, 6061, 6062, 6033, 2233, 2208, - 2251, 2235, 2236, 6034, 2215, 2218, 6035, 6036, - 2219, 2238, 2221, 6037, 2223, 2224, 2225, 6038, - 6039, 6040, 6041, 6042, 6043, 6044, 2229, 6045, - 6046, 6047, 2252, 2234, 6048, 2214, 2216, 2253, - 2254, 6049, 2220, 6050, 6051, 2239, 6052, 6053, - 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, - 6062, 6033, 2233, 2208, 2251, 2235, 2236, 6034, - 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, - 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, - 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, - 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, - 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, - 6058, 6059, 6060, 6061, 6062, 6033, 2233, 2208, - 2251, 2235, 2236, 6034, 2215, 2218, 6035, 6036, - 2219, 2238, 2221, 6037, 2223, 2224, 2225, 6038, - 6039, 6040, 6041, 6042, 6043, 6044, 2229, 6045, - 6046, 6047, 2252, 2234, 6048, 2214, 2216, 2253, - 2254, 6049, 2220, 6050, 6051, 2239, 6052, 6053, - 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, - 6062, 6033, 2233, 2208, 2251, 2235, 2236, 6034, - 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, - 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, - 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, - 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, - 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, - 6058, 6059, 6060, 6061, 6062, 6033, 2233, 2208, - 2251, 2235, 2236, 6034, 2215, 2218, 6035, 6036, - 2219, 2238, 2221, 6037, 2223, 2224, 2225, 6038, - 6039, 6040, 6041, 6042, 6043, 6044, 2229, 6045, - 6046, 6047, 2252, 2234, 6048, 2214, 2216, 2253, - 2254, 6049, 2220, 6050, 6051, 2239, 6052, 6053, - 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, - 6062, 6033, 2233, 2208, 2251, 2235, 2236, 6034, - 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, - 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, - 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, - 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, - 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, - 6058, 6059, 6060, 6061, 6062, 6063, 6064, 0, - 0, 6065, 6066, 2248, 6067, 6068, 6069, 6070, - 6071, 6072, 6073, 6074, 6075, 6076, 6077, 6078, - 2249, 6079, 6080, 6081, 6082, 6083, 6084, 6085, - 6086, 6087, 6088, 6089, 6090, 2247, 6091, 6092, - 6093, 6094, 6095, 6096, 6097, 6098, 6099, 6100, - 6101, 6102, 2246, 6103, 6104, 6105, 6106, 6107, - 6108, 6109, 6110, 6111, 6112, 6113, 6114, 6115, - 6116, 6117, 6118, 6065, 6066, 2248, 6067, 6068, - 6069, 6070, 6071, 6072, 6073, 6074, 6075, 6076, - 6077, 6078, 2249, 6079, 6080, 6081, 6082, 6083, - 6084, 6085, 6086, 6087, 6088, 6089, 6090, 2247, - 6091, 6092, 6093, 6094, 6095, 6096, 6097, 6098, - 6099, 6100, 6101, 6102, 2246, 6103, 6104, 6105, - 6106, 6107, 6108, 6109, 6110, 6111, 6112, 6113, - 6114, 6115, 6116, 6117, 6118, 6065, 6066, 2248, - 6067, 6068, 6069, 6070, 6071, 6072, 6073, 6074, - 6075, 6076, 6077, 6078, 2249, 6079, 6080, 6081, - 6082, 6083, 6084, 6085, 6086, 6087, 6088, 6089, - 6090, 2247, 6091, 6092, 6093, 6094, 6095, 6096, - 6097, 6098, 6099, 6100, 6101, 6102, 2246, 6103, - 6104, 6105, 6106, 6107, 6108, 6109, 6110, 6111, - 6112, 6113, 6114, 6115, 6116, 6117, 6118, 6065, - 6066, 2248, 6067, 6068, 6069, 6070, 6071, 6072, - 6073, 6074, 6075, 6076, 6077, 6078, 2249, 6079, - 6080, 6081, 6082, 6083, 6084, 6085, 6086, 6087, - 6088, 6089, 6090, 2247, 6091, 6092, 6093, 6094, - 6095, 6096, 6097, 6098, 6099, 6100, 6101, 6102, - 2246, 6103, 6104, 6105, 6106, 6107, 6108, 6109, - 6110, 6111, 6112, 6113, 6114, 6115, 6116, 6117, - 6118, 6065, 6066, 2248, 6067, 6068, 6069, 6070, - 6071, 6072, 6073, 6074, 6075, 6076, 6077, 6078, - 2249, 6079, 6080, 6081, 6082, 6083, 6084, 6085, - 6086, 6087, 6088, 6089, 6090, 2247, 6091, 6092, - 6093, 6094, 6095, 6096, 6097, 6098, 6099, 6100, - 6101, 6102, 2246, 6103, 6104, 6105, 6106, 6107, - 6108, 6109, 6110, 6111, 6112, 6113, 6114, 6115, - 6116, 6117, 6118, 6119, 6120, 0, 0, 6121, - 6122, 6123, 6124, 6125, 6126, 6127, 6128, 6129, - 6130, 6121, 6122, 6123, 6124, 6125, 6126, 6127, - 6128, 6129, 6130, 6121, 6122, 6123, 6124, 6125, - 6126, 6127, 6128, 6129, 6130, 6121, 6122, 6123, - 6124, 6125, 6126, 6127, 6128, 6129, 6130, 6121, - 6122, 6123, 6124, 6125, 6126, 6127, 6128, 6129, - 6130, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 841, 841, 841, 841, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 841, 841, - 841, 841, 841, 841, 841, 841, 1151, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 1151, 841, 841, - 1047, 1047, 1047, 1047, 1047, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1151, 1151, 1151, 1151, - 1151, 0, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, - 1151, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 543, 543, 543, 543, 543, 543, 543, - 0, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 543, 543, 543, 543, 543, 543, - 543, 543, 0, 0, 543, 543, 543, 543, - 543, 543, 543, 0, 543, 543, 0, 543, - 543, 543, 543, 543, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 0, 0, - 0, 543, 543, 543, 543, 543, 543, 543, - 525, 525, 525, 525, 525, 525, 525, 0, - 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 341, - 841, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 341, 341, 341, - 341, 341, 341, 341, 341, 543, 543, 543, - 543, 1172, 1172, 1172, 1172, 1172, 1172, 1172, - 1172, 1172, 1172, 0, 0, 0, 0, 0, - 11, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, - 1108, 1108, 1108, 1108, 1108, 1108, 0, 0, - 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, - 5741, 556, 556, 556, 556, 556, 556, 556, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6131, 6132, 6133, 6134, 6135, 6136, 6137, - 6138, 6139, 6140, 6141, 6142, 6143, 6144, 6145, - 6146, 6147, 6148, 6149, 6150, 6151, 6152, 6153, - 6154, 6155, 6156, 6157, 6158, 6159, 6160, 6161, - 6162, 6163, 6164, 6165, 6166, 6167, 6168, 6169, - 6170, 6171, 6172, 6173, 6174, 6175, 6176, 6177, - 6178, 6179, 6180, 6181, 6182, 6183, 6184, 6185, - 6186, 6187, 6188, 6189, 6190, 6191, 6192, 6193, - 6194, 6195, 6196, 6197, 6198, 543, 543, 543, - 543, 543, 543, 1173, 1153, 0, 0, 0, - 0, 1152, 1152, 1152, 1152, 1152, 1152, 1152, - 1152, 1152, 1152, 0, 0, 0, 0, 1105, - 1105, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 1148, 5846, 5846, - 5846, 1111, 5846, 5846, 5846, 5846, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 1148, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, - 5846, 5846, 5846, 5846, 5846, 5846, 5846, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6199, 6200, 6201, 6202, 0, 6203, 6204, - 6205, 6206, 6207, 6208, 6209, 6210, 6211, 6212, - 6213, 6214, 6215, 6216, 6217, 6218, 6219, 6220, - 6221, 6222, 6223, 6224, 6225, 6226, 6227, 6228, - 6229, 0, 6200, 6201, 0, 6230, 0, 0, - 6205, 0, 6207, 6208, 6209, 6210, 6211, 6212, - 6213, 6214, 6215, 6216, 0, 6218, 6219, 6220, - 6221, 0, 6223, 0, 6225, 0, 0, 0, - 0, 0, 0, 6201, 0, 0, 0, 0, - 6205, 0, 6207, 0, 6209, 0, 6211, 6212, - 6213, 0, 6215, 6216, 0, 6218, 0, 0, - 6221, 0, 6223, 0, 6225, 0, 6227, 0, - 6229, 0, 6200, 6201, 0, 6230, 0, 0, - 6205, 6206, 6207, 6208, 0, 6210, 6211, 6212, - 6213, 6214, 6215, 6216, 0, 6218, 6219, 6220, - 6221, 0, 6223, 6224, 6225, 6226, 0, 6228, - 0, 6199, 6200, 6201, 6202, 6230, 6203, 6204, - 6205, 6206, 6207, 0, 6209, 6210, 6211, 6212, - 6213, 6214, 6215, 6216, 6217, 6218, 6219, 6220, - 6221, 6222, 6223, 6224, 6225, 0, 0, 0, - 0, 0, 6200, 6201, 6202, 0, 6203, 6204, - 6205, 6206, 6207, 0, 6209, 6210, 6211, 6212, - 6213, 6214, 6215, 6216, 6217, 6218, 6219, 6220, - 6221, 6222, 6223, 6224, 6225, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 75, 75, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 79, 79, 79, 79, 2397, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 0, 0, 0, - 0, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 0, 0, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 0, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 2397, 0, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6231, 6232, 6233, 6234, 6235, 6236, 6237, - 6238, 6239, 6240, 6241, 1211, 1211, 0, 0, - 0, 6242, 6243, 6244, 6245, 6246, 6247, 6248, - 6249, 6250, 6251, 6252, 6253, 6254, 6255, 6256, - 6257, 6258, 6259, 6260, 6261, 6262, 6263, 6264, - 6265, 6266, 6267, 6268, 6269, 6270, 6271, 6272, - 79, 6273, 6274, 6275, 6276, 6277, 6278, 6279, - 6280, 6281, 6282, 6283, 6284, 6285, 6286, 6287, - 6288, 6289, 6290, 6291, 6292, 6293, 6294, 6295, - 6296, 6297, 6298, 6299, 6300, 6301, 6302, 6303, - 6304, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 6305, 6306, 6307, 0, 0, - 0, 2541, 2541, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 2541, - 2541, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 6308, - 841, 6309, 6308, 6308, 6308, 6308, 6308, 6308, - 6308, 6308, 6308, 6308, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 841, 841, - 841, 841, 841, 841, 841, 841, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 6310, - 6310, 6310, 6310, 6310, 6310, 6310, 6310, 6310, - 6310, 6310, 6310, 6310, 6310, 6310, 6310, 6310, - 6310, 6310, 6310, 6310, 6310, 6310, 6310, 6310, - 6310, 6311, 6312, 6313, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6314, 6315, 6316, 6317, 6318, 6319, 6320, - 6321, 6322, 6323, 6324, 6325, 6326, 6327, 6328, - 6329, 6330, 6331, 6332, 6333, 6334, 6335, 6336, - 6337, 6338, 6339, 6340, 6341, 6342, 6343, 6344, - 6345, 6346, 6347, 6348, 6349, 6350, 6351, 6352, - 6353, 6354, 6355, 6356, 6357, 0, 0, 0, - 0, 6358, 6359, 6360, 6361, 6362, 6363, 6364, - 6365, 6366, 0, 0, 0, 0, 0, 0, - 0, 6367, 6368, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2397, 2397, 2397, 2397, 2397, 2397, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 79, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 79, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 79, 79, 79, 79, - 2397, 2397, 2397, 2397, 2397, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 79, 79, 79, 2397, 79, 79, - 79, 2397, 2397, 2397, 6369, 6369, 6369, 6369, - 6369, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 79, 2397, 79, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 79, 79, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 77, - 77, 77, 77, 77, 77, 77, 77, 79, - 79, 79, 79, 79, 2397, 2397, 2397, 2397, - 79, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 2397, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 2397, 2397, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 2397, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 79, - 79, 79, 79, 79, 79, 2397, 79, 79, - 79, 2397, 2397, 2397, 79, 79, 2397, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 2397, 2397, 0, 0, - 0, 79, 79, 79, 79, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 79, 79, - 79, 79, 0, 0, 0, 0, 0, 0, - 0, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 0, 0, 0, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 0, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 0, 0, 0, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 0, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2815, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2815, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 0, 2397, 2397, 2397, 2397, - 0, 0, 0, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 0, 0, 2397, 2397, - 2397, 2397, 2397, 2397, 0, 0, 0, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 0, 0, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, - 2397, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 0, - 0, 2397, 2397, 2397, 2397, 0, 0, 0, - 0, 2397, 2397, 2397, 0, 0, 0, 0, - 0, 2397, 2397, 2397, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2397, 2397, 2397, 2397, 2397, 2397, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, - 3035, 3035, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6370, 6371, 6372, 6373, 6374, 4373, 6375, - 6376, 6377, 6378, 4374, 6379, 6380, 6381, 4375, - 6382, 6383, 6384, 6385, 6386, 6387, 6388, 6389, - 6390, 6391, 6392, 6393, 4433, 6394, 6395, 6396, - 6397, 6398, 6399, 6400, 6401, 6402, 4438, 4376, - 4377, 4439, 6403, 6404, 4184, 6405, 4378, 6406, - 6407, 6408, 6409, 6409, 6409, 6410, 6411, 6412, - 6413, 6414, 6415, 6416, 6417, 6418, 6419, 6420, - 6421, 6422, 6423, 6424, 6425, 6426, 6427, 6427, - 4441, 6428, 6429, 6430, 6431, 4380, 6432, 6433, - 6434, 4337, 6435, 6436, 6437, 6438, 6439, 6440, - 6441, 6442, 6443, 6444, 6445, 6446, 6447, 6448, - 6449, 6450, 6451, 6452, 6453, 6454, 6455, 6456, - 6457, 6458, 6459, 6460, 6460, 6461, 6462, 6463, - 4180, 6464, 6465, 6466, 6467, 6468, 6469, 6470, - 6471, 4385, 6472, 6473, 6474, 6475, 6476, 6477, - 6478, 6479, 6480, 6481, 6482, 6483, 6484, 6485, - 6486, 6487, 6488, 6489, 6490, 6491, 6492, 4126, - 6493, 6494, 6495, 6495, 6496, 6497, 6497, 6498, - 6499, 6500, 6501, 6502, 6503, 6504, 6505, 6506, - 6507, 6508, 6509, 6510, 4386, 6511, 6512, 6513, - 6514, 4453, 6514, 6515, 4388, 6516, 6517, 6518, - 6519, 4389, 4099, 6520, 6521, 6522, 6523, 6524, - 6525, 6526, 6527, 6528, 6529, 6530, 6531, 6532, - 6533, 6534, 6535, 6536, 6537, 6538, 6539, 6540, - 6541, 4390, 6542, 6543, 6544, 6545, 6546, 6547, - 4392, 6548, 6549, 6550, 6551, 6552, 6553, 6554, - 6555, 4127, 4461, 6556, 6557, 6558, 6559, 6560, - 6561, 6562, 6563, 4393, 6564, 6565, 6566, 6567, - 4504, 6568, 6569, 6570, 6571, 6572, 6573, 6574, - 6575, 6576, 6577, 6578, 6579, 6580, 4197, 6581, - 6582, 6583, 6584, 6585, 6586, 6587, 6588, 6589, - 6590, 6591, 4394, 4284, 6592, 6593, 6594, 6595, - 6596, 6597, 6598, 6599, 4465, 6600, 6601, 6602, - 6603, 6604, 6605, 6606, 6607, 4466, 6608, 6609, - 6610, 6611, 6612, 6613, 6614, 6615, 6616, 6617, - 6618, 6619, 4468, 6620, 6621, 6622, 6623, 6624, - 6625, 6626, 6627, 6628, 6629, 6630, 6630, 6631, - 6632, 4470, 6633, 6634, 6635, 6636, 6637, 6638, - 6639, 4183, 6640, 6641, 6642, 6643, 6644, 6645, - 6646, 4476, 6647, 6648, 6649, 6650, 6651, 6652, - 6652, 4477, 4506, 6653, 6654, 6655, 6656, 6657, - 4145, 4479, 6658, 6659, 4405, 6660, 6661, 4359, - 6662, 6663, 4409, 6664, 6665, 6666, 6667, 6667, - 6668, 6669, 6670, 6671, 6672, 6673, 6674, 6675, - 6676, 6677, 6678, 6679, 6680, 6681, 6682, 6683, - 6684, 6685, 6686, 6687, 6688, 6689, 6690, 6691, - 6692, 6693, 6694, 4415, 6695, 6696, 6697, 6698, - 6699, 6700, 6701, 6702, 6703, 6704, 6705, 6706, - 6707, 6708, 6709, 6710, 6496, 6711, 6712, 6713, - 6714, 6715, 6716, 6717, 6718, 6719, 6720, 6721, - 6722, 4201, 6723, 6724, 6725, 6726, 6727, 6728, - 4418, 6729, 6730, 6731, 6732, 6733, 6734, 6735, - 6736, 6737, 6738, 6739, 6740, 6741, 6742, 6743, - 6744, 6745, 6746, 6747, 6748, 4140, 6749, 6750, - 6751, 6752, 6753, 6754, 4486, 6755, 6756, 6757, - 6758, 6759, 6760, 6761, 6762, 6763, 6764, 6765, - 6766, 6767, 6768, 6769, 6770, 6771, 6772, 6773, - 6774, 4491, 4492, 6775, 6776, 6777, 6778, 6779, - 6780, 6781, 6782, 6783, 6784, 6785, 6786, 6787, - 4493, 6788, 6789, 6790, 6791, 6792, 6793, 6794, - 6795, 6796, 6797, 6798, 6799, 6800, 6801, 6802, - 6803, 6804, 6805, 6806, 6807, 6808, 6809, 6810, - 6811, 6812, 6813, 6814, 6815, 6816, 6817, 4499, - 4499, 6818, 6819, 6820, 6821, 6822, 6823, 6824, - 6825, 6826, 6827, 4500, 6828, 6829, 6830, 6831, - 6832, 6833, 6834, 6835, 6836, 6837, 6838, 6839, - 6840, 6841, 6842, 6843, 6844, 6845, 6846, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1459, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, - 2129, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 577, 577, 577, 577, 577, 577, 577, - 577, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, - 4086, 4086, 4086, 4086, 4086, 4086, 4086, 0, + 1, 2, 2, 2, 2, 2, 2, + 2, 2, 3, 4, 3, 5, 6, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 7, 7, 7, + 3, 8, 9, 9, 10, 11, 10, 9, + 9, 12, 13, 9, 14, 15, 16, 15, + 15, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 15, 9, 18, 19, 20, + 9, 9, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 12, 9, 13, 47, + 48, 47, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 12, 75, 13, 75, + 2, 2, 2, 2, 2, 2, 7, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 76, 9, 11, 11, 11, 11, 77, + 9, 78, 79, 80, 81, 75, 82, 79, + 83, 84, 85, 86, 87, 88, 89, 9, + 9, 90, 91, 92, 93, 94, 95, 96, + 9, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, + 75, 120, 121, 122, 123, 124, 125, 126, + 127, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 145, 146, 147, 148, 149, 150, + 75, 151, 152, 153, 154, 155, 156, 157, + 158, 159, 160, 161, 162, 163, 164, 165, + 166, 167, 168, 169, 170, 171, 172, 173, + 174, 175, 176, 177, 178, 179, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, + 190, 191, 192, 193, 194, 195, 196, 197, + 198, 199, 200, 201, 202, 203, 204, 205, + 206, 207, 208, 209, 210, 211, 212, 213, + 214, 215, 216, 217, 218, 219, 220, 221, + 222, 223, 224, 225, 226, 227, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, + 238, 239, 240, 241, 242, 243, 244, 245, + 246, 247, 248, 249, 250, 251, 252, 253, + 254, 255, 256, 257, 258, 259, 260, 261, + 262, 263, 264, 265, 266, 267, 268, 269, + 270, 271, 272, 273, 274, 275, 276, 277, + 278, 279, 280, 281, 282, 283, 284, 285, + 286, 287, 288, 289, 290, 291, 292, 293, + 294, 295, 296, 297, 298, 299, 215, 300, + 301, 302, 303, 304, 305, 306, 307, 308, + 309, 310, 311, 312, 215, 313, 314, 315, + 316, 317, 318, 319, 320, 321, 322, 323, + 324, 325, 326, 215, 215, 327, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, + 338, 339, 340, 215, 341, 342, 343, 215, + 344, 341, 341, 341, 341, 345, 346, 347, + 348, 349, 350, 351, 352, 353, 354, 355, + 356, 357, 358, 359, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 371, + 372, 373, 374, 375, 376, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, + 388, 389, 390, 391, 392, 393, 394, 395, + 396, 397, 398, 399, 400, 401, 402, 403, + 404, 405, 406, 407, 408, 409, 410, 411, + 412, 413, 414, 415, 416, 417, 418, 419, + 420, 421, 422, 423, 424, 425, 426, 427, + 428, 429, 430, 431, 432, 433, 434, 435, + 436, 437, 215, 438, 439, 440, 441, 442, + 443, 444, 445, 446, 447, 448, 449, 450, + 451, 452, 453, 454, 455, 215, 215, 215, + 215, 215, 215, 456, 457, 458, 459, 460, + 461, 462, 463, 464, 465, 466, 467, 468, + 469, 470, 471, 472, 473, 474, 475, 476, + 477, 478, 479, 480, 481, 482, 215, 483, + 484, 215, 485, 215, 486, 487, 215, 215, + 215, 488, 489, 215, 490, 215, 491, 492, + 215, 493, 494, 495, 496, 497, 215, 215, + 498, 215, 499, 500, 215, 215, 501, 215, + 215, 215, 215, 215, 215, 215, 502, 215, + 215, 503, 215, 504, 505, 215, 215, 215, + 506, 507, 508, 509, 510, 511, 215, 215, + 215, 215, 215, 512, 215, 341, 215, 215, + 215, 215, 215, 215, 215, 215, 513, 514, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 515, 516, 517, 518, 519, 520, 521, + 522, 523, 524, 524, 525, 525, 525, 525, + 525, 525, 525, 526, 526, 526, 526, 524, + 524, 524, 524, 524, 524, 524, 524, 524, + 524, 525, 525, 526, 526, 526, 526, 526, + 526, 527, 528, 529, 530, 531, 532, 526, + 526, 533, 534, 535, 536, 537, 526, 526, + 526, 526, 526, 526, 526, 524, 526, 525, + 526, 526, 526, 526, 526, 526, 526, 526, + 526, 526, 526, 526, 526, 526, 526, 526, + 526, 538, 539, 540, 541, 542, 543, 544, + 545, 546, 547, 548, 549, 550, 543, 543, + 551, 543, 552, 543, 553, 554, 555, 556, + 556, 556, 556, 555, 557, 556, 556, 556, + 556, 556, 558, 558, 559, 560, 561, 562, + 563, 564, 556, 556, 556, 556, 565, 566, + 556, 567, 568, 556, 556, 569, 569, 569, + 569, 570, 556, 556, 556, 556, 543, 543, + 543, 571, 572, 573, 574, 575, 576, 543, + 556, 556, 556, 543, 543, 543, 556, 556, + 577, 543, 543, 543, 556, 556, 556, 556, + 543, 555, 556, 556, 543, 578, 579, 579, + 578, 579, 579, 578, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, + 543, 580, 581, 582, 583, 584, 526, 585, + 586, 0, 0, 587, 588, 589, 590, 591, + 592, 0, 0, 0, 0, 88, 593, 594, + 595, 596, 597, 598, 0, 599, 0, 600, + 601, 602, 603, 604, 605, 606, 607, 608, + 609, 610, 611, 612, 613, 614, 615, 616, + 617, 618, 619, 0, 620, 621, 622, 623, + 624, 625, 626, 627, 628, 629, 630, 631, + 632, 633, 634, 635, 636, 637, 638, 639, + 640, 641, 642, 643, 644, 645, 646, 647, + 648, 649, 650, 651, 652, 653, 654, 655, + 656, 657, 658, 659, 660, 661, 662, 663, + 664, 665, 666, 667, 668, 669, 670, 671, + 672, 673, 674, 675, 676, 677, 678, 679, + 680, 681, 682, 683, 684, 685, 686, 687, + 688, 689, 690, 691, 692, 693, 694, 695, + 696, 697, 698, 699, 700, 701, 702, 75, + 703, 704, 705, 706, 707, 215, 708, 709, + 710, 711, 712, 713, 714, 715, 716, 717, + 718, 719, 720, 721, 722, 723, 724, 725, + 726, 727, 728, 729, 730, 731, 732, 733, + 734, 735, 736, 737, 738, 739, 740, 741, + 742, 743, 744, 745, 746, 747, 748, 749, + 750, 751, 752, 753, 754, 755, 756, 757, + 758, 759, 760, 761, 762, 763, 764, 765, + 766, 767, 768, 769, 770, 771, 772, 773, + 774, 775, 776, 777, 778, 779, 780, 781, + 782, 783, 784, 785, 786, 787, 788, 789, + 790, 791, 792, 793, 794, 795, 796, 797, + 798, 799, 800, 801, 802, 803, 804, 805, + 806, 807, 808, 809, 810, 811, 812, 813, + 814, 815, 816, 817, 818, 819, 820, 821, + 822, 823, 824, 825, 826, 827, 828, 829, + 830, 831, 832, 833, 834, 835, 836, 837, + 838, 839, 840, 841, 543, 543, 543, 543, + 543, 842, 842, 843, 844, 845, 846, 847, + 848, 849, 850, 851, 852, 853, 854, 855, + 856, 857, 858, 859, 860, 861, 862, 863, + 864, 865, 866, 867, 868, 869, 870, 871, + 872, 873, 874, 875, 876, 877, 878, 879, + 880, 881, 882, 883, 884, 885, 886, 887, + 888, 889, 890, 891, 892, 893, 894, 895, + 896, 897, 898, 899, 900, 901, 902, 903, + 904, 905, 906, 907, 908, 909, 910, 911, + 912, 913, 914, 915, 916, 917, 918, 919, + 920, 921, 922, 923, 924, 925, 926, 927, + 928, 929, 930, 931, 932, 933, 934, 935, + 936, 937, 938, 939, 940, 941, 942, 943, + 944, 945, 946, 947, 948, 949, 950, 951, + 952, 953, 954, 955, 956, 957, 958, 959, + 960, 961, 962, 963, 964, 965, 966, 967, + 968, 969, 970, 971, 972, 973, 974, 975, + 976, 977, 978, 979, 980, 981, 982, 983, + 984, 985, 986, 987, 988, 989, 990, 991, + 992, 993, 994, 995, 996, 997, 998, 999, + 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, + 1008, 0, 1009, 1010, 1011, 1012, 1013, 1014, + 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, + 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, + 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, + 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, + 0, 0, 525, 1047, 1047, 1047, 1047, 1047, + 1047, 215, 1048, 1049, 1050, 1051, 1052, 1053, + 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, + 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, + 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, + 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, + 1086, 215, 1047, 1087, 0, 0, 77, 77, + 11, 0, 556, 543, 543, 543, 543, 556, + 543, 543, 543, 1088, 556, 543, 543, 543, + 543, 543, 543, 556, 556, 556, 556, 556, + 556, 543, 543, 556, 543, 543, 1088, 1089, + 543, 1090, 1091, 1092, 1093, 1094, 1095, 1096, + 1097, 1098, 1099, 1099, 1100, 1101, 1102, 1103, + 1104, 1105, 1106, 1107, 1105, 543, 556, 1105, + 1098, 0, 0, 0, 0, 0, 0, 0, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 0, 0, 0, 0, + 1108, 1108, 1108, 1108, 1105, 1105, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1109, 1109, 1109, 1109, 1109, 1109, 75, + 75, 1110, 10, 10, 1111, 15, 1112, 77, + 77, 543, 543, 543, 543, 543, 543, 543, + 543, 1113, 1114, 1115, 1112, 1116, 0, 1112, + 1112, 1117, 1117, 1118, 1119, 1120, 1121, 1122, + 1123, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1124, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1125, 1117, 1126, 1127, 1128, 1129, 1113, + 1114, 1115, 1130, 1131, 1132, 1133, 1134, 556, + 543, 543, 543, 543, 543, 556, 543, 543, + 556, 1135, 1135, 1135, 1135, 1135, 1135, 1135, + 1135, 1135, 1135, 10, 1136, 1136, 1112, 1117, + 1117, 1137, 1117, 1117, 1117, 1117, 1138, 1139, + 1140, 1141, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1142, 1143, 1144, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1145, 1146, 1112, 1147, 543, + 543, 543, 543, 543, 543, 543, 1109, 77, + 543, 543, 543, 543, 556, 543, 1124, 1124, + 543, 543, 77, 556, 543, 543, 556, 1117, + 1117, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 1117, 1117, 1117, 1148, 1148, + 1117, 1112, 1112, 1112, 1112, 1112, 1112, 1112, + 1112, 1112, 1112, 1112, 1112, 1112, 1112, 0, + 1149, 1117, 1150, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 543, 556, 543, 543, 556, 543, 543, + 556, 556, 556, 543, 556, 556, 543, 556, + 543, 543, 543, 556, 543, 556, 543, 556, + 543, 556, 543, 543, 0, 0, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1117, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1152, 1152, 1152, 1152, 1152, 1152, 1152, + 1152, 1152, 1152, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 543, 543, 543, 543, + 543, 543, 543, 556, 543, 1153, 1153, 77, + 9, 9, 9, 1153, 0, 0, 556, 1154, + 1154, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 543, + 543, 543, 543, 1153, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 1153, 543, 543, + 543, 1153, 543, 543, 543, 543, 543, 0, + 0, 1105, 1105, 1105, 1105, 1105, 1105, 1105, + 1105, 1105, 1105, 1105, 1105, 1105, 1105, 1105, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 556, 556, 556, 0, 0, 1105, + 0, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 0, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 556, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 1109, 556, 543, 543, 556, + 543, 543, 556, 543, 543, 543, 556, 556, + 556, 1127, 1128, 1129, 543, 543, 543, 556, + 543, 543, 556, 556, 543, 543, 543, 543, + 543, 1151, 1151, 1151, 1155, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 1156, 1157, 341, 341, 341, 341, 341, + 341, 1158, 1159, 341, 1160, 1161, 341, 341, + 341, 341, 341, 1151, 1155, 1162, 341, 1155, + 1155, 1155, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1155, 1155, 1155, 1155, 1163, 1155, + 1155, 341, 543, 556, 543, 543, 1151, 1151, + 1151, 1164, 1165, 1166, 1167, 1168, 1169, 1170, + 1171, 341, 341, 1151, 1151, 1047, 1047, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1047, 525, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1151, 1155, 1155, 0, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 0, + 341, 341, 0, 0, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 0, 0, 0, 341, + 341, 341, 341, 0, 0, 1173, 341, 1174, + 1155, 1155, 1151, 1151, 1151, 1151, 0, 0, + 1175, 1155, 0, 0, 1176, 1177, 1163, 341, + 0, 0, 0, 0, 0, 0, 0, 0, + 1178, 0, 0, 0, 0, 1179, 1180, 0, + 1181, 341, 341, 1151, 1151, 0, 0, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 341, 341, 11, 11, 1182, 1182, 1182, + 1182, 1182, 1182, 841, 11, 341, 1047, 543, + 0, 0, 1151, 1151, 1155, 0, 341, 341, + 341, 341, 341, 341, 0, 0, 0, 0, + 341, 341, 0, 0, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 1183, 0, 341, 1184, + 0, 341, 341, 0, 0, 1173, 0, 1155, + 1155, 1155, 1151, 1151, 0, 0, 0, 0, + 1151, 1151, 0, 0, 1151, 1151, 1163, 0, + 0, 0, 1151, 0, 0, 0, 0, 0, + 0, 0, 1185, 1186, 1187, 341, 0, 1188, + 0, 0, 0, 0, 0, 0, 0, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1151, 1151, 341, 341, 341, 1151, 1047, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1151, 1151, 1155, 0, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 0, + 341, 341, 341, 0, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 0, 341, 341, + 341, 341, 341, 0, 0, 1173, 341, 1155, + 1155, 1155, 1151, 1151, 1151, 1151, 1151, 0, + 1151, 1151, 1155, 0, 1155, 1155, 1163, 0, + 0, 341, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 1151, 1151, 0, 0, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1047, 11, 0, 0, 0, 0, 0, + 0, 0, 341, 1151, 1151, 1151, 1151, 1151, + 1151, 0, 1151, 1155, 1155, 0, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 0, + 341, 341, 0, 0, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 0, 341, 341, + 341, 341, 341, 0, 0, 1173, 341, 1189, + 1151, 1155, 1151, 1151, 1151, 1151, 0, 0, + 1190, 1191, 0, 0, 1192, 1193, 1163, 0, + 0, 0, 0, 0, 0, 0, 0, 1194, + 1195, 0, 0, 0, 0, 1196, 1197, 0, + 341, 341, 341, 1151, 1151, 0, 0, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 841, 341, 1182, 1182, 1182, 1182, 1182, + 1182, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1151, 341, 0, 341, 341, + 341, 341, 341, 341, 0, 0, 0, 341, + 341, 341, 0, 1198, 341, 1199, 341, 0, + 0, 0, 341, 341, 0, 341, 0, 341, + 341, 0, 0, 0, 341, 341, 0, 0, + 0, 341, 341, 341, 0, 0, 0, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 0, 0, 0, 0, 1200, + 1155, 1151, 1155, 1155, 0, 0, 0, 1201, + 1202, 1155, 0, 1203, 1204, 1205, 1163, 0, + 0, 341, 0, 0, 0, 0, 0, 0, + 1206, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1182, 1182, 1182, 77, 77, 77, 77, + 77, 77, 11, 77, 0, 0, 0, 0, + 0, 1151, 1155, 1155, 1155, 1151, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 0, 0, 0, 341, 1151, + 1151, 1151, 1155, 1155, 1155, 1155, 0, 1207, + 1151, 1208, 0, 1151, 1151, 1151, 1163, 0, + 0, 0, 0, 0, 0, 0, 1209, 1210, + 0, 341, 341, 341, 0, 0, 0, 0, + 0, 341, 341, 1151, 1151, 0, 0, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 0, 0, 0, 0, 0, 0, 0, + 1047, 1211, 1211, 1211, 1211, 1211, 1211, 1211, + 841, 341, 1151, 1155, 1155, 1047, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 0, 341, 341, + 341, 341, 341, 0, 0, 1173, 341, 1155, + 1212, 1213, 1155, 1214, 1155, 1155, 0, 1215, + 1216, 1217, 0, 1218, 1219, 1151, 1163, 0, + 0, 0, 0, 0, 0, 0, 1220, 1221, + 0, 0, 0, 0, 0, 0, 0, 341, + 0, 341, 341, 1151, 1151, 0, 0, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 0, 341, 341, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1151, 1151, 1155, 1155, 0, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1163, 1163, 341, 1222, + 1155, 1155, 1151, 1151, 1151, 1151, 0, 1223, + 1224, 1155, 0, 1225, 1226, 1227, 1163, 1228, + 841, 0, 0, 0, 0, 341, 341, 341, + 1229, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 341, 341, 341, 1151, 1151, 0, 0, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 841, 341, 341, 341, 341, 341, + 341, 0, 0, 1155, 1155, 0, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 0, 0, 0, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 0, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 0, 341, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 0, 0, 1230, 0, 0, 0, 0, + 1231, 1155, 1155, 1151, 1151, 1151, 0, 1151, + 0, 1155, 1232, 1233, 1155, 1234, 1235, 1236, + 1237, 0, 0, 0, 0, 0, 0, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 0, 0, 1155, 1155, 1047, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1151, 341, 1238, 1151, 1151, 1151, + 1151, 1239, 1239, 1163, 0, 0, 0, 0, + 11, 341, 341, 341, 341, 341, 341, 525, + 1151, 1240, 1240, 1240, 1240, 1151, 1151, 1151, + 1047, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 1047, 1047, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 341, 341, 0, 341, 0, 341, + 341, 341, 341, 341, 0, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 0, 341, 0, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1151, 341, 1241, 1151, 1151, 1151, + 1151, 1242, 1242, 1163, 1151, 1151, 341, 0, + 0, 341, 341, 341, 341, 341, 0, 525, + 0, 1243, 1243, 1243, 1243, 1151, 1151, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 1244, 1245, 341, + 341, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 841, 841, 841, 1047, 1047, 1047, + 1047, 1047, 1047, 1047, 1047, 1246, 1047, 1047, + 1047, 1047, 1047, 1047, 841, 1047, 841, 841, + 841, 556, 556, 841, 841, 841, 841, 841, + 841, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 841, 556, 841, + 556, 841, 1247, 12, 13, 12, 13, 1155, + 1155, 341, 341, 341, 1248, 341, 341, 341, + 341, 0, 341, 341, 341, 341, 1249, 341, + 341, 341, 341, 1250, 341, 341, 341, 341, + 1251, 341, 341, 341, 341, 1252, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1253, 341, 341, 341, 0, 0, + 0, 0, 1254, 1255, 1256, 1257, 1258, 1259, + 1260, 1261, 1262, 1255, 1255, 1255, 1255, 1151, + 1155, 1255, 1263, 543, 543, 1163, 1047, 543, + 543, 341, 341, 341, 341, 341, 1151, 1151, + 1151, 1151, 1151, 1151, 1264, 1151, 1151, 1151, + 1151, 0, 1151, 1151, 1151, 1151, 1265, 1151, + 1151, 1151, 1151, 1266, 1151, 1151, 1151, 1151, + 1267, 1151, 1151, 1151, 1151, 1268, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1269, 1151, 1151, 1151, 0, 841, + 841, 841, 841, 841, 841, 841, 841, 556, + 841, 841, 841, 841, 841, 841, 0, 841, + 841, 1047, 1047, 1047, 1047, 1047, 841, 841, + 841, 841, 1047, 1047, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 1270, 1271, + 341, 341, 341, 341, 1272, 1272, 1151, 1273, + 1151, 1151, 1155, 1151, 1151, 1151, 1151, 1151, + 1173, 1272, 1163, 1163, 1155, 1155, 1151, 1151, + 341, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 1047, 1047, 1047, 1047, 1047, + 1047, 341, 341, 341, 341, 341, 341, 1155, + 1155, 1151, 1151, 341, 341, 341, 341, 1151, + 1151, 1151, 341, 1272, 1272, 1272, 341, 341, + 1272, 1272, 1272, 1272, 1272, 1272, 1272, 341, + 341, 341, 1151, 1151, 1151, 1151, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 1151, 1272, 1155, 1151, 1151, + 1272, 1272, 1272, 1272, 1272, 1272, 556, 341, + 1272, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 1272, 1272, 1272, 1151, 841, + 841, 1274, 1275, 1276, 1277, 1278, 1279, 1280, + 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, + 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, + 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, + 1305, 1306, 1307, 1308, 1309, 1310, 1311, 0, + 1312, 0, 0, 0, 0, 0, 1313, 0, + 0, 1314, 1315, 1316, 1317, 1318, 1319, 1320, + 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, + 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, + 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, + 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, + 1353, 1354, 1355, 1356, 1047, 1357, 1358, 1359, + 1360, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1362, 1363, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 0, 341, 341, 341, 341, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 0, 341, 341, 341, 341, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 0, 0, 543, 543, + 543, 1047, 1047, 1047, 1047, 1047, 1047, 1047, + 1047, 1047, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 0, 0, 0, 0, 0, + 0, 1366, 1367, 1368, 1369, 1370, 1371, 1372, + 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, + 1381, 1382, 1383, 1384, 1385, 1386, 1387, 1388, + 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, + 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, + 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, + 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, + 1421, 1422, 1423, 1424, 1425, 1426, 1427, 1428, + 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, + 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, + 1445, 1446, 1447, 1448, 1449, 1450, 1451, 0, + 0, 1452, 1453, 1454, 1455, 1456, 1457, 0, + 0, 1087, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 841, 1047, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 8, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 12, 13, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1047, 1047, 1047, 1458, + 1458, 1458, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 341, + 341, 341, 341, 1151, 1151, 1163, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 1151, 1151, 1163, 1047, 1047, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 1151, 1151, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 341, + 341, 341, 0, 1151, 1151, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 577, 577, 1155, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1155, + 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1151, + 1155, 1155, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1163, 1151, 1047, 1047, 1047, + 525, 1047, 1047, 1047, 11, 341, 543, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 1211, 1211, 1211, 1211, 1211, 1211, 1211, + 1211, 1211, 1211, 0, 0, 0, 0, 0, + 0, 9, 9, 9, 9, 9, 9, 1087, + 9, 9, 9, 9, 577, 577, 577, 1459, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 525, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 1151, 1151, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1089, 341, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 0, 1151, 1151, 1151, 1155, 1155, 1155, 1155, + 1151, 1151, 1155, 1155, 1155, 0, 0, 0, + 0, 1155, 1155, 1151, 1155, 1155, 1155, 1155, + 1155, 1155, 1088, 543, 556, 0, 0, 0, + 0, 77, 0, 0, 0, 9, 9, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 0, + 0, 341, 341, 341, 341, 341, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 0, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 1182, 0, 0, 0, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 543, 556, 1155, 1155, 1151, 0, 0, 1047, + 1047, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 1155, 1151, + 1155, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 0, 1163, 1272, 1151, 1272, 1272, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1155, 1155, + 1155, 1155, 1155, 1155, 1151, 1151, 543, 543, + 543, 543, 543, 543, 543, 543, 0, 0, + 556, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 1047, 1047, 1047, 1047, 1047, 1047, 1047, + 525, 1047, 1047, 1047, 1047, 1047, 1047, 0, + 0, 543, 543, 543, 543, 543, 556, 556, + 556, 556, 556, 556, 543, 543, 556, 842, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1151, 1151, 1151, 1151, 1155, 1460, 1461, + 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, + 341, 341, 1470, 1471, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 1173, 1472, 1151, + 1151, 1151, 1151, 1473, 1474, 1475, 1476, 1477, + 1478, 1479, 1480, 1481, 1482, 1483, 341, 341, + 341, 341, 341, 341, 341, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 1047, 1047, 1047, 1047, 1047, + 1047, 1047, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 543, 556, 543, 543, + 543, 543, 543, 543, 543, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 0, 0, + 0, 1151, 1151, 1155, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1155, 1151, 1151, 1151, 1151, 1155, + 1155, 1151, 1151, 1483, 1163, 1151, 1151, 341, + 341, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 1173, + 1155, 1151, 1151, 1155, 1155, 1155, 1151, 1155, + 1151, 1151, 1151, 1483, 1483, 0, 0, 0, + 0, 0, 0, 0, 0, 1047, 1047, 1047, + 1047, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 1155, 1155, 1155, + 1155, 1155, 1155, 1155, 1155, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1155, 1155, 1151, + 1173, 0, 0, 0, 1047, 1047, 1047, 1047, + 1047, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 341, 341, + 341, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 525, 525, 525, 525, 525, 525, 1047, + 1047, 1484, 1485, 1486, 1487, 1488, 1488, 1489, + 1490, 1491, 0, 0, 0, 0, 0, 0, + 0, 1492, 1493, 1494, 1495, 1496, 1497, 1498, + 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, + 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, + 1515, 1516, 1517, 1518, 1519, 1520, 1521, 1522, + 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, + 1531, 1532, 1533, 1534, 0, 0, 1535, 1536, + 1537, 1047, 1047, 1047, 1047, 1047, 1047, 1047, + 1047, 0, 0, 0, 0, 0, 0, 0, + 0, 543, 543, 543, 1047, 569, 556, 556, + 556, 556, 556, 543, 543, 556, 556, 556, + 556, 543, 1155, 569, 569, 569, 569, 569, + 569, 569, 341, 341, 341, 341, 556, 341, + 341, 341, 341, 341, 341, 543, 341, 341, + 1155, 543, 543, 341, 0, 0, 0, 0, + 0, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 1538, 1539, 1540, + 525, 1541, 1542, 1543, 1544, 1545, 1546, 1547, + 1548, 1549, 1550, 1551, 525, 1552, 1553, 1554, + 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, + 1563, 1564, 1565, 1566, 1567, 1568, 1569, 525, + 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, + 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, + 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, + 1594, 1595, 1596, 1597, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 1598, 1599, 215, 215, 215, 1600, 215, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 1601, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 1602, 1603, 1604, 1605, + 1568, 1606, 1607, 1608, 1609, 1610, 1611, 1612, + 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, + 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, + 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, + 1637, 543, 543, 556, 543, 543, 543, 543, + 543, 543, 543, 556, 543, 543, 579, 1638, + 556, 558, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 555, + 1089, 1089, 556, 0, 543, 578, 556, 543, + 556, 1639, 1640, 1641, 1642, 1643, 1644, 1645, + 1646, 1647, 1648, 1649, 1650, 1651, 1652, 1653, + 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, + 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, + 1670, 1671, 1672, 1673, 1674, 1675, 1676, 1677, + 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, + 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, + 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, + 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, + 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, + 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, + 1726, 1727, 1728, 1729, 1730, 1731, 1732, 1733, + 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, + 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, + 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, + 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, + 1766, 1767, 1768, 1769, 1770, 1771, 1772, 1773, + 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, + 1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, + 1790, 1791, 1792, 1793, 1794, 215, 215, 1795, + 215, 1796, 1797, 1798, 1799, 1800, 1801, 1802, + 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810, + 1811, 1812, 1813, 1814, 1815, 1816, 1817, 1818, + 1819, 1820, 1821, 1822, 1823, 1824, 1825, 1826, + 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1834, + 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, + 1843, 1844, 1845, 1846, 1847, 1848, 1849, 1850, + 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, + 1859, 1860, 1861, 1862, 1863, 1864, 1865, 1866, + 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, + 1875, 1876, 1877, 1878, 1879, 1880, 1881, 1882, + 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, + 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, + 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, + 1907, 1908, 1909, 1910, 1911, 1912, 1913, 0, + 0, 1914, 1915, 1916, 1917, 1918, 1919, 0, + 0, 1920, 1921, 1922, 1923, 1924, 1925, 1926, + 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, + 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, + 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, + 1951, 1952, 1953, 1954, 1955, 1956, 1957, 0, + 0, 1958, 1959, 1960, 1961, 1962, 1963, 0, + 0, 1964, 1965, 1966, 1967, 1968, 1969, 1970, + 1971, 0, 1972, 0, 1973, 0, 1974, 0, + 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, + 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, + 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, + 1999, 2000, 2001, 2002, 2003, 2004, 2005, 0, + 0, 2006, 2007, 2008, 2009, 2010, 2011, 2012, + 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, + 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, + 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, + 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, + 2045, 2046, 2047, 2048, 2049, 2050, 2051, 2052, + 2053, 2054, 2055, 2056, 2057, 2058, 0, 2059, + 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, + 2068, 2069, 2070, 2071, 2072, 2073, 0, 2074, + 2075, 2076, 2077, 2078, 2079, 2080, 2081, 2082, + 2083, 2084, 2085, 2086, 2087, 0, 0, 2088, + 2089, 2090, 2091, 2092, 2093, 0, 2094, 2095, + 2096, 2097, 2098, 2099, 2100, 2101, 2102, 2103, + 2104, 2105, 2106, 2107, 2108, 2109, 2110, 2111, + 2112, 0, 0, 2113, 2114, 2115, 0, 2116, + 2117, 2118, 2119, 2120, 2121, 2122, 2123, 2124, + 0, 2125, 2126, 2127, 2127, 2127, 2127, 2127, + 2128, 2127, 2127, 2127, 1459, 2129, 2130, 2131, + 2132, 1087, 2133, 1087, 1087, 1087, 1087, 9, + 2134, 2135, 2136, 2137, 2135, 2135, 2136, 2137, + 2135, 9, 9, 9, 9, 2138, 2139, 2140, + 9, 2141, 2142, 2143, 2144, 2145, 2146, 2147, + 76, 10, 10, 10, 2148, 2149, 9, 2150, + 2151, 9, 81, 93, 9, 2152, 9, 2153, + 48, 48, 9, 9, 9, 2154, 12, 13, + 2155, 2156, 2157, 9, 9, 9, 9, 9, + 9, 9, 9, 75, 9, 48, 9, 9, + 2158, 9, 9, 9, 9, 9, 9, 9, + 2127, 1459, 1459, 1459, 1459, 1459, 0, 2159, + 2160, 2161, 2162, 1459, 1459, 1459, 1459, 1459, + 1459, 2163, 2164, 0, 0, 2165, 2166, 2167, + 2168, 2169, 2170, 2171, 2172, 2173, 2174, 2175, + 2176, 2177, 2178, 2179, 2180, 2181, 2182, 2183, + 2184, 2185, 2186, 2187, 2188, 2189, 2190, 2191, + 0, 2192, 2193, 2194, 2195, 2196, 2197, 2198, + 2199, 2200, 2201, 2202, 2203, 2204, 0, 0, + 0, 11, 11, 11, 11, 11, 11, 11, + 11, 2205, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 543, 543, 569, 569, 543, 543, 543, + 543, 569, 569, 569, 543, 543, 842, 842, + 842, 842, 543, 842, 842, 842, 569, 569, + 543, 556, 543, 569, 569, 556, 556, 556, + 556, 543, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2206, 2207, 2208, 2209, 77, 2210, 2211, + 2212, 77, 2213, 2214, 2215, 2215, 2215, 2216, + 2217, 2218, 2218, 2219, 2220, 77, 2221, 2222, + 77, 75, 2223, 2224, 2225, 2225, 2225, 77, + 77, 2226, 2227, 2228, 77, 2229, 77, 2230, + 77, 2229, 77, 2231, 2232, 2233, 2208, 84, + 2234, 2235, 2236, 2237, 2238, 2239, 2240, 2241, + 2242, 2243, 2244, 77, 2245, 2246, 2247, 2248, + 2249, 2250, 75, 75, 75, 75, 2251, 2252, + 2234, 2253, 2254, 77, 75, 77, 77, 2255, + 841, 2256, 2257, 2258, 2259, 2260, 2261, 2262, + 2263, 2264, 2265, 2266, 2267, 2268, 2269, 2270, + 2271, 2272, 2273, 2274, 2275, 2276, 2277, 2278, + 2279, 2280, 2281, 2282, 2283, 2284, 2285, 2286, + 2287, 2288, 2289, 2290, 2291, 2292, 2293, 2294, + 2295, 2296, 2297, 2298, 2299, 2300, 2301, 2302, + 2303, 1458, 1458, 1458, 2304, 2305, 1458, 1458, + 1458, 1458, 2306, 77, 77, 0, 0, 0, + 0, 2307, 75, 2308, 75, 2309, 79, 79, + 79, 79, 79, 2310, 2311, 77, 77, 77, + 77, 75, 77, 77, 75, 77, 77, 75, + 77, 77, 79, 79, 77, 77, 77, 2312, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 2313, 2314, + 2315, 2316, 77, 2317, 77, 2318, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 2319, 2319, 2320, 2321, 75, 75, + 75, 2322, 2323, 2319, 2324, 2325, 2319, 75, + 75, 75, 2319, 14, 85, 75, 2319, 2319, + 75, 75, 75, 2319, 2319, 2319, 2319, 75, + 2319, 2319, 2319, 2319, 2326, 2327, 2328, 2329, + 75, 75, 75, 75, 2319, 2330, 2331, 2319, + 2332, 2333, 2319, 2319, 2319, 75, 75, 75, + 75, 75, 2319, 75, 2319, 2334, 2319, 2319, + 2319, 2319, 2335, 2319, 2336, 2337, 2338, 2319, + 2339, 2340, 2341, 2319, 2319, 2319, 2342, 75, + 75, 75, 75, 2319, 2319, 2319, 2319, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 2319, 2343, 2344, 2345, 75, 2346, 2347, 2319, + 2319, 2319, 2319, 2319, 2319, 75, 2348, 2349, + 2350, 2351, 2352, 2353, 2354, 2355, 2356, 2357, + 2358, 2359, 2360, 2361, 2362, 2363, 2364, 2319, + 2319, 2365, 2366, 2367, 2368, 2369, 2370, 2371, + 2372, 2373, 2374, 2319, 2319, 2319, 75, 75, + 2319, 2319, 2375, 2376, 75, 75, 75, 75, + 75, 2319, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 2377, 2319, 75, 75, 2319, + 2319, 2378, 2379, 2319, 2380, 2381, 2382, 2383, + 2384, 2319, 2319, 2385, 2386, 2387, 2388, 2319, + 2319, 2319, 75, 75, 75, 75, 75, 2319, + 2319, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 2319, 2319, 2319, 2319, 2319, 75, + 75, 2319, 2319, 75, 75, 75, 75, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2389, 2390, 2391, 2392, 2319, 2319, 2319, + 2319, 2319, 2319, 2393, 2394, 2395, 2396, 75, + 75, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 77, 77, 77, 77, 77, 77, 77, + 77, 12, 13, 12, 13, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 2397, 2397, 77, 77, 77, + 77, 2319, 2319, 77, 77, 77, 77, 77, + 77, 79, 2398, 2399, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 77, 75, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 79, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 841, 77, + 77, 77, 77, 77, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 79, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 75, 75, 75, + 75, 75, 75, 77, 77, 77, 77, 77, + 77, 77, 2397, 2397, 2397, 2397, 79, 79, + 79, 2397, 79, 79, 2397, 77, 77, 77, + 77, 79, 79, 79, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2400, 2401, 2402, 2403, 2404, 2405, 2406, + 2407, 2408, 2409, 2410, 2411, 2412, 2413, 2414, + 2415, 2416, 2417, 2418, 2419, 2420, 2421, 2422, + 2423, 2424, 2425, 2426, 2427, 2428, 2429, 2430, + 2431, 2432, 2433, 2434, 2435, 2436, 2437, 2438, + 2439, 2440, 2441, 2442, 2443, 2444, 2445, 2446, + 2447, 2448, 2449, 2450, 2451, 2452, 2453, 2454, + 2455, 2456, 2457, 2458, 2459, 2460, 2461, 2462, + 2463, 2464, 2465, 2466, 2467, 2468, 2469, 2470, + 2471, 2472, 2473, 2474, 2475, 2476, 2477, 2478, + 2479, 2480, 2481, 2482, 2483, 2484, 2485, 2486, + 2487, 2488, 2489, 2490, 2491, 2492, 2493, 2494, + 2495, 2496, 2497, 2498, 2499, 2500, 2501, 2502, + 2503, 2504, 2505, 2506, 2507, 2508, 2509, 2510, + 2511, 2512, 2513, 2514, 2515, 2516, 2517, 2518, + 2519, 2520, 2521, 2522, 2523, 2524, 2525, 2526, + 2527, 2528, 2529, 2530, 2531, 2532, 2533, 2534, + 2535, 2536, 2537, 2538, 1211, 1211, 1211, 1211, + 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, + 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, + 1211, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 79, 79, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 79, + 75, 77, 77, 77, 77, 77, 77, 77, + 77, 79, 75, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 75, 75, 75, 2539, 2539, 2540, 2540, + 75, 79, 79, 79, 79, 79, 79, 77, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 77, 2397, 2397, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 2539, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 2397, 79, 79, 79, 79, 79, 79, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 79, 79, 79, 2397, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 2397, 79, 79, 79, 79, 79, + 79, 79, 79, 2397, 2397, 2541, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 2397, 2397, + 79, 79, 79, 79, 79, 2397, 2397, 79, + 79, 79, 79, 79, 79, 79, 79, 2397, + 79, 79, 79, 79, 79, 2397, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 2397, 79, 79, 79, 79, + 79, 79, 79, 2397, 2397, 79, 2397, 79, + 79, 79, 79, 2397, 79, 79, 2397, 79, + 79, 79, 79, 79, 79, 79, 2397, 77, + 77, 79, 79, 2397, 2397, 79, 79, 79, + 79, 79, 79, 79, 77, 79, 77, 79, + 77, 77, 77, 77, 77, 77, 79, 77, + 77, 77, 79, 77, 77, 77, 77, 77, + 77, 2397, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 79, 79, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 79, 77, 77, + 79, 77, 77, 77, 77, 2397, 77, 2397, + 77, 77, 77, 77, 2397, 2397, 2397, 77, + 2397, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 79, 79, 79, 79, + 79, 12, 13, 12, 13, 12, 13, 12, + 13, 12, 13, 12, 13, 12, 13, 1211, + 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, + 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, + 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, + 1211, 1211, 1211, 1211, 1211, 77, 2397, 2397, + 2397, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 79, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 2397, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 2397, 2319, 75, 75, 2319, 2319, 12, 13, + 75, 2319, 2319, 75, 2319, 2319, 2319, 75, + 75, 75, 75, 75, 2319, 2319, 2319, 2319, + 75, 75, 75, 75, 75, 2319, 2319, 2319, + 75, 75, 75, 2319, 2319, 2319, 2319, 12, + 13, 12, 13, 12, 13, 12, 13, 12, + 13, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 2539, 2539, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 12, 13, 12, 13, 12, 13, + 12, 13, 75, 75, 2319, 2319, 2319, 2319, + 2319, 2319, 75, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 75, 75, 75, 75, 75, 75, 75, + 75, 2319, 75, 75, 75, 75, 75, 75, + 75, 2319, 2319, 2319, 2319, 2319, 2319, 75, + 75, 75, 2319, 75, 75, 75, 75, 2319, + 2319, 2319, 2319, 2319, 75, 2319, 2319, 75, + 75, 12, 13, 12, 13, 2319, 75, 75, + 75, 75, 2319, 75, 2319, 2319, 2319, 75, + 75, 2319, 2319, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 2319, 2319, 2319, + 2319, 2319, 2319, 75, 75, 12, 13, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 2319, 2319, 2542, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 75, 2319, + 2319, 2319, 2319, 75, 75, 2319, 75, 2319, + 75, 75, 2319, 75, 2319, 2319, 2319, 2319, + 75, 75, 75, 75, 75, 2319, 2319, 75, + 75, 75, 75, 75, 75, 2319, 2319, 2319, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 2319, 2319, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 2319, 2319, 75, + 75, 75, 75, 2319, 2319, 2319, 2319, 75, + 2319, 2319, 75, 75, 2319, 2543, 2544, 2545, + 75, 75, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 75, 75, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 75, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 2319, 2319, 2319, 2319, 2319, 2319, 2319, 2319, + 75, 75, 75, 75, 75, 2546, 2547, 2319, + 75, 75, 75, 2319, 2319, 2319, 2319, 2319, + 75, 75, 75, 75, 75, 2319, 2319, 2319, + 75, 75, 75, 75, 2319, 75, 75, 75, + 2319, 2319, 2319, 2319, 2319, 75, 2319, 75, + 75, 77, 77, 77, 77, 77, 79, 79, + 79, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 2397, 2397, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 77, 77, + 75, 75, 75, 75, 75, 75, 77, 77, + 77, 2397, 77, 77, 77, 77, 2397, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 0, 0, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 2548, + 77, 2549, 2550, 2551, 2552, 2553, 2554, 2555, + 2556, 2557, 2558, 2559, 2560, 2561, 2562, 2563, + 2564, 2565, 2566, 2567, 2568, 2569, 2570, 2571, + 2572, 2573, 2574, 2575, 2576, 2577, 2578, 2579, + 2580, 2581, 2582, 2583, 2584, 2585, 2586, 2587, + 2588, 2589, 2590, 2591, 2592, 2593, 2594, 2595, + 0, 2596, 2597, 2598, 2599, 2600, 2601, 2602, + 2603, 2604, 2605, 2606, 2607, 2608, 2609, 2610, + 2611, 2612, 2613, 2614, 2615, 2616, 2617, 2618, + 2619, 2620, 2621, 2622, 2623, 2624, 2625, 2626, + 2627, 2628, 2629, 2630, 2631, 2632, 2633, 2634, + 2635, 2636, 2637, 2638, 2639, 2640, 2641, 2642, + 0, 2643, 2644, 2645, 2646, 2647, 2648, 2649, + 2650, 2651, 2652, 2653, 2654, 2655, 2656, 2657, + 2658, 2659, 215, 2660, 2661, 215, 2662, 2663, + 215, 215, 215, 215, 215, 2664, 2665, 2666, + 2667, 2668, 2669, 2670, 2671, 2672, 2673, 2674, + 2675, 2676, 2677, 2678, 2679, 2680, 2681, 2682, + 2683, 2684, 2685, 2686, 2687, 2688, 2689, 2690, + 2691, 2692, 2693, 2694, 2695, 2696, 2697, 2698, + 2699, 2700, 2701, 2702, 2703, 2704, 2705, 2706, + 2707, 2708, 2709, 2710, 2711, 2712, 2713, 2714, + 2715, 2716, 2717, 2718, 2719, 2720, 2721, 2722, + 2723, 2724, 2725, 2726, 2727, 2728, 2729, 2730, + 2731, 2732, 2733, 2734, 2735, 2736, 2737, 2738, + 2739, 2740, 2741, 2742, 2743, 2744, 2745, 2746, + 2747, 2748, 2749, 2750, 2751, 2752, 2753, 2754, + 2755, 2756, 2757, 2758, 2759, 2760, 2761, 2762, + 2763, 2764, 2765, 2766, 2767, 215, 77, 77, + 77, 77, 77, 77, 2768, 2769, 2770, 2771, + 543, 543, 543, 2772, 2773, 0, 0, 0, + 0, 0, 9, 9, 9, 9, 1211, 9, + 9, 2774, 2775, 2776, 2777, 2778, 2779, 2780, + 2781, 2782, 2783, 2784, 2785, 2786, 2787, 2788, + 2789, 2790, 2791, 2792, 2793, 2794, 2795, 2796, + 2797, 2798, 2799, 2800, 2801, 2802, 2803, 2804, + 2805, 2806, 2807, 2808, 2809, 2810, 2811, 0, + 2812, 0, 0, 0, 0, 0, 2813, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 0, 0, 0, 0, 0, 0, 0, + 2814, 1047, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1163, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, + 543, 9, 9, 81, 93, 81, 93, 9, + 9, 9, 81, 93, 9, 81, 93, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 1087, 9, 9, 1087, 9, 81, 93, 9, + 9, 81, 93, 12, 13, 12, 13, 12, + 13, 12, 13, 9, 9, 9, 9, 9, + 524, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 1087, 1087, 9, 9, 9, + 9, 1087, 9, 2137, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 0, 2815, 2815, 2815, 2815, + 2816, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2817, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2818, 2819, 2820, 2821, 2822, 2823, 2824, + 2825, 2826, 2827, 2828, 2829, 2830, 2831, 2832, + 2833, 2834, 2835, 2836, 2837, 2838, 2839, 2840, + 2841, 2842, 2843, 2844, 2845, 2846, 2847, 2848, + 2849, 2850, 2851, 2852, 2853, 2854, 2855, 2856, + 2857, 2858, 2859, 2860, 2861, 2862, 2863, 2864, + 2865, 2866, 2867, 2868, 2869, 2870, 2871, 2872, + 2873, 2874, 2875, 2876, 2877, 2878, 2879, 2880, + 2881, 2882, 2883, 2884, 2885, 2886, 2887, 2888, + 2889, 2890, 2891, 2892, 2893, 2894, 2895, 2896, + 2897, 2898, 2899, 2900, 2901, 2902, 2903, 2904, + 2905, 2906, 2907, 2908, 2909, 2910, 2911, 2912, + 2913, 2914, 2915, 2916, 2917, 2918, 2919, 2920, + 2921, 2922, 2923, 2924, 2925, 2926, 2927, 2928, + 2929, 2930, 2931, 2932, 2933, 2934, 2935, 2936, + 2937, 2938, 2939, 2940, 2941, 2942, 2943, 2944, + 2945, 2946, 2947, 2948, 2949, 2950, 2951, 2952, + 2953, 2954, 2955, 2956, 2957, 2958, 2959, 2960, + 2961, 2962, 2963, 2964, 2965, 2966, 2967, 2968, + 2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, + 2977, 2978, 2979, 2980, 2981, 2982, 2983, 2984, + 2985, 2986, 2987, 2988, 2989, 2990, 2991, 2992, + 2993, 2994, 2995, 2996, 2997, 2998, 2999, 3000, + 3001, 3002, 3003, 3004, 3005, 3006, 3007, 3008, + 3009, 3010, 3011, 3012, 3013, 3014, 3015, 3016, + 3017, 3018, 3019, 3020, 3021, 3022, 3023, 3024, + 3025, 3026, 3027, 3028, 3029, 3030, 3031, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 0, 0, 0, + 0, 3032, 3033, 3033, 3033, 2815, 3034, 3035, + 3036, 3037, 3038, 3037, 3038, 3037, 3038, 3037, + 3038, 3037, 3038, 2815, 2815, 3037, 3038, 3037, + 3038, 3037, 3038, 3037, 3038, 3039, 3040, 3041, + 3041, 2815, 3036, 3036, 3036, 3036, 3036, 3036, + 3036, 3036, 3036, 3042, 1089, 555, 1088, 3043, + 3043, 3044, 3034, 3034, 3034, 3034, 3034, 3045, + 2815, 3046, 3047, 3048, 3034, 3035, 3049, 2815, + 77, 0, 3035, 3035, 3035, 3035, 3035, 3050, + 3035, 3035, 3035, 3035, 3051, 3052, 3053, 3054, + 3055, 3056, 3057, 3058, 3059, 3060, 3061, 3062, + 3063, 3064, 3065, 3066, 3067, 3068, 3069, 3070, + 3071, 3072, 3073, 3074, 3035, 3075, 3076, 3077, + 3078, 3079, 3080, 3035, 3035, 3035, 3035, 3035, + 3081, 3082, 3083, 3084, 3085, 3086, 3087, 3088, + 3089, 3090, 3091, 3092, 3093, 3094, 3095, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3096, 3035, 3035, + 0, 0, 3097, 3098, 3099, 3100, 3101, 3102, + 3103, 3039, 3035, 3035, 3035, 3035, 3035, 3104, + 3035, 3035, 3035, 3035, 3105, 3106, 3107, 3108, + 3109, 3110, 3111, 3112, 3113, 3114, 3115, 3116, + 3117, 3118, 3119, 3120, 3121, 3122, 3123, 3124, + 3125, 3126, 3127, 3128, 3035, 3129, 3130, 3131, + 3132, 3133, 3134, 3035, 3035, 3035, 3035, 3035, + 3135, 3136, 3137, 3138, 3139, 3140, 3141, 3142, + 3143, 3144, 3145, 3146, 3147, 3148, 3149, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3150, 3151, 3152, 3153, 3035, 3154, 3035, 3035, + 3155, 3156, 3157, 3158, 3033, 3034, 3159, 3160, + 3161, 0, 0, 0, 0, 0, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 0, 3162, 3163, 3164, 3165, 3166, 3167, + 3168, 3169, 3170, 3171, 3172, 3173, 3174, 3175, + 3176, 3177, 3178, 3179, 3180, 3181, 3182, 3183, + 3184, 3185, 3186, 3187, 3188, 3189, 3190, 3191, + 3192, 3193, 3194, 3195, 3196, 3197, 3198, 3199, + 3200, 3201, 3202, 3203, 3204, 3205, 3206, 3207, + 3208, 3209, 3210, 3211, 3212, 3213, 3214, 3215, + 3216, 3217, 3218, 3219, 3220, 3221, 3222, 3223, + 3224, 3225, 3226, 3227, 3228, 3229, 3230, 3231, + 3232, 3233, 3234, 3235, 3236, 3237, 3238, 3239, + 3240, 3241, 3242, 3243, 3244, 3245, 3246, 3247, + 3248, 3249, 3250, 3251, 3252, 3253, 3254, 3255, + 0, 3256, 3256, 3257, 3258, 3259, 3260, 3261, + 3262, 3263, 3264, 3265, 3266, 3267, 3268, 3269, + 3270, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 0, 0, 0, 0, + 0, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3271, 3272, 3273, 3274, 3275, 3276, 3277, + 3278, 3279, 3280, 3281, 3282, 3283, 3284, 3285, + 3286, 3287, 3288, 3289, 3290, 3291, 3292, 3293, + 3294, 3295, 3296, 3297, 3298, 3299, 3300, 3301, + 0, 3302, 3303, 3304, 3305, 3306, 3307, 3308, + 3309, 3310, 3311, 3312, 3313, 3314, 3315, 3316, + 3317, 3318, 3319, 3320, 3321, 3322, 3323, 3324, + 3325, 3326, 3327, 3328, 3329, 3330, 3331, 3332, + 3333, 3334, 3335, 3336, 3337, 3338, 3339, 3340, + 3341, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 3342, 3343, 3344, 3345, 3346, 3347, 3348, + 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356, + 3357, 3358, 3359, 3360, 3361, 3362, 3363, 3364, + 3365, 3366, 3367, 3368, 3369, 3370, 3371, 3372, + 3373, 3374, 3375, 3376, 3377, 3378, 3379, 3380, + 3381, 3382, 3383, 3384, 3385, 3386, 3387, 3388, + 3256, 3389, 3390, 3391, 3392, 3393, 3394, 3395, + 3396, 3397, 3398, 3399, 3400, 3401, 3402, 3403, + 3404, 3405, 3406, 3407, 3408, 3409, 3410, 3411, + 3412, 3413, 3414, 3415, 3416, 3417, 3418, 3419, + 3420, 3421, 3422, 3423, 3424, 3425, 3426, 3427, + 3428, 3429, 3430, 3431, 3432, 3433, 3434, 3435, + 3436, 3437, 3438, 3439, 3440, 3441, 3442, 3443, + 3444, 3445, 3446, 3447, 3448, 3449, 3450, 3451, + 3452, 3453, 3454, 3455, 3456, 3457, 3458, 3459, + 3460, 3461, 3462, 3463, 3464, 3465, 3466, 3467, + 3468, 3469, 3470, 3471, 3472, 3473, 3474, 3475, + 3476, 3477, 3478, 3479, 3480, 3481, 3482, 3483, + 3484, 3485, 3486, 3487, 3488, 3489, 3490, 3491, + 3492, 3493, 3494, 3495, 3496, 3497, 3498, 3499, + 3500, 3501, 3502, 3503, 3504, 3505, 3506, 3507, + 3508, 3509, 3510, 3511, 3512, 3513, 3514, 3515, + 3516, 3517, 3518, 3519, 3520, 3521, 3522, 3523, + 3524, 3525, 3526, 3527, 3528, 3529, 3530, 3531, + 3532, 3533, 3534, 3535, 3536, 3537, 3538, 3539, + 3540, 3541, 3542, 3543, 3544, 3545, 3546, 3547, + 3548, 3549, 3550, 3551, 3552, 3553, 3554, 3555, + 3556, 3557, 3558, 3559, 3560, 3561, 3562, 3563, + 3564, 3565, 3566, 3567, 3568, 3569, 3570, 3571, + 3572, 3573, 3574, 3575, 3576, 3577, 3578, 3579, + 3580, 3581, 3582, 3583, 3584, 3585, 3586, 3587, + 3588, 3589, 3590, 3591, 3592, 3593, 3594, 3595, + 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, + 3604, 3605, 3606, 3607, 3608, 3609, 3610, 3611, + 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, + 3620, 3621, 3622, 3623, 3624, 3625, 3626, 3627, + 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, + 3636, 3637, 3638, 3639, 3640, 3641, 3642, 3643, + 3644, 3645, 3646, 3647, 3648, 3649, 3650, 3651, + 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, + 3660, 3661, 3662, 3663, 3664, 3665, 3666, 3667, + 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, + 3676, 3677, 3678, 3679, 3680, 3681, 3682, 3683, + 3684, 3685, 3686, 3687, 3688, 3689, 3690, 3691, + 3692, 3693, 3694, 3695, 3696, 3697, 3698, 3699, + 3700, 3701, 3702, 3703, 3704, 3705, 3706, 3707, + 3708, 3709, 3710, 3711, 3712, 3713, 3714, 3715, + 3716, 3717, 3718, 3719, 3720, 3721, 3722, 3723, + 3724, 3725, 3726, 3727, 3728, 3729, 3730, 3731, + 3732, 3733, 3734, 3735, 3736, 3737, 3738, 3739, + 3740, 3741, 3742, 3743, 3744, 3745, 3746, 3747, + 3748, 3749, 3750, 3751, 3752, 3753, 3754, 3755, + 3756, 3757, 3758, 3759, 3760, 3761, 3762, 3763, + 3764, 3765, 3766, 3767, 3768, 3769, 3770, 3771, + 3772, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3034, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 0, 0, + 0, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 2815, 2815, 2815, 2815, 2815, 2815, 2815, 2815, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 525, 525, 525, 525, 525, 525, 1047, + 1047, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 525, 9, 9, + 9, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 341, 341, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3773, 3774, 3775, 3776, 3777, 3778, 3779, + 3780, 3781, 3782, 3783, 3784, 3785, 3786, 3787, + 3788, 3789, 3790, 3791, 3792, 3793, 3794, 3795, + 3796, 3797, 3798, 3799, 3800, 3801, 3802, 3803, + 3804, 3805, 3806, 3807, 3808, 3809, 3810, 3811, + 3812, 3813, 3814, 3815, 3816, 3817, 3818, 341, + 543, 842, 842, 842, 9, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 9, + 524, 3819, 3820, 3821, 3822, 3823, 3824, 3825, + 3826, 3827, 3828, 3829, 3830, 3831, 3832, 3833, + 3834, 3835, 3836, 3837, 3838, 3839, 3840, 3841, + 3842, 3843, 3844, 3845, 3846, 3847, 3848, 543, + 543, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 543, 543, 1047, 1047, 1047, 1047, 1047, + 1047, 0, 0, 0, 0, 0, 0, 0, + 0, 526, 526, 526, 526, 526, 526, 526, + 526, 526, 526, 526, 526, 526, 526, 526, + 526, 526, 526, 526, 526, 526, 526, 526, + 524, 524, 524, 524, 524, 524, 524, 524, + 524, 526, 526, 3849, 3850, 3851, 3852, 3853, + 3854, 3855, 3856, 3857, 3858, 3859, 3860, 3861, + 3862, 215, 215, 3863, 3864, 3865, 3866, 3867, + 3868, 3869, 3870, 3871, 3872, 3873, 3874, 3875, + 3876, 3877, 3878, 3879, 3880, 3881, 3882, 3883, + 3884, 3885, 3886, 3887, 3888, 3889, 3890, 3891, + 3892, 3893, 3894, 3895, 3896, 3897, 3898, 3899, + 3900, 3901, 3902, 3903, 3904, 3905, 3906, 3907, + 3908, 3909, 3910, 3911, 3912, 3913, 3914, 3915, + 3916, 3917, 3918, 3919, 3920, 3921, 3922, 3923, + 3924, 3925, 215, 215, 215, 215, 215, 215, + 215, 215, 3926, 3927, 3928, 3929, 3930, 3931, + 3932, 3933, 3934, 3935, 3936, 3937, 3938, 3939, + 3940, 524, 3941, 3941, 3942, 3943, 3944, 215, + 341, 3945, 3946, 3947, 3948, 3949, 215, 3950, + 3951, 3952, 3953, 3954, 3955, 3956, 3957, 3958, + 3959, 3960, 3961, 3962, 3963, 3964, 3965, 3966, + 3967, 3968, 3969, 3970, 3971, 3972, 3973, 3974, + 215, 3975, 3976, 3977, 3978, 3979, 3980, 3981, + 3982, 3983, 3984, 3985, 3986, 3987, 3988, 3989, + 3990, 0, 0, 3991, 3992, 3993, 3994, 3995, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 341, 3996, 3997, 215, 341, 341, 341, 341, + 341, 341, 341, 1151, 341, 341, 341, 1163, + 341, 341, 341, 341, 1151, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1155, 1155, 1151, 1151, + 1155, 77, 77, 77, 77, 0, 0, 0, + 0, 1182, 1182, 1182, 1182, 1182, 1182, 841, + 841, 11, 84, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 9, 9, 9, + 9, 0, 0, 0, 0, 0, 0, 0, + 0, 1155, 1155, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 1155, 1155, 1155, + 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, + 1155, 1155, 1155, 1155, 1155, 1163, 1151, 0, + 0, 0, 0, 0, 0, 0, 0, 1047, + 1047, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 341, 341, 341, 341, 341, + 341, 1047, 1047, 1047, 341, 1047, 341, 341, + 1151, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 1151, + 1151, 1151, 1151, 1151, 556, 556, 556, 1047, + 1047, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1155, 1483, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1047, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 1361, 1361, + 1361, 1361, 1361, 1361, 1361, 1361, 0, 0, + 0, 1151, 1151, 1151, 1155, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1173, 1155, 1155, 1151, + 1151, 1151, 1151, 1155, 1155, 1151, 1151, 1155, + 1155, 1483, 1047, 1047, 1047, 1047, 1047, 1047, + 1047, 1047, 1047, 1047, 1047, 1047, 1047, 0, + 525, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 1047, + 1047, 341, 341, 341, 341, 341, 1151, 525, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1151, 1151, 1151, 1151, 1151, 1151, + 1155, 1155, 1151, 1151, 1155, 1155, 1151, 1151, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 1151, 341, 341, 341, + 341, 341, 341, 341, 341, 1151, 1155, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 1047, 1047, 1047, + 1047, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 525, 341, 341, 341, 341, 341, 341, + 841, 841, 841, 341, 1272, 1151, 1272, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 543, 341, 543, 543, 556, 341, 341, + 543, 543, 341, 341, 341, 341, 341, 543, + 543, 341, 543, 341, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 341, 341, 525, 1047, + 1047, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1155, 1151, 1151, 1155, + 1155, 1047, 1047, 341, 525, 525, 1155, 1163, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 341, 341, 341, 341, 341, 341, + 0, 0, 341, 341, 341, 341, 341, 341, + 0, 0, 341, 341, 341, 341, 341, 341, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 215, 3998, 215, 215, 215, + 215, 215, 215, 215, 3941, 3999, 4000, 4001, + 4002, 215, 215, 215, 215, 215, 215, 215, + 215, 0, 0, 0, 0, 0, 0, 0, + 0, 4003, 4004, 4005, 4006, 4007, 4008, 4009, + 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, + 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, + 4026, 4027, 4028, 4029, 4030, 4031, 4032, 4033, + 4034, 4035, 4036, 4037, 4038, 4039, 4040, 4041, + 4042, 4043, 4044, 4045, 4046, 4047, 4048, 4049, + 4050, 4051, 4052, 4053, 4054, 4055, 4056, 4057, + 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, + 4066, 4067, 4068, 4069, 4070, 4071, 4072, 4073, + 4074, 4075, 4076, 4077, 4078, 4079, 4080, 4081, + 4082, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1155, 1155, 1151, 1155, + 1155, 1151, 1155, 1155, 1047, 1155, 1163, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4083, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4083, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 1364, 1364, 1364, 1364, 1364, 1364, 1364, 1364, + 0, 0, 0, 0, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 1365, 1365, 1365, + 1365, 1365, 1365, 1365, 1365, 0, 0, 0, + 0, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4087, 4088, 4089, 4090, 4091, 4092, 4093, + 4094, 4094, 4095, 4096, 4097, 4098, 4099, 4100, + 4101, 4102, 4103, 4104, 4105, 4106, 4107, 4108, + 4109, 4110, 4111, 4112, 4113, 4114, 4115, 4116, + 4117, 4118, 4119, 4120, 4121, 4122, 4123, 4124, + 4125, 4126, 4127, 4128, 4129, 4130, 4131, 4132, + 4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, + 4141, 4142, 4143, 4144, 4145, 4146, 4147, 4148, + 4149, 4150, 4151, 4152, 4153, 4154, 4155, 4156, + 4157, 4158, 4159, 4160, 4161, 4162, 4163, 4164, + 4165, 4166, 4167, 4168, 4169, 4170, 4171, 4172, + 4173, 4174, 4175, 4176, 4177, 4106, 4178, 4179, + 4180, 4181, 4182, 4183, 4184, 4185, 4186, 4187, + 4188, 4189, 4190, 4191, 4192, 4193, 4194, 4195, + 4196, 4197, 4198, 4199, 4200, 4201, 4202, 4203, + 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, + 4212, 4213, 4214, 4215, 4216, 4217, 4218, 4219, + 4220, 4221, 4222, 4223, 4224, 4225, 4226, 4227, + 4228, 4229, 4230, 4231, 4232, 4233, 4234, 4235, + 4236, 4237, 4238, 4239, 4240, 4241, 4242, 4243, + 4244, 4245, 4196, 4246, 4247, 4248, 4249, 4250, + 4251, 4252, 4253, 4180, 4254, 4255, 4256, 4257, + 4258, 4259, 4260, 4261, 4262, 4263, 4264, 4265, + 4266, 4267, 4268, 4269, 4270, 4271, 4272, 4273, + 4106, 4274, 4275, 4276, 4277, 4278, 4279, 4280, + 4281, 4282, 4283, 4284, 4285, 4286, 4287, 4288, + 4289, 4290, 4291, 4292, 4293, 4294, 4295, 4296, + 4297, 4298, 4299, 4300, 4182, 4301, 4302, 4303, + 4304, 4305, 4306, 4307, 4308, 4309, 4310, 4311, + 4312, 4313, 4314, 4315, 4316, 4317, 4318, 4319, + 4320, 4321, 4322, 4323, 4324, 4325, 4326, 4327, + 4328, 4329, 4330, 4331, 4332, 4333, 4334, 4335, + 4336, 4337, 4338, 4339, 4340, 4341, 4342, 4343, + 4344, 4345, 4346, 4347, 4348, 4349, 4350, 3035, + 3035, 4351, 3035, 4352, 3035, 3035, 4353, 4354, + 4355, 4356, 4357, 4358, 4359, 4360, 4361, 4362, + 3035, 4363, 3035, 4364, 3035, 3035, 4365, 4366, + 3035, 3035, 3035, 4367, 4368, 4369, 4370, 4371, + 4372, 4373, 4374, 4375, 4376, 4377, 4378, 4379, + 4380, 4381, 4382, 4383, 4384, 4385, 4386, 4387, + 4388, 4389, 4390, 4391, 4392, 4393, 4394, 4395, + 4396, 4397, 4398, 4399, 4400, 4401, 4402, 4403, + 4404, 4405, 4406, 4407, 4408, 4409, 4410, 4411, + 4235, 4412, 4413, 4414, 4415, 4416, 4417, 4417, + 4418, 4419, 4420, 4421, 4422, 4423, 4424, 4425, + 4365, 4426, 4427, 4428, 4429, 4430, 4431, 0, + 0, 4432, 4433, 4434, 4435, 4436, 4437, 4438, + 4439, 4379, 4440, 4441, 4442, 4351, 4443, 4444, + 4445, 4446, 4447, 4448, 4449, 4450, 4451, 4452, + 4453, 4454, 4388, 4455, 4389, 4456, 4457, 4458, + 4459, 4460, 4352, 4127, 4461, 4462, 4463, 4197, + 4284, 4464, 4465, 4396, 4466, 4397, 4467, 4468, + 4469, 4354, 4470, 4471, 4472, 4473, 4474, 4355, + 4475, 4476, 4477, 4478, 4479, 4480, 4411, 4481, + 4482, 4235, 4483, 4415, 4484, 4485, 4486, 4487, + 4488, 4420, 4489, 4364, 4490, 4421, 4178, 4491, + 4422, 4492, 4424, 4493, 4494, 4495, 4496, 4497, + 4426, 4360, 4498, 4427, 4499, 4428, 4500, 4094, + 4501, 4502, 4503, 4504, 4505, 4506, 4507, 4508, + 4509, 4510, 4511, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4512, 4513, 4514, 4515, 4516, 4517, 4518, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 4519, 4520, 4521, 4522, + 4523, 0, 0, 0, 0, 0, 4524, 4525, + 4526, 4527, 4528, 4529, 4530, 4531, 4532, 4533, + 4534, 4535, 4536, 4537, 4538, 4539, 4540, 4541, + 4542, 4543, 4544, 4545, 4546, 4547, 4548, 4549, + 0, 4550, 4551, 4552, 4553, 4554, 0, 4555, + 0, 4556, 4557, 0, 4558, 4559, 0, 4560, + 4561, 4562, 4563, 4564, 4565, 4566, 4567, 4568, + 4569, 4570, 4571, 4572, 4573, 4574, 4575, 4576, + 4577, 4578, 4579, 4580, 4581, 4582, 4583, 4584, + 4585, 4586, 4587, 4588, 4589, 4590, 4591, 4592, + 4593, 4594, 4595, 4596, 4597, 4598, 4599, 4600, + 4601, 4602, 4603, 4604, 4605, 4606, 4607, 4608, + 4609, 4610, 4611, 4612, 4613, 4614, 4615, 4616, + 4617, 4618, 4619, 4620, 4621, 4622, 4623, 4624, + 4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, + 4633, 4634, 4635, 4636, 4637, 4638, 4639, 4640, + 4641, 4642, 4643, 4644, 4645, 4646, 4647, 4648, + 4649, 4650, 4651, 4652, 4653, 4654, 4655, 4656, + 4657, 4658, 4659, 4660, 4661, 4662, 4663, 4664, + 4665, 4666, 4667, 4668, 4668, 4668, 4668, 4668, + 4668, 4668, 4668, 4668, 4668, 4668, 4668, 4668, + 4668, 4668, 4668, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 4669, 4670, 4671, 4672, + 4673, 4674, 4675, 4676, 4677, 4678, 4679, 4680, + 4681, 4682, 4683, 4684, 4685, 4686, 4687, 4688, + 4689, 4690, 4691, 4692, 4693, 4694, 4695, 4696, + 4697, 4698, 4699, 4700, 4701, 4702, 4703, 4704, + 4705, 4706, 4707, 4708, 4709, 4710, 4711, 4712, + 4713, 4714, 4715, 4716, 4707, 4717, 4718, 4719, + 4720, 4721, 4722, 4723, 4724, 4725, 4726, 4727, + 4728, 4729, 4730, 4731, 4732, 4733, 4734, 4735, + 4736, 4737, 4738, 4739, 4740, 4741, 4742, 4743, + 4744, 4745, 4746, 4747, 4748, 4749, 4750, 4751, + 4752, 4753, 4754, 4755, 4756, 4757, 4758, 4759, + 4760, 4761, 4762, 4763, 4764, 4765, 4766, 4767, + 4768, 4769, 4770, 4771, 4772, 4773, 4774, 4775, + 4776, 4777, 4778, 4779, 4780, 4781, 4782, 4783, + 4784, 4785, 4786, 4787, 4788, 4789, 4790, 4791, + 4792, 4793, 4794, 4795, 4796, 4797, 4798, 4799, + 4800, 4801, 4802, 4803, 4804, 4805, 4806, 4807, + 4808, 4809, 4810, 4811, 4812, 4813, 4814, 4815, + 4816, 4708, 4817, 4818, 4819, 4820, 4821, 4822, + 4823, 4824, 4825, 4826, 4827, 4828, 4829, 4830, + 4831, 4832, 4833, 4834, 4835, 4836, 4837, 4838, + 4839, 4840, 4841, 4842, 4843, 4844, 4845, 4846, + 4847, 4848, 4849, 4850, 4851, 4852, 4853, 4854, + 4855, 4856, 4857, 4858, 4859, 4860, 4861, 4862, + 4863, 4864, 4865, 4866, 4867, 4868, 4869, 4870, + 4871, 4872, 4873, 4874, 4875, 4876, 4877, 4878, + 4879, 4880, 4881, 4882, 4883, 4884, 4885, 4886, + 4887, 4888, 4889, 4890, 4891, 4892, 4893, 4894, + 4895, 4896, 4897, 4898, 4899, 4900, 4901, 4902, + 4903, 4904, 4905, 4906, 4907, 4908, 4909, 4910, + 4911, 4912, 4913, 4914, 4915, 4916, 4917, 4918, + 4919, 4920, 4921, 4922, 4923, 4924, 4925, 4926, + 4927, 4928, 4929, 4930, 4931, 4932, 4933, 4934, + 4935, 4936, 4937, 4938, 4939, 4940, 4941, 4942, + 4943, 4944, 4945, 4946, 4947, 4948, 4949, 4950, + 4951, 4952, 4953, 4954, 4955, 4956, 4957, 4958, + 4959, 4960, 4961, 4962, 4963, 4964, 4965, 4966, + 4967, 4968, 4969, 4970, 4971, 4972, 4973, 4974, + 4975, 4976, 4977, 4978, 4979, 4980, 4981, 4982, + 4983, 4984, 4985, 4986, 4987, 4988, 4989, 4990, + 4991, 4992, 4993, 4994, 4995, 4996, 4997, 4998, + 4999, 5000, 5001, 5002, 5003, 5004, 5005, 5006, + 5007, 5008, 5009, 5010, 5011, 5012, 5013, 5014, + 5015, 5016, 5017, 5018, 5019, 5020, 5021, 5022, + 5023, 5024, 5025, 5026, 5027, 5028, 5029, 5030, + 2137, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5031, 5032, 5033, 5034, 5035, 5036, 5037, + 5038, 5039, 5040, 5041, 5042, 5043, 5044, 5045, + 5046, 5047, 5048, 5049, 5050, 5051, 5052, 5053, + 5054, 5055, 5056, 5057, 5058, 5059, 5060, 5061, + 5062, 5063, 5064, 5065, 5066, 5067, 5068, 5069, + 5070, 5071, 5072, 5073, 5074, 5075, 5076, 5077, + 5078, 5079, 5080, 5081, 5082, 5083, 5084, 5085, + 5086, 5087, 5088, 5089, 5090, 5091, 5092, 5093, + 5094, 0, 0, 5095, 5096, 5097, 5098, 5099, + 5100, 5101, 5102, 5103, 5104, 5105, 5106, 5107, + 5108, 5109, 5110, 5111, 5112, 5113, 5114, 5115, + 5116, 5117, 5118, 5119, 5120, 5121, 5122, 5123, + 5124, 5125, 5126, 5127, 5128, 5129, 5130, 5131, + 5132, 5133, 5134, 5135, 5136, 5137, 5138, 5139, + 5140, 5141, 5142, 5143, 5144, 5145, 5146, 5147, + 5148, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5149, 5150, 5151, 5152, 5153, 5154, 5155, + 5156, 5157, 5158, 5159, 5160, 5161, 77, 0, + 0, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 5162, 5163, 5164, 5165, 5166, 5167, 5168, + 5169, 5170, 5171, 0, 0, 0, 0, 0, + 0, 543, 543, 543, 543, 543, 543, 543, + 556, 556, 556, 556, 556, 556, 556, 543, + 543, 5172, 5173, 5174, 5175, 5175, 5176, 5177, + 5178, 5179, 5180, 5181, 5182, 5183, 5184, 5185, + 5186, 5187, 5188, 5189, 5190, 5191, 3033, 3033, + 5192, 5193, 5194, 5194, 5194, 5194, 5195, 5195, + 5195, 5196, 5197, 5198, 0, 5199, 5200, 5201, + 5202, 5203, 5204, 5205, 5206, 5207, 5208, 5209, + 5210, 5211, 5212, 5213, 5214, 5215, 5216, 5217, + 0, 5218, 5219, 5220, 5221, 0, 0, 0, + 0, 5222, 5223, 5224, 1117, 5225, 0, 5226, + 5227, 5228, 5229, 5230, 5231, 5232, 5233, 5234, + 5235, 5236, 5237, 5238, 5239, 5240, 5241, 5242, + 5243, 5244, 5245, 5246, 5247, 5248, 5249, 5250, + 5251, 5252, 5253, 5254, 5255, 5256, 5257, 5258, + 5259, 5260, 5261, 5262, 5263, 5264, 5265, 5266, + 5267, 5268, 5269, 5270, 5271, 5272, 5273, 5274, + 5275, 5276, 5277, 5278, 5279, 5280, 5281, 5282, + 5283, 5284, 5285, 5286, 5287, 5288, 5289, 5290, + 5291, 5292, 5293, 5294, 5295, 5296, 5297, 5298, + 5299, 5300, 5301, 5302, 5303, 5304, 5305, 5306, + 5307, 5308, 5309, 5310, 5311, 5312, 5313, 5314, + 5315, 5316, 5317, 5318, 5319, 5320, 5321, 5322, + 5323, 5324, 5325, 5326, 5327, 5328, 5329, 5330, + 5331, 5332, 5333, 5334, 5335, 5336, 5337, 5338, + 5339, 5340, 5341, 5342, 5343, 5344, 5345, 5346, + 5347, 5348, 5349, 5350, 5351, 5352, 5353, 5354, + 5355, 5356, 5357, 5358, 5359, 5360, 0, 0, + 1459, 0, 5361, 5362, 5363, 5364, 5365, 5366, + 5367, 5368, 5369, 5370, 5371, 5372, 5373, 5374, + 5375, 5376, 5377, 5378, 5379, 5380, 5381, 5382, + 5383, 5384, 5385, 5386, 5387, 5388, 5389, 5390, + 5391, 5392, 5393, 5394, 5395, 5396, 5397, 5398, + 5399, 5400, 5401, 5402, 5403, 5404, 5405, 5406, + 5407, 5408, 5409, 5410, 5411, 5412, 5413, 5414, + 5415, 5416, 5417, 5418, 5419, 5420, 5421, 5422, + 5423, 5424, 5425, 5426, 5427, 5428, 5429, 5430, + 5431, 5432, 5433, 5434, 5435, 5436, 5437, 5438, + 5439, 5440, 5441, 5442, 5443, 5444, 5445, 5446, + 5447, 5448, 5449, 5450, 5451, 5452, 5453, 5454, + 5455, 5456, 5457, 5458, 5459, 5460, 5461, 5462, + 5463, 5464, 5465, 5466, 5467, 5468, 5469, 5470, + 5471, 5472, 5473, 5474, 5475, 5476, 5477, 5478, + 5479, 5480, 5481, 5482, 5483, 5484, 5485, 5486, + 5487, 5488, 5489, 5490, 5491, 5492, 5493, 5494, + 5495, 5496, 5497, 5498, 5499, 5500, 5501, 5502, + 5503, 5504, 5505, 5506, 5507, 5508, 5509, 5510, + 5511, 5512, 5513, 5514, 5515, 5516, 5517, 5518, + 5519, 5520, 5521, 5522, 5523, 5524, 5525, 5526, + 5527, 5528, 5529, 5530, 5531, 5532, 5533, 5534, + 5535, 5536, 5537, 5538, 5539, 5540, 5541, 5542, + 5543, 5544, 5545, 5546, 5547, 5548, 5549, 5550, + 0, 0, 0, 5551, 5552, 5553, 5554, 5555, + 5556, 0, 0, 5557, 5558, 5559, 5560, 5561, + 5562, 0, 0, 5563, 5564, 5565, 5566, 5567, + 5568, 0, 0, 5569, 5570, 5571, 0, 0, + 0, 5572, 5573, 5574, 5575, 5576, 5577, 5578, + 0, 5579, 5580, 5581, 5582, 5583, 5584, 5585, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 5586, 5586, 5586, 77, 77, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 0, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 0, 341, 341, 0, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 0, 0, 0, 0, + 0, 1047, 9, 1047, 0, 0, 0, 0, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 0, 0, 0, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 5587, 5587, 5587, 5587, 5587, 5587, 5587, + 5587, 5587, 5587, 5587, 5587, 5587, 5587, 5587, + 5587, 5587, 5587, 5587, 5587, 5587, 5587, 5587, + 5587, 5587, 5587, 5587, 5587, 5587, 5587, 5587, + 5587, 5587, 5587, 5587, 5587, 5587, 5587, 5587, + 5587, 5587, 5587, 5587, 5587, 5587, 5587, 5587, + 5587, 5587, 5587, 5587, 5587, 5587, 1211, 1211, + 1211, 1211, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 1211, 1211, 77, 841, 841, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 0, 0, 0, + 0, 77, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 556, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 556, 5588, 5588, 5588, 5588, 5588, 5588, + 5588, 5588, 5588, 5588, 5588, 5588, 5588, 5588, + 5588, 5588, 5588, 5588, 5588, 5588, 5588, 5588, + 5588, 5588, 5588, 5588, 5588, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 1182, 1182, 1182, 1182, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1458, 341, 341, 341, 341, 341, + 341, 341, 341, 1458, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 543, + 543, 543, 543, 543, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 0, + 1047, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 1047, 1458, 1458, 1458, 1458, 1458, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5589, 5590, 5591, 5592, 5593, 5594, 5595, + 5596, 5597, 5598, 5599, 5600, 5601, 5602, 5603, + 5604, 5605, 5606, 5607, 5608, 5609, 5610, 5611, + 5612, 5613, 5614, 5615, 5616, 5617, 5618, 5619, + 5620, 5621, 5622, 5623, 5624, 5625, 5626, 5627, + 5628, 5629, 5630, 5631, 5632, 5633, 5634, 5635, + 5636, 5637, 5638, 5639, 5640, 5641, 5642, 5643, + 5644, 5645, 5646, 5647, 5648, 5649, 5650, 5651, + 5652, 5653, 5654, 5655, 5656, 5657, 5658, 5659, + 5660, 5661, 5662, 5663, 5664, 5665, 5666, 5667, + 5668, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 5669, 5670, 5671, 5672, 5673, 5674, 5675, + 5676, 5677, 5678, 5679, 5680, 5681, 5682, 5683, + 5684, 5685, 5686, 5687, 5688, 5689, 5690, 5691, + 5692, 5693, 5694, 5695, 5696, 5697, 5698, 5699, + 5700, 5701, 5702, 5703, 5704, 0, 0, 0, + 0, 5705, 5706, 5707, 5708, 5709, 5710, 5711, + 5712, 5713, 5714, 5715, 5716, 5717, 5718, 5719, + 5720, 5721, 5722, 5723, 5724, 5725, 5726, 5727, + 5728, 5729, 5730, 5731, 5732, 5733, 5734, 5735, + 5736, 5737, 5738, 5739, 5740, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1047, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 0, + 0, 1108, 0, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 0, + 1108, 1108, 0, 0, 0, 1108, 0, 0, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 0, + 1105, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 5742, 5742, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 0, 0, 0, 0, 0, 0, 0, 0, + 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 0, 1108, 1108, 0, + 0, 0, 0, 0, 5741, 5741, 5741, 5741, + 5741, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 5741, + 5741, 5741, 5741, 5741, 5741, 0, 0, 0, + 9, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 0, 0, 0, 0, 0, + 1105, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 0, 0, 0, 0, 5741, 5741, 1108, + 1108, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 0, 0, 5741, 5741, 5741, 5741, 5741, + 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 1108, 1151, 1151, 1151, 0, 1151, 1151, + 0, 0, 0, 0, 0, 1151, 556, 1151, + 543, 1108, 1108, 1108, 1108, 0, 1108, 1108, + 1108, 0, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 0, + 0, 543, 569, 556, 0, 0, 0, 0, + 1163, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 5741, 0, 0, 0, 0, 0, 0, + 0, 1105, 1105, 1105, 1105, 1105, 1105, 1105, + 1105, 1105, 0, 0, 0, 0, 0, 0, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 5741, 5741, + 1105, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 5741, 5741, + 5741, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 5742, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 543, 556, + 0, 0, 0, 0, 5741, 5741, 5741, 5741, + 5741, 1105, 1105, 1105, 1105, 1105, 1105, 1105, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 0, + 0, 0, 9, 9, 9, 9, 9, 9, + 9, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 0, + 0, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 0, 0, 0, 0, + 0, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 0, 0, 0, 0, 0, + 0, 0, 1105, 1105, 1105, 1105, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5743, 5744, 5745, 5746, 5747, 5748, 5749, + 5750, 5751, 5752, 5753, 5754, 5755, 5756, 5757, + 5758, 5759, 5760, 5761, 5762, 5763, 5764, 5765, + 5766, 5767, 5768, 5769, 5770, 5771, 5772, 5773, + 5774, 5775, 5776, 5777, 5778, 5779, 5780, 5781, + 5782, 5783, 5784, 5785, 5786, 5787, 5788, 5789, + 5790, 5791, 5792, 5793, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5794, 5795, 5796, 5797, 5798, 5799, 5800, + 5801, 5802, 5803, 5804, 5805, 5806, 5807, 5808, + 5809, 5810, 5811, 5812, 5813, 5814, 5815, 5816, + 5817, 5818, 5819, 5820, 5821, 5822, 5823, 5824, + 5825, 5826, 5827, 5828, 5829, 5830, 5831, 5832, + 5833, 5834, 5835, 5836, 5837, 5838, 5839, 5840, + 5841, 5842, 5843, 5844, 0, 0, 0, 0, + 0, 0, 0, 5741, 5741, 5741, 5741, 5741, + 5741, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 543, 543, 543, + 543, 0, 0, 0, 0, 0, 0, 0, + 0, 1135, 1135, 1135, 1135, 1135, 1135, 1135, + 1135, 1135, 1135, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5845, 5845, 5845, 5845, 5845, 5845, 5845, + 5845, 5845, 5845, 5845, 5845, 5845, 5845, 5845, + 5845, 5845, 5845, 5845, 5845, 5845, 5845, 5845, + 5845, 5845, 5845, 5845, 5845, 5845, 5845, 5845, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 5741, 5741, + 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 1108, 0, 0, 0, 0, 0, 0, 0, + 0, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 1117, + 1117, 1117, 1117, 1117, 1117, 1117, 1117, 556, + 556, 543, 543, 543, 556, 543, 556, 556, + 556, 556, 5846, 5846, 5846, 5846, 1112, 1112, + 1112, 1112, 1112, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1155, 1151, 1155, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1163, + 1047, 1047, 1047, 1047, 1047, 1047, 1047, 0, + 0, 0, 0, 1211, 1211, 1211, 1211, 1211, + 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, + 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1163, 1151, 1151, 1155, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 5847, 5848, 5849, 5850, 341, 341, + 341, 341, 341, 341, 341, 341, 5851, 341, + 341, 341, 341, 341, 5852, 341, 341, 341, + 341, 1155, 1155, 1155, 1151, 1151, 1151, 1151, + 1155, 1155, 1163, 5853, 1047, 1047, 5854, 1047, + 1047, 1047, 1047, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 5854, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 0, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 543, 543, 543, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 5855, 1151, 1151, 1151, 1151, 1155, 1151, 5856, + 5857, 1151, 5858, 5859, 1163, 1163, 0, 1172, + 1172, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1047, 1047, 1047, 1047, 341, 1155, 1155, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1173, 1047, 1047, 341, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1151, 1151, 1155, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1155, 1155, 1155, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1155, 1483, 341, 1228, 1228, 341, 1047, 1047, + 1047, 1047, 1151, 1173, 1151, 1151, 1047, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 341, 1047, 341, 1047, 1047, + 1047, 0, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 0, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 1155, 1155, 1155, + 1151, 1151, 1151, 1155, 1155, 1151, 1483, 1173, + 1151, 1047, 1047, 1047, 1047, 1047, 1047, 1151, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 0, 341, 341, 341, 341, 0, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 0, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1047, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 1151, 1155, 1155, 1155, 1151, 1151, 1151, 1151, + 1151, 1151, 1173, 1163, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 1151, 1151, 1155, 1155, 0, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 0, + 341, 341, 0, 0, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 0, 341, 341, + 341, 341, 341, 0, 1173, 1173, 341, 5860, + 1155, 1151, 1155, 1155, 1155, 1155, 0, 0, + 5861, 1155, 0, 0, 5862, 5863, 1483, 0, + 0, 341, 0, 0, 0, 0, 0, 0, + 5864, 0, 0, 0, 0, 0, 341, 341, + 341, 341, 341, 1155, 1155, 0, 0, 543, + 543, 543, 543, 543, 543, 543, 0, 0, + 0, 543, 543, 543, 543, 543, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 1155, 1155, + 1155, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1155, 1155, 1163, 1151, 1151, 1155, 1173, + 341, 341, 341, 341, 1047, 1047, 1047, 1047, + 1047, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 1047, 0, 1047, 543, + 341, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 5865, 1155, 1155, 1151, 1151, 1151, 1151, + 1151, 1151, 5866, 5867, 5868, 5869, 5870, 5871, + 1151, 1151, 1155, 1163, 1173, 341, 341, 1047, + 341, 0, 0, 0, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 5872, 1155, 1155, 1151, 1151, 1151, 1151, 0, + 0, 5873, 5874, 5875, 5876, 1151, 1151, 1155, + 1163, 1173, 1047, 1047, 1047, 1047, 1047, 1047, + 1047, 1047, 1047, 1047, 1047, 1047, 1047, 1047, + 1047, 1047, 1047, 1047, 1047, 1047, 1047, 1047, + 1047, 341, 341, 341, 341, 1151, 1151, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 1155, 1155, 1155, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1155, 1155, 1151, 1155, + 1163, 1151, 1047, 1047, 1047, 341, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1151, 1155, 1151, 1155, + 1155, 1151, 1151, 1151, 1151, 1151, 1151, 1483, + 1173, 341, 0, 0, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 0, 0, 1151, 1151, + 1151, 1155, 1155, 1151, 1151, 1151, 1151, 1155, + 1151, 1151, 1151, 1151, 1163, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 1182, 1182, 1047, 1047, 1047, + 841, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 1155, 1155, 1155, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1155, 1163, 1173, 1047, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5877, 5878, 5879, 5880, 5881, 5882, 5883, + 5884, 5885, 5886, 5887, 5888, 5889, 5890, 5891, + 5892, 5893, 5894, 5895, 5896, 5897, 5898, 5899, + 5900, 5901, 5902, 5903, 5904, 5905, 5906, 5907, + 5908, 5909, 5910, 5911, 5912, 5913, 5914, 5915, + 5916, 5917, 5918, 5919, 5920, 5921, 5922, 5923, + 5924, 5925, 5926, 5927, 5928, 5929, 5930, 5931, + 5932, 5933, 5934, 5935, 5936, 5937, 5938, 5939, + 5940, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 341, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 0, 0, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1155, 1155, 1155, 1151, 1151, 1151, + 1151, 0, 0, 1151, 1151, 1155, 1155, 1155, + 1155, 1163, 341, 1047, 341, 1155, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 1151, 1151, 1151, 1151, 1151, 1151, + 5941, 5941, 1151, 1151, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1151, 1163, 1151, 1151, + 1151, 1151, 1155, 1228, 1151, 1151, 1151, 1151, + 1047, 1047, 1047, 1047, 1047, 1047, 1047, 1047, + 1163, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 1151, 1151, 1151, 1151, 1151, 1151, + 1155, 1155, 1151, 1151, 1151, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 1228, 1228, 1228, + 1228, 1228, 1228, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1155, 1151, 1163, 1047, 1047, 1047, 341, 1047, + 1047, 1047, 1047, 1047, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 1155, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 0, 1151, 1151, 1151, 1151, 1151, 1151, 1155, + 5942, 341, 1047, 1047, 1047, 1047, 1047, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 0, 0, + 0, 1047, 1047, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 0, 0, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 0, 1155, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1155, 1151, 1151, 1155, 1151, 1151, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 0, 341, 341, 0, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 1151, 1151, 1151, 1151, 1151, 1151, + 0, 0, 0, 1151, 0, 1151, 1151, 0, + 1151, 1151, 1151, 1173, 1151, 1163, 1163, 1228, + 1151, 0, 0, 0, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 0, + 341, 341, 0, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 1155, 1155, 1155, 1155, 1155, + 0, 1151, 1151, 0, 1155, 1155, 1151, 1155, + 1163, 341, 0, 0, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 1151, 1151, 1155, 1155, + 1047, 1047, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 77, 77, + 77, 77, 77, 77, 77, 77, 11, 11, + 11, 11, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1047, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 1458, 1458, 1458, 1458, 1458, 1458, 1458, 1458, + 0, 1047, 1047, 1047, 1047, 1047, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 0, 5943, 5943, 5943, 5943, 5943, 5943, 5943, + 5943, 5943, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 1047, + 1047, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 0, + 0, 569, 569, 569, 569, 569, 1047, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 543, 543, 543, 543, 543, 543, 543, + 1047, 1047, 1047, 1047, 1047, 841, 841, 841, + 841, 525, 525, 525, 525, 1047, 841, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 0, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 0, 0, 0, 0, 0, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5944, 5945, 5946, 5947, 5948, 5949, 5950, + 5951, 5952, 5953, 5954, 5955, 5956, 5957, 5958, + 5959, 5960, 5961, 5962, 5963, 5964, 5965, 5966, + 5967, 5968, 5969, 5970, 5971, 5972, 5973, 5974, + 5975, 5976, 5977, 5978, 5979, 5980, 5981, 5982, + 5983, 5984, 5985, 5986, 5987, 5988, 5989, 5990, + 5991, 5992, 5993, 5994, 5995, 5996, 5997, 5998, + 5999, 6000, 6001, 6002, 6003, 6004, 6005, 6006, + 6007, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1047, 1047, 1047, 1047, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 0, 0, 0, 0, + 1151, 341, 1155, 1155, 1155, 1155, 1155, 1155, + 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, + 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, + 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, + 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, + 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, + 1155, 1155, 1155, 1155, 1155, 1155, 1155, 1155, + 1155, 0, 0, 0, 0, 0, 0, 0, + 1151, 1151, 1151, 1151, 525, 525, 525, 525, + 525, 525, 525, 525, 525, 525, 525, 525, + 525, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3034, 3034, 3033, 3034, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3035, 3035, 3035, + 3035, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 0, 0, 841, 1151, 569, + 1047, 1459, 1459, 1459, 1459, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 0, 0, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 6008, 6009, 841, 841, 841, 841, 841, 6010, + 6011, 6012, 6013, 6014, 6015, 6016, 6017, 6018, + 569, 569, 569, 841, 841, 841, 6019, 6020, + 6021, 6022, 6023, 6024, 1459, 1459, 1459, 1459, + 1459, 1459, 1459, 1459, 556, 556, 556, 556, + 556, 556, 556, 556, 841, 841, 543, 543, + 543, 543, 543, 556, 556, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 543, 543, 543, 543, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 6025, 6026, 6027, 6028, 6029, 6030, + 6031, 6032, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 543, 543, 543, 77, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1182, + 1182, 1182, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6033, 2233, 2208, 2251, 2235, 2236, 6034, + 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, + 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, + 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, + 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, + 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, + 6058, 6059, 6060, 6061, 6062, 6033, 2233, 2208, + 2251, 2235, 2236, 6034, 2215, 2218, 6035, 6036, + 2219, 2238, 2221, 6037, 2223, 2224, 2225, 6038, + 6039, 6040, 6041, 6042, 6043, 6044, 2229, 6045, + 6046, 6047, 2252, 2234, 6048, 2214, 0, 2253, + 2254, 6049, 2220, 6050, 6051, 2239, 6052, 6053, + 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, + 6062, 6033, 2233, 2208, 2251, 2235, 2236, 6034, + 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, + 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, + 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, + 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, + 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, + 6058, 6059, 6060, 6061, 6062, 6033, 0, 2208, + 2251, 0, 0, 6034, 0, 0, 6035, 6036, + 0, 0, 2221, 6037, 2223, 2224, 0, 6038, + 6039, 6040, 6041, 6042, 6043, 6044, 2229, 6045, + 6046, 6047, 2252, 0, 6048, 0, 2216, 2253, + 2254, 6049, 2220, 6050, 6051, 0, 6052, 6053, + 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, + 6062, 6033, 2233, 2208, 2251, 2235, 2236, 6034, + 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, + 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, + 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, + 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, + 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, + 6058, 6059, 6060, 6061, 6062, 6033, 2233, 0, + 2251, 2235, 2236, 6034, 0, 0, 6035, 6036, + 2219, 2238, 2221, 6037, 2223, 2224, 0, 6038, + 6039, 6040, 6041, 6042, 6043, 6044, 0, 6045, + 6046, 6047, 2252, 2234, 6048, 2214, 2216, 2253, + 2254, 6049, 2220, 6050, 6051, 2239, 6052, 6053, + 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, + 6062, 6033, 2233, 0, 2251, 2235, 2236, 6034, + 0, 2218, 6035, 6036, 2219, 2238, 0, 6037, + 0, 0, 0, 6038, 6039, 6040, 6041, 6042, + 6043, 6044, 0, 6045, 6046, 6047, 2252, 2234, + 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, + 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, + 6058, 6059, 6060, 6061, 6062, 6033, 2233, 2208, + 2251, 2235, 2236, 6034, 2215, 2218, 6035, 6036, + 2219, 2238, 2221, 6037, 2223, 2224, 2225, 6038, + 6039, 6040, 6041, 6042, 6043, 6044, 2229, 6045, + 6046, 6047, 2252, 2234, 6048, 2214, 2216, 2253, + 2254, 6049, 2220, 6050, 6051, 2239, 6052, 6053, + 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, + 6062, 6033, 2233, 2208, 2251, 2235, 2236, 6034, + 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, + 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, + 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, + 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, + 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, + 6058, 6059, 6060, 6061, 6062, 6033, 2233, 2208, + 2251, 2235, 2236, 6034, 2215, 2218, 6035, 6036, + 2219, 2238, 2221, 6037, 2223, 2224, 2225, 6038, + 6039, 6040, 6041, 6042, 6043, 6044, 2229, 6045, + 6046, 6047, 2252, 2234, 6048, 2214, 2216, 2253, + 2254, 6049, 2220, 6050, 6051, 2239, 6052, 6053, + 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, + 6062, 6033, 2233, 2208, 2251, 2235, 2236, 6034, + 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, + 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, + 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, + 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, + 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, + 6058, 6059, 6060, 6061, 6062, 6033, 2233, 2208, + 2251, 2235, 2236, 6034, 2215, 2218, 6035, 6036, + 2219, 2238, 2221, 6037, 2223, 2224, 2225, 6038, + 6039, 6040, 6041, 6042, 6043, 6044, 2229, 6045, + 6046, 6047, 2252, 2234, 6048, 2214, 2216, 2253, + 2254, 6049, 2220, 6050, 6051, 2239, 6052, 6053, + 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, + 6062, 6033, 2233, 2208, 2251, 2235, 2236, 6034, + 2215, 2218, 6035, 6036, 2219, 2238, 2221, 6037, + 2223, 2224, 2225, 6038, 6039, 6040, 6041, 6042, + 6043, 6044, 2229, 6045, 6046, 6047, 2252, 2234, + 6048, 2214, 2216, 2253, 2254, 6049, 2220, 6050, + 6051, 2239, 6052, 6053, 6054, 6055, 6056, 6057, + 6058, 6059, 6060, 6061, 6062, 6063, 6064, 0, + 0, 6065, 6066, 2248, 6067, 6068, 6069, 6070, + 6071, 6072, 6073, 6074, 6075, 6076, 6077, 6078, + 2249, 6079, 6080, 6081, 6082, 6083, 6084, 6085, + 6086, 6087, 6088, 6089, 6090, 2247, 6091, 6092, + 6093, 6094, 6095, 6096, 6097, 6098, 6099, 6100, + 6101, 6102, 2246, 6103, 6104, 6105, 6106, 6107, + 6108, 6109, 6110, 6111, 6112, 6113, 6114, 6115, + 6116, 6117, 6118, 6065, 6066, 2248, 6067, 6068, + 6069, 6070, 6071, 6072, 6073, 6074, 6075, 6076, + 6077, 6078, 2249, 6079, 6080, 6081, 6082, 6083, + 6084, 6085, 6086, 6087, 6088, 6089, 6090, 2247, + 6091, 6092, 6093, 6094, 6095, 6096, 6097, 6098, + 6099, 6100, 6101, 6102, 2246, 6103, 6104, 6105, + 6106, 6107, 6108, 6109, 6110, 6111, 6112, 6113, + 6114, 6115, 6116, 6117, 6118, 6065, 6066, 2248, + 6067, 6068, 6069, 6070, 6071, 6072, 6073, 6074, + 6075, 6076, 6077, 6078, 2249, 6079, 6080, 6081, + 6082, 6083, 6084, 6085, 6086, 6087, 6088, 6089, + 6090, 2247, 6091, 6092, 6093, 6094, 6095, 6096, + 6097, 6098, 6099, 6100, 6101, 6102, 2246, 6103, + 6104, 6105, 6106, 6107, 6108, 6109, 6110, 6111, + 6112, 6113, 6114, 6115, 6116, 6117, 6118, 6065, + 6066, 2248, 6067, 6068, 6069, 6070, 6071, 6072, + 6073, 6074, 6075, 6076, 6077, 6078, 2249, 6079, + 6080, 6081, 6082, 6083, 6084, 6085, 6086, 6087, + 6088, 6089, 6090, 2247, 6091, 6092, 6093, 6094, + 6095, 6096, 6097, 6098, 6099, 6100, 6101, 6102, + 2246, 6103, 6104, 6105, 6106, 6107, 6108, 6109, + 6110, 6111, 6112, 6113, 6114, 6115, 6116, 6117, + 6118, 6065, 6066, 2248, 6067, 6068, 6069, 6070, + 6071, 6072, 6073, 6074, 6075, 6076, 6077, 6078, + 2249, 6079, 6080, 6081, 6082, 6083, 6084, 6085, + 6086, 6087, 6088, 6089, 6090, 2247, 6091, 6092, + 6093, 6094, 6095, 6096, 6097, 6098, 6099, 6100, + 6101, 6102, 2246, 6103, 6104, 6105, 6106, 6107, + 6108, 6109, 6110, 6111, 6112, 6113, 6114, 6115, + 6116, 6117, 6118, 6119, 6120, 0, 0, 6121, + 6122, 6123, 6124, 6125, 6126, 6127, 6128, 6129, + 6130, 6121, 6122, 6123, 6124, 6125, 6126, 6127, + 6128, 6129, 6130, 6121, 6122, 6123, 6124, 6125, + 6126, 6127, 6128, 6129, 6130, 6121, 6122, 6123, + 6124, 6125, 6126, 6127, 6128, 6129, 6130, 6121, + 6122, 6123, 6124, 6125, 6126, 6127, 6128, 6129, + 6130, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 841, 841, 841, 841, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 841, 841, + 841, 841, 841, 841, 841, 841, 1151, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 1151, 841, 841, + 1047, 1047, 1047, 1047, 1047, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1151, 1151, 1151, 1151, + 1151, 0, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 1151, 1151, 1151, 1151, 1151, 1151, 1151, + 1151, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 543, 543, 543, 543, 543, 543, 543, + 0, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 0, 0, 543, 543, 543, 543, + 543, 543, 543, 0, 543, 543, 0, 543, + 543, 543, 543, 543, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 0, 0, + 0, 543, 543, 543, 543, 543, 543, 543, + 525, 525, 525, 525, 525, 525, 525, 0, + 0, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 341, + 841, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 543, 543, 543, + 543, 1172, 1172, 1172, 1172, 1172, 1172, 1172, + 1172, 1172, 1172, 0, 0, 0, 0, 0, + 11, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 1108, 1108, + 1108, 1108, 1108, 1108, 1108, 1108, 0, 0, + 5741, 5741, 5741, 5741, 5741, 5741, 5741, 5741, + 5741, 556, 556, 556, 556, 556, 556, 556, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6131, 6132, 6133, 6134, 6135, 6136, 6137, + 6138, 6139, 6140, 6141, 6142, 6143, 6144, 6145, + 6146, 6147, 6148, 6149, 6150, 6151, 6152, 6153, + 6154, 6155, 6156, 6157, 6158, 6159, 6160, 6161, + 6162, 6163, 6164, 6165, 6166, 6167, 6168, 6169, + 6170, 6171, 6172, 6173, 6174, 6175, 6176, 6177, + 6178, 6179, 6180, 6181, 6182, 6183, 6184, 6185, + 6186, 6187, 6188, 6189, 6190, 6191, 6192, 6193, + 6194, 6195, 6196, 6197, 6198, 543, 543, 543, + 543, 543, 543, 1173, 1153, 0, 0, 0, + 0, 1152, 1152, 1152, 1152, 1152, 1152, 1152, + 1152, 1152, 1152, 0, 0, 0, 0, 1105, + 1105, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 1148, 5846, 5846, + 5846, 1111, 5846, 5846, 5846, 5846, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 1148, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 5846, + 5846, 5846, 5846, 5846, 5846, 5846, 5846, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6199, 6200, 6201, 6202, 0, 6203, 6204, + 6205, 6206, 6207, 6208, 6209, 6210, 6211, 6212, + 6213, 6214, 6215, 6216, 6217, 6218, 6219, 6220, + 6221, 6222, 6223, 6224, 6225, 6226, 6227, 6228, + 6229, 0, 6200, 6201, 0, 6230, 0, 0, + 6205, 0, 6207, 6208, 6209, 6210, 6211, 6212, + 6213, 6214, 6215, 6216, 0, 6218, 6219, 6220, + 6221, 0, 6223, 0, 6225, 0, 0, 0, + 0, 0, 0, 6201, 0, 0, 0, 0, + 6205, 0, 6207, 0, 6209, 0, 6211, 6212, + 6213, 0, 6215, 6216, 0, 6218, 0, 0, + 6221, 0, 6223, 0, 6225, 0, 6227, 0, + 6229, 0, 6200, 6201, 0, 6230, 0, 0, + 6205, 6206, 6207, 6208, 0, 6210, 6211, 6212, + 6213, 6214, 6215, 6216, 0, 6218, 6219, 6220, + 6221, 0, 6223, 6224, 6225, 6226, 0, 6228, + 0, 6199, 6200, 6201, 6202, 6230, 6203, 6204, + 6205, 6206, 6207, 0, 6209, 6210, 6211, 6212, + 6213, 6214, 6215, 6216, 6217, 6218, 6219, 6220, + 6221, 6222, 6223, 6224, 6225, 0, 0, 0, + 0, 0, 6200, 6201, 6202, 0, 6203, 6204, + 6205, 6206, 6207, 0, 6209, 6210, 6211, 6212, + 6213, 6214, 6215, 6216, 6217, 6218, 6219, 6220, + 6221, 6222, 6223, 6224, 6225, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 75, 75, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 79, 79, 79, 79, 2397, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 0, 0, 0, + 0, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 0, 0, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 0, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 2397, 0, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6231, 6232, 6233, 6234, 6235, 6236, 6237, + 6238, 6239, 6240, 6241, 1211, 1211, 0, 0, + 0, 6242, 6243, 6244, 6245, 6246, 6247, 6248, + 6249, 6250, 6251, 6252, 6253, 6254, 6255, 6256, + 6257, 6258, 6259, 6260, 6261, 6262, 6263, 6264, + 6265, 6266, 6267, 6268, 6269, 6270, 6271, 6272, + 79, 6273, 6274, 6275, 6276, 6277, 6278, 6279, + 6280, 6281, 6282, 6283, 6284, 6285, 6286, 6287, + 6288, 6289, 6290, 6291, 6292, 6293, 6294, 6295, + 6296, 6297, 6298, 6299, 6300, 6301, 6302, 6303, + 6304, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 6305, 6306, 6307, 0, 0, + 0, 2541, 2541, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 2541, + 2541, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 6308, + 841, 6309, 6308, 6308, 6308, 6308, 6308, 6308, + 6308, 6308, 6308, 6308, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 841, 841, + 841, 841, 841, 841, 841, 841, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 6310, + 6310, 6310, 6310, 6310, 6310, 6310, 6310, 6310, + 6310, 6310, 6310, 6310, 6310, 6310, 6310, 6310, + 6310, 6310, 6310, 6310, 6310, 6310, 6310, 6310, + 6310, 6311, 6312, 6313, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6314, 6315, 6316, 6317, 6318, 6319, 6320, + 6321, 6322, 6323, 6324, 6325, 6326, 6327, 6328, + 6329, 6330, 6331, 6332, 6333, 6334, 6335, 6336, + 6337, 6338, 6339, 6340, 6341, 6342, 6343, 6344, + 6345, 6346, 6347, 6348, 6349, 6350, 6351, 6352, + 6353, 6354, 6355, 6356, 6357, 0, 0, 0, + 0, 6358, 6359, 6360, 6361, 6362, 6363, 6364, + 6365, 6366, 0, 0, 0, 0, 0, 0, + 0, 6367, 6368, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2397, 2397, 2397, 2397, 2397, 2397, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 79, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 79, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 79, 79, 79, 79, + 2397, 2397, 2397, 2397, 2397, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 79, 79, 79, 2397, 79, 79, + 79, 2397, 2397, 2397, 6369, 6369, 6369, 6369, + 6369, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 79, 2397, 79, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 79, 79, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 77, + 77, 77, 77, 77, 77, 77, 77, 79, + 79, 79, 79, 79, 2397, 2397, 2397, 2397, + 79, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 2397, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 2397, 2397, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 2397, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 79, + 79, 79, 79, 79, 79, 2397, 79, 79, + 79, 2397, 2397, 2397, 79, 79, 2397, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 2397, 2397, 0, 0, + 0, 79, 79, 79, 79, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 79, 79, + 79, 79, 0, 0, 0, 0, 0, 0, + 0, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 0, 0, 0, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 0, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 0, 0, 0, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 0, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2815, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2815, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 0, 2397, 2397, 2397, 2397, + 0, 0, 0, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 0, 0, 2397, 2397, + 2397, 2397, 2397, 2397, 0, 0, 0, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 0, 0, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 2397, 2397, 2397, 2397, 2397, 2397, 2397, + 2397, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 0, + 0, 2397, 2397, 2397, 2397, 0, 0, 0, + 0, 2397, 2397, 2397, 0, 0, 0, 0, + 0, 2397, 2397, 2397, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2397, 2397, 2397, 2397, 2397, 2397, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 3035, 3035, 3035, 3035, 3035, 3035, + 3035, 3035, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6370, 6371, 6372, 6373, 6374, 4373, 6375, + 6376, 6377, 6378, 4374, 6379, 6380, 6381, 4375, + 6382, 6383, 6384, 6385, 6386, 6387, 6388, 6389, + 6390, 6391, 6392, 6393, 4433, 6394, 6395, 6396, + 6397, 6398, 6399, 6400, 6401, 6402, 4438, 4376, + 4377, 4439, 6403, 6404, 4184, 6405, 4378, 6406, + 6407, 6408, 6409, 6409, 6409, 6410, 6411, 6412, + 6413, 6414, 6415, 6416, 6417, 6418, 6419, 6420, + 6421, 6422, 6423, 6424, 6425, 6426, 6427, 6427, + 4441, 6428, 6429, 6430, 6431, 4380, 6432, 6433, + 6434, 4337, 6435, 6436, 6437, 6438, 6439, 6440, + 6441, 6442, 6443, 6444, 6445, 6446, 6447, 6448, + 6449, 6450, 6451, 6452, 6453, 6454, 6455, 6456, + 6457, 6458, 6459, 6460, 6460, 6461, 6462, 6463, + 4180, 6464, 6465, 6466, 6467, 6468, 6469, 6470, + 6471, 4385, 6472, 6473, 6474, 6475, 6476, 6477, + 6478, 6479, 6480, 6481, 6482, 6483, 6484, 6485, + 6486, 6487, 6488, 6489, 6490, 6491, 6492, 4126, + 6493, 6494, 6495, 6495, 6496, 6497, 6497, 6498, + 6499, 6500, 6501, 6502, 6503, 6504, 6505, 6506, + 6507, 6508, 6509, 6510, 4386, 6511, 6512, 6513, + 6514, 4453, 6514, 6515, 4388, 6516, 6517, 6518, + 6519, 4389, 4099, 6520, 6521, 6522, 6523, 6524, + 6525, 6526, 6527, 6528, 6529, 6530, 6531, 6532, + 6533, 6534, 6535, 6536, 6537, 6538, 6539, 6540, + 6541, 4390, 6542, 6543, 6544, 6545, 6546, 6547, + 4392, 6548, 6549, 6550, 6551, 6552, 6553, 6554, + 6555, 4127, 4461, 6556, 6557, 6558, 6559, 6560, + 6561, 6562, 6563, 4393, 6564, 6565, 6566, 6567, + 4504, 6568, 6569, 6570, 6571, 6572, 6573, 6574, + 6575, 6576, 6577, 6578, 6579, 6580, 4197, 6581, + 6582, 6583, 6584, 6585, 6586, 6587, 6588, 6589, + 6590, 6591, 4394, 4284, 6592, 6593, 6594, 6595, + 6596, 6597, 6598, 6599, 4465, 6600, 6601, 6602, + 6603, 6604, 6605, 6606, 6607, 4466, 6608, 6609, + 6610, 6611, 6612, 6613, 6614, 6615, 6616, 6617, + 6618, 6619, 4468, 6620, 6621, 6622, 6623, 6624, + 6625, 6626, 6627, 6628, 6629, 6630, 6630, 6631, + 6632, 4470, 6633, 6634, 6635, 6636, 6637, 6638, + 6639, 4183, 6640, 6641, 6642, 6643, 6644, 6645, + 6646, 4476, 6647, 6648, 6649, 6650, 6651, 6652, + 6652, 4477, 4506, 6653, 6654, 6655, 6656, 6657, + 4145, 4479, 6658, 6659, 4405, 6660, 6661, 4359, + 6662, 6663, 4409, 6664, 6665, 6666, 6667, 6667, + 6668, 6669, 6670, 6671, 6672, 6673, 6674, 6675, + 6676, 6677, 6678, 6679, 6680, 6681, 6682, 6683, + 6684, 6685, 6686, 6687, 6688, 6689, 6690, 6691, + 6692, 6693, 6694, 4415, 6695, 6696, 6697, 6698, + 6699, 6700, 6701, 6702, 6703, 6704, 6705, 6706, + 6707, 6708, 6709, 6710, 6496, 6711, 6712, 6713, + 6714, 6715, 6716, 6717, 6718, 6719, 6720, 6721, + 6722, 4201, 6723, 6724, 6725, 6726, 6727, 6728, + 4418, 6729, 6730, 6731, 6732, 6733, 6734, 6735, + 6736, 6737, 6738, 6739, 6740, 6741, 6742, 6743, + 6744, 6745, 6746, 6747, 6748, 4140, 6749, 6750, + 6751, 6752, 6753, 6754, 4486, 6755, 6756, 6757, + 6758, 6759, 6760, 6761, 6762, 6763, 6764, 6765, + 6766, 6767, 6768, 6769, 6770, 6771, 6772, 6773, + 6774, 4491, 4492, 6775, 6776, 6777, 6778, 6779, + 6780, 6781, 6782, 6783, 6784, 6785, 6786, 6787, + 4493, 6788, 6789, 6790, 6791, 6792, 6793, 6794, + 6795, 6796, 6797, 6798, 6799, 6800, 6801, 6802, + 6803, 6804, 6805, 6806, 6807, 6808, 6809, 6810, + 6811, 6812, 6813, 6814, 6815, 6816, 6817, 4499, + 4499, 6818, 6819, 6820, 6821, 6822, 6823, 6824, + 6825, 6826, 6827, 4500, 6828, 6829, 6830, 6831, + 6832, 6833, 6834, 6835, 6836, 6837, 6838, 6839, + 6840, 6841, 6842, 6843, 6844, 6845, 6846, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1459, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 2129, 2129, 2129, 2129, 2129, 2129, 2129, + 2129, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 577, 577, 577, 577, 577, 577, 577, + 577, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 0, 0, }; static const utf8proc_property_t utf8proc_properties[] = { @@ -179482,967 +193249,967 @@ static const utf8proc_property_t utf8proc_properties[] = { }; static const utf8proc_uint16_t utf8proc_combinations[] = { - 0, 46, 192, 193, 194, 195, 196, 197, 0, - 256, 258, 260, 550, 461, 0, 0, 512, - 514, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 7680, 7840, 0, 0, 0, 0, 0, 7842, -1, 11, - 262, 264, 0, 0, 0, 199, 0, 0, - 0, 266, 268, -0, 46, 200, 201, 202, 7868, 203, - 0, 552, 274, 276, 280, 278, 282, 0, - 0, 516, 518, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7864, 0, 7704, 7706, 0, - 0, 7866, -0, 46, 204, 205, 206, 296, 207, 0, - 0, 298, 300, 302, 304, 463, 0, 0, - 520, 522, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 7882, 0, 0, 7724, 0, 0, - 7880, -0, 42, 504, 323, 0, 209, 0, 0, 325, - 0, 0, 0, 7748, 327, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 7750, 7752, 7754, -0, 46, 210, 211, 212, 213, - 214, 0, 0, 332, 334, 490, 558, 465, - 336, 416, 524, 526, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 7884, 0, 0, 0, - 0, 0, 7886, -0, 46, 217, 218, 219, 360, 220, - 366, 0, 362, 364, 370, 0, 467, 368, - 431, 532, 534, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7908, 0, 7798, 7796, 0, - 7794, 7910, -0, 46, 7922, 221, 374, 7928, 376, 0, - 0, 562, 0, 0, 7822, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 7924, 0, 0, 0, 0, 0, - 7926, -0, 46, 224, 225, 226, 227, 228, 229, 0, - 257, 259, 261, 551, 462, 0, 0, 513, - 515, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 7681, 7841, 0, 0, 0, 0, 0, 7843, -1, 11, - 263, 265, 0, 0, 0, 231, 0, 0, - 0, 267, 269, -0, 46, 232, 233, 234, 7869, 235, - 0, 553, 275, 277, 281, 279, 283, 0, - 0, 517, 519, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7865, 0, 7705, 7707, 0, - 0, 7867, -0, 46, 236, 237, 238, 297, 239, 0, - 0, 299, 301, 303, 0, 464, 0, 0, - 521, 523, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 7883, 0, 0, 7725, 0, 0, - 7881, -0, 42, 505, 324, 0, 241, 0, 0, 326, - 0, 0, 0, 7749, 328, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 7751, 7753, 7755, -0, 46, 242, 243, 244, 245, - 246, 0, 0, 333, 335, 491, 559, 466, - 337, 417, 525, 527, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 7885, 0, 0, 0, - 0, 0, 7887, -0, 46, 249, 250, 251, 361, 252, - 367, 0, 363, 365, 371, 0, 468, 369, - 432, 533, 535, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7909, 0, 7799, 7797, 0, - 7795, 7911, -0, 46, 7923, 253, 375, 7929, 255, 7833, - 0, 563, 0, 0, 7823, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 7925, 0, 0, 0, 0, 0, - 7927, -6, 42, 7696, 0, 0, 0, 7690, 270, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7692, 7694, 7698, -6, 42, 7697, 0, - 0, 0, 7691, 271, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 7693, 7695, 7699, -1, 11, 500, 284, 0, 0, 0, - 290, 7712, 286, 0, 288, 486, -1, 11, 501, 285, - 0, 0, 0, 291, 7713, 287, 0, 289, - 487, -2, 44, 292, 0, 7718, 0, 7720, 0, 0, - 0, 7714, 542, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7716, - 0, 0, 0, 7722, -2, 44, 293, 0, 7719, 0, - 7721, 0, 0, 0, 7715, 543, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 7717, 7830, 0, 0, 7723, -2, 2, 308, -2, 11, - 309, 0, 0, 0, 0, 0, 0, 0, - 0, 496, -1, 41, 7728, 0, 0, 0, 0, 310, - 0, 0, 0, 0, 488, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 7730, 7732, -1, 41, 7729, 0, 0, 0, 0, - 311, 0, 0, 0, 0, 489, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 7731, 7733, -1, 42, 313, 0, 0, 0, - 0, 315, 0, 0, 0, 0, 317, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7734, 7738, 7740, -1, 42, 314, 0, - 0, 0, 0, 316, 0, 0, 0, 0, - 318, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 7735, 7739, 7741, -1, 41, - 340, 0, 0, 0, 0, 342, 0, 0, - 0, 7768, 344, 0, 0, 528, 530, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7770, - 7774, -1, 41, 341, 0, 0, 0, 0, 343, 0, - 0, 0, 7769, 345, 0, 0, 529, 531, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 7771, 7775, -1, 40, 346, 348, 0, 0, 0, 350, - 0, 0, 0, 7776, 352, 0, 0, 0, - 0, 536, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 7778, -1, 40, 347, 349, 0, 0, 0, 351, - 0, 0, 0, 7777, 353, 0, 0, 0, - 0, 537, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 7779, -6, 42, 354, 0, 0, 0, 7786, 356, - 0, 0, 0, 0, 538, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 7788, 7790, 7792, -4, 42, 7831, - 0, 355, 0, 0, 0, 7787, 357, 0, - 0, 0, 0, 539, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7789, 7791, 7793, -0, 40, 7808, 7810, - 372, 0, 7812, 0, 0, 0, 0, 0, - 7814, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7816, -0, 40, 7809, - 7811, 373, 0, 7813, 7832, 0, 0, 0, - 0, 7815, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7817, -1, 41, - 377, 7824, 0, 0, 0, 0, 0, 0, - 0, 379, 381, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7826, - 7828, -1, 41, 378, 7825, 0, 0, 0, 0, 0, - 0, 0, 380, 382, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 7827, 7829, -0, 11, 475, 471, 0, 0, 0, 0, - 0, 469, 0, 0, 0, 473, -0, 11, 476, 472, - 0, 0, 0, 0, 0, 470, 0, 0, - 0, 474, -7, 7, 478, -7, 7, 479, -7, 7, 480, -7, 7, 481, -1, 7, 508, 0, - 0, 0, 0, 0, 482, -1, 7, 509, 0, 0, - 0, 0, 0, 483, -7, 7, 492, -7, 7, 493, -11, 11, 494, -11, 11, 495, -1, 1, - 506, -1, 1, 507, -1, 1, 510, -1, 1, 511, -7, 7, 554, -7, 7, 555, -1, 7, 7756, 0, - 0, 7758, 0, 0, 556, -1, 7, 7757, 0, 0, - 7759, 0, 0, 557, -7, 7, 560, -7, 7, 561, -0, 49, 8173, 901, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 8129, -0, 50, - 8122, 902, 0, 0, 0, 0, 0, 8121, - 8120, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7944, - 7945, 0, 8124, -0, 48, 8136, 904, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 7960, 7961, -0, 50, 8138, 905, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7976, 7977, 0, 8140, -0, 48, 8154, - 906, 0, 0, 938, 0, 0, 8153, 8152, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7992, 7993, -0, 48, - 8184, 908, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 8008, - 8009, -0, 48, 8170, 910, 0, 0, 939, 0, 0, - 8169, 8168, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8025, -0, 50, 8186, 911, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8040, 8041, 0, 8188, -0, 49, 8146, 912, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 8151, -0, 50, 8048, - 940, 0, 0, 0, 0, 0, 8113, 8112, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7936, 7937, - 8118, 8115, -0, 48, 8050, 941, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 7952, 7953, -0, 50, 8052, 942, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 7968, 7969, 8134, 8131, -0, 49, 8054, 943, - 0, 0, 970, 0, 0, 8145, 8144, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 7984, 7985, 8150, -0, 49, - 8162, 944, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8167, -0, 49, 8058, 973, 0, 0, 971, 0, - 0, 8161, 8160, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8016, 8017, 8166, -0, 48, 8056, 972, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 8000, 8001, -0, 50, 8060, 974, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 8032, 8033, 8182, 8179, -1, 4, - 979, 0, 0, 980, -0, 8, 1024, 0, 0, 0, - 1025, 0, 0, 0, 1238, -1, 1, 1027, -4, 4, 1031, -1, 1, 1036, -0, 8, - 1037, 0, 0, 0, 1252, 0, 0, 1250, - 1049, -4, 12, 1264, 0, 0, 1262, 1038, 0, 0, - 0, 1266, -0, 8, 1117, 0, 0, 0, 1253, 0, - 0, 1251, 1081, -0, 8, 1104, 0, 0, 0, 1105, - 0, 0, 0, 1239, -1, 1, 1107, -4, 4, 1111, -1, 1, 1116, -4, 12, 1265, - 0, 0, 1263, 1118, 0, 0, 0, 1267, -14, 14, - 1142, -14, 14, 1143, -4, 8, 1244, 0, 0, 0, 1217, -4, 8, 1245, - 0, 0, 0, 1218, -4, 8, 1234, 0, 0, 0, - 1232, -4, 8, 1235, 0, 0, 0, 1233, -4, 4, 1242, -4, 4, 1243, -4, 4, - 1246, -4, 4, 1247, -4, 4, 1254, -4, 4, 1255, -4, 4, 1258, -4, 4, 1259, -4, 4, 1260, -4, 4, 1261, -4, 4, - 1268, -4, 4, 1269, -4, 4, 1272, -4, 4, 1273, -17, 19, 1570, 1571, 1573, -18, 18, 1572, -18, 18, - 1574, -18, 18, 1728, -18, 18, 1730, -18, 18, 1747, -20, 20, 2345, -20, 20, 2353, -20, 20, 2356, -21, 22, 2507, - 2508, -23, 25, 2888, 2891, 2892, -26, 26, 2964, -26, 27, 3020, 3018, -27, 27, 3019, -28, 28, - 3144, -29, 29, 3264, -29, 31, 3271, 3272, 3274, -29, 29, 3275, -32, 33, 3402, 3404, -32, 32, - 3403, -34, 36, 3546, 3548, 3550, -34, 34, 3549, -37, 37, 4134, -38, 38, 6918, -38, 38, 6920, -38, 38, - 6922, -38, 38, 6924, -38, 38, 6926, -38, 38, 6930, -38, 38, 6971, -38, 38, 6973, -38, 38, 6976, -38, 38, 6977, -38, 38, - 6979, -10, 41, 7682, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7684, - 7686, -10, 41, 7683, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7685, - 7687, -1, 1, 7688, -1, 1, 7689, -0, 1, 7700, 7702, -0, 1, 7701, 7703, -8, 8, 7708, -8, 8, - 7709, -10, 10, 7710, -10, 10, 7711, -1, 1, 7726, -1, 1, 7727, -7, 7, 7736, -7, 7, 7737, -1, 40, 7742, - 0, 0, 0, 0, 0, 0, 0, 0, - 7744, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7746, -1, 40, 7743, - 0, 0, 0, 0, 0, 0, 0, 0, - 7745, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7747, -0, 1, 7760, - 7762, -0, 1, 7761, 7763, -1, 10, 7764, 0, 0, 0, 0, - 0, 0, 0, 0, 7766, -1, 10, 7765, 0, 0, - 0, 0, 0, 0, 0, 0, 7767, -7, 7, 7772, -7, 7, - 7773, -10, 10, 7780, -10, 10, 7781, -10, 10, 7782, -10, 10, 7783, -10, 10, 7784, -10, 10, 7785, -1, 1, 7800, -1, 1, - 7801, -4, 4, 7802, -4, 4, 7803, -3, 40, 7804, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 7806, -3, 40, 7805, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7807, -4, 10, 7820, - 0, 0, 0, 0, 0, 7818, -4, 10, 7821, 0, - 0, 0, 0, 0, 7819, -10, 10, 7835, -0, 46, 7846, 7844, - 0, 7850, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 7848, -0, 46, 7847, 7845, 0, - 7851, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7849, -2, 8, 7852, 0, 0, 0, - 0, 0, 7862, -2, 8, 7853, 0, 0, 0, 0, - 0, 7863, -0, 46, 7856, 7854, 0, 7860, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 7858, -0, 46, 7857, 7855, 0, 7861, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7859, -0, 46, - 7872, 7870, 0, 7876, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7874, -0, 46, 7873, - 7871, 0, 7877, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 7875, -2, 2, 7878, -2, 2, 7879, -0, 46, - 7890, 7888, 0, 7894, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7892, -0, 46, 7891, - 7889, 0, 7895, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 7893, -2, 2, 7896, -2, 2, 7897, -0, 46, - 7900, 7898, 0, 7904, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 7906, 0, 0, 0, 0, 0, 7902, -0, 46, 7901, - 7899, 0, 7905, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7907, - 0, 0, 0, 0, 0, 7903, -0, 46, 7914, 7912, - 0, 7918, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7920, 0, - 0, 0, 0, 0, 7916, -0, 46, 7915, 7913, 0, - 7919, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 7921, 0, 0, - 0, 0, 0, 7917, -0, 50, 7938, 7940, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 7942, 8064, -0, 50, 7939, - 7941, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 7943, 8065, -0, 50, 7946, 7948, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7950, 8072, -0, 50, 7947, 7949, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7951, 8073, -0, 1, - 7954, 7956, -0, 1, 7955, 7957, -0, 1, 7962, 7964, -0, 1, 7963, 7965, -0, 50, - 7970, 7972, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 7974, 8080, -0, 50, 7971, 7973, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 7975, 8081, -0, 50, 7978, 7980, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7982, - 8088, -0, 50, 7979, 7981, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 7983, 8089, -0, 49, 7986, 7988, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 7990, -0, 49, 7987, 7989, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7991, -0, 49, - 7994, 7996, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 7998, -0, 49, 7995, 7997, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 7999, -0, 1, 8002, 8004, -0, 1, 8003, 8005, -0, 1, - 8010, 8012, -0, 1, 8011, 8013, -0, 49, 8018, 8020, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 8022, -0, 49, 8019, 8021, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 8023, -0, 49, - 8027, 8029, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8031, -0, 50, 8034, 8036, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 8038, 8096, -0, 50, 8035, 8037, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 8039, 8097, -0, 50, - 8042, 8044, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8046, 8104, -0, 50, 8043, 8045, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 8047, 8105, -50, 50, 8066, -50, 50, 8067, -50, 50, - 8068, -50, 50, 8069, -50, 50, 8070, -50, 50, 8071, -50, 50, 8074, -50, 50, 8075, -50, 50, 8076, -50, 50, 8077, -50, 50, - 8078, -50, 50, 8079, -50, 50, 8082, -50, 50, 8083, -50, 50, 8084, -50, 50, 8085, -50, 50, 8086, -50, 50, 8087, -50, 50, - 8090, -50, 50, 8091, -50, 50, 8092, -50, 50, 8093, -50, 50, 8094, -50, 50, 8095, -50, 50, 8098, -50, 50, 8099, -50, 50, - 8100, -50, 50, 8101, -50, 50, 8102, -50, 50, 8103, -50, 50, 8106, -50, 50, 8107, -50, 50, 8108, -50, 50, 8109, -50, 50, - 8110, -50, 50, 8111, -50, 50, 8114, -50, 50, 8116, -50, 50, 8119, -50, 50, 8130, -50, 50, 8132, -50, 50, 8135, -0, 49, - 8141, 8142, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8143, -0, 49, 8157, 8158, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 8159, -47, 48, 8164, 8165, -48, 48, 8172, -50, 50, 8178, -50, 50, - 8180, -50, 50, 8183, -51, 51, 8602, -51, 51, 8603, -51, 51, 8622, -51, 51, 8653, -51, 51, 8654, -51, 51, 8655, -51, 51, - 8708, -51, 51, 8713, -51, 51, 8716, -51, 51, 8740, -51, 51, 8742, -51, 51, 8769, -51, 51, 8772, -51, 51, 8775, -51, 51, - 8777, -51, 51, 8800, -51, 51, 8802, -51, 51, 8813, -51, 51, 8814, -51, 51, 8815, -51, 51, 8816, -51, 51, 8817, -51, 51, - 8820, -51, 51, 8821, -51, 51, 8824, -51, 51, 8825, -51, 51, 8832, -51, 51, 8833, -51, 51, 8836, -51, 51, 8837, -51, 51, - 8840, -51, 51, 8841, -51, 51, 8876, -51, 51, 8877, -51, 51, 8878, -51, 51, 8879, -51, 51, 8928, -51, 51, 8929, -51, 51, - 8930, -51, 51, 8931, -51, 51, 8938, -51, 51, 8939, -51, 51, 8940, -51, 51, 8941, -51, 51, 10972, -52, 52, 12364, -52, 52, - 12366, -52, 52, 12368, -52, 52, 12370, -52, 52, 12372, -52, 52, 12374, -52, 52, 12376, -52, 52, 12378, -52, 52, 12380, -52, 52, - 12382, -52, 52, 12384, -52, 52, 12386, -52, 52, 12389, -52, 52, 12391, -52, 52, 12393, -52, 53, 12400, 12401, -52, 53, - 12403, 12404, -52, 53, 12406, 12407, -52, 53, 12409, 12410, -52, 53, 12412, 12413, -52, 52, - 12436, -52, 52, 12446, -52, 52, 12460, -52, 52, 12462, -52, 52, 12464, -52, 52, 12466, -52, 52, 12468, -52, 52, 12470, -52, 52, - 12472, -52, 52, 12474, -52, 52, 12476, -52, 52, 12478, -52, 52, 12480, -52, 52, 12482, -52, 52, 12485, -52, 52, 12487, -52, 52, - 12489, -52, 53, 12496, 12497, -52, 53, 12499, 12500, -52, 53, 12502, 12503, -52, 53, 12505, - 12506, -52, 53, 12508, 12509, -52, 52, 12532, -52, 52, 12535, -52, 52, 12536, -52, 52, 12537, -52, 52, 12538, -52, 52, - 12542, -54, 55, 1, 4250, -54, 55, 1, 4252, -54, 55, 1, 4267, -56, 57, 1, 4398, -56, 57, 1, 4399, -58, 61, 1, 4939, 1, 4940, -62, 67, - 1, 5307, 1, 5308, 1, 5310, -68, 69, 1, 5562, -68, 69, 1, 5563, -70, 71, 1, 53598, -70, 71, 1, 53599, -72, 81, 1, 53600, - 1, 53601, 1, 53602, 1, 53603, 1, 53604, -70, 71, 1, 53691, -70, 71, 1, 53692, -72, 75, 1, 53693, 1, 53695, -72, 75, - 1, 53694, 1, 53696, + 0, 46, 192, 193, 194, 195, 196, 197, 0, + 256, 258, 260, 550, 461, 0, 0, 512, + 514, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 7680, 7840, 0, 0, 0, 0, 0, 7842, +1, 11, + 262, 264, 0, 0, 0, 199, 0, 0, + 0, 266, 268, +0, 46, 200, 201, 202, 7868, 203, + 0, 552, 274, 276, 280, 278, 282, 0, + 0, 516, 518, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7864, 0, 7704, 7706, 0, + 0, 7866, +0, 46, 204, 205, 206, 296, 207, 0, + 0, 298, 300, 302, 304, 463, 0, 0, + 520, 522, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7882, 0, 0, 7724, 0, 0, + 7880, +0, 42, 504, 323, 0, 209, 0, 0, 325, + 0, 0, 0, 7748, 327, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7750, 7752, 7754, +0, 46, 210, 211, 212, 213, + 214, 0, 0, 332, 334, 490, 558, 465, + 336, 416, 524, 526, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7884, 0, 0, 0, + 0, 0, 7886, +0, 46, 217, 218, 219, 360, 220, + 366, 0, 362, 364, 370, 0, 467, 368, + 431, 532, 534, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7908, 0, 7798, 7796, 0, + 7794, 7910, +0, 46, 7922, 221, 374, 7928, 376, 0, + 0, 562, 0, 0, 7822, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7924, 0, 0, 0, 0, 0, + 7926, +0, 46, 224, 225, 226, 227, 228, 229, 0, + 257, 259, 261, 551, 462, 0, 0, 513, + 515, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 7681, 7841, 0, 0, 0, 0, 0, 7843, +1, 11, + 263, 265, 0, 0, 0, 231, 0, 0, + 0, 267, 269, +0, 46, 232, 233, 234, 7869, 235, + 0, 553, 275, 277, 281, 279, 283, 0, + 0, 517, 519, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7865, 0, 7705, 7707, 0, + 0, 7867, +0, 46, 236, 237, 238, 297, 239, 0, + 0, 299, 301, 303, 0, 464, 0, 0, + 521, 523, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7883, 0, 0, 7725, 0, 0, + 7881, +0, 42, 505, 324, 0, 241, 0, 0, 326, + 0, 0, 0, 7749, 328, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7751, 7753, 7755, +0, 46, 242, 243, 244, 245, + 246, 0, 0, 333, 335, 491, 559, 466, + 337, 417, 525, 527, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7885, 0, 0, 0, + 0, 0, 7887, +0, 46, 249, 250, 251, 361, 252, + 367, 0, 363, 365, 371, 0, 468, 369, + 432, 533, 535, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7909, 0, 7799, 7797, 0, + 7795, 7911, +0, 46, 7923, 253, 375, 7929, 255, 7833, + 0, 563, 0, 0, 7823, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7925, 0, 0, 0, 0, 0, + 7927, +6, 42, 7696, 0, 0, 0, 7690, 270, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7692, 7694, 7698, +6, 42, 7697, 0, + 0, 0, 7691, 271, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 7693, 7695, 7699, +1, 11, 500, 284, 0, 0, 0, + 290, 7712, 286, 0, 288, 486, +1, 11, 501, 285, + 0, 0, 0, 291, 7713, 287, 0, 289, + 487, +2, 44, 292, 0, 7718, 0, 7720, 0, 0, + 0, 7714, 542, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7716, + 0, 0, 0, 7722, +2, 44, 293, 0, 7719, 0, + 7721, 0, 0, 0, 7715, 543, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7717, 7830, 0, 0, 7723, +2, 2, 308, +2, 11, + 309, 0, 0, 0, 0, 0, 0, 0, + 0, 496, +1, 41, 7728, 0, 0, 0, 0, 310, + 0, 0, 0, 0, 488, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7730, 7732, +1, 41, 7729, 0, 0, 0, 0, + 311, 0, 0, 0, 0, 489, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7731, 7733, +1, 42, 313, 0, 0, 0, + 0, 315, 0, 0, 0, 0, 317, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7734, 7738, 7740, +1, 42, 314, 0, + 0, 0, 0, 316, 0, 0, 0, 0, + 318, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7735, 7739, 7741, +1, 41, + 340, 0, 0, 0, 0, 342, 0, 0, + 0, 7768, 344, 0, 0, 528, 530, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7770, + 7774, +1, 41, 341, 0, 0, 0, 0, 343, 0, + 0, 0, 7769, 345, 0, 0, 529, 531, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 7771, 7775, +1, 40, 346, 348, 0, 0, 0, 350, + 0, 0, 0, 7776, 352, 0, 0, 0, + 0, 536, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7778, +1, 40, 347, 349, 0, 0, 0, 351, + 0, 0, 0, 7777, 353, 0, 0, 0, + 0, 537, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7779, +6, 42, 354, 0, 0, 0, 7786, 356, + 0, 0, 0, 0, 538, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7788, 7790, 7792, +4, 42, 7831, + 0, 355, 0, 0, 0, 7787, 357, 0, + 0, 0, 0, 539, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7789, 7791, 7793, +0, 40, 7808, 7810, + 372, 0, 7812, 0, 0, 0, 0, 0, + 7814, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7816, +0, 40, 7809, + 7811, 373, 0, 7813, 7832, 0, 0, 0, + 0, 7815, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7817, +1, 41, + 377, 7824, 0, 0, 0, 0, 0, 0, + 0, 379, 381, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7826, + 7828, +1, 41, 378, 7825, 0, 0, 0, 0, 0, + 0, 0, 380, 382, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 7827, 7829, +0, 11, 475, 471, 0, 0, 0, 0, + 0, 469, 0, 0, 0, 473, +0, 11, 476, 472, + 0, 0, 0, 0, 0, 470, 0, 0, + 0, 474, +7, 7, 478, +7, 7, 479, +7, 7, 480, +7, 7, 481, +1, 7, 508, 0, + 0, 0, 0, 0, 482, +1, 7, 509, 0, 0, + 0, 0, 0, 483, +7, 7, 492, +7, 7, 493, +11, 11, 494, +11, 11, 495, +1, 1, + 506, +1, 1, 507, +1, 1, 510, +1, 1, 511, +7, 7, 554, +7, 7, 555, +1, 7, 7756, 0, + 0, 7758, 0, 0, 556, +1, 7, 7757, 0, 0, + 7759, 0, 0, 557, +7, 7, 560, +7, 7, 561, +0, 49, 8173, 901, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8129, +0, 50, + 8122, 902, 0, 0, 0, 0, 0, 8121, + 8120, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7944, + 7945, 0, 8124, +0, 48, 8136, 904, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7960, 7961, +0, 50, 8138, 905, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7976, 7977, 0, 8140, +0, 48, 8154, + 906, 0, 0, 938, 0, 0, 8153, 8152, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7992, 7993, +0, 48, + 8184, 908, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8008, + 8009, +0, 48, 8170, 910, 0, 0, 939, 0, 0, + 8169, 8168, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8025, +0, 50, 8186, 911, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8040, 8041, 0, 8188, +0, 49, 8146, 912, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 8151, +0, 50, 8048, + 940, 0, 0, 0, 0, 0, 8113, 8112, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7936, 7937, + 8118, 8115, +0, 48, 8050, 941, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7952, 7953, +0, 50, 8052, 942, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7968, 7969, 8134, 8131, +0, 49, 8054, 943, + 0, 0, 970, 0, 0, 8145, 8144, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7984, 7985, 8150, +0, 49, + 8162, 944, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8167, +0, 49, 8058, 973, 0, 0, 971, 0, + 0, 8161, 8160, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8016, 8017, 8166, +0, 48, 8056, 972, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8000, 8001, +0, 50, 8060, 974, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8032, 8033, 8182, 8179, +1, 4, + 979, 0, 0, 980, +0, 8, 1024, 0, 0, 0, + 1025, 0, 0, 0, 1238, +1, 1, 1027, +4, 4, 1031, +1, 1, 1036, +0, 8, + 1037, 0, 0, 0, 1252, 0, 0, 1250, + 1049, +4, 12, 1264, 0, 0, 1262, 1038, 0, 0, + 0, 1266, +0, 8, 1117, 0, 0, 0, 1253, 0, + 0, 1251, 1081, +0, 8, 1104, 0, 0, 0, 1105, + 0, 0, 0, 1239, +1, 1, 1107, +4, 4, 1111, +1, 1, 1116, +4, 12, 1265, + 0, 0, 1263, 1118, 0, 0, 0, 1267, +14, 14, + 1142, +14, 14, 1143, +4, 8, 1244, 0, 0, 0, 1217, +4, 8, 1245, + 0, 0, 0, 1218, +4, 8, 1234, 0, 0, 0, + 1232, +4, 8, 1235, 0, 0, 0, 1233, +4, 4, 1242, +4, 4, 1243, +4, 4, + 1246, +4, 4, 1247, +4, 4, 1254, +4, 4, 1255, +4, 4, 1258, +4, 4, 1259, +4, 4, 1260, +4, 4, 1261, +4, 4, + 1268, +4, 4, 1269, +4, 4, 1272, +4, 4, 1273, +17, 19, 1570, 1571, 1573, +18, 18, 1572, +18, 18, + 1574, +18, 18, 1728, +18, 18, 1730, +18, 18, 1747, +20, 20, 2345, +20, 20, 2353, +20, 20, 2356, +21, 22, 2507, + 2508, +23, 25, 2888, 2891, 2892, +26, 26, 2964, +26, 27, 3020, 3018, +27, 27, 3019, +28, 28, + 3144, +29, 29, 3264, +29, 31, 3271, 3272, 3274, +29, 29, 3275, +32, 33, 3402, 3404, +32, 32, + 3403, +34, 36, 3546, 3548, 3550, +34, 34, 3549, +37, 37, 4134, +38, 38, 6918, +38, 38, 6920, +38, 38, + 6922, +38, 38, 6924, +38, 38, 6926, +38, 38, 6930, +38, 38, 6971, +38, 38, 6973, +38, 38, 6976, +38, 38, 6977, +38, 38, + 6979, +10, 41, 7682, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7684, + 7686, +10, 41, 7683, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7685, + 7687, +1, 1, 7688, +1, 1, 7689, +0, 1, 7700, 7702, +0, 1, 7701, 7703, +8, 8, 7708, +8, 8, + 7709, +10, 10, 7710, +10, 10, 7711, +1, 1, 7726, +1, 1, 7727, +7, 7, 7736, +7, 7, 7737, +1, 40, 7742, + 0, 0, 0, 0, 0, 0, 0, 0, + 7744, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7746, +1, 40, 7743, + 0, 0, 0, 0, 0, 0, 0, 0, + 7745, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7747, +0, 1, 7760, + 7762, +0, 1, 7761, 7763, +1, 10, 7764, 0, 0, 0, 0, + 0, 0, 0, 0, 7766, +1, 10, 7765, 0, 0, + 0, 0, 0, 0, 0, 0, 7767, +7, 7, 7772, +7, 7, + 7773, +10, 10, 7780, +10, 10, 7781, +10, 10, 7782, +10, 10, 7783, +10, 10, 7784, +10, 10, 7785, +1, 1, 7800, +1, 1, + 7801, +4, 4, 7802, +4, 4, 7803, +3, 40, 7804, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 7806, +3, 40, 7805, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7807, +4, 10, 7820, + 0, 0, 0, 0, 0, 7818, +4, 10, 7821, 0, + 0, 0, 0, 0, 7819, +10, 10, 7835, +0, 46, 7846, 7844, + 0, 7850, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7848, +0, 46, 7847, 7845, 0, + 7851, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7849, +2, 8, 7852, 0, 0, 0, + 0, 0, 7862, +2, 8, 7853, 0, 0, 0, 0, + 0, 7863, +0, 46, 7856, 7854, 0, 7860, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 7858, +0, 46, 7857, 7855, 0, 7861, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7859, +0, 46, + 7872, 7870, 0, 7876, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7874, +0, 46, 7873, + 7871, 0, 7877, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7875, +2, 2, 7878, +2, 2, 7879, +0, 46, + 7890, 7888, 0, 7894, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7892, +0, 46, 7891, + 7889, 0, 7895, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7893, +2, 2, 7896, +2, 2, 7897, +0, 46, + 7900, 7898, 0, 7904, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 7906, 0, 0, 0, 0, 0, 7902, +0, 46, 7901, + 7899, 0, 7905, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7907, + 0, 0, 0, 0, 0, 7903, +0, 46, 7914, 7912, + 0, 7918, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7920, 0, + 0, 0, 0, 0, 7916, +0, 46, 7915, 7913, 0, + 7919, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7921, 0, 0, + 0, 0, 0, 7917, +0, 50, 7938, 7940, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7942, 8064, +0, 50, 7939, + 7941, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 7943, 8065, +0, 50, 7946, 7948, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7950, 8072, +0, 50, 7947, 7949, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7951, 8073, +0, 1, + 7954, 7956, +0, 1, 7955, 7957, +0, 1, 7962, 7964, +0, 1, 7963, 7965, +0, 50, + 7970, 7972, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7974, 8080, +0, 50, 7971, 7973, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7975, 8081, +0, 50, 7978, 7980, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7982, + 8088, +0, 50, 7979, 7981, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7983, 8089, +0, 49, 7986, 7988, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7990, +0, 49, 7987, 7989, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7991, +0, 49, + 7994, 7996, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7998, +0, 49, 7995, 7997, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 7999, +0, 1, 8002, 8004, +0, 1, 8003, 8005, +0, 1, + 8010, 8012, +0, 1, 8011, 8013, +0, 49, 8018, 8020, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 8022, +0, 49, 8019, 8021, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8023, +0, 49, + 8027, 8029, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8031, +0, 50, 8034, 8036, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8038, 8096, +0, 50, 8035, 8037, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 8039, 8097, +0, 50, + 8042, 8044, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8046, 8104, +0, 50, 8043, 8045, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8047, 8105, +50, 50, 8066, +50, 50, 8067, +50, 50, + 8068, +50, 50, 8069, +50, 50, 8070, +50, 50, 8071, +50, 50, 8074, +50, 50, 8075, +50, 50, 8076, +50, 50, 8077, +50, 50, + 8078, +50, 50, 8079, +50, 50, 8082, +50, 50, 8083, +50, 50, 8084, +50, 50, 8085, +50, 50, 8086, +50, 50, 8087, +50, 50, + 8090, +50, 50, 8091, +50, 50, 8092, +50, 50, 8093, +50, 50, 8094, +50, 50, 8095, +50, 50, 8098, +50, 50, 8099, +50, 50, + 8100, +50, 50, 8101, +50, 50, 8102, +50, 50, 8103, +50, 50, 8106, +50, 50, 8107, +50, 50, 8108, +50, 50, 8109, +50, 50, + 8110, +50, 50, 8111, +50, 50, 8114, +50, 50, 8116, +50, 50, 8119, +50, 50, 8130, +50, 50, 8132, +50, 50, 8135, +0, 49, + 8141, 8142, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8143, +0, 49, 8157, 8158, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8159, +47, 48, 8164, 8165, +48, 48, 8172, +50, 50, 8178, +50, 50, + 8180, +50, 50, 8183, +51, 51, 8602, +51, 51, 8603, +51, 51, 8622, +51, 51, 8653, +51, 51, 8654, +51, 51, 8655, +51, 51, + 8708, +51, 51, 8713, +51, 51, 8716, +51, 51, 8740, +51, 51, 8742, +51, 51, 8769, +51, 51, 8772, +51, 51, 8775, +51, 51, + 8777, +51, 51, 8800, +51, 51, 8802, +51, 51, 8813, +51, 51, 8814, +51, 51, 8815, +51, 51, 8816, +51, 51, 8817, +51, 51, + 8820, +51, 51, 8821, +51, 51, 8824, +51, 51, 8825, +51, 51, 8832, +51, 51, 8833, +51, 51, 8836, +51, 51, 8837, +51, 51, + 8840, +51, 51, 8841, +51, 51, 8876, +51, 51, 8877, +51, 51, 8878, +51, 51, 8879, +51, 51, 8928, +51, 51, 8929, +51, 51, + 8930, +51, 51, 8931, +51, 51, 8938, +51, 51, 8939, +51, 51, 8940, +51, 51, 8941, +51, 51, 10972, +52, 52, 12364, +52, 52, + 12366, +52, 52, 12368, +52, 52, 12370, +52, 52, 12372, +52, 52, 12374, +52, 52, 12376, +52, 52, 12378, +52, 52, 12380, +52, 52, + 12382, +52, 52, 12384, +52, 52, 12386, +52, 52, 12389, +52, 52, 12391, +52, 52, 12393, +52, 53, 12400, 12401, +52, 53, + 12403, 12404, +52, 53, 12406, 12407, +52, 53, 12409, 12410, +52, 53, 12412, 12413, +52, 52, + 12436, +52, 52, 12446, +52, 52, 12460, +52, 52, 12462, +52, 52, 12464, +52, 52, 12466, +52, 52, 12468, +52, 52, 12470, +52, 52, + 12472, +52, 52, 12474, +52, 52, 12476, +52, 52, 12478, +52, 52, 12480, +52, 52, 12482, +52, 52, 12485, +52, 52, 12487, +52, 52, + 12489, +52, 53, 12496, 12497, +52, 53, 12499, 12500, +52, 53, 12502, 12503, +52, 53, 12505, + 12506, +52, 53, 12508, 12509, +52, 52, 12532, +52, 52, 12535, +52, 52, 12536, +52, 52, 12537, +52, 52, 12538, +52, 52, + 12542, +54, 55, 1, 4250, +54, 55, 1, 4252, +54, 55, 1, 4267, +56, 57, 1, 4398, +56, 57, 1, 4399, +58, 61, 1, 4939, 1, 4940, +62, 67, + 1, 5307, 1, 5308, 1, 5310, +68, 69, 1, 5562, +68, 69, 1, 5563, +70, 71, 1, 53598, +70, 71, 1, 53599, +72, 81, 1, 53600, + 1, 53601, 1, 53602, 1, 53603, 1, 53604, +70, 71, 1, 53691, +70, 71, 1, 53692, +72, 75, 1, 53693, 1, 53695, +72, 75, + 1, 53694, 1, 53696, }; @@ -180732,7 +194499,7 @@ UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break( UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_codepoint(const char *u_input, int &sz) { auto u = (const unsigned char *) u_input; unsigned char u0 = u[0]; - if (u0>=0 && u0<=127) { + if (u0<=127) { sz = 1; return u0; } @@ -180846,8 +194613,8 @@ static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqinde } for (; len >= 0; entry++, len--) { utf8proc_int32_t entry_cp = seqindex_decode_entry(&entry); - - written += utf8proc_decompose_char(entry_cp, dst+written, + utf8proc_int32_t *dst_ptr = dst ? dst + written : nullptr; + written += utf8proc_decompose_char(entry_cp, dst_ptr, (bufsize > written) ? (bufsize - written) : 0, options, last_boundclass); if (written < 0) return UTF8PROC_ERROR_OVERFLOW; @@ -181020,8 +194787,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom( if (custom_func != NULL) { uc = custom_func(uc, custom_data); /* user-specified custom mapping */ } + utf8proc_int32_t *target_buffer = buffer ? buffer + wpos : nullptr; decomp_result = utf8proc_decompose_char( - uc, buffer + wpos, (bufsize > wpos) ? (bufsize - wpos) : 0, options, + uc, target_buffer, (bufsize > wpos) ? (bufsize - wpos) : 0, options, &boundclass ); if (decomp_result < 0) return decomp_result; @@ -181275,7 +195043,7 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8 // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 // See the end of this file for a list @@ -181301,30 +195069,51 @@ namespace duckdb { // 3 U+000800 U+00FFFF 1110xxxx // 4 U+010000 U+10FFFF 11110xxx -UnicodeType Utf8Proc::Analyze(const char *s, size_t len) { +static void AssignInvalidUTF8Reason(UnicodeInvalidReason *invalid_reason, size_t *invalid_pos, size_t pos, UnicodeInvalidReason reason) { + if (invalid_reason) { + *invalid_reason = reason; + } + if (invalid_pos) { + *invalid_pos = pos; + } +} + +UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) { UnicodeType type = UnicodeType::ASCII; char c; for (size_t i = 0; i < len; i++) { c = s[i]; if (c == '\0') { + AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE); return UnicodeType::INVALID; } // 1 Byte / ASCII - if ((c & 0x80) == 0) + if ((c & 0x80) == 0) { continue; + } type = UnicodeType::UNICODE; - if ((s[++i] & 0xC0) != 0x80) + if ((s[++i] & 0xC0) != 0x80) { + AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH); return UnicodeType::INVALID; - if ((c & 0xE0) == 0xC0) + } + if ((c & 0xE0) == 0xC0) { continue; - if ((s[++i] & 0xC0) != 0x80) + } + if ((s[++i] & 0xC0) != 0x80) { + AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH); return UnicodeType::INVALID; - if ((c & 0xF0) == 0xE0) + } + if ((c & 0xF0) == 0xE0) { continue; - if ((s[++i] & 0xC0) != 0x80) + } + if ((s[++i] & 0xC0) != 0x80) { + AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH); return UnicodeType::INVALID; - if ((c & 0xF8) == 0xF0) + } + if ((c & 0xF8) == 0xF0) { continue; + } + AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH); return UnicodeType::INVALID; } @@ -181386,7 +195175,7 @@ size_t Utf8Proc::RenderWidth(const char *s, size_t len, size_t pos) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list #include @@ -181396,7 +195185,7 @@ size_t Utf8Proc::RenderWidth(const char *s, size_t len, size_t pos) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list @@ -181696,7 +195485,7 @@ PGNode *newNode(size_t size, PGNodeTag type) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list @@ -181706,7 +195495,7 @@ PGNode *newNode(size_t size, PGNodeTag type) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -181788,7 +195577,7 @@ bool PostgresParser::IsKeyword(const std::string &text) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*-------------------------------------------------------------------- @@ -182336,7 +196125,7 @@ int length(const PGList *list); // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*-------------------------------------------------------------------- @@ -182377,7 +196166,7 @@ int length(const PGList *list); // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -182412,7 +196201,7 @@ typedef struct PGFunctionCallInfoData *PGFunctionCallInfo; // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -182490,7 +196279,7 @@ PGGroupingSet *makeGroupingSet(GroupingSetKind kind, PGList *content, int locati // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -182837,7 +196626,7 @@ PGGroupingSet *makeGroupingSet(GroupingSetKind kind, PGList *content, int locati // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*-------------------------------------------------------------------- @@ -182919,7 +196708,7 @@ PGValue *makeString(const char *str) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /* A Bison parser, made by GNU Bison 2.3. */ @@ -184007,7 +197796,7 @@ PGValue *makeString(const char *str) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -184034,7 +197823,7 @@ PGValue *makeString(const char *str) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -184172,7 +197961,7 @@ namespace duckdb_libpgquery { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /* A Bison parser, made by GNU Bison 2.3. */ @@ -185290,7 +199079,7 @@ int base_yyparse(core_yyscan_t yyscanner); // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -185314,7 +199103,7 @@ int base_yyparse(core_yyscan_t yyscanner); // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -185334,7 +199123,7 @@ int base_yyparse(core_yyscan_t yyscanner); // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -185395,7 +199184,7 @@ typedef struct { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -185630,7 +199419,6 @@ static PGNode *makeNullAConst(int location); static PGNode *makeAConst(PGValue *v, int location); static PGNode *makeBoolAConst(bool state, int location); static PGNode *makeParamRef(int number, int location); -static PGNode *makeParamRefCast(int number, int location, PGTypeName *tpname); static void check_qualified_name(PGList *names, core_yyscan_t yyscanner); static PGList *check_func_name(PGList *names, core_yyscan_t yyscanner); static PGList *check_indirection(PGList *indirection, core_yyscan_t yyscanner); @@ -185721,7 +199509,7 @@ typedef union YYSTYPE PGViewCheckOption viewcheckoption; } /* Line 193 of yacc.c. */ -#line 1261 "third_party/libpg_query/grammar/grammar_out.cpp" +#line 1260 "third_party/libpg_query/grammar/grammar_out.cpp" YYSTYPE; # define yystype YYSTYPE /* obsolescent; will be withdrawn */ # define YYSTYPE_IS_DECLARED 1 @@ -185746,7 +199534,7 @@ typedef struct YYLTYPE /* Line 216 of yacc.c. */ -#line 1286 "third_party/libpg_query/grammar/grammar_out.cpp" +#line 1285 "third_party/libpg_query/grammar/grammar_out.cpp" #ifdef short # undef short @@ -185963,16 +199751,16 @@ union yyalloc /* YYFINAL -- State number of the termination state. */ #define YYFINAL 578 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 49047 +#define YYLAST 49293 /* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 494 +#define YYNTOKENS 496 /* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 367 +#define YYNNTS 369 /* YYNRULES -- Number of rules. */ -#define YYNRULES 1745 +#define YYNRULES 1751 /* YYNRULES -- Number of states. */ -#define YYNSTATES 2853 +#define YYNSTATES 2868 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ #define YYUNDEFTOK 2 @@ -185987,16 +199775,16 @@ static const yytype_uint16 yytranslate[] = 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 491, 2, 481, 2, 2, + 2, 2, 2, 2, 2, 493, 2, 481, 2, 2, 486, 487, 479, 477, 490, 478, 488, 480, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 493, 489, - 473, 475, 474, 492, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 495, 489, + 473, 475, 474, 494, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 484, 2, 485, 482, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 491, 2, 492, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -186073,813 +199861,817 @@ static const yytype_uint16 yyprhs[] = 170, 177, 182, 189, 196, 203, 209, 215, 222, 232, 237, 243, 251, 258, 263, 272, 277, 280, 285, 289, 296, 301, 304, 307, 310, 313, 315, 318, 319, 321, - 324, 327, 330, 332, 336, 341, 344, 345, 348, 352, - 355, 359, 366, 373, 382, 389, 398, 405, 414, 421, - 430, 439, 450, 459, 470, 472, 473, 481, 483, 488, - 493, 501, 504, 506, 510, 515, 519, 520, 522, 523, - 526, 530, 536, 545, 551, 552, 558, 564, 572, 575, - 576, 578, 580, 582, 586, 589, 590, 592, 593, 595, - 599, 601, 605, 607, 611, 614, 617, 622, 628, 634, - 643, 645, 646, 650, 660, 673, 677, 678, 683, 690, - 692, 695, 697, 699, 700, 702, 705, 708, 710, 713, - 716, 718, 721, 725, 728, 731, 734, 737, 741, 745, - 749, 751, 755, 757, 758, 760, 763, 766, 769, 772, - 775, 778, 781, 784, 786, 788, 789, 799, 812, 813, - 816, 818, 820, 822, 824, 826, 828, 832, 833, 835, - 838, 840, 842, 845, 848, 852, 854, 856, 859, 862, - 864, 867, 871, 877, 880, 886, 892, 895, 899, 901, - 903, 906, 909, 910, 915, 916, 920, 925, 930, 931, - 935, 938, 939, 943, 945, 947, 949, 951, 953, 955, - 957, 959, 961, 963, 967, 971, 973, 976, 979, 982, - 985, 988, 991, 992, 996, 1000, 1004, 1005, 1007, 1010, - 1012, 1015, 1018, 1021, 1024, 1029, 1031, 1035, 1037, 1041, - 1043, 1045, 1047, 1049, 1053, 1055, 1058, 1059, 1061, 1062, - 1064, 1068, 1069, 1072, 1073, 1077, 1081, 1083, 1089, 1093, - 1095, 1099, 1101, 1106, 1112, 1118, 1125, 1129, 1137, 1142, - 1154, 1156, 1160, 1163, 1166, 1169, 1170, 1174, 1176, 1178, - 1181, 1184, 1187, 1190, 1192, 1193, 1195, 1198, 1205, 1210, - 1217, 1222, 1229, 1238, 1240, 1242, 1244, 1246, 1248, 1251, - 1253, 1256, 1258, 1260, 1262, 1266, 1270, 1274, 1278, 1281, - 1284, 1286, 1290, 1292, 1294, 1296, 1298, 1302, 1304, 1306, - 1307, 1309, 1311, 1313, 1320, 1322, 1324, 1327, 1331, 1340, - 1352, 1362, 1364, 1366, 1370, 1371, 1373, 1377, 1379, 1380, - 1382, 1383, 1385, 1386, 1388, 1392, 1394, 1396, 1398, 1402, - 1403, 1406, 1409, 1410, 1413, 1414, 1416, 1417, 1419, 1421, - 1423, 1427, 1431, 1433, 1435, 1439, 1443, 1447, 1451, 1456, - 1460, 1463, 1465, 1467, 1469, 1471, 1473, 1477, 1479, 1481, - 1485, 1489, 1491, 1494, 1499, 1504, 1507, 1511, 1517, 1523, - 1525, 1527, 1538, 1549, 1551, 1554, 1559, 1564, 1569, 1572, - 1575, 1579, 1581, 1585, 1592, 1595, 1596, 1600, 1604, 1609, - 1614, 1619, 1624, 1628, 1631, 1633, 1635, 1636, 1638, 1640, - 1641, 1643, 1649, 1651, 1652, 1654, 1655, 1659, 1661, 1665, - 1670, 1674, 1676, 1678, 1679, 1682, 1685, 1686, 1689, 1692, - 1694, 1696, 1698, 1699, 1702, 1707, 1713, 1718, 1721, 1725, - 1728, 1731, 1734, 1737, 1739, 1742, 1746, 1747, 1749, 1750, - 1756, 1758, 1763, 1770, 1773, 1775, 1776, 1781, 1782, 1784, - 1786, 1788, 1790, 1793, 1796, 1798, 1800, 1802, 1804, 1806, - 1808, 1812, 1813, 1815, 1819, 1821, 1823, 1826, 1829, 1830, - 1832, 1836, 1838, 1839, 1841, 1844, 1848, 1851, 1856, 1859, - 1863, 1866, 1867, 1869, 1872, 1873, 1878, 1884, 1887, 1888, - 1890, 1894, 1898, 1902, 1906, 1910, 1914, 1916, 1921, 1925, - 1930, 1936, 1941, 1947, 1952, 1958, 1961, 1966, 1968, 1970, - 1971, 1973, 1978, 1984, 1989, 1990, 1993, 1996, 1999, 2001, - 2003, 2004, 2009, 2012, 2014, 2017, 2020, 2025, 2028, 2035, - 2038, 2040, 2044, 2049, 2050, 2053, 2054, 2057, 2058, 2060, - 2064, 2068, 2071, 2072, 2075, 2080, 2082, 2084, 2087, 2091, - 2097, 2104, 2107, 2111, 2116, 2121, 2125, 2130, 2131, 2133, - 2135, 2137, 2139, 2141, 2144, 2149, 2151, 2153, 2155, 2157, - 2160, 2164, 2165, 2167, 2169, 2171, 2173, 2175, 2178, 2181, - 2184, 2187, 2190, 2192, 2196, 2197, 2199, 2201, 2203, 2205, - 2211, 2214, 2216, 2218, 2220, 2222, 2227, 2229, 2232, 2235, - 2237, 2241, 2245, 2248, 2250, 2251, 2257, 2260, 2266, 2269, - 2271, 2275, 2279, 2280, 2282, 2284, 2286, 2288, 2290, 2292, - 2294, 2296, 2298, 2300, 2302, 2304, 2306, 2308, 2310, 2312, - 2314, 2316, 2318, 2320, 2322, 2324, 2326, 2328, 2332, 2336, - 2340, 2344, 2348, 2352, 2356, 2357, 2359, 2363, 2367, 2373, - 2376, 2379, 2383, 2387, 2391, 2395, 2399, 2403, 2407, 2411, - 2415, 2419, 2423, 2427, 2431, 2434, 2437, 2441, 2445, 2448, - 2451, 2455, 2459, 2465, 2470, 2477, 2481, 2487, 2492, 2499, - 2504, 2511, 2517, 2525, 2529, 2532, 2537, 2541, 2544, 2546, - 2550, 2554, 2558, 2562, 2567, 2571, 2576, 2580, 2585, 2591, - 2598, 2605, 2613, 2620, 2628, 2635, 2643, 2647, 2652, 2657, - 2664, 2666, 2671, 2673, 2677, 2680, 2683, 2687, 2691, 2695, + 324, 327, 330, 332, 336, 341, 344, 346, 347, 350, + 354, 357, 361, 368, 375, 384, 391, 400, 407, 416, + 423, 432, 441, 452, 461, 472, 474, 475, 483, 485, + 490, 495, 503, 506, 508, 512, 517, 521, 522, 524, + 525, 528, 532, 538, 547, 553, 554, 560, 566, 574, + 577, 578, 580, 582, 584, 588, 591, 592, 594, 595, + 597, 601, 603, 607, 609, 613, 616, 619, 624, 630, + 636, 645, 647, 648, 652, 662, 675, 679, 680, 685, + 692, 694, 697, 699, 701, 702, 704, 707, 710, 712, + 715, 718, 720, 723, 727, 730, 733, 736, 739, 743, + 747, 751, 753, 757, 759, 760, 762, 765, 768, 771, + 774, 777, 780, 783, 786, 788, 790, 791, 801, 814, + 815, 818, 820, 822, 824, 826, 828, 830, 834, 835, + 837, 840, 842, 844, 847, 850, 854, 856, 858, 861, + 864, 866, 869, 873, 879, 882, 888, 894, 897, 901, + 903, 905, 908, 911, 912, 917, 918, 922, 927, 932, + 933, 937, 940, 941, 945, 947, 949, 951, 953, 955, + 957, 959, 961, 963, 965, 969, 973, 975, 978, 981, + 984, 987, 990, 993, 994, 998, 1002, 1006, 1007, 1009, + 1012, 1014, 1017, 1020, 1023, 1026, 1031, 1033, 1037, 1039, + 1043, 1045, 1047, 1049, 1051, 1055, 1057, 1060, 1061, 1063, + 1064, 1066, 1070, 1071, 1074, 1075, 1079, 1083, 1085, 1091, + 1095, 1097, 1101, 1103, 1108, 1114, 1120, 1127, 1131, 1139, + 1144, 1156, 1158, 1162, 1165, 1168, 1171, 1172, 1176, 1178, + 1180, 1183, 1186, 1189, 1192, 1194, 1195, 1197, 1200, 1207, + 1212, 1219, 1224, 1231, 1240, 1242, 1244, 1246, 1248, 1250, + 1253, 1255, 1258, 1260, 1262, 1264, 1268, 1272, 1276, 1280, + 1283, 1286, 1288, 1292, 1294, 1296, 1298, 1300, 1304, 1306, + 1308, 1309, 1311, 1313, 1315, 1322, 1324, 1326, 1329, 1333, + 1342, 1354, 1364, 1366, 1368, 1372, 1373, 1375, 1379, 1381, + 1382, 1384, 1385, 1387, 1388, 1390, 1394, 1396, 1398, 1400, + 1404, 1405, 1408, 1411, 1412, 1415, 1416, 1418, 1419, 1421, + 1423, 1425, 1429, 1433, 1435, 1437, 1441, 1445, 1449, 1453, + 1458, 1462, 1465, 1467, 1469, 1471, 1473, 1475, 1479, 1481, + 1483, 1487, 1491, 1493, 1496, 1501, 1506, 1509, 1513, 1519, + 1525, 1527, 1529, 1540, 1551, 1553, 1556, 1561, 1566, 1571, + 1574, 1577, 1581, 1583, 1587, 1594, 1597, 1598, 1602, 1606, + 1611, 1616, 1621, 1626, 1630, 1633, 1635, 1637, 1638, 1640, + 1642, 1643, 1645, 1651, 1653, 1654, 1656, 1657, 1661, 1663, + 1667, 1672, 1676, 1678, 1680, 1681, 1684, 1687, 1688, 1691, + 1694, 1696, 1698, 1700, 1701, 1704, 1709, 1715, 1720, 1723, + 1727, 1730, 1733, 1736, 1739, 1741, 1744, 1748, 1749, 1751, + 1752, 1758, 1760, 1765, 1772, 1775, 1777, 1778, 1783, 1784, + 1786, 1788, 1790, 1792, 1795, 1798, 1800, 1802, 1804, 1806, + 1808, 1810, 1814, 1815, 1817, 1821, 1823, 1825, 1828, 1831, + 1832, 1834, 1838, 1840, 1841, 1843, 1846, 1850, 1853, 1858, + 1861, 1865, 1868, 1869, 1871, 1874, 1875, 1880, 1886, 1889, + 1890, 1892, 1896, 1900, 1904, 1908, 1912, 1916, 1918, 1923, + 1927, 1932, 1938, 1943, 1949, 1954, 1960, 1963, 1968, 1970, + 1972, 1973, 1975, 1980, 1986, 1991, 1992, 1995, 1998, 2001, + 2003, 2005, 2006, 2011, 2014, 2016, 2019, 2022, 2027, 2030, + 2037, 2040, 2042, 2046, 2051, 2052, 2055, 2056, 2059, 2060, + 2062, 2066, 2070, 2073, 2074, 2077, 2082, 2084, 2086, 2089, + 2093, 2099, 2106, 2109, 2113, 2119, 2125, 2129, 2134, 2135, + 2137, 2139, 2141, 2143, 2145, 2148, 2153, 2155, 2157, 2159, + 2161, 2164, 2168, 2169, 2171, 2173, 2175, 2177, 2179, 2182, + 2185, 2188, 2191, 2194, 2196, 2200, 2201, 2203, 2205, 2207, + 2209, 2215, 2218, 2220, 2222, 2224, 2226, 2231, 2233, 2236, + 2239, 2241, 2245, 2249, 2252, 2254, 2255, 2261, 2264, 2270, + 2273, 2275, 2279, 2283, 2284, 2286, 2288, 2290, 2292, 2294, + 2296, 2298, 2300, 2302, 2304, 2306, 2308, 2310, 2312, 2314, + 2316, 2318, 2320, 2322, 2324, 2326, 2328, 2330, 2332, 2336, + 2340, 2344, 2348, 2352, 2356, 2360, 2361, 2363, 2367, 2371, + 2377, 2380, 2383, 2387, 2391, 2395, 2399, 2403, 2407, 2411, + 2415, 2419, 2423, 2427, 2431, 2435, 2438, 2441, 2445, 2449, + 2452, 2455, 2459, 2463, 2469, 2474, 2481, 2485, 2491, 2496, + 2503, 2508, 2515, 2521, 2529, 2533, 2536, 2541, 2545, 2548, + 2550, 2554, 2558, 2562, 2566, 2570, 2574, 2579, 2583, 2588, + 2592, 2597, 2603, 2610, 2617, 2625, 2632, 2640, 2647, 2655, + 2659, 2664, 2669, 2676, 2678, 2683, 2685, 2689, 2692, 2695, 2699, 2703, 2707, 2711, 2715, 2719, 2723, 2727, 2731, 2735, - 2738, 2741, 2747, 2754, 2761, 2769, 2771, 2773, 2776, 2779, - 2782, 2787, 2789, 2792, 2794, 2797, 2800, 2804, 2810, 2817, - 2826, 2833, 2840, 2845, 2850, 2852, 2854, 2856, 2862, 2864, - 2866, 2871, 2873, 2878, 2880, 2885, 2887, 2892, 2894, 2896, - 2898, 2900, 2902, 2904, 2911, 2918, 2923, 2928, 2933, 2938, - 2945, 2951, 2957, 2963, 2968, 2975, 2980, 2986, 2987, 2993, - 2994, 2997, 2998, 3000, 3004, 3008, 3011, 3014, 3015, 3022, - 3024, 3025, 3029, 3030, 3033, 3036, 3037, 3039, 3044, 3047, - 3050, 3053, 3056, 3059, 3064, 3068, 3070, 3076, 3078, 3080, - 3082, 3084, 3086, 3088, 3090, 3092, 3094, 3096, 3098, 3100, - 3102, 3104, 3106, 3108, 3110, 3112, 3117, 3119, 3124, 3126, - 3131, 3133, 3136, 3138, 3141, 3143, 3146, 3148, 3152, 3154, - 3158, 3160, 3161, 3163, 3167, 3169, 3173, 3177, 3179, 3183, - 3187, 3188, 3190, 3192, 3194, 3196, 3198, 3200, 3202, 3204, - 3206, 3208, 3213, 3217, 3220, 3224, 3225, 3229, 3233, 3236, - 3239, 3241, 3242, 3245, 3248, 3252, 3255, 3257, 3259, 3263, - 3269, 3271, 3274, 3279, 3282, 3283, 3285, 3286, 3288, 3291, - 3294, 3297, 3301, 3307, 3309, 3310, 3312, 3315, 3316, 3319, - 3321, 3322, 3324, 3325, 3327, 3331, 3335, 3338, 3340, 3342, - 3344, 3348, 3350, 3353, 3355, 3359, 3361, 3363, 3365, 3368, - 3370, 3372, 3375, 3377, 3379, 3382, 3389, 3392, 3398, 3402, - 3406, 3408, 3410, 3412, 3414, 3416, 3418, 3420, 3422, 3424, - 3426, 3428, 3430, 3432, 3434, 3436, 3438, 3440, 3442, 3444, - 3446, 3449, 3452, 3456, 3460, 3461, 3463, 3465, 3467, 3469, - 3471, 3473, 3475, 3481, 3485, 3486, 3488, 3490, 3492, 3494, - 3499, 3507, 3510, 3511, 3513, 3515, 3517, 3519, 3533, 3550, - 3552, 3555, 3556, 3558, 3559, 3561, 3562, 3565, 3566, 3568, - 3569, 3576, 3585, 3592, 3601, 3608, 3617, 3620, 3622, 3627, - 3631, 3634, 3639, 3643, 3649, 3651, 3652, 3654, 3656, 3657, - 3659, 3661, 3663, 3665, 3667, 3669, 3671, 3673, 3675, 3677, - 3679, 3683, 3685, 3687, 3689, 3691, 3693, 3695, 3698, 3700, - 3702, 3705, 3709, 3713, 3715, 3719, 3723, 3726, 3730, 3734, - 3738, 3742, 3744, 3746, 3748, 3750, 3754, 3760, 3762, 3764, - 3766, 3768, 3772, 3775, 3777, 3782, 3788, 3794, 3799, 3806, - 3808, 3810, 3812, 3814, 3816, 3818, 3819, 3821, 3825, 3827, - 3828, 3836, 3838, 3841, 3845, 3848, 3849, 3852, 3853, 3856, - 3861, 3864, 3866, 3868, 3870, 3873, 3877, 3880, 3883, 3887, - 3892, 3895, 3897, 3899, 3901, 3905, 3908, 3918, 3930, 3943, - 3958, 3962, 3967, 3972, 3973, 3981, 3992, 3995, 3999, 4000, - 4005, 4007, 4009, 4011, 4013, 4015, 4017, 4019, 4021, 4023, - 4025, 4027, 4029, 4031, 4033, 4035, 4037, 4039, 4041, 4043, - 4045, 4047, 4049, 4051, 4053, 4055, 4057, 4059, 4061, 4063, - 4065, 4067, 4069, 4071, 4073, 4075, 4077, 4079, 4081, 4083, - 4085, 4087, 4089, 4091, 4093, 4095, 4097, 4099, 4101, 4103, - 4105, 4107, 4109, 4111, 4113, 4115, 4117, 4119, 4121, 4123, - 4125, 4127, 4129, 4131, 4133, 4135, 4137, 4139, 4141, 4143, - 4145, 4147, 4149, 4151, 4153, 4155, 4157, 4159, 4161, 4163, - 4165, 4167, 4169, 4171, 4173, 4175, 4177, 4179, 4181, 4183, - 4185, 4187, 4189, 4191, 4193, 4195, 4197, 4199, 4201, 4203, - 4205, 4207, 4209, 4211, 4213, 4215, 4217, 4219, 4221, 4223, - 4225, 4227, 4229, 4231, 4233, 4235, 4237, 4239, 4241, 4243, - 4245, 4247, 4249, 4251, 4253, 4255, 4257, 4259, 4261, 4263, - 4265, 4267, 4269, 4271, 4273, 4275, 4277, 4279, 4281, 4283, - 4285, 4287, 4289, 4291, 4293, 4295, 4297, 4299, 4301, 4303, - 4305, 4307, 4309, 4311, 4313, 4315, 4317, 4319, 4321, 4323, - 4325, 4327, 4329, 4331, 4333, 4335, 4337, 4339, 4341, 4343, - 4345, 4347, 4349, 4351, 4353, 4355, 4357, 4359, 4361, 4363, - 4365, 4367, 4369, 4371, 4373, 4375, 4377, 4379, 4381, 4383, - 4385, 4387, 4389, 4391, 4393, 4395, 4397, 4399, 4401, 4403, - 4405, 4407, 4409, 4411, 4413, 4415, 4417, 4419, 4421, 4423, - 4425, 4427, 4429, 4431, 4433, 4435, 4437, 4439, 4441, 4443, - 4445, 4447, 4449, 4451, 4453, 4455, 4457, 4459, 4461, 4463, - 4465, 4467, 4469, 4471, 4473, 4475, 4477, 4479, 4481, 4483, - 4485, 4487, 4489, 4491, 4493, 4495, 4497, 4499, 4501, 4503, - 4505, 4507, 4509, 4511, 4513, 4515, 4517, 4519, 4521, 4523, - 4525, 4527, 4529, 4531, 4533, 4535, 4537, 4539, 4541, 4543, - 4545, 4547, 4549, 4551, 4553, 4555, 4557, 4559, 4561, 4563, - 4565, 4567, 4569, 4571, 4573, 4575, 4577, 4579, 4581, 4583, - 4585, 4587, 4589, 4591, 4593, 4595, 4597, 4599, 4601, 4603, - 4605, 4607, 4609, 4611, 4613, 4615, 4617, 4619, 4621, 4623, - 4625, 4627, 4629, 4631, 4633, 4635, 4637, 4639, 4641, 4643, - 4645, 4647, 4649, 4651, 4653, 4655, 4657, 4659, 4661, 4663, - 4665, 4667, 4669, 4671, 4673, 4675, 4677, 4679, 4681, 4683, - 4685, 4687, 4689, 4691, 4693, 4695, 4697, 4699, 4701, 4703, - 4705, 4707, 4709, 4711, 4713, 4715, 4717, 4719, 4721, 4723, - 4725, 4727, 4729, 4731, 4733, 4735, 4737, 4739, 4741, 4743, - 4745, 4747, 4749, 4751, 4753, 4755, 4757, 4759, 4761, 4763, - 4765, 4767, 4769, 4771, 4773, 4775, 4777, 4779, 4781, 4783, - 4785, 4787, 4789, 4791, 4793, 4795, 4797, 4799, 4801, 4803, - 4805, 4807, 4809, 4811, 4813, 4815, 4817, 4819, 4821, 4823, - 4825, 4827, 4829, 4831, 4833, 4835, 4837, 4839, 4841, 4843, - 4845, 4847, 4849, 4851, 4853, 4855, 4857, 4859, 4861, 4863, - 4865, 4867, 4869, 4871, 4873, 4875, 4877, 4879, 4881, 4883, - 4885, 4887, 4889, 4891, 4893, 4895, 4897, 4899, 4901, 4903, - 4905, 4907, 4909, 4911, 4913, 4915, 4917, 4919, 4921, 4923, - 4925, 4927, 4929, 4931, 4933, 4935, 4937, 4939, 4941, 4943, - 4945, 4947, 4949, 4951, 4953, 4955, 4957, 4959, 4961, 4963, - 4965, 4967, 4969, 4971, 4973, 4975, 4977, 4979, 4981, 4983, - 4985, 4987, 4989, 4991, 4993, 4995, 4997, 4999, 5001, 5003, - 5005, 5007, 5009, 5011, 5013, 5015, 5017, 5019, 5021, 5023, - 5025, 5027, 5029, 5031, 5033, 5035, 5037, 5039, 5041, 5043, - 5045, 5047, 5049, 5051, 5053, 5055, 5057, 5059, 5061, 5063, - 5065, 5067, 5069, 5071, 5073, 5075, 5077, 5079, 5081, 5083, - 5085, 5087, 5089, 5091, 5093, 5095, 5097, 5099, 5101, 5103, - 5105, 5107, 5109, 5111, 5113, 5115 + 2739, 2743, 2747, 2750, 2753, 2759, 2766, 2773, 2781, 2783, + 2785, 2788, 2791, 2794, 2799, 2801, 2804, 2806, 2809, 2812, + 2816, 2822, 2829, 2838, 2845, 2852, 2857, 2862, 2864, 2866, + 2868, 2874, 2876, 2878, 2883, 2885, 2890, 2892, 2897, 2899, + 2904, 2906, 2908, 2910, 2912, 2914, 2916, 2923, 2930, 2935, + 2940, 2945, 2950, 2957, 2963, 2969, 2975, 2980, 2987, 2992, + 2998, 2999, 3005, 3006, 3009, 3010, 3012, 3016, 3020, 3023, + 3026, 3027, 3034, 3036, 3037, 3041, 3042, 3045, 3048, 3049, + 3051, 3056, 3059, 3062, 3065, 3068, 3071, 3076, 3080, 3082, + 3088, 3092, 3094, 3098, 3100, 3102, 3104, 3106, 3108, 3110, + 3112, 3114, 3116, 3118, 3120, 3122, 3124, 3126, 3128, 3130, + 3132, 3134, 3139, 3141, 3146, 3148, 3153, 3155, 3158, 3160, + 3163, 3165, 3168, 3170, 3174, 3176, 3180, 3182, 3183, 3185, + 3189, 3191, 3195, 3199, 3201, 3205, 3209, 3210, 3212, 3214, + 3216, 3218, 3220, 3222, 3224, 3226, 3228, 3230, 3235, 3239, + 3242, 3246, 3247, 3251, 3255, 3258, 3261, 3263, 3264, 3267, + 3270, 3274, 3277, 3279, 3281, 3285, 3291, 3293, 3296, 3301, + 3304, 3305, 3307, 3308, 3310, 3313, 3316, 3319, 3323, 3329, + 3331, 3332, 3334, 3337, 3338, 3341, 3343, 3344, 3346, 3347, + 3349, 3353, 3357, 3360, 3362, 3364, 3366, 3370, 3372, 3375, + 3377, 3381, 3383, 3385, 3387, 3390, 3392, 3394, 3397, 3399, + 3401, 3404, 3411, 3414, 3420, 3424, 3428, 3430, 3432, 3434, + 3436, 3438, 3440, 3442, 3444, 3446, 3448, 3450, 3452, 3454, + 3456, 3458, 3460, 3462, 3464, 3466, 3468, 3471, 3474, 3478, + 3482, 3483, 3485, 3487, 3489, 3491, 3493, 3495, 3497, 3503, + 3507, 3508, 3510, 3512, 3514, 3516, 3521, 3529, 3532, 3533, + 3535, 3537, 3539, 3541, 3555, 3572, 3574, 3577, 3578, 3580, + 3581, 3583, 3584, 3587, 3588, 3590, 3591, 3598, 3607, 3614, + 3623, 3630, 3639, 3642, 3644, 3649, 3653, 3656, 3661, 3665, + 3671, 3673, 3674, 3676, 3678, 3679, 3681, 3683, 3685, 3687, + 3689, 3691, 3693, 3695, 3697, 3699, 3701, 3705, 3707, 3709, + 3711, 3713, 3715, 3717, 3720, 3722, 3724, 3727, 3731, 3735, + 3737, 3741, 3745, 3748, 3752, 3756, 3760, 3764, 3766, 3768, + 3770, 3772, 3776, 3782, 3784, 3786, 3788, 3790, 3794, 3797, + 3799, 3804, 3810, 3816, 3821, 3828, 3830, 3832, 3834, 3836, + 3838, 3840, 3841, 3843, 3847, 3849, 3850, 3858, 3860, 3863, + 3867, 3870, 3871, 3874, 3875, 3878, 3883, 3886, 3888, 3890, + 3892, 3895, 3899, 3902, 3905, 3909, 3914, 3917, 3919, 3921, + 3923, 3927, 3930, 3940, 3952, 3965, 3980, 3984, 3989, 3994, + 3995, 4003, 4014, 4017, 4021, 4022, 4027, 4029, 4031, 4033, + 4035, 4037, 4039, 4041, 4043, 4045, 4047, 4049, 4051, 4053, + 4055, 4057, 4059, 4061, 4063, 4065, 4067, 4069, 4071, 4073, + 4075, 4077, 4079, 4081, 4083, 4085, 4087, 4089, 4091, 4093, + 4095, 4097, 4099, 4101, 4103, 4105, 4107, 4109, 4111, 4113, + 4115, 4117, 4119, 4121, 4123, 4125, 4127, 4129, 4131, 4133, + 4135, 4137, 4139, 4141, 4143, 4145, 4147, 4149, 4151, 4153, + 4155, 4157, 4159, 4161, 4163, 4165, 4167, 4169, 4171, 4173, + 4175, 4177, 4179, 4181, 4183, 4185, 4187, 4189, 4191, 4193, + 4195, 4197, 4199, 4201, 4203, 4205, 4207, 4209, 4211, 4213, + 4215, 4217, 4219, 4221, 4223, 4225, 4227, 4229, 4231, 4233, + 4235, 4237, 4239, 4241, 4243, 4245, 4247, 4249, 4251, 4253, + 4255, 4257, 4259, 4261, 4263, 4265, 4267, 4269, 4271, 4273, + 4275, 4277, 4279, 4281, 4283, 4285, 4287, 4289, 4291, 4293, + 4295, 4297, 4299, 4301, 4303, 4305, 4307, 4309, 4311, 4313, + 4315, 4317, 4319, 4321, 4323, 4325, 4327, 4329, 4331, 4333, + 4335, 4337, 4339, 4341, 4343, 4345, 4347, 4349, 4351, 4353, + 4355, 4357, 4359, 4361, 4363, 4365, 4367, 4369, 4371, 4373, + 4375, 4377, 4379, 4381, 4383, 4385, 4387, 4389, 4391, 4393, + 4395, 4397, 4399, 4401, 4403, 4405, 4407, 4409, 4411, 4413, + 4415, 4417, 4419, 4421, 4423, 4425, 4427, 4429, 4431, 4433, + 4435, 4437, 4439, 4441, 4443, 4445, 4447, 4449, 4451, 4453, + 4455, 4457, 4459, 4461, 4463, 4465, 4467, 4469, 4471, 4473, + 4475, 4477, 4479, 4481, 4483, 4485, 4487, 4489, 4491, 4493, + 4495, 4497, 4499, 4501, 4503, 4505, 4507, 4509, 4511, 4513, + 4515, 4517, 4519, 4521, 4523, 4525, 4527, 4529, 4531, 4533, + 4535, 4537, 4539, 4541, 4543, 4545, 4547, 4549, 4551, 4553, + 4555, 4557, 4559, 4561, 4563, 4565, 4567, 4569, 4571, 4573, + 4575, 4577, 4579, 4581, 4583, 4585, 4587, 4589, 4591, 4593, + 4595, 4597, 4599, 4601, 4603, 4605, 4607, 4609, 4611, 4613, + 4615, 4617, 4619, 4621, 4623, 4625, 4627, 4629, 4631, 4633, + 4635, 4637, 4639, 4641, 4643, 4645, 4647, 4649, 4651, 4653, + 4655, 4657, 4659, 4661, 4663, 4665, 4667, 4669, 4671, 4673, + 4675, 4677, 4679, 4681, 4683, 4685, 4687, 4689, 4691, 4693, + 4695, 4697, 4699, 4701, 4703, 4705, 4707, 4709, 4711, 4713, + 4715, 4717, 4719, 4721, 4723, 4725, 4727, 4729, 4731, 4733, + 4735, 4737, 4739, 4741, 4743, 4745, 4747, 4749, 4751, 4753, + 4755, 4757, 4759, 4761, 4763, 4765, 4767, 4769, 4771, 4773, + 4775, 4777, 4779, 4781, 4783, 4785, 4787, 4789, 4791, 4793, + 4795, 4797, 4799, 4801, 4803, 4805, 4807, 4809, 4811, 4813, + 4815, 4817, 4819, 4821, 4823, 4825, 4827, 4829, 4831, 4833, + 4835, 4837, 4839, 4841, 4843, 4845, 4847, 4849, 4851, 4853, + 4855, 4857, 4859, 4861, 4863, 4865, 4867, 4869, 4871, 4873, + 4875, 4877, 4879, 4881, 4883, 4885, 4887, 4889, 4891, 4893, + 4895, 4897, 4899, 4901, 4903, 4905, 4907, 4909, 4911, 4913, + 4915, 4917, 4919, 4921, 4923, 4925, 4927, 4929, 4931, 4933, + 4935, 4937, 4939, 4941, 4943, 4945, 4947, 4949, 4951, 4953, + 4955, 4957, 4959, 4961, 4963, 4965, 4967, 4969, 4971, 4973, + 4975, 4977, 4979, 4981, 4983, 4985, 4987, 4989, 4991, 4993, + 4995, 4997, 4999, 5001, 5003, 5005, 5007, 5009, 5011, 5013, + 5015, 5017, 5019, 5021, 5023, 5025, 5027, 5029, 5031, 5033, + 5035, 5037, 5039, 5041, 5043, 5045, 5047, 5049, 5051, 5053, + 5055, 5057, 5059, 5061, 5063, 5065, 5067, 5069, 5071, 5073, + 5075, 5077, 5079, 5081, 5083, 5085, 5087, 5089, 5091, 5093, + 5095, 5097, 5099, 5101, 5103, 5105, 5107, 5109, 5111, 5113, + 5115, 5117, 5119, 5121, 5123, 5125, 5127, 5129, 5131, 5133, + 5135, 5137 }; /* YYRHS -- A `-1'-separated list of the rules' RHS. */ static const yytype_int16 yyrhs[] = { - 495, 0, -1, 496, -1, 496, 489, 497, -1, 497, - -1, 809, -1, 535, -1, 498, -1, 841, -1, 848, - -1, 810, -1, 600, -1, 851, -1, 596, -1, 799, - -1, 531, -1, 544, -1, 509, -1, 837, -1, 590, - -1, 533, -1, 813, -1, 811, -1, 812, -1, 802, - -1, 512, -1, 830, -1, 530, -1, 796, -1, 510, - -1, 617, -1, 542, -1, 599, -1, 832, -1, 842, - -1, 824, -1, 845, -1, 849, -1, -1, 29, 394, - 677, 506, -1, 29, 394, 185, 149, 677, 506, -1, - 29, 194, 777, 506, -1, 29, 194, 185, 149, 777, - 506, -1, 29, 360, 777, 506, -1, 29, 360, 185, - 149, 777, 506, -1, 29, 441, 777, 506, -1, 29, - 441, 185, 149, 777, 506, -1, 501, -1, 499, 501, - -1, 366, 114, 719, -1, 134, 114, -1, 338, -1, - 338, 537, 538, -1, 366, 539, -1, 366, 171, 589, - -1, 505, -1, 502, 490, 505, -1, 23, 569, -1, - 23, 185, 264, 149, 569, -1, 23, 75, 569, -1, - 23, 75, 185, 264, 149, 569, -1, 29, 511, 785, - 500, -1, 29, 511, 785, 134, 264, 269, -1, 29, - 511, 785, 366, 264, 269, -1, 29, 511, 785, 366, - 382, 541, -1, 29, 511, 785, 366, 557, -1, 29, - 511, 785, 337, 557, -1, 29, 511, 785, 366, 385, - 785, -1, 29, 511, 785, 23, 171, 589, 36, 184, - 547, -1, 29, 511, 785, 499, -1, 29, 511, 785, - 134, 184, -1, 29, 511, 785, 134, 184, 185, 149, - -1, 134, 511, 185, 149, 785, 594, -1, 134, 511, - 785, 594, -1, 29, 511, 785, 508, 416, 689, 686, - 504, -1, 29, 511, 785, 507, -1, 23, 559, -1, - 29, 85, 779, 545, -1, 432, 85, 779, -1, 134, - 85, 185, 149, 779, 594, -1, 134, 85, 779, 594, - -1, 366, 236, -1, 366, 425, -1, 366, 557, -1, - 337, 557, -1, 507, -1, 429, 719, -1, -1, 552, - -1, 366, 552, -1, 23, 552, -1, 134, 567, -1, - 503, -1, 506, 490, 503, -1, 283, 486, 502, 487, - -1, 366, 106, -1, -1, 110, 779, -1, 110, 309, - 779, -1, 110, 27, -1, 110, 309, 27, -1, 29, - 352, 779, 333, 405, 779, -1, 29, 394, 677, 333, - 405, 779, -1, 29, 394, 185, 149, 677, 333, 405, - 779, -1, 29, 360, 777, 333, 405, 779, -1, 29, - 360, 185, 149, 777, 333, 405, 779, -1, 29, 441, - 777, 333, 405, 779, -1, 29, 441, 185, 149, 777, - 333, 405, 779, -1, 29, 194, 777, 333, 405, 779, - -1, 29, 194, 185, 149, 777, 333, 405, 779, -1, - 29, 394, 677, 333, 511, 779, 405, 779, -1, 29, - 394, 185, 149, 677, 333, 511, 779, 405, 779, -1, - 29, 394, 677, 333, 85, 779, 405, 779, -1, 29, - 394, 185, 149, 677, 333, 85, 779, 405, 779, -1, - 75, -1, -1, 516, 204, 210, 514, 513, 519, 521, - -1, 617, -1, 292, 522, 434, 617, -1, 486, 526, - 487, 617, -1, 486, 526, 487, 292, 522, 434, 617, - -1, 114, 435, -1, 777, -1, 777, 36, 785, -1, - 486, 528, 487, 683, -1, 279, 85, 779, -1, -1, - 622, -1, -1, 785, 771, -1, 529, 475, 719, -1, - 486, 523, 487, 475, 719, -1, 279, 83, 515, 130, - 427, 366, 527, 683, -1, 279, 83, 515, 130, 265, - -1, -1, 785, 524, 525, 635, 636, -1, 724, 524, - 525, 635, 636, -1, 486, 719, 487, 524, 525, 635, - 636, -1, 340, 774, -1, -1, 428, -1, 393, -1, - 529, -1, 523, 490, 529, -1, 73, 790, -1, -1, - 790, -1, -1, 517, -1, 526, 490, 517, -1, 518, - -1, 527, 490, 518, -1, 520, -1, 528, 490, 520, - -1, 785, 771, -1, 306, 785, -1, 306, 785, 475, - 829, -1, 306, 785, 486, 749, 487, -1, 92, 588, - 360, 777, 532, -1, 92, 588, 360, 185, 264, 149, - 777, 532, -1, 536, -1, -1, 148, 779, 534, -1, - 92, 588, 394, 853, 36, 148, 779, 534, 852, -1, - 92, 588, 394, 185, 264, 149, 853, 36, 148, 779, - 534, 852, -1, 486, 747, 487, -1, -1, 29, 360, - 777, 536, -1, 29, 360, 185, 149, 777, 536, -1, - 539, -1, 536, 539, -1, 448, -1, 472, -1, -1, - 4, -1, 477, 4, -1, 478, 4, -1, 541, -1, - 36, 691, -1, 55, 538, -1, 105, -1, 262, 105, - -1, 193, 540, 538, -1, 242, 538, -1, 250, 538, - -1, 262, 242, -1, 262, 250, -1, 293, 54, 790, - -1, 360, 255, 790, -1, 380, 537, 538, -1, 338, - -1, 338, 537, 538, -1, 54, -1, -1, 783, -1, - 477, 783, -1, 478, 783, -1, 19, 543, -1, 47, - 543, -1, 380, 543, -1, 79, 543, -1, 140, 543, - -1, 345, 543, -1, 451, -1, 407, -1, -1, 92, - 588, 394, 777, 486, 576, 487, 564, 556, -1, 92, - 588, 394, 185, 264, 149, 777, 486, 576, 487, 564, - 556, -1, -1, 545, 568, -1, 583, -1, 860, -1, - 744, -1, 538, -1, 784, -1, 263, -1, 486, 536, - 487, -1, -1, 784, -1, 262, 22, -1, 339, -1, - 58, -1, 366, 269, -1, 366, 114, -1, 85, 779, - 551, -1, 551, -1, 563, -1, 73, 790, -1, 264, - 269, -1, 269, -1, 422, 575, -1, 312, 216, 575, - -1, 67, 486, 719, 487, 558, -1, 114, 720, -1, - 171, 589, 36, 184, 547, -1, 327, 777, 578, 586, - 554, -1, 567, 548, -1, 279, 427, 549, -1, 553, - -1, 580, -1, 553, 580, -1, 580, 553, -1, -1, - 283, 486, 570, 487, -1, -1, 279, 79, 134, -1, - 279, 79, 119, 348, -1, 279, 79, 311, 348, -1, - -1, 486, 561, 487, -1, 262, 196, -1, -1, 85, - 779, 584, -1, 584, -1, 78, -1, 86, -1, 115, - -1, 184, -1, 195, -1, 382, -1, 385, -1, 27, - -1, 581, -1, 561, 490, 581, -1, 429, 194, 572, - -1, 116, -1, 264, 116, -1, 198, 117, -1, 198, - 187, -1, 448, 557, -1, 448, 277, -1, 450, 277, - -1, -1, 486, 571, 487, -1, 566, 192, 560, -1, - 566, 146, 560, -1, -1, 794, -1, 264, 116, -1, - 116, -1, 198, 187, -1, 198, 117, -1, 264, 431, - -1, 262, 196, -1, 785, 689, 555, 579, -1, 552, - -1, 570, 490, 552, -1, 574, -1, 571, 490, 574, - -1, 785, -1, 569, -1, 587, -1, 559, -1, 794, - 475, 546, -1, 794, -1, 448, 565, -1, -1, 585, - -1, -1, 785, -1, 486, 582, 487, -1, -1, 579, - 550, -1, -1, 279, 119, 549, -1, 794, 475, 546, - -1, 794, -1, 794, 488, 794, 475, 546, -1, 794, - 488, 794, -1, 577, -1, 582, 490, 577, -1, 689, - -1, 787, 791, 481, 416, -1, 367, 787, 791, 481, - 416, -1, 67, 486, 719, 487, 545, -1, 422, 486, - 582, 487, 575, 545, -1, 422, 562, 545, -1, 312, - 216, 486, 582, 487, 575, 545, -1, 312, 216, 562, - 545, -1, 164, 216, 486, 582, 487, 327, 777, 578, - 586, 554, 545, -1, 573, -1, 585, 490, 573, -1, - 240, 168, -1, 240, 297, -1, 240, 372, -1, -1, - 226, 777, 566, -1, 400, -1, 398, -1, 230, 400, - -1, 230, 398, -1, 173, 400, -1, 173, 398, -1, - 425, -1, -1, 30, -1, 54, 114, -1, 134, 591, - 185, 149, 593, 594, -1, 134, 591, 593, 594, -1, - 134, 592, 185, 149, 778, 594, -1, 134, 592, 778, - 594, -1, 134, 595, 779, 279, 790, 594, -1, 134, - 595, 185, 149, 779, 279, 790, 594, -1, 394, -1, - 360, -1, 169, -1, 237, -1, 441, -1, 241, 441, - -1, 194, -1, 164, 394, -1, 74, -1, 89, -1, - 382, -1, 401, 355, 296, -1, 401, 355, 126, -1, - 401, 355, 399, -1, 401, 355, 82, -1, 21, 243, - -1, 143, 410, -1, 152, -1, 164, 106, 452, -1, - 318, -1, 352, -1, 363, -1, 790, -1, 593, 490, - 790, -1, 58, -1, 339, -1, -1, 304, -1, 349, - -1, 410, -1, 92, 597, 777, 598, 36, 719, -1, - 169, -1, 237, -1, 486, 487, -1, 486, 749, 487, - -1, 516, 427, 838, 366, 527, 667, 839, 521, -1, - 90, 612, 777, 578, 610, 601, 606, 615, 602, 537, - 607, -1, 90, 486, 798, 487, 405, 606, 615, 537, - 607, -1, 167, -1, 405, -1, 604, 121, 784, -1, - -1, 614, -1, 603, 490, 614, -1, 429, -1, -1, - 36, -1, -1, 317, -1, -1, 611, -1, 486, 616, - 487, -1, 821, -1, 538, -1, 479, -1, 486, 603, - 487, -1, -1, 794, 608, -1, 448, 277, -1, -1, - 611, 613, -1, -1, 50, -1, -1, 50, -1, 277, - -1, 166, -1, 120, 605, 784, -1, 269, 605, 784, - -1, 94, -1, 180, -1, 319, 605, 784, -1, 142, - 605, 784, -1, 163, 319, 582, -1, 163, 319, 479, - -1, 163, 264, 269, 582, -1, 163, 269, 582, -1, - 138, 784, -1, 821, -1, 784, -1, 383, -1, 384, - -1, 609, -1, 616, 490, 609, -1, 619, -1, 618, - -1, 486, 619, 487, -1, 486, 618, 487, -1, 621, - -1, 620, 632, -1, 620, 631, 659, 638, -1, 620, - 631, 637, 660, -1, 622, 620, -1, 622, 620, 632, - -1, 622, 620, 631, 659, 638, -1, 622, 620, 631, - 637, 660, -1, 621, -1, 618, -1, 359, 630, 773, - 625, 667, 683, 654, 658, 728, 642, -1, 359, 629, - 774, 625, 667, 683, 654, 658, 728, 642, -1, 666, - -1, 394, 677, -1, 620, 421, 628, 620, -1, 620, - 208, 628, 620, -1, 620, 144, 628, 620, -1, 448, - 623, -1, 472, 623, -1, 448, 325, 623, -1, 624, - -1, 623, 490, 624, -1, 779, 792, 36, 486, 798, - 487, -1, 210, 626, -1, -1, 400, 627, 777, -1, - 398, 627, 777, -1, 230, 400, 627, 777, -1, 230, - 398, 627, 777, -1, 173, 400, 627, 777, -1, 173, - 398, 627, 777, -1, 425, 627, 777, -1, 394, 777, - -1, 777, -1, 394, -1, -1, 27, -1, 129, -1, - -1, 129, -1, 129, 279, 486, 747, 487, -1, 27, - -1, -1, 632, -1, -1, 285, 54, 633, -1, 634, - -1, 633, 490, 634, -1, 719, 429, 744, 636, -1, - 719, 635, 636, -1, 37, -1, 123, -1, -1, 471, - 159, -1, 471, 220, -1, -1, 639, 640, -1, 640, - 639, -1, 639, -1, 640, -1, 637, -1, -1, 227, - 648, -1, 227, 648, 490, 649, -1, 157, 653, 650, - 652, 280, -1, 157, 653, 652, 280, -1, 276, 649, - -1, 276, 650, 652, -1, 4, 481, -1, 9, 481, - -1, 4, 301, -1, 9, 301, -1, 9, -1, 9, - 348, -1, 429, 350, 644, -1, -1, 785, -1, -1, - 643, 486, 641, 487, 647, -1, 641, -1, 641, 486, - 785, 487, -1, 641, 486, 785, 490, 9, 487, -1, - 396, 644, -1, 645, -1, -1, 334, 486, 9, 487, - -1, -1, 719, -1, 27, -1, 719, -1, 721, -1, - 477, 651, -1, 478, 651, -1, 783, -1, 4, -1, - 347, -1, 348, -1, 159, -1, 261, -1, 176, 54, - 655, -1, -1, 656, -1, 655, 490, 656, -1, 719, - -1, 657, -1, 486, 487, -1, 179, 719, -1, -1, - 661, -1, 162, 321, 280, -1, 659, -1, -1, 662, - -1, 661, 662, -1, 663, 664, 665, -1, 162, 427, - -1, 162, 262, 216, 427, -1, 162, 369, -1, 162, - 216, 369, -1, 274, 776, -1, -1, 268, -1, 373, - 235, -1, -1, 435, 486, 747, 487, -1, 666, 490, - 486, 747, 487, -1, 167, 668, -1, -1, 669, -1, - 668, 490, 669, -1, 677, 672, 646, -1, 678, 673, - 646, -1, 221, 678, 673, -1, 618, 672, 646, -1, - 221, 618, 672, -1, 670, -1, 486, 670, 487, 671, - -1, 486, 670, 487, -1, 669, 93, 215, 669, -1, - 669, 674, 215, 669, 676, -1, 669, 215, 669, 676, - -1, 669, 258, 674, 215, 669, -1, 669, 258, 215, - 669, -1, 36, 785, 486, 778, 487, -1, 36, 786, - -1, 785, 486, 778, 487, -1, 785, -1, 671, -1, - -1, 671, -1, 36, 486, 684, 487, -1, 36, 785, - 486, 684, 487, -1, 785, 486, 684, 487, -1, -1, - 168, 675, -1, 224, 675, -1, 343, 675, -1, 200, - -1, 288, -1, -1, 429, 486, 778, 487, -1, 279, - 719, -1, 777, -1, 777, 479, -1, 280, 777, -1, - 280, 486, 777, 487, -1, 724, 682, -1, 348, 167, - 486, 680, 487, 682, -1, 724, 681, -1, 679, -1, - 680, 490, 679, -1, 36, 486, 684, 487, -1, -1, - 472, 286, -1, -1, 445, 719, -1, -1, 685, -1, - 684, 490, 685, -1, 785, 689, 686, -1, 73, 790, - -1, -1, 785, 689, -1, 687, 490, 785, 689, -1, - 347, -1, 388, -1, 691, 690, -1, 367, 691, 690, - -1, 691, 35, 484, 783, 485, -1, 367, 691, 35, - 484, 783, 485, -1, 691, 35, -1, 367, 691, 35, - -1, 688, 486, 687, 487, -1, 238, 486, 751, 487, - -1, 690, 484, 485, -1, 690, 484, 783, 485, -1, - -1, 693, -1, 695, -1, 697, -1, 701, -1, 707, - -1, 708, 718, -1, 708, 486, 783, 487, -1, 695, - -1, 698, -1, 702, -1, 707, -1, 789, 694, -1, - 486, 747, 487, -1, -1, 206, -1, 207, -1, 374, - -1, 49, -1, 322, -1, 160, 696, -1, 133, 308, - -1, 112, 694, -1, 111, 694, -1, 272, 694, -1, - 52, -1, 486, 783, 487, -1, -1, 699, -1, 700, - -1, 699, -1, 700, -1, 51, 706, 486, 747, 487, - -1, 51, 706, -1, 703, -1, 704, -1, 703, -1, - 704, -1, 705, 486, 783, 487, -1, 705, -1, 65, - 706, -1, 64, 706, -1, 436, -1, 257, 65, 706, - -1, 257, 64, 706, -1, 259, 706, -1, 438, -1, - -1, 404, 486, 783, 487, 709, -1, 404, 709, -1, - 403, 486, 783, 487, 709, -1, 403, 709, -1, 209, - -1, 472, 403, 469, -1, 450, 403, 469, -1, -1, - 466, -1, 467, -1, 252, -1, 253, -1, 108, -1, - 109, -1, 182, -1, 183, -1, 248, -1, 249, -1, - 356, -1, 357, -1, 246, -1, 247, -1, 244, -1, - 245, -1, 710, -1, 711, -1, 712, -1, 713, -1, - 714, -1, 715, -1, 716, -1, 717, -1, 710, 405, - 711, -1, 712, 405, 713, -1, 712, 405, 714, -1, - 712, 405, 715, -1, 713, 405, 714, -1, 713, 405, - 715, -1, 714, 405, 715, -1, -1, 721, -1, 719, - 11, 689, -1, 719, 73, 790, -1, 719, 41, 403, - 469, 719, -1, 477, 719, -1, 478, 719, -1, 719, - 477, 719, -1, 719, 478, 719, -1, 719, 479, 719, - -1, 719, 480, 719, -1, 719, 481, 719, -1, 719, - 482, 719, -1, 719, 473, 719, -1, 719, 474, 719, - -1, 719, 475, 719, -1, 719, 16, 719, -1, 719, - 17, 719, -1, 719, 18, 719, -1, 719, 743, 719, - -1, 743, 719, -1, 719, 743, -1, 719, 33, 719, - -1, 719, 284, 719, -1, 264, 719, -1, 470, 719, - -1, 719, 172, 719, -1, 719, 226, 719, -1, 719, - 226, 719, 142, 719, -1, 719, 470, 226, 719, -1, - 719, 470, 226, 719, 142, 719, -1, 719, 186, 719, - -1, 719, 186, 719, 142, 719, -1, 719, 470, 186, - 719, -1, 719, 470, 186, 719, 142, 719, -1, 719, - 371, 405, 719, -1, 719, 371, 405, 719, 142, 719, - -1, 719, 470, 371, 405, 719, -1, 719, 470, 371, - 405, 719, 142, 719, -1, 719, 212, 269, -1, 719, - 213, -1, 719, 212, 264, 269, -1, 719, 264, 269, - -1, 719, 267, -1, 739, -1, 739, 15, 719, -1, - 767, 15, 719, -1, 739, 290, 739, -1, 719, 212, - 412, -1, 719, 212, 264, 412, -1, 719, 212, 155, - -1, 719, 212, 264, 155, -1, 719, 212, 423, -1, - 719, 212, 264, 423, -1, 719, 212, 129, 167, 719, - -1, 719, 212, 264, 129, 167, 719, -1, 719, 212, - 274, 486, 751, 487, -1, 719, 212, 264, 274, 486, - 751, 487, -1, 719, 48, 772, 720, 33, 719, -1, - 719, 470, 48, 772, 720, 33, 719, -1, 719, 48, - 391, 720, 33, 719, -1, 719, 470, 48, 391, 720, - 33, 719, -1, 719, 191, 761, -1, 719, 470, 191, - 761, -1, 719, 745, 740, 618, -1, 719, 745, 740, - 486, 719, 487, -1, 114, -1, 35, 484, 748, 485, - -1, 721, -1, 720, 11, 689, -1, 477, 720, -1, - 478, 720, -1, 720, 477, 720, -1, 720, 478, 720, - -1, 720, 479, 720, -1, 720, 480, 720, -1, 720, - 481, 720, -1, 720, 482, 720, -1, 720, 473, 720, - -1, 720, 474, 720, -1, 720, 475, 720, -1, 720, - 16, 720, -1, 720, 17, 720, -1, 720, 18, 720, - -1, 720, 743, 720, -1, 743, 720, -1, 720, 743, - -1, 720, 212, 129, 167, 720, -1, 720, 212, 264, - 129, 167, 720, -1, 720, 212, 274, 486, 751, 487, - -1, 720, 212, 264, 274, 486, 751, 487, -1, 767, - -1, 782, -1, 491, 9, -1, 492, 771, -1, 10, - 771, -1, 486, 719, 487, 771, -1, 762, -1, 723, - 771, -1, 618, -1, 618, 770, -1, 149, 618, -1, - 781, 486, 487, -1, 781, 486, 749, 631, 487, -1, - 781, 486, 437, 750, 631, 487, -1, 781, 486, 749, - 490, 437, 750, 631, 487, -1, 781, 486, 27, 749, - 631, 487, -1, 781, 486, 129, 749, 631, 487, -1, - 781, 486, 479, 487, -1, 722, 726, 727, 731, -1, - 725, -1, 722, -1, 725, -1, 74, 162, 486, 719, - 487, -1, 98, -1, 101, -1, 101, 486, 783, 487, - -1, 102, -1, 102, 486, 783, 487, -1, 231, -1, - 231, 486, 783, 487, -1, 232, -1, 232, 486, 783, - 487, -1, 99, -1, 103, -1, 365, -1, 428, -1, - 97, -1, 100, -1, 61, 486, 719, 36, 689, 487, - -1, 415, 486, 719, 36, 689, 487, -1, 154, 486, - 752, 487, -1, 291, 486, 754, 487, -1, 305, 486, - 756, 487, -1, 390, 486, 757, 487, -1, 409, 486, - 719, 36, 689, 487, -1, 411, 486, 53, 760, 487, - -1, 411, 486, 222, 760, 487, -1, 411, 486, 406, - 760, 487, -1, 411, 486, 760, 487, -1, 270, 486, - 719, 490, 719, 487, -1, 72, 486, 747, 487, -1, - 449, 176, 486, 632, 487, -1, -1, 158, 486, 445, - 719, 487, -1, -1, 447, 729, -1, -1, 730, -1, - 729, 490, 730, -1, 785, 36, 732, -1, 289, 732, - -1, 289, 785, -1, -1, 486, 733, 734, 631, 735, - 487, -1, 785, -1, -1, 298, 54, 747, -1, -1, - 320, 736, -1, 348, 736, -1, -1, 737, -1, 48, - 737, 33, 737, -1, 418, 307, -1, 418, 161, -1, - 96, 347, -1, 719, 307, -1, 719, 161, -1, 347, - 486, 747, 487, -1, 347, 486, 487, -1, 738, -1, - 486, 747, 490, 719, 487, -1, 34, -1, 376, -1, - 27, -1, 8, -1, 742, -1, 477, -1, 478, -1, - 479, -1, 480, -1, 481, -1, 482, -1, 473, -1, - 474, -1, 475, -1, 16, -1, 17, -1, 18, -1, - 8, -1, 281, 486, 746, 487, -1, 741, -1, 281, - 486, 746, 487, -1, 741, -1, 281, 486, 746, 487, - -1, 226, -1, 470, 226, -1, 172, -1, 470, 172, - -1, 186, -1, 470, 186, -1, 741, -1, 785, 488, - 746, -1, 719, -1, 747, 490, 719, -1, 747, -1, - -1, 750, -1, 749, 490, 750, -1, 719, -1, 793, - 13, 719, -1, 793, 14, 719, -1, 689, -1, 751, - 490, 689, -1, 753, 167, 719, -1, -1, 3, -1, - 710, -1, 711, -1, 712, -1, 713, -1, 714, -1, - 715, -1, 716, -1, 717, -1, 784, -1, 719, 755, - 758, 759, -1, 719, 755, 758, -1, 302, 719, -1, - 720, 191, 720, -1, -1, 719, 758, 759, -1, 719, - 759, 758, -1, 719, 758, -1, 719, 759, -1, 747, - -1, -1, 167, 719, -1, 162, 719, -1, 719, 167, - 747, -1, 167, 747, -1, 747, -1, 618, -1, 486, - 747, 487, -1, 60, 766, 763, 765, 140, -1, 764, - -1, 763, 764, -1, 444, 719, 402, 719, -1, 136, - 719, -1, -1, 719, -1, -1, 785, -1, 785, 770, - -1, 488, 780, -1, 488, 479, -1, 484, 719, 485, - -1, 484, 769, 493, 769, 485, -1, 719, -1, -1, - 768, -1, 770, 768, -1, -1, 771, 768, -1, 40, - -1, -1, 774, -1, -1, 775, -1, 774, 490, 775, - -1, 719, 36, 795, -1, 719, 3, -1, 719, -1, - 479, -1, 777, -1, 776, 490, 777, -1, 786, -1, - 785, 770, -1, 779, -1, 778, 490, 779, -1, 785, - -1, 794, -1, 788, -1, 785, 770, -1, 783, -1, - 4, -1, 784, 771, -1, 6, -1, 7, -1, 781, - 784, -1, 781, 486, 749, 631, 487, 784, -1, 692, - 784, -1, 708, 486, 719, 487, 718, -1, 708, 783, - 718, -1, 708, 784, 718, -1, 412, -1, 155, -1, - 269, -1, 9, -1, 5, -1, 3, -1, 854, -1, - 855, -1, 785, -1, 5, -1, 3, -1, 854, -1, - 859, -1, 3, -1, 854, -1, 856, -1, 3, -1, - 854, -1, 857, -1, 785, -1, 785, 791, -1, 488, - 780, -1, 791, 488, 780, -1, 486, 778, 487, -1, - -1, 787, -1, 3, -1, 858, -1, 854, -1, 860, - -1, 794, -1, 5, -1, 309, 779, 797, 36, 798, - -1, 486, 751, 487, -1, -1, 617, -1, 512, -1, - 599, -1, 837, -1, 92, 352, 785, 800, -1, 92, - 352, 185, 264, 149, 785, 800, -1, 800, 801, -1, - -1, 544, -1, 802, -1, 531, -1, 849, -1, 92, - 808, 194, 805, 806, 279, 777, 804, 486, 528, 487, - 807, 683, -1, 92, 808, 194, 805, 185, 264, 149, - 572, 279, 777, 804, 486, 528, 487, 807, 683, -1, - 785, -1, 429, 803, -1, -1, 81, -1, -1, 572, - -1, -1, 448, 557, -1, -1, 422, -1, -1, 29, - 394, 677, 366, 352, 779, -1, 29, 394, 185, 149, - 677, 366, 352, 779, -1, 29, 360, 777, 366, 352, - 779, -1, 29, 360, 185, 149, 777, 366, 352, 779, - -1, 29, 441, 777, 366, 352, 779, -1, 29, 441, - 185, 149, 777, 366, 352, 779, -1, 163, 68, -1, - 68, -1, 151, 107, 784, 607, -1, 190, 107, 784, - -1, 150, 816, -1, 150, 820, 814, 816, -1, 150, - 439, 816, -1, 150, 486, 819, 487, 816, -1, 439, - -1, -1, 821, -1, 538, -1, -1, 617, -1, 512, - -1, 599, -1, 837, -1, 851, -1, 3, -1, 854, - -1, 858, -1, 817, -1, 784, -1, 822, -1, 819, - 490, 822, -1, 32, -1, 31, -1, 412, -1, 155, - -1, 279, -1, 818, -1, 823, 815, -1, 817, -1, - 820, -1, 366, 825, -1, 366, 230, 825, -1, 366, - 364, 825, -1, 826, -1, 847, 167, 96, -1, 403, - 469, 828, -1, 352, 784, -1, 847, 405, 829, -1, - 847, 475, 829, -1, 847, 405, 114, -1, 847, 475, - 114, -1, 821, -1, 538, -1, 784, -1, 3, -1, - 708, 784, 718, -1, 708, 486, 783, 487, 784, -1, - 538, -1, 114, -1, 230, -1, 827, -1, 829, 490, - 827, -1, 229, 831, -1, 784, -1, 430, 834, 836, - 814, -1, 430, 834, 836, 814, 777, -1, 430, 834, - 836, 814, 841, -1, 430, 486, 835, 487, -1, 430, - 486, 835, 487, 777, 792, -1, 820, -1, 439, -1, - 166, -1, 168, -1, 3, -1, 168, -1, -1, 833, - -1, 835, 490, 833, -1, 166, -1, -1, 516, 119, - 167, 838, 840, 839, 521, -1, 677, -1, 677, 785, - -1, 677, 36, 785, -1, 445, 719, -1, -1, 429, - 668, -1, -1, 820, 814, -1, 820, 814, 777, 792, - -1, 337, 844, -1, 847, -1, 27, -1, 843, -1, - 403, 469, -1, 407, 214, 225, -1, 846, 617, -1, - 846, 847, -1, 846, 403, 469, -1, 846, 407, 214, - 225, -1, 846, 27, -1, 370, -1, 124, -1, 785, - -1, 847, 488, 785, -1, 56, 722, -1, 92, 588, - 441, 777, 578, 807, 36, 617, 850, -1, 92, 284, - 335, 588, 441, 777, 578, 807, 36, 617, 850, -1, - 92, 588, 325, 441, 777, 486, 582, 487, 807, 36, - 617, 850, -1, 92, 284, 335, 588, 325, 441, 777, - 486, 582, 487, 807, 36, 617, 850, -1, 448, 67, - 282, -1, 448, 59, 67, 282, -1, 448, 230, 67, - 282, -1, -1, 92, 588, 394, 853, 36, 617, 852, - -1, 92, 588, 394, 185, 264, 149, 853, 36, 617, - 852, -1, 448, 106, -1, 448, 262, 106, -1, -1, - 777, 578, 564, 556, -1, 19, -1, 20, -1, 21, - -1, 22, -1, 23, -1, 24, -1, 25, -1, 26, - -1, 28, -1, 29, -1, 30, -1, 38, -1, 39, - -1, 41, -1, 42, -1, 43, -1, 45, -1, 46, - -1, 47, -1, 54, -1, 55, -1, 56, -1, 57, - -1, 58, -1, 59, -1, 62, -1, 63, -1, 66, - -1, 68, -1, 69, -1, 70, -1, 71, -1, 76, - -1, 77, -1, 78, -1, 79, -1, 80, -1, 82, - -1, 83, -1, 84, -1, 86, -1, 87, -1, 88, - -1, 89, -1, 90, -1, 91, -1, 94, -1, 95, - -1, 96, -1, 104, -1, 105, -1, 106, -1, 107, - -1, 108, -1, 109, -1, 110, -1, 113, -1, 115, - -1, 117, -1, 118, -1, 119, -1, 120, -1, 121, - -1, 122, -1, 124, -1, 125, -1, 126, -1, 127, - -1, 128, -1, 131, -1, 132, -1, 133, -1, 134, - -1, 135, -1, 137, -1, 138, -1, 139, -1, 141, - -1, 142, -1, 143, -1, 145, -1, 146, -1, 147, - -1, 148, -1, 150, -1, 151, -1, 152, -1, 153, - -1, 156, -1, 158, -1, 159, -1, 161, -1, 163, - -1, 165, -1, 169, -1, 170, -1, 171, -1, 173, - -1, 175, -1, 178, -1, 180, -1, 181, -1, 182, - -1, 183, -1, 184, -1, 185, -1, 187, -1, 188, - -1, 189, -1, 190, -1, 192, -1, 193, -1, 194, - -1, 195, -1, 196, -1, 197, -1, 199, -1, 202, - -1, 203, -1, 204, -1, 205, -1, 211, -1, 214, - -1, 216, -1, 217, -1, 218, -1, 219, -1, 220, - -1, 223, -1, 225, -1, 228, -1, 229, -1, 230, - -1, 233, -1, 234, -1, 235, -1, 236, -1, 237, - -1, 239, -1, 240, -1, 241, -1, 242, -1, 243, - -1, 244, -1, 245, -1, 246, -1, 247, -1, 248, - -1, 249, -1, 250, -1, 251, -1, 252, -1, 253, - -1, 254, -1, 255, -1, 256, -1, 260, -1, 261, - -1, 262, -1, 265, -1, 266, -1, 268, -1, 271, - -1, 273, -1, 274, -1, 275, -1, 277, -1, 278, - -1, 281, -1, 282, -1, 283, -1, 286, -1, 289, - -1, 292, -1, 293, -1, 294, -1, 295, -1, 296, - -1, 297, -1, 298, -1, 299, -1, 300, -1, 301, - -1, 303, -1, 304, -1, 306, -1, 307, -1, 309, - -1, 310, -1, 311, -1, 313, -1, 314, -1, 315, - -1, 316, -1, 317, -1, 318, -1, 319, -1, 320, - -1, 321, -1, 323, -1, 324, -1, 325, -1, 326, - -1, 328, -1, 329, -1, 330, -1, 331, -1, 332, - -1, 333, -1, 334, -1, 335, -1, 336, -1, 337, - -1, 338, -1, 339, -1, 341, -1, 342, -1, 344, - -1, 345, -1, 346, -1, 348, -1, 349, -1, 350, - -1, 351, -1, 352, -1, 353, -1, 354, -1, 355, - -1, 356, -1, 357, -1, 358, -1, 360, -1, 361, - -1, 362, -1, 363, -1, 364, -1, 366, -1, 368, - -1, 369, -1, 370, -1, 372, -1, 373, -1, 375, - -1, 377, -1, 378, -1, 379, -1, 380, -1, 381, - -1, 382, -1, 383, -1, 384, -1, 385, -1, 386, - -1, 387, -1, 389, -1, 392, -1, 393, -1, 395, - -1, 397, -1, 398, -1, 399, -1, 400, -1, 401, - -1, 407, -1, 408, -1, 410, -1, 413, -1, 414, - -1, 416, -1, 417, -1, 418, -1, 419, -1, 420, - -1, 423, -1, 424, -1, 425, -1, 426, -1, 427, - -1, 430, -1, 431, -1, 432, -1, 433, -1, 434, - -1, 438, -1, 440, -1, 441, -1, 442, -1, 443, - -1, 446, -1, 449, -1, 450, -1, 451, -1, 452, - -1, 453, -1, 454, -1, 466, -1, 467, -1, 468, - -1, 469, -1, 48, -1, 49, -1, 51, -1, 52, - -1, 64, -1, 65, -1, 72, -1, 111, -1, 112, - -1, 149, -1, 154, -1, 160, -1, 177, -1, 201, - -1, 206, -1, 207, -1, 209, -1, 238, -1, 257, - -1, 259, -1, 263, -1, 270, -1, 272, -1, 287, - -1, 291, -1, 305, -1, 308, -1, 322, -1, 347, - -1, 367, -1, 374, -1, 388, -1, 390, -1, 403, - -1, 404, -1, 409, -1, 411, -1, 415, -1, 435, - -1, 436, -1, 455, -1, 456, -1, 457, -1, 458, - -1, 459, -1, 460, -1, 461, -1, 462, -1, 463, - -1, 464, -1, 465, -1, 44, -1, 50, -1, 74, - -1, 81, -1, 93, -1, 100, -1, 166, -1, 168, - -1, 172, -1, 186, -1, 200, -1, 212, -1, 213, - -1, 215, -1, 224, -1, 226, -1, 238, -1, 258, - -1, 267, -1, 288, -1, 290, -1, 343, -1, 371, - -1, 388, -1, 396, -1, 439, -1, 44, -1, 50, + 497, 0, -1, 498, -1, 498, 489, 499, -1, 499, + -1, 813, -1, 537, -1, 500, -1, 845, -1, 852, + -1, 814, -1, 602, -1, 855, -1, 598, -1, 803, + -1, 533, -1, 546, -1, 511, -1, 841, -1, 592, + -1, 535, -1, 817, -1, 815, -1, 816, -1, 806, + -1, 514, -1, 834, -1, 532, -1, 800, -1, 512, + -1, 619, -1, 544, -1, 601, -1, 836, -1, 846, + -1, 828, -1, 849, -1, 853, -1, -1, 29, 394, + 679, 508, -1, 29, 394, 185, 149, 679, 508, -1, + 29, 194, 781, 508, -1, 29, 194, 185, 149, 781, + 508, -1, 29, 360, 781, 508, -1, 29, 360, 185, + 149, 781, 508, -1, 29, 441, 781, 508, -1, 29, + 441, 185, 149, 781, 508, -1, 503, -1, 501, 503, + -1, 366, 114, 721, -1, 134, 114, -1, 338, -1, + 338, 539, 540, -1, 366, 541, -1, 366, 171, 591, + -1, 507, -1, 504, 490, 507, -1, 23, 571, -1, + 23, 185, 264, 149, 571, -1, 23, 75, 571, -1, + 23, 75, 185, 264, 149, 571, -1, 29, 513, 789, + 502, -1, 29, 513, 789, 134, 264, 269, -1, 29, + 513, 789, 366, 264, 269, -1, 29, 513, 789, 366, + 382, 543, -1, 29, 513, 789, 366, 559, -1, 29, + 513, 789, 337, 559, -1, 29, 513, 789, 366, 385, + 789, -1, 29, 513, 789, 23, 171, 591, 36, 184, + 549, -1, 29, 513, 789, 501, -1, 29, 513, 789, + 134, 184, -1, 29, 513, 789, 134, 184, 185, 149, + -1, 134, 513, 185, 149, 789, 596, -1, 134, 513, + 789, 596, -1, 29, 513, 789, 510, 416, 691, 688, + 506, -1, 29, 513, 789, 509, -1, 23, 561, -1, + 29, 85, 783, 547, -1, 432, 85, 783, -1, 134, + 85, 185, 149, 783, 596, -1, 134, 85, 783, 596, + -1, 366, 236, -1, 366, 425, -1, 366, 559, -1, + 337, 559, -1, 509, -1, 429, 721, -1, -1, 554, + -1, 366, 554, -1, 23, 554, -1, 134, 569, -1, + 505, -1, 508, 490, 505, -1, 283, 486, 504, 487, + -1, 366, 106, -1, 366, -1, -1, 110, 783, -1, + 110, 309, 783, -1, 110, 27, -1, 110, 309, 27, + -1, 29, 352, 783, 333, 405, 783, -1, 29, 394, + 679, 333, 405, 783, -1, 29, 394, 185, 149, 679, + 333, 405, 783, -1, 29, 360, 781, 333, 405, 783, + -1, 29, 360, 185, 149, 781, 333, 405, 783, -1, + 29, 441, 781, 333, 405, 783, -1, 29, 441, 185, + 149, 781, 333, 405, 783, -1, 29, 194, 781, 333, + 405, 783, -1, 29, 194, 185, 149, 781, 333, 405, + 783, -1, 29, 394, 679, 333, 513, 783, 405, 783, + -1, 29, 394, 185, 149, 679, 333, 513, 783, 405, + 783, -1, 29, 394, 679, 333, 85, 783, 405, 783, + -1, 29, 394, 185, 149, 679, 333, 85, 783, 405, + 783, -1, 75, -1, -1, 518, 204, 210, 516, 515, + 521, 523, -1, 619, -1, 292, 524, 434, 619, -1, + 486, 528, 487, 619, -1, 486, 528, 487, 292, 524, + 434, 619, -1, 114, 435, -1, 781, -1, 781, 36, + 789, -1, 486, 530, 487, 685, -1, 279, 85, 783, + -1, -1, 624, -1, -1, 789, 775, -1, 531, 475, + 721, -1, 486, 525, 487, 475, 721, -1, 279, 83, + 517, 130, 427, 366, 529, 685, -1, 279, 83, 517, + 130, 265, -1, -1, 789, 526, 527, 637, 638, -1, + 726, 526, 527, 637, 638, -1, 486, 721, 487, 526, + 527, 637, 638, -1, 340, 778, -1, -1, 428, -1, + 393, -1, 531, -1, 525, 490, 531, -1, 73, 794, + -1, -1, 794, -1, -1, 519, -1, 528, 490, 519, + -1, 520, -1, 529, 490, 520, -1, 522, -1, 530, + 490, 522, -1, 789, 775, -1, 306, 789, -1, 306, + 789, 475, 833, -1, 306, 789, 486, 753, 487, -1, + 92, 590, 360, 781, 534, -1, 92, 590, 360, 185, + 264, 149, 781, 534, -1, 538, -1, -1, 148, 783, + 536, -1, 92, 590, 394, 857, 36, 148, 783, 536, + 856, -1, 92, 590, 394, 185, 264, 149, 857, 36, + 148, 783, 536, 856, -1, 486, 751, 487, -1, -1, + 29, 360, 781, 538, -1, 29, 360, 185, 149, 781, + 538, -1, 541, -1, 538, 541, -1, 448, -1, 472, + -1, -1, 4, -1, 477, 4, -1, 478, 4, -1, + 543, -1, 36, 693, -1, 55, 540, -1, 105, -1, + 262, 105, -1, 193, 542, 540, -1, 242, 540, -1, + 250, 540, -1, 262, 242, -1, 262, 250, -1, 293, + 54, 794, -1, 360, 255, 794, -1, 380, 539, 540, + -1, 338, -1, 338, 539, 540, -1, 54, -1, -1, + 787, -1, 477, 787, -1, 478, 787, -1, 19, 545, + -1, 47, 545, -1, 380, 545, -1, 79, 545, -1, + 140, 545, -1, 345, 545, -1, 451, -1, 407, -1, + -1, 92, 590, 394, 781, 486, 578, 487, 566, 558, + -1, 92, 590, 394, 185, 264, 149, 781, 486, 578, + 487, 566, 558, -1, -1, 547, 570, -1, 585, -1, + 864, -1, 748, -1, 540, -1, 788, -1, 263, -1, + 486, 538, 487, -1, -1, 788, -1, 262, 22, -1, + 339, -1, 58, -1, 366, 269, -1, 366, 114, -1, + 85, 783, 553, -1, 553, -1, 565, -1, 73, 794, + -1, 264, 269, -1, 269, -1, 422, 577, -1, 312, + 216, 577, -1, 67, 486, 721, 487, 560, -1, 114, + 722, -1, 171, 591, 36, 184, 549, -1, 327, 781, + 580, 588, 556, -1, 569, 550, -1, 279, 427, 551, + -1, 555, -1, 582, -1, 555, 582, -1, 582, 555, + -1, -1, 283, 486, 572, 487, -1, -1, 279, 79, + 134, -1, 279, 79, 119, 348, -1, 279, 79, 311, + 348, -1, -1, 486, 563, 487, -1, 262, 196, -1, + -1, 85, 783, 586, -1, 586, -1, 78, -1, 86, + -1, 115, -1, 184, -1, 195, -1, 382, -1, 385, + -1, 27, -1, 583, -1, 563, 490, 583, -1, 429, + 194, 574, -1, 116, -1, 264, 116, -1, 198, 117, + -1, 198, 187, -1, 448, 559, -1, 448, 277, -1, + 450, 277, -1, -1, 486, 573, 487, -1, 568, 192, + 562, -1, 568, 146, 562, -1, -1, 798, -1, 264, + 116, -1, 116, -1, 198, 187, -1, 198, 117, -1, + 264, 431, -1, 262, 196, -1, 789, 691, 557, 581, + -1, 554, -1, 572, 490, 554, -1, 576, -1, 573, + 490, 576, -1, 789, -1, 571, -1, 589, -1, 561, + -1, 798, 475, 548, -1, 798, -1, 448, 567, -1, + -1, 587, -1, -1, 789, -1, 486, 584, 487, -1, + -1, 581, 552, -1, -1, 279, 119, 551, -1, 798, + 475, 548, -1, 798, -1, 798, 488, 798, 475, 548, + -1, 798, 488, 798, -1, 579, -1, 584, 490, 579, + -1, 691, -1, 791, 795, 481, 416, -1, 367, 791, + 795, 481, 416, -1, 67, 486, 721, 487, 547, -1, + 422, 486, 584, 487, 577, 547, -1, 422, 564, 547, + -1, 312, 216, 486, 584, 487, 577, 547, -1, 312, + 216, 564, 547, -1, 164, 216, 486, 584, 487, 327, + 781, 580, 588, 556, 547, -1, 575, -1, 587, 490, + 575, -1, 240, 168, -1, 240, 297, -1, 240, 372, + -1, -1, 226, 781, 568, -1, 400, -1, 398, -1, + 230, 400, -1, 230, 398, -1, 173, 400, -1, 173, + 398, -1, 425, -1, -1, 30, -1, 54, 114, -1, + 134, 593, 185, 149, 595, 596, -1, 134, 593, 595, + 596, -1, 134, 594, 185, 149, 782, 596, -1, 134, + 594, 782, 596, -1, 134, 597, 783, 279, 794, 596, + -1, 134, 597, 185, 149, 783, 279, 794, 596, -1, + 394, -1, 360, -1, 169, -1, 237, -1, 441, -1, + 241, 441, -1, 194, -1, 164, 394, -1, 74, -1, + 89, -1, 382, -1, 401, 355, 296, -1, 401, 355, + 126, -1, 401, 355, 399, -1, 401, 355, 82, -1, + 21, 243, -1, 143, 410, -1, 152, -1, 164, 106, + 452, -1, 318, -1, 352, -1, 363, -1, 794, -1, + 595, 490, 794, -1, 58, -1, 339, -1, -1, 304, + -1, 349, -1, 410, -1, 92, 599, 781, 600, 36, + 721, -1, 169, -1, 237, -1, 486, 487, -1, 486, + 753, 487, -1, 518, 427, 842, 366, 529, 669, 843, + 523, -1, 90, 614, 781, 580, 612, 603, 608, 617, + 604, 539, 609, -1, 90, 486, 802, 487, 405, 608, + 617, 539, 609, -1, 167, -1, 405, -1, 606, 121, + 788, -1, -1, 616, -1, 605, 490, 616, -1, 429, + -1, -1, 36, -1, -1, 317, -1, -1, 613, -1, + 486, 618, 487, -1, 825, -1, 540, -1, 479, -1, + 486, 605, 487, -1, -1, 798, 610, -1, 448, 277, + -1, -1, 613, 615, -1, -1, 50, -1, -1, 50, + -1, 277, -1, 166, -1, 120, 607, 788, -1, 269, + 607, 788, -1, 94, -1, 180, -1, 319, 607, 788, + -1, 142, 607, 788, -1, 163, 319, 584, -1, 163, + 319, 479, -1, 163, 264, 269, 584, -1, 163, 269, + 584, -1, 138, 788, -1, 825, -1, 788, -1, 383, + -1, 384, -1, 611, -1, 618, 490, 611, -1, 621, + -1, 620, -1, 486, 621, 487, -1, 486, 620, 487, + -1, 623, -1, 622, 634, -1, 622, 633, 661, 640, + -1, 622, 633, 639, 662, -1, 624, 622, -1, 624, + 622, 634, -1, 624, 622, 633, 661, 640, -1, 624, + 622, 633, 639, 662, -1, 623, -1, 620, -1, 359, + 632, 777, 627, 669, 685, 656, 660, 730, 644, -1, + 359, 631, 778, 627, 669, 685, 656, 660, 730, 644, + -1, 668, -1, 394, 679, -1, 622, 421, 630, 622, + -1, 622, 208, 630, 622, -1, 622, 144, 630, 622, + -1, 448, 625, -1, 472, 625, -1, 448, 325, 625, + -1, 626, -1, 625, 490, 626, -1, 783, 796, 36, + 486, 802, 487, -1, 210, 628, -1, -1, 400, 629, + 781, -1, 398, 629, 781, -1, 230, 400, 629, 781, + -1, 230, 398, 629, 781, -1, 173, 400, 629, 781, + -1, 173, 398, 629, 781, -1, 425, 629, 781, -1, + 394, 781, -1, 781, -1, 394, -1, -1, 27, -1, + 129, -1, -1, 129, -1, 129, 279, 486, 751, 487, + -1, 27, -1, -1, 634, -1, -1, 285, 54, 635, + -1, 636, -1, 635, 490, 636, -1, 721, 429, 748, + 638, -1, 721, 637, 638, -1, 37, -1, 123, -1, + -1, 471, 159, -1, 471, 220, -1, -1, 641, 642, + -1, 642, 641, -1, 641, -1, 642, -1, 639, -1, + -1, 227, 650, -1, 227, 650, 490, 651, -1, 157, + 655, 652, 654, 280, -1, 157, 655, 654, 280, -1, + 276, 651, -1, 276, 652, 654, -1, 4, 481, -1, + 9, 481, -1, 4, 301, -1, 9, 301, -1, 9, + -1, 9, 348, -1, 429, 350, 646, -1, -1, 789, + -1, -1, 645, 486, 643, 487, 649, -1, 643, -1, + 643, 486, 789, 487, -1, 643, 486, 789, 490, 9, + 487, -1, 396, 646, -1, 647, -1, -1, 334, 486, + 9, 487, -1, -1, 721, -1, 27, -1, 721, -1, + 723, -1, 477, 653, -1, 478, 653, -1, 787, -1, + 4, -1, 347, -1, 348, -1, 159, -1, 261, -1, + 176, 54, 657, -1, -1, 658, -1, 657, 490, 658, + -1, 721, -1, 659, -1, 486, 487, -1, 179, 721, + -1, -1, 663, -1, 162, 321, 280, -1, 661, -1, + -1, 664, -1, 663, 664, -1, 665, 666, 667, -1, + 162, 427, -1, 162, 262, 216, 427, -1, 162, 369, + -1, 162, 216, 369, -1, 274, 780, -1, -1, 268, + -1, 373, 235, -1, -1, 435, 486, 751, 487, -1, + 668, 490, 486, 751, 487, -1, 167, 670, -1, -1, + 671, -1, 670, 490, 671, -1, 679, 674, 648, -1, + 680, 675, 648, -1, 221, 680, 675, -1, 620, 674, + 648, -1, 221, 620, 674, -1, 672, -1, 486, 672, + 487, 673, -1, 486, 672, 487, -1, 671, 93, 215, + 671, -1, 671, 676, 215, 671, 678, -1, 671, 215, + 671, 678, -1, 671, 258, 676, 215, 671, -1, 671, + 258, 215, 671, -1, 36, 789, 486, 782, 487, -1, + 36, 790, -1, 789, 486, 782, 487, -1, 789, -1, + 673, -1, -1, 673, -1, 36, 486, 686, 487, -1, + 36, 789, 486, 686, 487, -1, 789, 486, 686, 487, + -1, -1, 168, 677, -1, 224, 677, -1, 343, 677, + -1, 200, -1, 288, -1, -1, 429, 486, 782, 487, + -1, 279, 721, -1, 781, -1, 781, 479, -1, 280, + 781, -1, 280, 486, 781, 487, -1, 726, 684, -1, + 348, 167, 486, 682, 487, 684, -1, 726, 683, -1, + 681, -1, 682, 490, 681, -1, 36, 486, 686, 487, + -1, -1, 472, 286, -1, -1, 445, 721, -1, -1, + 687, -1, 686, 490, 687, -1, 789, 691, 688, -1, + 73, 794, -1, -1, 789, 691, -1, 689, 490, 789, + 691, -1, 347, -1, 388, -1, 693, 692, -1, 367, + 693, 692, -1, 693, 35, 484, 787, 485, -1, 367, + 693, 35, 484, 787, 485, -1, 693, 35, -1, 367, + 693, 35, -1, 690, 486, 689, 487, 692, -1, 238, + 486, 755, 487, 692, -1, 692, 484, 485, -1, 692, + 484, 787, 485, -1, -1, 695, -1, 697, -1, 699, + -1, 703, -1, 709, -1, 710, 720, -1, 710, 486, + 787, 487, -1, 697, -1, 700, -1, 704, -1, 709, + -1, 793, 696, -1, 486, 751, 487, -1, -1, 206, + -1, 207, -1, 374, -1, 49, -1, 322, -1, 160, + 698, -1, 133, 308, -1, 112, 696, -1, 111, 696, + -1, 272, 696, -1, 52, -1, 486, 787, 487, -1, + -1, 701, -1, 702, -1, 701, -1, 702, -1, 51, + 708, 486, 751, 487, -1, 51, 708, -1, 705, -1, + 706, -1, 705, -1, 706, -1, 707, 486, 787, 487, + -1, 707, -1, 65, 708, -1, 64, 708, -1, 436, + -1, 257, 65, 708, -1, 257, 64, 708, -1, 259, + 708, -1, 438, -1, -1, 404, 486, 787, 487, 711, + -1, 404, 711, -1, 403, 486, 787, 487, 711, -1, + 403, 711, -1, 209, -1, 472, 403, 469, -1, 450, + 403, 469, -1, -1, 466, -1, 467, -1, 252, -1, + 253, -1, 108, -1, 109, -1, 182, -1, 183, -1, + 248, -1, 249, -1, 356, -1, 357, -1, 246, -1, + 247, -1, 244, -1, 245, -1, 712, -1, 713, -1, + 714, -1, 715, -1, 716, -1, 717, -1, 718, -1, + 719, -1, 712, 405, 713, -1, 714, 405, 715, -1, + 714, 405, 716, -1, 714, 405, 717, -1, 715, 405, + 716, -1, 715, 405, 717, -1, 716, 405, 717, -1, + -1, 723, -1, 721, 11, 691, -1, 721, 73, 794, + -1, 721, 41, 403, 469, 721, -1, 477, 721, -1, + 478, 721, -1, 721, 477, 721, -1, 721, 478, 721, + -1, 721, 479, 721, -1, 721, 480, 721, -1, 721, + 481, 721, -1, 721, 482, 721, -1, 721, 473, 721, + -1, 721, 474, 721, -1, 721, 475, 721, -1, 721, + 16, 721, -1, 721, 17, 721, -1, 721, 18, 721, + -1, 721, 747, 721, -1, 747, 721, -1, 721, 747, + -1, 721, 33, 721, -1, 721, 284, 721, -1, 264, + 721, -1, 470, 721, -1, 721, 172, 721, -1, 721, + 226, 721, -1, 721, 226, 721, 142, 721, -1, 721, + 470, 226, 721, -1, 721, 470, 226, 721, 142, 721, + -1, 721, 186, 721, -1, 721, 186, 721, 142, 721, + -1, 721, 470, 186, 721, -1, 721, 470, 186, 721, + 142, 721, -1, 721, 371, 405, 721, -1, 721, 371, + 405, 721, 142, 721, -1, 721, 470, 371, 405, 721, + -1, 721, 470, 371, 405, 721, 142, 721, -1, 721, + 212, 269, -1, 721, 213, -1, 721, 212, 264, 269, + -1, 721, 264, 269, -1, 721, 267, -1, 741, -1, + 491, 743, 492, -1, 484, 752, 485, -1, 741, 15, + 721, -1, 771, 15, 721, -1, 741, 290, 741, -1, + 721, 212, 412, -1, 721, 212, 264, 412, -1, 721, + 212, 155, -1, 721, 212, 264, 155, -1, 721, 212, + 423, -1, 721, 212, 264, 423, -1, 721, 212, 129, + 167, 721, -1, 721, 212, 264, 129, 167, 721, -1, + 721, 212, 274, 486, 755, 487, -1, 721, 212, 264, + 274, 486, 755, 487, -1, 721, 48, 776, 722, 33, + 721, -1, 721, 470, 48, 776, 722, 33, 721, -1, + 721, 48, 391, 722, 33, 721, -1, 721, 470, 48, + 391, 722, 33, 721, -1, 721, 191, 765, -1, 721, + 470, 191, 765, -1, 721, 749, 744, 620, -1, 721, + 749, 744, 486, 721, 487, -1, 114, -1, 35, 484, + 752, 485, -1, 723, -1, 722, 11, 691, -1, 477, + 722, -1, 478, 722, -1, 722, 477, 722, -1, 722, + 478, 722, -1, 722, 479, 722, -1, 722, 480, 722, + -1, 722, 481, 722, -1, 722, 482, 722, -1, 722, + 473, 722, -1, 722, 474, 722, -1, 722, 475, 722, + -1, 722, 16, 722, -1, 722, 17, 722, -1, 722, + 18, 722, -1, 722, 747, 722, -1, 747, 722, -1, + 722, 747, -1, 722, 212, 129, 167, 722, -1, 722, + 212, 264, 129, 167, 722, -1, 722, 212, 274, 486, + 755, 487, -1, 722, 212, 264, 274, 486, 755, 487, + -1, 771, -1, 786, -1, 493, 9, -1, 494, 775, + -1, 10, 775, -1, 486, 721, 487, 775, -1, 766, + -1, 725, 775, -1, 620, -1, 620, 774, -1, 149, + 620, -1, 785, 486, 487, -1, 785, 486, 753, 633, + 487, -1, 785, 486, 437, 754, 633, 487, -1, 785, + 486, 753, 490, 437, 754, 633, 487, -1, 785, 486, + 27, 753, 633, 487, -1, 785, 486, 129, 753, 633, + 487, -1, 785, 486, 479, 487, -1, 724, 728, 729, + 733, -1, 727, -1, 724, -1, 727, -1, 74, 162, + 486, 721, 487, -1, 98, -1, 101, -1, 101, 486, + 787, 487, -1, 102, -1, 102, 486, 787, 487, -1, + 231, -1, 231, 486, 787, 487, -1, 232, -1, 232, + 486, 787, 487, -1, 99, -1, 103, -1, 365, -1, + 428, -1, 97, -1, 100, -1, 61, 486, 721, 36, + 691, 487, -1, 415, 486, 721, 36, 691, 487, -1, + 154, 486, 756, 487, -1, 291, 486, 758, 487, -1, + 305, 486, 760, 487, -1, 390, 486, 761, 487, -1, + 409, 486, 721, 36, 691, 487, -1, 411, 486, 53, + 764, 487, -1, 411, 486, 222, 764, 487, -1, 411, + 486, 406, 764, 487, -1, 411, 486, 764, 487, -1, + 270, 486, 721, 490, 721, 487, -1, 72, 486, 751, + 487, -1, 449, 176, 486, 634, 487, -1, -1, 158, + 486, 445, 721, 487, -1, -1, 447, 731, -1, -1, + 732, -1, 731, 490, 732, -1, 789, 36, 734, -1, + 289, 734, -1, 289, 789, -1, -1, 486, 735, 736, + 633, 737, 487, -1, 789, -1, -1, 298, 54, 751, + -1, -1, 320, 738, -1, 348, 738, -1, -1, 739, + -1, 48, 739, 33, 739, -1, 418, 307, -1, 418, + 161, -1, 96, 347, -1, 721, 307, -1, 721, 161, + -1, 347, 486, 751, 487, -1, 347, 486, 487, -1, + 740, -1, 486, 751, 490, 721, 487, -1, 790, 495, + 721, -1, 742, -1, 743, 490, 742, -1, 34, -1, + 376, -1, 27, -1, 8, -1, 746, -1, 477, -1, + 478, -1, 479, -1, 480, -1, 481, -1, 482, -1, + 473, -1, 474, -1, 475, -1, 16, -1, 17, -1, + 18, -1, 8, -1, 281, 486, 750, 487, -1, 745, + -1, 281, 486, 750, 487, -1, 745, -1, 281, 486, + 750, 487, -1, 226, -1, 470, 226, -1, 172, -1, + 470, 172, -1, 186, -1, 470, 186, -1, 745, -1, + 789, 488, 750, -1, 721, -1, 751, 490, 721, -1, + 751, -1, -1, 754, -1, 753, 490, 754, -1, 721, + -1, 797, 13, 721, -1, 797, 14, 721, -1, 691, + -1, 755, 490, 691, -1, 757, 167, 721, -1, -1, + 3, -1, 712, -1, 713, -1, 714, -1, 715, -1, + 716, -1, 717, -1, 718, -1, 719, -1, 788, -1, + 721, 759, 762, 763, -1, 721, 759, 762, -1, 302, + 721, -1, 722, 191, 722, -1, -1, 721, 762, 763, + -1, 721, 763, 762, -1, 721, 762, -1, 721, 763, + -1, 751, -1, -1, 167, 721, -1, 162, 721, -1, + 721, 167, 751, -1, 167, 751, -1, 751, -1, 620, + -1, 486, 751, 487, -1, 60, 770, 767, 769, 140, + -1, 768, -1, 767, 768, -1, 444, 721, 402, 721, + -1, 136, 721, -1, -1, 721, -1, -1, 789, -1, + 789, 774, -1, 488, 784, -1, 488, 479, -1, 484, + 721, 485, -1, 484, 773, 495, 773, 485, -1, 721, + -1, -1, 772, -1, 774, 772, -1, -1, 775, 772, + -1, 40, -1, -1, 778, -1, -1, 779, -1, 778, + 490, 779, -1, 721, 36, 799, -1, 721, 3, -1, + 721, -1, 479, -1, 781, -1, 780, 490, 781, -1, + 790, -1, 789, 774, -1, 783, -1, 782, 490, 783, + -1, 789, -1, 798, -1, 792, -1, 789, 774, -1, + 787, -1, 4, -1, 788, 775, -1, 6, -1, 7, + -1, 785, 788, -1, 785, 486, 753, 633, 487, 788, + -1, 694, 788, -1, 710, 486, 721, 487, 720, -1, + 710, 787, 720, -1, 710, 788, 720, -1, 412, -1, + 155, -1, 269, -1, 9, -1, 5, -1, 3, -1, + 858, -1, 859, -1, 789, -1, 5, -1, 3, -1, + 858, -1, 863, -1, 3, -1, 858, -1, 860, -1, + 3, -1, 858, -1, 861, -1, 789, -1, 789, 795, + -1, 488, 784, -1, 795, 488, 784, -1, 486, 782, + 487, -1, -1, 791, -1, 3, -1, 862, -1, 858, + -1, 864, -1, 798, -1, 5, -1, 309, 783, 801, + 36, 802, -1, 486, 755, 487, -1, -1, 619, -1, + 514, -1, 601, -1, 841, -1, 92, 352, 789, 804, + -1, 92, 352, 185, 264, 149, 789, 804, -1, 804, + 805, -1, -1, 546, -1, 806, -1, 533, -1, 853, + -1, 92, 812, 194, 809, 810, 279, 781, 808, 486, + 530, 487, 811, 685, -1, 92, 812, 194, 809, 185, + 264, 149, 574, 279, 781, 808, 486, 530, 487, 811, + 685, -1, 789, -1, 429, 807, -1, -1, 81, -1, + -1, 574, -1, -1, 448, 559, -1, -1, 422, -1, + -1, 29, 394, 679, 366, 352, 783, -1, 29, 394, + 185, 149, 679, 366, 352, 783, -1, 29, 360, 781, + 366, 352, 783, -1, 29, 360, 185, 149, 781, 366, + 352, 783, -1, 29, 441, 781, 366, 352, 783, -1, + 29, 441, 185, 149, 781, 366, 352, 783, -1, 163, + 68, -1, 68, -1, 151, 107, 788, 609, -1, 190, + 107, 788, -1, 150, 820, -1, 150, 824, 818, 820, + -1, 150, 439, 820, -1, 150, 486, 823, 487, 820, + -1, 439, -1, -1, 825, -1, 540, -1, -1, 619, + -1, 514, -1, 601, -1, 841, -1, 855, -1, 3, + -1, 858, -1, 862, -1, 821, -1, 788, -1, 826, + -1, 823, 490, 826, -1, 32, -1, 31, -1, 412, + -1, 155, -1, 279, -1, 822, -1, 827, 819, -1, + 821, -1, 824, -1, 366, 829, -1, 366, 230, 829, + -1, 366, 364, 829, -1, 830, -1, 851, 167, 96, + -1, 403, 469, 832, -1, 352, 788, -1, 851, 405, + 833, -1, 851, 475, 833, -1, 851, 405, 114, -1, + 851, 475, 114, -1, 825, -1, 540, -1, 788, -1, + 3, -1, 710, 788, 720, -1, 710, 486, 787, 487, + 788, -1, 540, -1, 114, -1, 230, -1, 831, -1, + 833, 490, 831, -1, 229, 835, -1, 788, -1, 430, + 838, 840, 818, -1, 430, 838, 840, 818, 781, -1, + 430, 838, 840, 818, 845, -1, 430, 486, 839, 487, + -1, 430, 486, 839, 487, 781, 796, -1, 824, -1, + 439, -1, 166, -1, 168, -1, 3, -1, 168, -1, + -1, 837, -1, 839, 490, 837, -1, 166, -1, -1, + 518, 119, 167, 842, 844, 843, 523, -1, 679, -1, + 679, 789, -1, 679, 36, 789, -1, 445, 721, -1, + -1, 429, 670, -1, -1, 824, 818, -1, 824, 818, + 781, 796, -1, 337, 848, -1, 851, -1, 27, -1, + 847, -1, 403, 469, -1, 407, 214, 225, -1, 850, + 619, -1, 850, 851, -1, 850, 403, 469, -1, 850, + 407, 214, 225, -1, 850, 27, -1, 370, -1, 124, + -1, 789, -1, 851, 488, 789, -1, 56, 724, -1, + 92, 590, 441, 781, 580, 811, 36, 619, 854, -1, + 92, 284, 335, 590, 441, 781, 580, 811, 36, 619, + 854, -1, 92, 590, 325, 441, 781, 486, 584, 487, + 811, 36, 619, 854, -1, 92, 284, 335, 590, 325, + 441, 781, 486, 584, 487, 811, 36, 619, 854, -1, + 448, 67, 282, -1, 448, 59, 67, 282, -1, 448, + 230, 67, 282, -1, -1, 92, 590, 394, 857, 36, + 619, 856, -1, 92, 590, 394, 185, 264, 149, 857, + 36, 619, 856, -1, 448, 106, -1, 448, 262, 106, + -1, -1, 781, 580, 566, 558, -1, 19, -1, 20, + -1, 21, -1, 22, -1, 23, -1, 24, -1, 25, + -1, 26, -1, 28, -1, 29, -1, 30, -1, 38, + -1, 39, -1, 41, -1, 42, -1, 43, -1, 45, + -1, 46, -1, 47, -1, 54, -1, 55, -1, 56, + -1, 57, -1, 58, -1, 59, -1, 62, -1, 63, + -1, 66, -1, 68, -1, 69, -1, 70, -1, 71, + -1, 76, -1, 77, -1, 78, -1, 79, -1, 80, + -1, 82, -1, 83, -1, 84, -1, 86, -1, 87, + -1, 88, -1, 89, -1, 90, -1, 91, -1, 94, + -1, 95, -1, 96, -1, 104, -1, 105, -1, 106, + -1, 107, -1, 108, -1, 109, -1, 110, -1, 113, + -1, 115, -1, 117, -1, 118, -1, 119, -1, 120, + -1, 121, -1, 122, -1, 124, -1, 125, -1, 126, + -1, 127, -1, 128, -1, 131, -1, 132, -1, 133, + -1, 134, -1, 135, -1, 137, -1, 138, -1, 139, + -1, 141, -1, 142, -1, 143, -1, 145, -1, 146, + -1, 147, -1, 148, -1, 150, -1, 151, -1, 152, + -1, 153, -1, 156, -1, 158, -1, 159, -1, 161, + -1, 163, -1, 165, -1, 169, -1, 170, -1, 171, + -1, 173, -1, 175, -1, 178, -1, 180, -1, 181, + -1, 182, -1, 183, -1, 184, -1, 185, -1, 187, + -1, 188, -1, 189, -1, 190, -1, 192, -1, 193, + -1, 194, -1, 195, -1, 196, -1, 197, -1, 199, + -1, 202, -1, 203, -1, 204, -1, 205, -1, 211, + -1, 214, -1, 216, -1, 217, -1, 218, -1, 219, + -1, 220, -1, 223, -1, 225, -1, 228, -1, 229, + -1, 230, -1, 233, -1, 234, -1, 235, -1, 236, + -1, 237, -1, 239, -1, 240, -1, 241, -1, 242, + -1, 243, -1, 244, -1, 245, -1, 246, -1, 247, + -1, 248, -1, 249, -1, 250, -1, 251, -1, 252, + -1, 253, -1, 254, -1, 255, -1, 256, -1, 260, + -1, 261, -1, 262, -1, 265, -1, 266, -1, 268, + -1, 271, -1, 273, -1, 274, -1, 275, -1, 277, + -1, 278, -1, 281, -1, 282, -1, 283, -1, 286, + -1, 289, -1, 292, -1, 293, -1, 294, -1, 295, + -1, 296, -1, 297, -1, 298, -1, 299, -1, 300, + -1, 301, -1, 303, -1, 304, -1, 306, -1, 307, + -1, 309, -1, 310, -1, 311, -1, 313, -1, 314, + -1, 315, -1, 316, -1, 317, -1, 318, -1, 319, + -1, 320, -1, 321, -1, 323, -1, 324, -1, 325, + -1, 326, -1, 328, -1, 329, -1, 330, -1, 331, + -1, 332, -1, 333, -1, 334, -1, 335, -1, 336, + -1, 337, -1, 338, -1, 339, -1, 341, -1, 342, + -1, 344, -1, 345, -1, 346, -1, 348, -1, 349, + -1, 350, -1, 351, -1, 352, -1, 353, -1, 354, + -1, 355, -1, 356, -1, 357, -1, 358, -1, 360, + -1, 361, -1, 362, -1, 363, -1, 364, -1, 366, + -1, 368, -1, 369, -1, 370, -1, 372, -1, 373, + -1, 375, -1, 377, -1, 378, -1, 379, -1, 380, + -1, 381, -1, 382, -1, 383, -1, 384, -1, 385, + -1, 386, -1, 387, -1, 389, -1, 392, -1, 393, + -1, 395, -1, 397, -1, 398, -1, 399, -1, 400, + -1, 401, -1, 407, -1, 408, -1, 410, -1, 413, + -1, 414, -1, 416, -1, 417, -1, 418, -1, 419, + -1, 420, -1, 423, -1, 424, -1, 425, -1, 426, + -1, 427, -1, 430, -1, 431, -1, 432, -1, 433, + -1, 434, -1, 438, -1, 440, -1, 441, -1, 442, + -1, 443, -1, 446, -1, 449, -1, 450, -1, 451, + -1, 452, -1, 453, -1, 454, -1, 466, -1, 467, + -1, 468, -1, 469, -1, 48, -1, 49, -1, 51, + -1, 52, -1, 64, -1, 65, -1, 72, -1, 111, + -1, 112, -1, 149, -1, 154, -1, 160, -1, 177, + -1, 201, -1, 206, -1, 207, -1, 209, -1, 238, + -1, 257, -1, 259, -1, 263, -1, 270, -1, 272, + -1, 287, -1, 291, -1, 305, -1, 308, -1, 322, + -1, 347, -1, 367, -1, 374, -1, 388, -1, 390, + -1, 403, -1, 404, -1, 409, -1, 411, -1, 415, + -1, 435, -1, 436, -1, 455, -1, 456, -1, 457, + -1, 458, -1, 459, -1, 460, -1, 461, -1, 462, + -1, 463, -1, 464, -1, 465, -1, 44, -1, 50, -1, 74, -1, 81, -1, 93, -1, 100, -1, 166, -1, 168, -1, 172, -1, 186, -1, 200, -1, 212, - -1, 213, -1, 215, -1, 224, -1, 226, -1, 258, - -1, 267, -1, 288, -1, 290, -1, 343, -1, 371, - -1, 396, -1, 415, -1, 439, -1, 48, -1, 49, - -1, 51, -1, 52, -1, 65, -1, 64, -1, 72, - -1, 111, -1, 112, -1, 149, -1, 154, -1, 160, - -1, 177, -1, 201, -1, 207, -1, 209, -1, 206, - -1, 238, -1, 257, -1, 259, -1, 263, -1, 270, - -1, 272, -1, 287, -1, 291, -1, 305, -1, 308, - -1, 322, -1, 347, -1, 367, -1, 374, -1, 388, - -1, 390, -1, 403, -1, 404, -1, 409, -1, 411, - -1, 415, -1, 435, -1, 436, -1, 455, -1, 456, - -1, 457, -1, 458, -1, 459, -1, 460, -1, 461, - -1, 462, -1, 463, -1, 464, -1, 465, -1, 44, + -1, 213, -1, 215, -1, 224, -1, 226, -1, 238, + -1, 258, -1, 267, -1, 288, -1, 290, -1, 343, + -1, 371, -1, 388, -1, 396, -1, 439, -1, 44, -1, 50, -1, 74, -1, 81, -1, 93, -1, 100, -1, 166, -1, 168, -1, 172, -1, 186, -1, 200, -1, 212, -1, 213, -1, 215, -1, 224, -1, 226, - -1, 238, -1, 258, -1, 267, -1, 288, -1, 290, - -1, 343, -1, 371, -1, 388, -1, 396, -1, 415, - -1, 439, -1, 27, -1, 31, -1, 32, -1, 33, - -1, 34, -1, 35, -1, 36, -1, 37, -1, 40, - -1, 53, -1, 60, -1, 61, -1, 67, -1, 73, - -1, 75, -1, 85, -1, 92, -1, 97, -1, 98, - -1, 99, -1, 101, -1, 102, -1, 103, -1, 114, - -1, 116, -1, 123, -1, 129, -1, 130, -1, 136, - -1, 140, -1, 144, -1, 155, -1, 157, -1, 162, - -1, 164, -1, 167, -1, 174, -1, 176, -1, 179, - -1, 191, -1, 198, -1, 208, -1, 210, -1, 221, - -1, 222, -1, 227, -1, 231, -1, 232, -1, 264, - -1, 269, -1, 276, -1, 279, -1, 280, -1, 284, - -1, 285, -1, 302, -1, 312, -1, 327, -1, 340, - -1, 359, -1, 365, -1, 376, -1, 391, -1, 394, - -1, 402, -1, 405, -1, 406, -1, 412, -1, 421, - -1, 422, -1, 428, -1, 429, -1, 437, -1, 444, - -1, 445, -1, 447, -1, 448, -1 + -1, 258, -1, 267, -1, 288, -1, 290, -1, 343, + -1, 371, -1, 396, -1, 415, -1, 439, -1, 48, + -1, 49, -1, 51, -1, 52, -1, 65, -1, 64, + -1, 72, -1, 111, -1, 112, -1, 149, -1, 154, + -1, 160, -1, 177, -1, 201, -1, 207, -1, 209, + -1, 206, -1, 238, -1, 257, -1, 259, -1, 263, + -1, 270, -1, 272, -1, 287, -1, 291, -1, 305, + -1, 308, -1, 322, -1, 347, -1, 367, -1, 374, + -1, 388, -1, 390, -1, 403, -1, 404, -1, 409, + -1, 411, -1, 415, -1, 435, -1, 436, -1, 455, + -1, 456, -1, 457, -1, 458, -1, 459, -1, 460, + -1, 461, -1, 462, -1, 463, -1, 464, -1, 465, + -1, 44, -1, 50, -1, 74, -1, 81, -1, 93, + -1, 100, -1, 166, -1, 168, -1, 172, -1, 186, + -1, 200, -1, 212, -1, 213, -1, 215, -1, 224, + -1, 226, -1, 238, -1, 258, -1, 267, -1, 288, + -1, 290, -1, 343, -1, 371, -1, 388, -1, 396, + -1, 415, -1, 439, -1, 27, -1, 31, -1, 32, + -1, 33, -1, 34, -1, 35, -1, 36, -1, 37, + -1, 40, -1, 53, -1, 60, -1, 61, -1, 67, + -1, 73, -1, 75, -1, 85, -1, 92, -1, 97, + -1, 98, -1, 99, -1, 101, -1, 102, -1, 103, + -1, 114, -1, 116, -1, 123, -1, 129, -1, 130, + -1, 136, -1, 140, -1, 144, -1, 155, -1, 157, + -1, 162, -1, 164, -1, 167, -1, 174, -1, 176, + -1, 179, -1, 191, -1, 198, -1, 208, -1, 210, + -1, 221, -1, 222, -1, 227, -1, 231, -1, 232, + -1, 264, -1, 269, -1, 276, -1, 279, -1, 280, + -1, 284, -1, 285, -1, 302, -1, 312, -1, 327, + -1, 340, -1, 359, -1, 365, -1, 376, -1, 391, + -1, 394, -1, 402, -1, 405, -1, 406, -1, 412, + -1, 421, -1, 422, -1, 428, -1, 429, -1, 437, + -1, 444, -1, 445, -1, 447, -1, 448, -1 }; /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint16 yyrline[] = { - 0, 456, 456, 472, 484, 493, 494, 495, 496, 497, - 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, - 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, - 518, 519, 520, 521, 522, 523, 524, 525, 527, 9, + 0, 459, 459, 475, 487, 496, 497, 498, 499, 500, + 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, + 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, + 521, 522, 523, 524, 525, 526, 527, 528, 530, 9, 18, 27, 36, 45, 54, 63, 72, 85, 87, 93, 94, 99, 103, 107, 118, 126, 130, 139, 148, 157, 166, 175, 184, 192, 200, 209, 218, 227, 236, 253, 262, 271, 280, 290, 303, 318, 327, 335, 350, 358, 368, 378, 385, 392, 400, 407, 418, 419, 424, 428, - 433, 438, 446, 447, 452, 456, 457, 7, 13, 19, - 25, 6, 15, 25, 35, 45, 55, 65, 75, 85, - 95, 106, 117, 127, 140, 141, 8, 21, 27, 34, - 40, 47, 57, 61, 70, 79, 88, 95, 96, 101, - 113, 118, 143, 153, 163, 169, 180, 191, 206, 207, - 213, 214, 219, 220, 226, 227, 231, 232, 237, 239, - 245, 246, 250, 251, 256, 7, 14, 22, 9, 19, - 32, 33, 7, 14, 31, 51, 52, 9, 17, 29, - 30, 34, 35, 36, 41, 42, 43, 48, 52, 56, - 60, 64, 68, 72, 76, 80, 84, 88, 92, 97, - 101, 105, 112, 113, 117, 118, 119, 2, 9, 15, - 21, 28, 35, 45, 46, 47, 7, 21, 41, 42, - 69, 70, 71, 72, 73, 74, 78, 79, 84, 89, - 90, 91, 92, 93, 98, 105, 106, 107, 124, 131, - 138, 148, 158, 170, 179, 188, 207, 214, 219, 221, - 223, 225, 228, 233, 234, 238, 239, 240, 241, 246, - 250, 251, 256, 263, 268, 269, 270, 271, 272, 273, - 274, 275, 281, 282, 286, 291, 298, 305, 312, 324, - 325, 326, 327, 331, 336, 337, 338, 343, 348, 349, - 350, 351, 352, 353, 358, 381, 385, 392, 393, 397, - 401, 402, 403, 407, 411, 419, 420, 425, 426, 430, - 438, 439, 444, 445, 449, 454, 458, 462, 467, 475, - 476, 480, 481, 487, 498, 511, 525, 539, 553, 567, - 590, 594, 601, 605, 613, 618, 625, 635, 636, 637, - 638, 639, 646, 653, 654, 659, 660, 9, 19, 29, - 39, 49, 59, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 92, 93, - 94, 95, 96, 97, 98, 103, 104, 109, 110, 111, - 116, 117, 118, 7, 18, 19, 23, 27, 7, 1, - 30, 53, 54, 59, 63, 68, 72, 80, 81, 85, - 86, 91, 92, 96, 97, 102, 103, 104, 105, 106, - 111, 119, 123, 128, 129, 134, 138, 143, 147, 151, - 155, 159, 163, 167, 171, 175, 179, 183, 187, 191, - 195, 203, 209, 210, 211, 216, 220, 47, 48, 52, - 53, 68, 69, 76, 84, 92, 100, 108, 116, 127, - 128, 155, 170, 186, 187, 206, 210, 214, 231, 238, - 245, 255, 256, 259, 271, 282, 290, 295, 300, 305, - 310, 318, 326, 331, 336, 343, 344, 348, 349, 350, - 357, 358, 362, 363, 367, 368, 372, 376, 377, 380, - 389, 400, 401, 402, 405, 406, 407, 411, 412, 413, - 414, 418, 419, 423, 425, 441, 443, 448, 451, 459, - 463, 467, 471, 475, 479, 486, 491, 498, 499, 503, - 507, 511, 515, 522, 529, 530, 535, 536, 540, 541, - 549, 569, 570, 572, 577, 578, 582, 583, 586, 587, - 612, 613, 617, 618, 622, 623, 627, 640, 641, 645, - 646, 650, 651, 655, 656, 660, 671, 672, 673, 674, - 678, 679, 684, 685, 686, 695, 701, 719, 720, 724, - 725, 731, 737, 745, 753, 789, 815, 819, 845, 849, - 862, 876, 891, 903, 919, 925, 930, 936, 943, 944, - 952, 956, 960, 966, 973, 978, 979, 980, 981, 985, - 986, 998, 999, 1004, 1011, 1018, 1025, 1057, 1068, 1081, - 1086, 1087, 1090, 1091, 1094, 1095, 1100, 1101, 1106, 1110, - 1116, 1137, 1145, 1158, 1161, 1165, 1165, 1167, 1172, 1179, - 1184, 1190, 1195, 1201, 1206, 1214, 1216, 1219, 1223, 1224, - 1225, 1226, 1227, 1228, 1233, 1253, 1254, 1255, 1256, 1267, - 1281, 1282, 1288, 1293, 1298, 1303, 1308, 1313, 1318, 1323, - 1329, 1335, 1341, 1348, 1370, 1379, 1383, 1391, 1395, 1403, - 1415, 1436, 1440, 1446, 1450, 1463, 1471, 1481, 1483, 1485, - 1487, 1489, 1491, 1496, 1497, 1504, 1513, 1521, 1530, 1541, - 1549, 1550, 1551, 1555, 1555, 1558, 1558, 1561, 1561, 1564, - 1564, 1567, 1567, 1570, 1570, 1573, 1573, 1576, 1576, 1579, - 1581, 1583, 1585, 1587, 1589, 1591, 1593, 1595, 1600, 1605, - 1611, 1618, 1623, 1629, 1635, 1666, 1668, 1670, 1678, 1693, + 433, 438, 446, 447, 452, 456, 457, 458, 7, 13, + 19, 25, 6, 15, 25, 35, 45, 55, 65, 75, + 85, 95, 106, 117, 127, 140, 141, 8, 21, 27, + 34, 40, 47, 57, 61, 70, 79, 88, 95, 96, + 101, 113, 118, 143, 153, 163, 169, 180, 191, 206, + 207, 213, 214, 219, 220, 226, 227, 231, 232, 237, + 239, 245, 246, 250, 251, 256, 7, 14, 22, 9, + 19, 32, 33, 7, 14, 31, 51, 52, 9, 17, + 29, 30, 34, 35, 36, 41, 42, 43, 48, 52, + 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, + 97, 101, 105, 112, 113, 117, 118, 119, 2, 9, + 15, 21, 28, 35, 45, 46, 47, 7, 21, 41, + 42, 69, 70, 71, 72, 73, 74, 78, 79, 84, + 89, 90, 91, 92, 93, 98, 105, 106, 107, 124, + 131, 138, 148, 158, 170, 179, 188, 207, 214, 219, + 221, 223, 225, 228, 233, 234, 238, 239, 240, 241, + 246, 250, 251, 256, 263, 268, 269, 270, 271, 272, + 273, 274, 275, 281, 282, 286, 291, 298, 305, 312, + 324, 325, 326, 327, 331, 336, 337, 338, 343, 348, + 349, 350, 351, 352, 353, 358, 381, 385, 392, 393, + 397, 401, 402, 403, 407, 411, 419, 420, 425, 426, + 430, 438, 439, 444, 445, 449, 454, 458, 462, 467, + 475, 476, 480, 481, 487, 498, 511, 525, 539, 553, + 567, 590, 594, 601, 605, 613, 618, 625, 635, 636, + 637, 638, 639, 646, 653, 654, 659, 660, 9, 19, + 29, 39, 49, 59, 73, 74, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 85, 86, 87, 92, + 93, 94, 95, 96, 97, 98, 103, 104, 109, 110, + 111, 116, 117, 118, 7, 18, 19, 23, 27, 7, + 1, 30, 53, 54, 59, 63, 68, 72, 80, 81, + 85, 86, 91, 92, 96, 97, 102, 103, 104, 105, + 106, 111, 119, 123, 128, 129, 134, 138, 143, 147, + 151, 155, 159, 163, 167, 171, 175, 179, 183, 187, + 191, 195, 203, 209, 210, 211, 216, 220, 47, 48, + 52, 53, 68, 69, 76, 84, 92, 100, 108, 116, + 127, 128, 155, 170, 186, 187, 206, 210, 214, 231, + 238, 245, 255, 256, 259, 271, 282, 290, 295, 300, + 305, 310, 318, 326, 331, 336, 343, 344, 348, 349, + 350, 357, 358, 362, 363, 367, 368, 372, 376, 377, + 380, 389, 400, 401, 402, 405, 406, 407, 411, 412, + 413, 414, 418, 419, 423, 425, 441, 443, 448, 451, + 459, 463, 467, 471, 475, 479, 486, 491, 498, 499, + 503, 507, 511, 515, 522, 529, 530, 535, 536, 540, + 541, 549, 569, 570, 572, 577, 578, 582, 583, 586, + 587, 612, 613, 617, 618, 622, 623, 627, 640, 641, + 645, 646, 650, 651, 655, 656, 660, 671, 672, 673, + 674, 678, 679, 684, 685, 686, 695, 701, 719, 720, + 724, 725, 731, 737, 745, 753, 789, 815, 819, 845, + 849, 862, 876, 891, 903, 919, 925, 930, 936, 943, + 944, 952, 956, 960, 966, 973, 978, 979, 980, 981, + 985, 986, 998, 999, 1004, 1011, 1018, 1025, 1057, 1068, + 1081, 1086, 1087, 1090, 1091, 1094, 1095, 1100, 1101, 1106, + 1110, 1116, 1137, 1145, 1158, 1161, 1165, 1165, 1167, 1172, + 1179, 1184, 1190, 1195, 1201, 1207, 1216, 1218, 1221, 1225, + 1226, 1227, 1228, 1229, 1230, 1235, 1255, 1256, 1257, 1258, + 1269, 1283, 1284, 1290, 1295, 1300, 1305, 1310, 1315, 1320, + 1325, 1331, 1337, 1343, 1350, 1372, 1381, 1385, 1393, 1397, + 1405, 1417, 1438, 1442, 1448, 1452, 1465, 1473, 1483, 1485, + 1487, 1489, 1491, 1493, 1498, 1499, 1506, 1515, 1523, 1532, + 1543, 1551, 1552, 1553, 1557, 1557, 1560, 1560, 1563, 1563, + 1566, 1566, 1569, 1569, 1572, 1572, 1575, 1575, 1578, 1578, + 1581, 1583, 1585, 1587, 1589, 1591, 1593, 1595, 1597, 1602, + 1607, 1613, 1620, 1625, 1631, 1637, 1668, 1670, 1672, 1680, 1695, 1697, 1699, 1701, 1703, 1705, 1707, 1709, 1711, 1713, - 1715, 1717, 1719, 1722, 1724, 1726, 1729, 1731, 1733, 1735, - 1738, 1743, 1748, 1755, 1760, 1767, 1772, 1780, 1785, 1794, - 1802, 1810, 1818, 1836, 1844, 1852, 1860, 1868, 1876, 1880, - 1888, 1896, 1912, 1920, 1928, 1936, 1944, 1952, 1960, 1964, - 1968, 1972, 1976, 1984, 1992, 2000, 2008, 2028, 2050, 2061, - 2068, 2082, 2098, 2100, 2102, 2104, 2106, 2108, 2110, 2112, - 2114, 2116, 2118, 2120, 2122, 2124, 2126, 2128, 2130, 2132, - 2134, 2136, 2140, 2144, 2148, 2162, 2163, 2164, 2171, 2183, - 2198, 2210, 2212, 2224, 2235, 2259, 2272, 2276, 2282, 2289, - 2296, 2306, 2313, 2341, 2376, 2387, 2388, 2395, 2401, 2405, - 2409, 2413, 2417, 2421, 2425, 2429, 2433, 2437, 2441, 2445, - 2449, 2453, 2457, 2461, 2463, 2465, 2469, 2478, 2483, 2490, - 2505, 2512, 2516, 2520, 2524, 2528, 2542, 2543, 2547, 2548, - 2556, 2557, 2561, 2562, 2567, 2575, 2577, 2591, 2594, 2621, - 2622, 2625, 2626, 2637, 2655, 2662, 2671, 2688, 2733, 2741, - 2749, 2757, 2765, 2786, 2787, 2790, 2791, 2794, 2795, 2796, - 2799, 2800, 2803, 2804, 2805, 2806, 2807, 2808, 2809, 2810, - 2811, 2812, 2813, 2814, 2817, 2819, 2824, 2826, 2831, 2833, - 2835, 2837, 2839, 2841, 2843, 2845, 2859, 2861, 2865, 2869, - 2876, 2881, 2888, 2892, 2898, 2902, 2911, 2922, 2923, 2927, - 2931, 2938, 2939, 2940, 2941, 2942, 2943, 2944, 2945, 2946, - 2947, 2957, 2961, 2968, 2975, 2976, 2992, 2996, 3001, 3005, - 3020, 3025, 3029, 3032, 3035, 3036, 3037, 3040, 3047, 3057, - 3071, 3072, 3076, 3087, 3088, 3091, 3092, 3095, 3099, 3106, - 3110, 3114, 3122, 3133, 3134, 3138, 3139, 3143, 3144, 3147, - 3148, 3158, 3159, 3163, 3164, 3167, 3183, 3191, 3199, 3221, - 3222, 3233, 3237, 3264, 3266, 3271, 3273, 3283, 3286, 3297, - 3301, 3305, 3317, 3321, 3330, 3337, 3369, 3373, 3377, 3381, - 3385, 3389, 3393, 3399, 3400, 3416, 3417, 3418, 3421, 3422, - 3428, 3429, 3430, 3433, 3434, 3435, 3438, 3439, 3440, 3443, - 3444, 3447, 3449, 3454, 3455, 3458, 3466, 3467, 3468, 3469, - 3472, 3473, 7, 18, 19, 23, 24, 25, 26, 7, - 16, 34, 41, 46, 47, 48, 49, 8, 33, 62, - 66, 67, 72, 73, 78, 79, 83, 84, 89, 90, - 7, 16, 25, 34, 43, 52, 5, 11, 7, 20, - 9, 16, 26, 33, 44, 45, 50, 51, 52, 57, - 58, 59, 60, 61, 65, 66, 67, 72, 73, 78, - 82, 90, 91, 96, 97, 98, 104, 109, 117, 118, - 10, 16, 22, 32, 33, 41, 52, 64, 72, 80, - 87, 97, 99, 105, 109, 113, 128, 135, 136, 137, - 141, 142, 7, 16, 8, 22, 36, 48, 56, 70, - 71, 72, 73, 74, 87, 88, 93, 94, 98, 99, - 7, 21, 25, 32, 43, 44, 50, 51, 9, 19, - 2, 7, 14, 24, 25, 32, 3, 10, 16, 22, - 28, 37, 37, 39, 40, 6, 8, 21, 34, 52, - 74, 75, 76, 77, 11, 24, 41, 42, 43, 48, + 1715, 1717, 1719, 1721, 1724, 1726, 1728, 1731, 1733, 1735, + 1737, 1740, 1745, 1750, 1757, 1762, 1769, 1774, 1782, 1787, + 1796, 1804, 1812, 1820, 1838, 1846, 1854, 1862, 1870, 1878, + 1882, 1886, 1890, 1898, 1906, 1922, 1930, 1938, 1946, 1954, + 1962, 1970, 1974, 1978, 1982, 1986, 1994, 2002, 2010, 2018, + 2038, 2060, 2071, 2078, 2092, 2108, 2110, 2112, 2114, 2116, + 2118, 2120, 2122, 2124, 2126, 2128, 2130, 2132, 2134, 2136, + 2138, 2140, 2142, 2144, 2146, 2150, 2154, 2158, 2172, 2173, + 2174, 2181, 2193, 2208, 2220, 2222, 2234, 2245, 2269, 2282, + 2286, 2292, 2299, 2306, 2316, 2323, 2351, 2386, 2397, 2398, + 2405, 2411, 2415, 2419, 2423, 2427, 2431, 2435, 2439, 2443, + 2447, 2451, 2455, 2459, 2463, 2467, 2471, 2473, 2475, 2479, + 2488, 2493, 2500, 2515, 2522, 2526, 2530, 2534, 2538, 2552, + 2553, 2557, 2558, 2566, 2567, 2571, 2572, 2577, 2585, 2587, + 2601, 2604, 2631, 2632, 2635, 2636, 2647, 2653, 2660, 2669, + 2686, 2731, 2739, 2747, 2755, 2763, 2784, 2785, 2788, 2789, + 2793, 2803, 2804, 2806, 2807, 2808, 2811, 2812, 2815, 2816, + 2817, 2818, 2819, 2820, 2821, 2822, 2823, 2824, 2825, 2826, + 2829, 2831, 2836, 2838, 2843, 2845, 2847, 2849, 2851, 2853, + 2855, 2857, 2871, 2873, 2877, 2881, 2888, 2893, 2900, 2904, + 2910, 2914, 2923, 2934, 2935, 2939, 2943, 2950, 2951, 2952, + 2953, 2954, 2955, 2956, 2957, 2958, 2959, 2969, 2973, 2980, + 2987, 2988, 3004, 3008, 3013, 3017, 3032, 3037, 3041, 3044, + 3047, 3048, 3049, 3052, 3059, 3069, 3083, 3084, 3088, 3099, + 3100, 3103, 3104, 3107, 3111, 3118, 3122, 3126, 3134, 3145, + 3146, 3150, 3151, 3155, 3156, 3159, 3160, 3170, 3171, 3175, + 3176, 3179, 3195, 3203, 3211, 3233, 3234, 3245, 3249, 3276, + 3278, 3283, 3285, 3295, 3298, 3309, 3313, 3317, 3329, 3333, + 3342, 3349, 3381, 3385, 3389, 3393, 3397, 3401, 3405, 3411, + 3412, 3428, 3429, 3430, 3433, 3434, 3440, 3441, 3442, 3445, + 3446, 3447, 3450, 3451, 3452, 3455, 3456, 3459, 3461, 3466, + 3467, 3470, 3478, 3479, 3480, 3481, 3484, 3485, 7, 18, + 19, 23, 24, 25, 26, 7, 16, 34, 41, 46, + 47, 48, 49, 8, 33, 62, 66, 67, 72, 73, + 78, 79, 83, 84, 89, 90, 7, 16, 25, 34, + 43, 52, 5, 11, 7, 20, 9, 16, 26, 33, + 44, 45, 50, 51, 52, 57, 58, 59, 60, 61, + 65, 66, 67, 72, 73, 78, 82, 90, 91, 96, + 97, 98, 104, 109, 117, 118, 10, 16, 22, 32, + 33, 41, 52, 64, 72, 80, 87, 97, 99, 105, + 109, 113, 128, 135, 136, 137, 141, 142, 7, 16, + 8, 22, 36, 48, 56, 70, 71, 72, 73, 74, + 87, 88, 93, 94, 98, 99, 7, 21, 25, 32, + 43, 44, 50, 51, 9, 19, 2, 7, 14, 24, + 25, 32, 3, 10, 16, 22, 28, 37, 37, 39, + 40, 6, 8, 21, 34, 52, 74, 75, 76, 77, + 11, 24, 41, 42, 43, 48, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, @@ -186909,25 +200701,25 @@ static const yytype_uint16 yyrline[] = 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, - 61, 61, 61, 61, 61, 61, 61, 61, 61, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, - 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, + 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, - 66, 66, 66, 66, 66, 66, 66, 66, 66, 67, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, @@ -186935,7 +200727,7 @@ static const yytype_uint16 yyrline[] = 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67 + 67, 67 }; #endif @@ -187021,8 +200813,8 @@ static const char *const yytname[] = "XMLTABLE", "YEAR_P", "YEARS_P", "YES_P", "ZONE", "NOT_LA", "NULLS_LA", "WITH_LA", "'<'", "'>'", "'='", "POSTFIXOP", "'+'", "'-'", "'*'", "'/'", "'%'", "'^'", "UMINUS", "'['", "']'", "'('", "')'", "'.'", "';'", "','", - "'#'", "'?'", "':'", "$accept", "stmtblock", "stmtmulti", "stmt", - "AlterTableStmt", "alter_identity_column_option_list", + "'{'", "'}'", "'#'", "'?'", "':'", "$accept", "stmtblock", "stmtmulti", + "stmt", "AlterTableStmt", "alter_identity_column_option_list", "alter_column_default", "alter_identity_column_option", "alter_generic_option_list", "alter_table_cmd", "alter_using", "alter_generic_option_elem", "alter_table_cmds", "alter_generic_options", @@ -187089,22 +200881,22 @@ static const char *const yytname[] = "window_definition_list", "window_definition", "over_clause", "window_specification", "opt_existing_window_name", "opt_partition_clause", "opt_frame_clause", "frame_extent", - "frame_bound", "qualified_row", "row", "sub_type", "all_Op", "MathOp", - "qual_Op", "qual_all_Op", "subquery_Op", "any_operator", "expr_list", - "opt_expr_list", "func_arg_list", "func_arg_expr", "type_list", - "extract_list", "extract_arg", "overlay_list", "overlay_placing", - "position_list", "substr_list", "substr_from", "substr_for", "trim_list", - "in_expr", "case_expr", "when_clause_list", "when_clause", - "case_default", "case_arg", "columnref", "indirection_el", - "opt_slice_bound", "indirection", "opt_indirection", "opt_asymmetric", - "opt_target_list", "target_list", "target_el", "qualified_name_list", - "qualified_name", "name_list", "name", "attr_name", "func_name", - "AexprConst", "Iconst", "Sconst", "ColId", "ColIdOrString", - "type_function_name", "function_name_token", "type_name_token", - "any_name", "attrs", "opt_name_list", "param_name", "ColLabel", - "ColLabelOrString", "PrepareStmt", "prep_type_clause", "PreparableStmt", - "CreateSchemaStmt", "OptSchemaEltList", "schema_stmt", "IndexStmt", - "access_method", "access_method_clause", "opt_concurrently", + "frame_bound", "qualified_row", "row", "dict_arg", "dict_arguments", + "sub_type", "all_Op", "MathOp", "qual_Op", "qual_all_Op", "subquery_Op", + "any_operator", "expr_list", "opt_expr_list", "func_arg_list", + "func_arg_expr", "type_list", "extract_list", "extract_arg", + "overlay_list", "overlay_placing", "position_list", "substr_list", + "substr_from", "substr_for", "trim_list", "in_expr", "case_expr", + "when_clause_list", "when_clause", "case_default", "case_arg", + "columnref", "indirection_el", "opt_slice_bound", "indirection", + "opt_indirection", "opt_asymmetric", "opt_target_list", "target_list", + "target_el", "qualified_name_list", "qualified_name", "name_list", + "name", "attr_name", "func_name", "AexprConst", "Iconst", "Sconst", + "ColId", "ColIdOrString", "type_function_name", "function_name_token", + "type_name_token", "any_name", "attrs", "opt_name_list", "param_name", + "ColLabel", "ColLabelOrString", "PrepareStmt", "prep_type_clause", + "PreparableStmt", "CreateSchemaStmt", "OptSchemaEltList", "schema_stmt", + "IndexStmt", "access_method", "access_method_clause", "opt_concurrently", "opt_index_name", "opt_reloptions", "opt_unique", "AlterObjectSchemaStmt", "CheckPointStmt", "ExportStmt", "ImportStmt", "ExplainStmt", "opt_verbose", "explain_option_arg", "ExplainableStmt", @@ -187178,188 +200970,189 @@ static const yytype_uint16 yytoknum[] = 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 60, 62, 61, 728, 43, 45, 42, 47, 37, 94, 729, 91, 93, 40, 41, 46, 59, - 44, 35, 63, 58 + 44, 123, 125, 35, 63, 58 }; # endif /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const yytype_uint16 yyr1[] = { - 0, 494, 495, 496, 496, 497, 497, 497, 497, 497, - 497, 497, 497, 497, 497, 497, 497, 497, 497, 497, - 497, 497, 497, 497, 497, 497, 497, 497, 497, 497, - 497, 497, 497, 497, 497, 497, 497, 497, 497, 498, - 498, 498, 498, 498, 498, 498, 498, 499, 499, 500, - 500, 501, 501, 501, 501, 502, 502, 503, 503, 503, - 503, 503, 503, 503, 503, 503, 503, 503, 503, 503, - 503, 503, 503, 503, 503, 503, 503, 503, 503, 503, - 503, 503, 503, 503, 503, 503, 504, 504, 505, 505, - 505, 505, 506, 506, 507, 508, 508, 509, 509, 509, - 509, 510, 510, 510, 510, 510, 510, 510, 510, 510, - 510, 510, 510, 510, 511, 511, 512, 513, 513, 513, - 513, 513, 514, 514, 515, 515, 515, 516, 516, 517, - 518, 518, 519, 519, 519, 520, 520, 520, 521, 521, - 522, 522, 523, 523, 524, 524, 525, 525, 526, 526, - 527, 527, 528, 528, 529, 530, 530, 530, 531, 531, - 532, 532, 533, 533, 533, 534, 534, 535, 535, 536, - 536, 537, 537, 537, 538, 538, 538, 538, 539, 539, - 539, 539, 539, 539, 539, 539, 539, 539, 539, 539, - 539, 539, 540, 540, 541, 541, 541, 542, 542, 542, - 542, 542, 542, 543, 543, 543, 544, 544, 545, 545, - 546, 546, 546, 546, 546, 546, 547, 547, 548, 549, - 549, 549, 549, 549, 550, 550, 550, 550, 551, 551, - 551, 551, 551, 551, 551, 551, 552, 553, 554, 554, - 554, 554, 554, 555, 555, 556, 556, 556, 556, 557, - 558, 558, 559, 559, 560, 560, 560, 560, 560, 560, - 560, 560, 561, 561, 562, 563, 563, 563, 563, 564, - 564, 564, 564, 565, 566, 566, 566, 567, 568, 568, - 568, 568, 568, 568, 569, 570, 570, 571, 571, 572, - 573, 573, 573, 574, 574, 575, 575, 576, 576, 577, - 578, 578, 579, 579, 580, 581, 581, 581, 581, 582, - 582, 583, 583, 583, 584, 584, 584, 584, 584, 584, - 585, 585, 586, 586, 586, 586, 587, 588, 588, 588, - 588, 588, 588, 588, 588, 589, 589, 590, 590, 590, - 590, 590, 590, 591, 591, 591, 591, 591, 591, 591, - 591, 591, 591, 591, 591, 591, 591, 591, 592, 592, - 592, 592, 592, 592, 592, 593, 593, 594, 594, 594, - 595, 595, 595, 596, 597, 597, 598, 598, 599, 600, - 600, 601, 601, 602, 602, 603, 603, 604, 604, 605, - 605, 606, 606, 607, 607, 608, 608, 608, 608, 608, - 609, 610, 610, 611, 611, 612, 612, 613, 613, 613, - 613, 613, 613, 613, 613, 613, 613, 613, 613, 613, - 613, 614, 615, 615, 615, 616, 616, 617, 617, 618, - 618, 619, 619, 619, 619, 619, 619, 619, 619, 620, - 620, 621, 621, 621, 621, 621, 621, 621, 622, 622, - 622, 623, 623, 624, 625, 625, 626, 626, 626, 626, - 626, 626, 626, 626, 626, 627, 627, 628, 628, 628, - 629, 629, 630, 630, 631, 631, 632, 633, 633, 634, - 634, 635, 635, 635, 636, 636, 636, 637, 637, 637, - 637, 638, 638, 639, 639, 639, 639, 640, 640, 641, - 641, 641, 641, 641, 641, 642, 642, 643, 643, 644, - 644, 644, 644, 645, 646, 646, 647, 647, 648, 648, - 649, 650, 650, 650, 651, 651, 652, 652, 653, 653, - 654, 654, 655, 655, 656, 656, 657, 658, 658, 659, - 659, 660, 660, 661, 661, 662, 663, 663, 663, 663, - 664, 664, 665, 665, 665, 666, 666, 667, 667, 668, - 668, 669, 669, 669, 669, 669, 669, 669, 670, 670, - 670, 670, 670, 670, 671, 671, 671, 671, 672, 672, - 673, 673, 673, 673, 673, 674, 674, 674, 674, 675, - 675, 676, 676, 677, 677, 677, 677, 678, 678, 679, - 680, 680, 681, 681, 682, 682, 683, 683, 684, 684, - 685, 686, 686, 687, 687, 688, 688, 689, 689, 689, - 689, 689, 689, 689, 689, 690, 690, 690, 691, 691, - 691, 691, 691, 691, 691, 692, 692, 692, 692, 693, - 694, 694, 695, 695, 695, 695, 695, 695, 695, 695, - 695, 695, 695, 696, 696, 697, 697, 698, 698, 699, - 700, 701, 701, 702, 702, 703, 704, 705, 705, 705, - 705, 705, 705, 706, 706, 707, 707, 707, 707, 708, - 709, 709, 709, 710, 710, 711, 711, 712, 712, 713, - 713, 714, 714, 715, 715, 716, 716, 717, 717, 718, - 718, 718, 718, 718, 718, 718, 718, 718, 718, 718, - 718, 718, 718, 718, 718, 719, 719, 719, 719, 719, - 719, 719, 719, 719, 719, 719, 719, 719, 719, 719, - 719, 719, 719, 719, 719, 719, 719, 719, 719, 719, - 719, 719, 719, 719, 719, 719, 719, 719, 719, 719, - 719, 719, 719, 719, 719, 719, 719, 719, 719, 719, - 719, 719, 719, 719, 719, 719, 719, 719, 719, 719, - 719, 719, 719, 719, 719, 719, 719, 719, 719, 719, - 719, 719, 720, 720, 720, 720, 720, 720, 720, 720, + 0, 496, 497, 498, 498, 499, 499, 499, 499, 499, + 499, 499, 499, 499, 499, 499, 499, 499, 499, 499, + 499, 499, 499, 499, 499, 499, 499, 499, 499, 499, + 499, 499, 499, 499, 499, 499, 499, 499, 499, 500, + 500, 500, 500, 500, 500, 500, 500, 501, 501, 502, + 502, 503, 503, 503, 503, 504, 504, 505, 505, 505, + 505, 505, 505, 505, 505, 505, 505, 505, 505, 505, + 505, 505, 505, 505, 505, 505, 505, 505, 505, 505, + 505, 505, 505, 505, 505, 505, 506, 506, 507, 507, + 507, 507, 508, 508, 509, 510, 510, 510, 511, 511, + 511, 511, 512, 512, 512, 512, 512, 512, 512, 512, + 512, 512, 512, 512, 512, 513, 513, 514, 515, 515, + 515, 515, 515, 516, 516, 517, 517, 517, 518, 518, + 519, 520, 520, 521, 521, 521, 522, 522, 522, 523, + 523, 524, 524, 525, 525, 526, 526, 527, 527, 528, + 528, 529, 529, 530, 530, 531, 532, 532, 532, 533, + 533, 534, 534, 535, 535, 535, 536, 536, 537, 537, + 538, 538, 539, 539, 539, 540, 540, 540, 540, 541, + 541, 541, 541, 541, 541, 541, 541, 541, 541, 541, + 541, 541, 541, 542, 542, 543, 543, 543, 544, 544, + 544, 544, 544, 544, 545, 545, 545, 546, 546, 547, + 547, 548, 548, 548, 548, 548, 548, 549, 549, 550, + 551, 551, 551, 551, 551, 552, 552, 552, 552, 553, + 553, 553, 553, 553, 553, 553, 553, 554, 555, 556, + 556, 556, 556, 556, 557, 557, 558, 558, 558, 558, + 559, 560, 560, 561, 561, 562, 562, 562, 562, 562, + 562, 562, 562, 563, 563, 564, 565, 565, 565, 565, + 566, 566, 566, 566, 567, 568, 568, 568, 569, 570, + 570, 570, 570, 570, 570, 571, 572, 572, 573, 573, + 574, 575, 575, 575, 576, 576, 577, 577, 578, 578, + 579, 580, 580, 581, 581, 582, 583, 583, 583, 583, + 584, 584, 585, 585, 585, 586, 586, 586, 586, 586, + 586, 587, 587, 588, 588, 588, 588, 589, 590, 590, + 590, 590, 590, 590, 590, 590, 591, 591, 592, 592, + 592, 592, 592, 592, 593, 593, 593, 593, 593, 593, + 593, 593, 593, 593, 593, 593, 593, 593, 593, 594, + 594, 594, 594, 594, 594, 594, 595, 595, 596, 596, + 596, 597, 597, 597, 598, 599, 599, 600, 600, 601, + 602, 602, 603, 603, 604, 604, 605, 605, 606, 606, + 607, 607, 608, 608, 609, 609, 610, 610, 610, 610, + 610, 611, 612, 612, 613, 613, 614, 614, 615, 615, + 615, 615, 615, 615, 615, 615, 615, 615, 615, 615, + 615, 615, 616, 617, 617, 617, 618, 618, 619, 619, + 620, 620, 621, 621, 621, 621, 621, 621, 621, 621, + 622, 622, 623, 623, 623, 623, 623, 623, 623, 624, + 624, 624, 625, 625, 626, 627, 627, 628, 628, 628, + 628, 628, 628, 628, 628, 628, 629, 629, 630, 630, + 630, 631, 631, 632, 632, 633, 633, 634, 635, 635, + 636, 636, 637, 637, 637, 638, 638, 638, 639, 639, + 639, 639, 640, 640, 641, 641, 641, 641, 642, 642, + 643, 643, 643, 643, 643, 643, 644, 644, 645, 645, + 646, 646, 646, 646, 647, 648, 648, 649, 649, 650, + 650, 651, 652, 652, 652, 653, 653, 654, 654, 655, + 655, 656, 656, 657, 657, 658, 658, 659, 660, 660, + 661, 661, 662, 662, 663, 663, 664, 665, 665, 665, + 665, 666, 666, 667, 667, 667, 668, 668, 669, 669, + 670, 670, 671, 671, 671, 671, 671, 671, 671, 672, + 672, 672, 672, 672, 672, 673, 673, 673, 673, 674, + 674, 675, 675, 675, 675, 675, 676, 676, 676, 676, + 677, 677, 678, 678, 679, 679, 679, 679, 680, 680, + 681, 682, 682, 683, 683, 684, 684, 685, 685, 686, + 686, 687, 688, 688, 689, 689, 690, 690, 691, 691, + 691, 691, 691, 691, 691, 691, 692, 692, 692, 693, + 693, 693, 693, 693, 693, 693, 694, 694, 694, 694, + 695, 696, 696, 697, 697, 697, 697, 697, 697, 697, + 697, 697, 697, 697, 698, 698, 699, 699, 700, 700, + 701, 702, 703, 703, 704, 704, 705, 706, 707, 707, + 707, 707, 707, 707, 708, 708, 709, 709, 709, 709, + 710, 711, 711, 711, 712, 712, 713, 713, 714, 714, + 715, 715, 716, 716, 717, 717, 718, 718, 719, 719, 720, 720, 720, 720, 720, 720, 720, 720, 720, 720, - 720, 720, 720, 720, 720, 721, 721, 721, 721, 721, - 721, 721, 721, 721, 721, 721, 722, 722, 722, 722, - 722, 722, 722, 723, 723, 724, 724, 725, 725, 725, - 725, 725, 725, 725, 725, 725, 725, 725, 725, 725, - 725, 725, 725, 725, 725, 725, 725, 725, 725, 725, - 725, 725, 725, 725, 725, 725, 726, 726, 727, 727, - 728, 728, 729, 729, 730, 731, 731, 731, 732, 733, - 733, 734, 734, 735, 735, 735, 736, 736, 737, 737, - 737, 737, 737, 738, 738, 739, 739, 740, 740, 740, - 741, 741, 742, 742, 742, 742, 742, 742, 742, 742, - 742, 742, 742, 742, 743, 743, 744, 744, 745, 745, - 745, 745, 745, 745, 745, 745, 746, 746, 747, 747, - 748, 748, 749, 749, 750, 750, 750, 751, 751, 752, - 752, 753, 753, 753, 753, 753, 753, 753, 753, 753, - 753, 754, 754, 755, 756, 756, 757, 757, 757, 757, - 757, 757, 758, 759, 760, 760, 760, 761, 761, 762, - 763, 763, 764, 765, 765, 766, 766, 767, 767, 768, - 768, 768, 768, 769, 769, 770, 770, 771, 771, 772, - 772, 773, 773, 774, 774, 775, 775, 775, 775, 776, - 776, 777, 777, 778, 778, 779, 780, 781, 781, 782, - 782, 782, 782, 782, 782, 782, 782, 782, 782, 782, - 782, 782, 782, 783, 784, 785, 785, 785, 786, 786, - 787, 787, 787, 788, 788, 788, 789, 789, 789, 790, - 790, 791, 791, 792, 792, 793, 794, 794, 794, 794, - 795, 795, 796, 797, 797, 798, 798, 798, 798, 799, - 799, 800, 800, 801, 801, 801, 801, 802, 802, 803, - 804, 804, 805, 805, 806, 806, 807, 807, 808, 808, - 809, 809, 809, 809, 809, 809, 810, 810, 811, 812, - 813, 813, 813, 813, 814, 814, 815, 815, 815, 816, - 816, 816, 816, 816, 817, 817, 817, 818, 818, 819, - 819, 820, 820, 821, 821, 821, 821, 822, 823, 823, - 824, 824, 824, 825, 825, 825, 825, 826, 826, 826, - 826, 827, 827, 828, 828, 828, 828, 828, 828, 828, - 829, 829, 830, 831, 832, 832, 832, 832, 832, 833, - 833, 833, 833, 833, 834, 834, 835, 835, 836, 836, - 837, 838, 838, 838, 839, 839, 840, 840, 841, 841, - 842, 843, 843, 844, 844, 844, 845, 845, 845, 845, - 845, 846, 846, 847, 847, 848, 849, 849, 849, 849, - 850, 850, 850, 850, 851, 851, 852, 852, 852, 853, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 854, - 854, 854, 854, 854, 854, 854, 854, 854, 854, 855, - 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, - 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, - 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, - 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, - 855, 855, 855, 855, 855, 855, 855, 855, 855, 855, - 856, 856, 856, 856, 856, 856, 856, 856, 856, 856, - 856, 856, 856, 856, 856, 856, 856, 856, 856, 856, - 856, 856, 856, 856, 856, 856, 857, 857, 857, 857, - 857, 857, 857, 857, 857, 857, 857, 857, 857, 857, - 857, 857, 857, 857, 857, 857, 857, 857, 857, 857, - 857, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 720, 720, 720, 720, 720, 720, 721, 721, 721, 721, + 721, 721, 721, 721, 721, 721, 721, 721, 721, 721, + 721, 721, 721, 721, 721, 721, 721, 721, 721, 721, + 721, 721, 721, 721, 721, 721, 721, 721, 721, 721, + 721, 721, 721, 721, 721, 721, 721, 721, 721, 721, + 721, 721, 721, 721, 721, 721, 721, 721, 721, 721, + 721, 721, 721, 721, 721, 721, 721, 721, 721, 721, + 721, 721, 721, 721, 721, 722, 722, 722, 722, 722, + 722, 722, 722, 722, 722, 722, 722, 722, 722, 722, + 722, 722, 722, 722, 722, 722, 722, 722, 723, 723, + 723, 723, 723, 723, 723, 723, 723, 723, 723, 724, + 724, 724, 724, 724, 724, 724, 725, 725, 726, 726, + 727, 727, 727, 727, 727, 727, 727, 727, 727, 727, + 727, 727, 727, 727, 727, 727, 727, 727, 727, 727, + 727, 727, 727, 727, 727, 727, 727, 727, 727, 728, + 728, 729, 729, 730, 730, 731, 731, 732, 733, 733, + 733, 734, 735, 735, 736, 736, 737, 737, 737, 738, + 738, 739, 739, 739, 739, 739, 740, 740, 741, 741, + 742, 743, 743, 744, 744, 744, 745, 745, 746, 746, + 746, 746, 746, 746, 746, 746, 746, 746, 746, 746, + 747, 747, 748, 748, 749, 749, 749, 749, 749, 749, + 749, 749, 750, 750, 751, 751, 752, 752, 753, 753, + 754, 754, 754, 755, 755, 756, 756, 757, 757, 757, + 757, 757, 757, 757, 757, 757, 757, 758, 758, 759, + 760, 760, 761, 761, 761, 761, 761, 761, 762, 763, + 764, 764, 764, 765, 765, 766, 767, 767, 768, 769, + 769, 770, 770, 771, 771, 772, 772, 772, 772, 773, + 773, 774, 774, 775, 775, 776, 776, 777, 777, 778, + 778, 779, 779, 779, 779, 780, 780, 781, 781, 782, + 782, 783, 784, 785, 785, 786, 786, 786, 786, 786, + 786, 786, 786, 786, 786, 786, 786, 786, 786, 787, + 788, 789, 789, 789, 790, 790, 791, 791, 791, 792, + 792, 792, 793, 793, 793, 794, 794, 795, 795, 796, + 796, 797, 798, 798, 798, 798, 799, 799, 800, 801, + 801, 802, 802, 802, 802, 803, 803, 804, 804, 805, + 805, 805, 805, 806, 806, 807, 808, 808, 809, 809, + 810, 810, 811, 811, 812, 812, 813, 813, 813, 813, + 813, 813, 814, 814, 815, 816, 817, 817, 817, 817, + 818, 818, 819, 819, 819, 820, 820, 820, 820, 820, + 821, 821, 821, 822, 822, 823, 823, 824, 824, 825, + 825, 825, 825, 826, 827, 827, 828, 828, 828, 829, + 829, 829, 829, 830, 830, 830, 830, 831, 831, 832, + 832, 832, 832, 832, 832, 832, 833, 833, 834, 835, + 836, 836, 836, 836, 836, 837, 837, 837, 837, 837, + 838, 838, 839, 839, 840, 840, 841, 842, 842, 842, + 843, 843, 844, 844, 845, 845, 846, 847, 847, 848, + 848, 848, 849, 849, 849, 849, 849, 850, 850, 851, + 851, 852, 853, 853, 853, 853, 854, 854, 854, 854, + 855, 855, 856, 856, 856, 857, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, - 858, 858, 859, 859, 859, 859, 859, 859, 859, 859, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 858, 858, 858, 858, 858, + 858, 858, 858, 858, 858, 859, 859, 859, 859, 859, 859, 859, 859, 859, 859, 859, 859, 859, 859, 859, - 859, 859, 859, 859, 859, 859, 859, 859, 859, 860, - 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, - 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, - 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, - 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, - 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, + 859, 859, 859, 859, 859, 859, 859, 859, 859, 859, + 859, 859, 859, 859, 859, 859, 859, 859, 859, 859, + 859, 859, 859, 859, 859, 859, 859, 859, 859, 859, + 859, 859, 859, 859, 859, 859, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, 860, - 860, 860, 860, 860, 860, 860 + 860, 860, 861, 861, 861, 861, 861, 861, 861, 861, + 861, 861, 861, 861, 861, 861, 861, 861, 861, 861, + 861, 861, 861, 861, 861, 861, 861, 862, 862, 862, + 862, 862, 862, 862, 862, 862, 862, 862, 862, 862, + 862, 862, 862, 862, 862, 862, 862, 862, 862, 862, + 862, 862, 862, 862, 862, 862, 862, 862, 862, 862, + 862, 862, 862, 862, 862, 862, 862, 862, 862, 862, + 862, 862, 862, 862, 862, 862, 862, 862, 863, 863, + 863, 863, 863, 863, 863, 863, 863, 863, 863, 863, + 863, 863, 863, 863, 863, 863, 863, 863, 863, 863, + 863, 863, 863, 863, 863, 864, 864, 864, 864, 864, + 864, 864, 864, 864, 864, 864, 864, 864, 864, 864, + 864, 864, 864, 864, 864, 864, 864, 864, 864, 864, + 864, 864, 864, 864, 864, 864, 864, 864, 864, 864, + 864, 864, 864, 864, 864, 864, 864, 864, 864, 864, + 864, 864, 864, 864, 864, 864, 864, 864, 864, 864, + 864, 864, 864, 864, 864, 864, 864, 864, 864, 864, + 864, 864, 864, 864, 864, 864, 864, 864, 864, 864, + 864, 864 }; /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ @@ -187374,116 +201167,118 @@ static const yytype_uint8 yyr2[] = 6, 4, 6, 6, 6, 5, 5, 6, 9, 4, 5, 7, 6, 4, 8, 4, 2, 4, 3, 6, 4, 2, 2, 2, 2, 1, 2, 0, 1, 2, - 2, 2, 1, 3, 4, 2, 0, 2, 3, 2, - 3, 6, 6, 8, 6, 8, 6, 8, 6, 8, - 8, 10, 8, 10, 1, 0, 7, 1, 4, 4, - 7, 2, 1, 3, 4, 3, 0, 1, 0, 2, - 3, 5, 8, 5, 0, 5, 5, 7, 2, 0, - 1, 1, 1, 3, 2, 0, 1, 0, 1, 3, - 1, 3, 1, 3, 2, 2, 4, 5, 5, 8, - 1, 0, 3, 9, 12, 3, 0, 4, 6, 1, - 2, 1, 1, 0, 1, 2, 2, 1, 2, 2, - 1, 2, 3, 2, 2, 2, 2, 3, 3, 3, - 1, 3, 1, 0, 1, 2, 2, 2, 2, 2, - 2, 2, 2, 1, 1, 0, 9, 12, 0, 2, - 1, 1, 1, 1, 1, 1, 3, 0, 1, 2, - 1, 1, 2, 2, 3, 1, 1, 2, 2, 1, - 2, 3, 5, 2, 5, 5, 2, 3, 1, 1, - 2, 2, 0, 4, 0, 3, 4, 4, 0, 3, - 2, 0, 3, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 3, 3, 1, 2, 2, 2, 2, - 2, 2, 0, 3, 3, 3, 0, 1, 2, 1, - 2, 2, 2, 2, 4, 1, 3, 1, 3, 1, - 1, 1, 1, 3, 1, 2, 0, 1, 0, 1, - 3, 0, 2, 0, 3, 3, 1, 5, 3, 1, - 3, 1, 4, 5, 5, 6, 3, 7, 4, 11, - 1, 3, 2, 2, 2, 0, 3, 1, 1, 2, - 2, 2, 2, 1, 0, 1, 2, 6, 4, 6, - 4, 6, 8, 1, 1, 1, 1, 1, 2, 1, - 2, 1, 1, 1, 3, 3, 3, 3, 2, 2, - 1, 3, 1, 1, 1, 1, 3, 1, 1, 0, - 1, 1, 1, 6, 1, 1, 2, 3, 8, 11, - 9, 1, 1, 3, 0, 1, 3, 1, 0, 1, - 0, 1, 0, 1, 3, 1, 1, 1, 3, 0, - 2, 2, 0, 2, 0, 1, 0, 1, 1, 1, - 3, 3, 1, 1, 3, 3, 3, 3, 4, 3, - 2, 1, 1, 1, 1, 1, 3, 1, 1, 3, - 3, 1, 2, 4, 4, 2, 3, 5, 5, 1, - 1, 10, 10, 1, 2, 4, 4, 4, 2, 2, - 3, 1, 3, 6, 2, 0, 3, 3, 4, 4, - 4, 4, 3, 2, 1, 1, 0, 1, 1, 0, - 1, 5, 1, 0, 1, 0, 3, 1, 3, 4, - 3, 1, 1, 0, 2, 2, 0, 2, 2, 1, - 1, 1, 0, 2, 4, 5, 4, 2, 3, 2, - 2, 2, 2, 1, 2, 3, 0, 1, 0, 5, - 1, 4, 6, 2, 1, 0, 4, 0, 1, 1, - 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, - 3, 0, 1, 3, 1, 1, 2, 2, 0, 1, - 3, 1, 0, 1, 2, 3, 2, 4, 2, 3, - 2, 0, 1, 2, 0, 4, 5, 2, 0, 1, - 3, 3, 3, 3, 3, 3, 1, 4, 3, 4, - 5, 4, 5, 4, 5, 2, 4, 1, 1, 0, - 1, 4, 5, 4, 0, 2, 2, 2, 1, 1, - 0, 4, 2, 1, 2, 2, 4, 2, 6, 2, - 1, 3, 4, 0, 2, 0, 2, 0, 1, 3, - 3, 2, 0, 2, 4, 1, 1, 2, 3, 5, - 6, 2, 3, 4, 4, 3, 4, 0, 1, 1, - 1, 1, 1, 2, 4, 1, 1, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 1, 2, 2, 2, - 2, 2, 1, 3, 0, 1, 1, 1, 1, 5, - 2, 1, 1, 1, 1, 4, 1, 2, 2, 1, - 3, 3, 2, 1, 0, 5, 2, 5, 2, 1, - 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, - 3, 3, 3, 3, 0, 1, 3, 3, 5, 2, - 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 2, 2, 3, 3, 2, 2, - 3, 3, 5, 4, 6, 3, 5, 4, 6, 4, - 6, 5, 7, 3, 2, 4, 3, 2, 1, 3, - 3, 3, 3, 4, 3, 4, 3, 4, 5, 6, - 6, 7, 6, 7, 6, 7, 3, 4, 4, 6, - 1, 4, 1, 3, 2, 2, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, - 2, 5, 6, 6, 7, 1, 1, 2, 2, 2, - 4, 1, 2, 1, 2, 2, 3, 5, 6, 8, - 6, 6, 4, 4, 1, 1, 1, 5, 1, 1, - 4, 1, 4, 1, 4, 1, 4, 1, 1, 1, - 1, 1, 1, 6, 6, 4, 4, 4, 4, 6, - 5, 5, 5, 4, 6, 4, 5, 0, 5, 0, - 2, 0, 1, 3, 3, 2, 2, 0, 6, 1, - 0, 3, 0, 2, 2, 0, 1, 4, 2, 2, - 2, 2, 2, 4, 3, 1, 5, 1, 1, 1, + 2, 2, 1, 3, 4, 2, 1, 0, 2, 3, + 2, 3, 6, 6, 8, 6, 8, 6, 8, 6, + 8, 8, 10, 8, 10, 1, 0, 7, 1, 4, + 4, 7, 2, 1, 3, 4, 3, 0, 1, 0, + 2, 3, 5, 8, 5, 0, 5, 5, 7, 2, + 0, 1, 1, 1, 3, 2, 0, 1, 0, 1, + 3, 1, 3, 1, 3, 2, 2, 4, 5, 5, + 8, 1, 0, 3, 9, 12, 3, 0, 4, 6, + 1, 2, 1, 1, 0, 1, 2, 2, 1, 2, + 2, 1, 2, 3, 2, 2, 2, 2, 3, 3, + 3, 1, 3, 1, 0, 1, 2, 2, 2, 2, + 2, 2, 2, 2, 1, 1, 0, 9, 12, 0, + 2, 1, 1, 1, 1, 1, 1, 3, 0, 1, + 2, 1, 1, 2, 2, 3, 1, 1, 2, 2, + 1, 2, 3, 5, 2, 5, 5, 2, 3, 1, + 1, 2, 2, 0, 4, 0, 3, 4, 4, 0, + 3, 2, 0, 3, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 3, 3, 1, 2, 2, 2, + 2, 2, 2, 0, 3, 3, 3, 0, 1, 2, + 1, 2, 2, 2, 2, 4, 1, 3, 1, 3, + 1, 1, 1, 1, 3, 1, 2, 0, 1, 0, + 1, 3, 0, 2, 0, 3, 3, 1, 5, 3, + 1, 3, 1, 4, 5, 5, 6, 3, 7, 4, + 11, 1, 3, 2, 2, 2, 0, 3, 1, 1, + 2, 2, 2, 2, 1, 0, 1, 2, 6, 4, + 6, 4, 6, 8, 1, 1, 1, 1, 1, 2, + 1, 2, 1, 1, 1, 3, 3, 3, 3, 2, + 2, 1, 3, 1, 1, 1, 1, 3, 1, 1, + 0, 1, 1, 1, 6, 1, 1, 2, 3, 8, + 11, 9, 1, 1, 3, 0, 1, 3, 1, 0, + 1, 0, 1, 0, 1, 3, 1, 1, 1, 3, + 0, 2, 2, 0, 2, 0, 1, 0, 1, 1, + 1, 3, 3, 1, 1, 3, 3, 3, 3, 4, + 3, 2, 1, 1, 1, 1, 1, 3, 1, 1, + 3, 3, 1, 2, 4, 4, 2, 3, 5, 5, + 1, 1, 10, 10, 1, 2, 4, 4, 4, 2, + 2, 3, 1, 3, 6, 2, 0, 3, 3, 4, + 4, 4, 4, 3, 2, 1, 1, 0, 1, 1, + 0, 1, 5, 1, 0, 1, 0, 3, 1, 3, + 4, 3, 1, 1, 0, 2, 2, 0, 2, 2, + 1, 1, 1, 0, 2, 4, 5, 4, 2, 3, + 2, 2, 2, 2, 1, 2, 3, 0, 1, 0, + 5, 1, 4, 6, 2, 1, 0, 4, 0, 1, + 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, + 1, 3, 0, 1, 3, 1, 1, 2, 2, 0, + 1, 3, 1, 0, 1, 2, 3, 2, 4, 2, + 3, 2, 0, 1, 2, 0, 4, 5, 2, 0, + 1, 3, 3, 3, 3, 3, 3, 1, 4, 3, + 4, 5, 4, 5, 4, 5, 2, 4, 1, 1, + 0, 1, 4, 5, 4, 0, 2, 2, 2, 1, + 1, 0, 4, 2, 1, 2, 2, 4, 2, 6, + 2, 1, 3, 4, 0, 2, 0, 2, 0, 1, + 3, 3, 2, 0, 2, 4, 1, 1, 2, 3, + 5, 6, 2, 3, 5, 5, 3, 4, 0, 1, + 1, 1, 1, 1, 2, 4, 1, 1, 1, 1, + 2, 3, 0, 1, 1, 1, 1, 1, 2, 2, + 2, 2, 2, 1, 3, 0, 1, 1, 1, 1, + 5, 2, 1, 1, 1, 1, 4, 1, 2, 2, + 1, 3, 3, 2, 1, 0, 5, 2, 5, 2, + 1, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 4, 1, 4, 1, 4, - 1, 2, 1, 2, 1, 2, 1, 3, 1, 3, - 1, 0, 1, 3, 1, 3, 3, 1, 3, 3, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 4, 3, 2, 3, 0, 3, 3, 2, 2, - 1, 0, 2, 2, 3, 2, 1, 1, 3, 5, - 1, 2, 4, 2, 0, 1, 0, 1, 2, 2, - 2, 3, 5, 1, 0, 1, 2, 0, 2, 1, - 0, 1, 0, 1, 3, 3, 2, 1, 1, 1, - 3, 1, 2, 1, 3, 1, 1, 1, 2, 1, - 1, 2, 1, 1, 2, 6, 2, 5, 3, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, + 3, 3, 3, 3, 3, 0, 1, 3, 3, 5, + 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, + 2, 3, 3, 5, 4, 6, 3, 5, 4, 6, + 4, 6, 5, 7, 3, 2, 4, 3, 2, 1, + 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, + 4, 5, 6, 6, 7, 6, 7, 6, 7, 3, + 4, 4, 6, 1, 4, 1, 3, 2, 2, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 2, 2, 5, 6, 6, 7, 1, 1, + 2, 2, 2, 4, 1, 2, 1, 2, 2, 3, + 5, 6, 8, 6, 6, 4, 4, 1, 1, 1, + 5, 1, 1, 4, 1, 4, 1, 4, 1, 4, + 1, 1, 1, 1, 1, 1, 6, 6, 4, 4, + 4, 4, 6, 5, 5, 5, 4, 6, 4, 5, + 0, 5, 0, 2, 0, 1, 3, 3, 2, 2, + 0, 6, 1, 0, 3, 0, 2, 2, 0, 1, + 4, 2, 2, 2, 2, 2, 4, 3, 1, 5, + 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 4, 1, 4, 1, 4, 1, 2, 1, 2, + 1, 2, 1, 3, 1, 3, 1, 0, 1, 3, + 1, 3, 3, 1, 3, 3, 0, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 4, 3, 2, + 3, 0, 3, 3, 2, 2, 1, 0, 2, 2, + 3, 2, 1, 1, 3, 5, 1, 2, 4, 2, + 0, 1, 0, 1, 2, 2, 2, 3, 5, 1, + 0, 1, 2, 0, 2, 1, 0, 1, 0, 1, + 3, 3, 2, 1, 1, 1, 3, 1, 2, 1, + 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, + 2, 6, 2, 5, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 3, 3, 0, 1, 1, 1, 1, 1, - 1, 1, 5, 3, 0, 1, 1, 1, 1, 4, - 7, 2, 0, 1, 1, 1, 1, 13, 16, 1, - 2, 0, 1, 0, 1, 0, 2, 0, 1, 0, - 6, 8, 6, 8, 6, 8, 2, 1, 4, 3, - 2, 4, 3, 5, 1, 0, 1, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 3, 3, + 0, 1, 1, 1, 1, 1, 1, 1, 5, 3, + 0, 1, 1, 1, 1, 4, 7, 2, 0, 1, + 1, 1, 1, 13, 16, 1, 2, 0, 1, 0, + 1, 0, 2, 0, 1, 0, 6, 8, 6, 8, + 6, 8, 2, 1, 4, 3, 2, 4, 3, 5, + 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, + 1, 1, 1, 2, 1, 1, 2, 3, 3, 1, + 3, 3, 2, 3, 3, 3, 3, 1, 1, 1, + 1, 3, 5, 1, 1, 1, 1, 3, 2, 1, + 4, 5, 5, 4, 6, 1, 1, 1, 1, 1, + 1, 0, 1, 3, 1, 0, 7, 1, 2, 3, + 2, 0, 2, 0, 2, 4, 2, 1, 1, 1, + 2, 3, 2, 2, 3, 4, 2, 1, 1, 1, + 3, 2, 9, 11, 12, 14, 3, 4, 4, 0, + 7, 10, 2, 3, 0, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 2, 1, 1, - 2, 3, 3, 1, 3, 3, 2, 3, 3, 3, - 3, 1, 1, 1, 1, 3, 5, 1, 1, 1, - 1, 3, 2, 1, 4, 5, 5, 4, 6, 1, - 1, 1, 1, 1, 1, 0, 1, 3, 1, 0, - 7, 1, 2, 3, 2, 0, 2, 0, 2, 4, - 2, 1, 1, 1, 2, 3, 2, 2, 3, 4, - 2, 1, 1, 1, 3, 2, 9, 11, 12, 14, - 3, 4, 4, 0, 7, 10, 2, 3, 0, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -187538,8 +201333,7 @@ static const yytype_uint8 yyr2[] = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1 + 1, 1 }; /* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state @@ -187547,1888 +201341,1505 @@ static const yytype_uint8 yyr2[] = means the default is an error. */ static const yytype_uint16 yydefact[] = { - 128, 205, 0, 1102, 1101, 205, 0, 1077, 205, 406, - 334, 0, 1172, 0, 205, 0, 128, 0, 0, 0, - 0, 0, 0, 0, 205, 473, 0, 1171, 205, 0, - 1145, 0, 0, 0, 0, 0, 2, 4, 7, 17, + 129, 206, 0, 1108, 1107, 206, 0, 1083, 206, 407, + 335, 0, 1178, 0, 206, 0, 129, 0, 0, 0, + 0, 0, 0, 0, 206, 474, 0, 1177, 206, 0, + 1151, 0, 0, 0, 0, 0, 2, 4, 7, 17, 29, 25, 0, 27, 15, 20, 6, 31, 16, 19, - 13, 32, 11, 30, 440, 427, 475, 439, 127, 443, - 28, 14, 24, 5, 10, 22, 23, 21, 1085, 35, + 13, 32, 11, 30, 441, 428, 476, 440, 128, 444, + 28, 14, 24, 5, 10, 22, 23, 21, 1091, 35, 26, 33, 18, 8, 34, 36, 0, 9, 37, 12, - 204, 203, 197, 0, 0, 0, 0, 0, 198, 1015, - 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, - 1200, 1201, 1202, 1203, 1204, 1205, 1540, 1206, 1207, 1208, - 1489, 1490, 1541, 1491, 1492, 1209, 1210, 1211, 1212, 1213, - 1214, 1215, 1216, 1493, 1494, 1217, 1218, 1219, 1220, 1221, - 1495, 1542, 1222, 1223, 1224, 1225, 1226, 1543, 1227, 1228, - 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1544, 1236, 1237, - 1238, 1545, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1496, - 1497, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, - 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, - 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1498, - 1274, 1275, 1276, 1277, 1499, 1278, 1279, 1280, 1500, 1281, - 1282, 1283, 1546, 1547, 1284, 1285, 1286, 1548, 1287, 1288, - 1501, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1549, 1296, - 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, - 1550, 1502, 1307, 1308, 1309, 1310, 1503, 1504, 1505, 1311, - 1551, 1552, 1312, 1553, 1313, 1314, 1315, 1316, 1317, 1318, - 1554, 1319, 1555, 1320, 1321, 1322, 1323, 1324, 1325, 1326, - 1327, 1506, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, - 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, - 1507, 1557, 1508, 1346, 1347, 1348, 1509, 1349, 1350, 1558, - 1351, 1510, 1352, 1511, 1353, 1354, 1355, 1356, 1357, 1358, - 1359, 1360, 1361, 1512, 1559, 1362, 1560, 1513, 1363, 1364, - 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, - 1514, 1375, 1376, 1515, 1377, 1378, 1379, 1380, 1381, 1382, - 1383, 1384, 1385, 1386, 1387, 1388, 1516, 1389, 1390, 1391, - 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, - 1402, 1403, 1404, 1405, 1406, 1561, 1407, 1408, 1409, 1517, - 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, - 1420, 1421, 1422, 1423, 1424, 1425, 1426, 1518, 1427, 1428, - 1429, 1562, 1430, 1431, 1519, 1432, 1433, 1434, 1435, 1436, - 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1520, 1444, 1521, - 1445, 1446, 1447, 1564, 1448, 1449, 1450, 1451, 1452, 1522, - 1523, 1453, 1454, 1524, 1455, 1525, 1456, 1457, 1526, 1458, - 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, - 1469, 1470, 1471, 1472, 1527, 1528, 1473, 1565, 1474, 1475, - 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1529, - 1530, 1531, 1532, 1533, 1534, 1535, 1536, 1537, 1538, 1539, - 1485, 1486, 1487, 1488, 1175, 0, 0, 997, 1016, 1017, - 1025, 200, 405, 128, 0, 374, 0, 0, 375, 0, - 0, 328, 327, 1068, 333, 0, 0, 0, 1015, 99, - 1506, 1377, 1520, 97, 995, 1016, 0, 351, 352, 0, - 360, 0, 345, 349, 346, 0, 370, 362, 371, 363, - 344, 364, 353, 343, 0, 372, 347, 0, 0, 0, - 201, 166, 334, 128, 0, 1090, 1091, 1089, 1080, 1085, - 1092, 1093, 0, 1076, 0, 1014, 1133, 1132, 155, 1044, - 1162, 1522, 1453, 1173, 1163, 1160, 1161, 202, 472, 470, - 0, 982, 1322, 1414, 1425, 1522, 1110, 1113, 0, 199, - 1019, 0, 444, 593, 1018, 991, 1144, 0, 1149, 0, - 1391, 448, 451, 1034, 449, 440, 0, 0, 1, 128, - 0, 0, 0, 469, 469, 0, 469, 0, 432, 440, - 435, 439, 0, 1084, 1158, 1170, 1522, 1453, 1527, 1166, - 1167, 1295, 0, 0, 1295, 0, 1295, 0, 1295, 0, - 0, 974, 0, 975, 998, 1046, 1047, 1045, 0, 1048, - 301, 332, 331, 330, 329, 334, 1295, 1052, 0, 0, - 0, 0, 0, 1063, 100, 98, 358, 359, 0, 350, - 348, 0, 1295, 369, 1029, 365, 1295, 369, 993, 1295, - 0, 0, 162, 0, 1082, 1094, 1591, 1592, 1593, 1594, - 1596, 1595, 1597, 1598, 1599, 1600, 1601, 1602, 1603, 1604, - 1607, 1605, 1606, 1608, 1609, 1610, 1611, 1612, 1613, 1614, - 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, - 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, - 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1108, 0, 1109, - 1099, 1088, 1095, 1096, 128, 404, 1079, 0, 0, 0, - 0, 1164, 0, 0, 0, 1000, 1002, 1003, 904, 1013, - 977, 0, 1490, 1491, 1492, 966, 0, 1493, 1494, 1495, - 1542, 841, 828, 837, 842, 829, 831, 838, 1496, 1497, - 780, 1261, 1498, 1499, 1011, 1500, 1503, 1504, 1505, 833, - 835, 1507, 1508, 0, 1012, 1510, 1511, 1358, 1513, 1514, - 1516, 1517, 839, 1519, 1521, 1522, 1523, 1524, 1525, 1010, - 1526, 840, 1528, 0, 0, 0, 988, 0, 0, 977, - 813, 0, 635, 636, 657, 658, 637, 663, 664, 666, - 638, 0, 987, 715, 857, 977, 824, 885, 758, 0, - 811, 805, 455, 983, 0, 806, 999, 977, 967, 455, - 981, 1111, 1116, 1112, 0, 0, 0, 0, 0, 595, - 594, 992, 1143, 1141, 1142, 1140, 1139, 1146, 0, 1148, - 1085, 918, 0, 450, 0, 0, 0, 430, 429, 3, - 0, 0, 1151, 0, 467, 468, 0, 0, 0, 0, - 0, 0, 0, 0, 542, 489, 490, 492, 539, 543, - 551, 0, 436, 0, 1034, 1168, 0, 0, 0, 115, - 115, 0, 0, 0, 0, 0, 92, 41, 85, 0, - 0, 0, 0, 180, 193, 0, 0, 0, 0, 0, - 190, 0, 0, 173, 43, 167, 169, 0, 115, 0, - 39, 0, 0, 0, 45, 1015, 0, 1540, 1541, 1542, - 1543, 1544, 842, 0, 1546, 1547, 1548, 1549, 1550, 1551, - 1552, 1553, 1554, 1555, 1506, 1557, 1558, 1559, 1560, 1561, - 1562, 1520, 1564, 1526, 0, 1565, 0, 816, 924, 475, - 922, 1035, 0, 1016, 1022, 973, 0, 1036, 1669, 1670, - 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, - 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, - 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, - 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, 1710, - 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, 1720, - 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, 1729, 1730, - 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, - 1629, 1741, 1742, 1743, 1744, 1745, 970, 969, 996, 1038, - 1037, 1039, 976, 0, 0, 402, 0, 0, 1049, 0, - 1295, 161, 1295, 301, 0, 301, 0, 0, 1062, 1065, - 361, 357, 355, 354, 356, 0, 367, 368, 0, 338, - 0, 1030, 0, 0, 340, 0, 0, 0, 0, 128, - 0, 174, 1104, 1105, 1103, 0, 0, 1087, 177, 194, - 1098, 1107, 1097, 1106, 1086, 1081, 0, 1078, 393, 1122, - 1121, 1130, 156, 0, 1026, 1566, 645, 1567, 674, 652, - 674, 674, 1568, 1569, 1570, 1571, 641, 641, 654, 1572, - 1573, 1574, 1575, 1576, 642, 643, 679, 1577, 1578, 1579, - 1580, 1581, 0, 0, 1582, 674, 1583, 641, 1584, 1585, - 646, 1586, 615, 0, 1587, 644, 616, 1588, 682, 682, - 1589, 669, 1590, 0, 927, 627, 628, 629, 630, 655, - 656, 631, 661, 662, 632, 714, 0, 641, 1027, 1028, - 128, 1165, 1174, 0, 809, 921, 673, 660, 965, 0, - 0, 668, 667, 0, 0, 0, 0, 0, 650, 649, - 648, 815, 930, 0, 647, 0, 0, 674, 674, 672, - 738, 0, 651, 0, 0, 945, 0, 951, 0, 0, - 0, 678, 0, 676, 0, 0, 0, 739, 719, 720, - 813, 918, 0, 807, 808, 814, 1006, 0, 0, 714, - 714, 986, 904, 0, 901, 902, 903, 0, 0, 0, - 980, 0, 912, 914, 0, 0, 754, 910, 0, 757, - 0, 0, 0, 0, 898, 899, 900, 892, 893, 894, - 895, 896, 897, 908, 891, 735, 0, 0, 859, 812, - 0, 0, 734, 0, 0, 0, 558, 0, 1004, 1001, - 968, 558, 1124, 1128, 1129, 1127, 0, 1123, 1115, 1114, - 1119, 1117, 1120, 1118, 0, 1137, 0, 1134, 555, 0, - 452, 0, 0, 1157, 0, 122, 0, 1152, 0, 447, - 446, 476, 477, 483, 445, 528, 529, 0, 0, 0, - 0, 548, 546, 519, 493, 518, 0, 0, 497, 0, - 520, 715, 541, 434, 487, 488, 491, 433, 0, 544, - 0, 554, 542, 492, 0, 1159, 1169, 0, 0, 0, - 0, 0, 1295, 0, 0, 76, 57, 253, 0, 114, - 0, 0, 0, 0, 0, 0, 0, 84, 81, 82, - 83, 0, 0, 0, 0, 178, 179, 192, 0, 183, - 184, 181, 185, 186, 0, 0, 171, 172, 0, 0, - 0, 0, 170, 0, 0, 0, 0, 0, 0, 0, - 0, 475, 475, 475, 822, 0, 0, 474, 0, 0, - 971, 974, 392, 309, 0, 299, 0, 0, 0, 0, - 0, 334, 1055, 1053, 1051, 1054, 1056, 0, 0, 158, - 160, 0, 298, 272, 0, 1067, 376, 0, 0, 1295, - 1064, 289, 0, 369, 366, 1031, 0, 369, 994, 0, - 369, 165, 1295, 301, 0, 1083, 1100, 175, 195, 176, - 196, 425, 0, 399, 407, 412, 390, 0, 390, 0, - 409, 413, 390, 408, 390, 403, 0, 157, 0, 0, - 627, 0, 621, 617, 687, 688, 689, 690, 697, 698, - 695, 696, 691, 692, 685, 686, 693, 694, 683, 684, - 0, 699, 700, 701, 702, 703, 704, 705, 706, 633, - 1043, 0, 639, 1042, 0, 978, 920, 0, 0, 0, - 964, 960, 0, 0, 0, 0, 0, 0, 931, 932, - 933, 934, 935, 936, 937, 938, 939, 0, 0, 940, - 0, 0, 0, 671, 670, 0, 890, 901, 902, 903, - 898, 899, 900, 892, 893, 894, 895, 896, 897, 916, - 0, 0, 0, 0, 0, 0, 0, 0, 782, 0, - 0, 805, 884, 0, 918, 950, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 918, 956, 0, 0, - 977, 0, 0, 0, 1008, 1009, 716, 730, 731, 732, - 736, 1041, 1040, 985, 0, 979, 0, 0, 717, 740, - 745, 0, 957, 776, 0, 764, 0, 753, 0, 762, - 766, 741, 756, 0, 737, 0, 980, 913, 915, 0, - 911, 0, 727, 728, 729, 721, 722, 723, 724, 725, - 726, 733, 889, 887, 888, 0, 0, 0, 867, 759, - 0, 0, 761, 760, 1287, 1322, 0, 466, 466, 466, - 454, 464, 984, 0, 607, 475, 607, 0, 714, 596, - 1034, 1147, 1135, 1136, 919, 1033, 128, 0, 1155, 0, - 0, 0, 134, 117, 0, 1153, 0, 150, 558, 0, - 977, 0, 481, 482, 0, 486, 1517, 1410, 0, 0, - 0, 0, 521, 549, 0, 540, 0, 1000, 522, 999, - 523, 526, 527, 498, 550, 989, 552, 0, 545, 438, - 437, 556, 0, 42, 0, 1295, 59, 0, 0, 0, - 0, 0, 0, 208, 244, 208, 96, 1295, 369, 1295, - 369, 1194, 1262, 1426, 0, 55, 88, 0, 277, 108, - 0, 262, 306, 78, 93, 101, 0, 0, 44, 168, - 182, 187, 104, 191, 188, 1072, 189, 115, 0, 40, - 0, 102, 0, 1070, 0, 0, 46, 106, 1074, 0, - 0, 0, 0, 923, 817, 925, 926, 973, 0, 391, - 0, 300, 0, 401, 381, 382, 392, 0, 301, 1052, - 0, 0, 0, 0, 0, 292, 290, 320, 0, 297, - 291, 299, 0, 0, 248, 0, 1188, 0, 0, 377, - 373, 0, 0, 337, 1032, 339, 0, 341, 0, 0, - 394, 0, 397, 0, 396, 400, 395, 389, 0, 420, - 0, 0, 0, 0, 0, 0, 1131, 0, 622, 618, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 928, - 471, 781, 0, 0, 0, 961, 0, 0, 855, 0, - 830, 832, 640, 845, 0, 653, 834, 836, 0, 905, - 0, 0, 0, 846, 784, 785, 0, 0, 0, 0, + 205, 204, 198, 0, 0, 0, 0, 0, 199, 1021, + 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, + 1206, 1207, 1208, 1209, 1210, 1211, 1546, 1212, 1213, 1214, + 1495, 1496, 1547, 1497, 1498, 1215, 1216, 1217, 1218, 1219, + 1220, 1221, 1222, 1499, 1500, 1223, 1224, 1225, 1226, 1227, + 1501, 1548, 1228, 1229, 1230, 1231, 1232, 1549, 1233, 1234, + 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1550, 1242, 1243, + 1244, 1551, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1502, + 1503, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, + 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, + 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1504, + 1280, 1281, 1282, 1283, 1505, 1284, 1285, 1286, 1506, 1287, + 1288, 1289, 1552, 1553, 1290, 1291, 1292, 1554, 1293, 1294, + 1507, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1555, 1302, + 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, + 1556, 1508, 1313, 1314, 1315, 1316, 1509, 1510, 1511, 1317, + 1557, 1558, 1318, 1559, 1319, 1320, 1321, 1322, 1323, 1324, + 1560, 1325, 1561, 1326, 1327, 1328, 1329, 1330, 1331, 1332, + 1333, 1512, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, + 1342, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, + 1513, 1563, 1514, 1352, 1353, 1354, 1515, 1355, 1356, 1564, + 1357, 1516, 1358, 1517, 1359, 1360, 1361, 1362, 1363, 1364, + 1365, 1366, 1367, 1518, 1565, 1368, 1566, 1519, 1369, 1370, + 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, + 1520, 1381, 1382, 1521, 1383, 1384, 1385, 1386, 1387, 1388, + 1389, 1390, 1391, 1392, 1393, 1394, 1522, 1395, 1396, 1397, + 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, 1407, + 1408, 1409, 1410, 1411, 1412, 1567, 1413, 1414, 1415, 1523, + 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, + 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1524, 1433, 1434, + 1435, 1568, 1436, 1437, 1525, 1438, 1439, 1440, 1441, 1442, + 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1526, 1450, 1527, + 1451, 1452, 1453, 1570, 1454, 1455, 1456, 1457, 1458, 1528, + 1529, 1459, 1460, 1530, 1461, 1531, 1462, 1463, 1532, 1464, + 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, + 1475, 1476, 1477, 1478, 1533, 1534, 1479, 1571, 1480, 1481, + 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1535, + 1536, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, + 1491, 1492, 1493, 1494, 1181, 0, 0, 1003, 1022, 1023, + 1031, 201, 406, 129, 0, 375, 0, 0, 376, 0, + 0, 329, 328, 1074, 334, 0, 0, 0, 1021, 100, + 1512, 1383, 1526, 98, 1001, 1022, 0, 352, 353, 0, + 361, 0, 346, 350, 347, 0, 371, 363, 372, 364, + 345, 365, 354, 344, 0, 373, 348, 0, 0, 0, + 202, 167, 335, 129, 0, 1096, 1097, 1095, 1086, 1091, + 1098, 1099, 0, 1082, 0, 1020, 1139, 1138, 156, 1050, + 1168, 1528, 1459, 1179, 1169, 1166, 1167, 203, 473, 471, + 0, 988, 1328, 1420, 1431, 1528, 1116, 1119, 0, 200, + 1025, 0, 445, 594, 1024, 997, 1150, 0, 1155, 0, + 1397, 449, 452, 1040, 450, 441, 0, 0, 1, 129, + 0, 0, 0, 470, 470, 0, 470, 0, 433, 441, + 436, 440, 0, 1090, 1164, 1176, 1528, 1459, 1533, 1172, + 1173, 1301, 0, 0, 1301, 0, 1301, 0, 1301, 0, + 0, 980, 0, 981, 1004, 1052, 1053, 1051, 0, 1054, + 302, 333, 332, 331, 330, 335, 1301, 1058, 0, 0, + 0, 0, 0, 1069, 101, 99, 359, 360, 0, 351, + 349, 0, 1301, 370, 1035, 366, 1301, 370, 999, 1301, + 0, 0, 163, 0, 1088, 1100, 1597, 1598, 1599, 1600, + 1602, 1601, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, + 1613, 1611, 1612, 1614, 1615, 1616, 1617, 1618, 1619, 1620, + 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, + 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, + 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1114, 0, 1115, + 1105, 1094, 1101, 1102, 129, 405, 1085, 0, 0, 0, + 0, 1170, 0, 0, 0, 1006, 1008, 1009, 910, 1019, + 983, 0, 1496, 1497, 1498, 972, 0, 1499, 1500, 1501, + 1548, 844, 831, 840, 845, 832, 834, 841, 1502, 1503, + 783, 1267, 1504, 1505, 1017, 1506, 1509, 1510, 1511, 836, + 838, 1513, 1514, 0, 1018, 1516, 1517, 1364, 1519, 1520, + 1522, 1523, 842, 1525, 1527, 1528, 1529, 1530, 1531, 1016, + 1532, 843, 1534, 0, 0, 0, 994, 927, 0, 0, + 0, 983, 816, 0, 636, 637, 658, 659, 638, 664, + 665, 667, 639, 0, 993, 716, 860, 983, 827, 888, + 759, 0, 814, 808, 456, 989, 0, 809, 1005, 983, + 973, 456, 987, 1117, 1122, 1118, 0, 0, 0, 0, + 0, 596, 595, 998, 1149, 1147, 1148, 1146, 1145, 1152, + 0, 1154, 1091, 924, 0, 451, 0, 0, 0, 431, + 430, 3, 0, 0, 1157, 0, 468, 469, 0, 0, + 0, 0, 0, 0, 0, 0, 543, 490, 491, 493, + 540, 544, 552, 0, 437, 0, 1040, 1174, 0, 0, + 0, 116, 116, 0, 0, 0, 0, 0, 92, 41, + 85, 0, 0, 0, 0, 181, 194, 0, 0, 0, + 0, 0, 191, 0, 0, 174, 43, 168, 170, 0, + 116, 0, 39, 0, 0, 0, 45, 1021, 0, 1546, + 1547, 1548, 1549, 1550, 845, 0, 1552, 1553, 1554, 1555, + 1556, 1557, 1558, 1559, 1560, 1561, 1512, 1563, 1564, 1565, + 1566, 1567, 1568, 1526, 1570, 1532, 0, 1571, 0, 819, + 930, 476, 928, 1041, 0, 1022, 1028, 979, 0, 1042, + 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, + 1685, 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, 1694, + 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, + 1705, 1706, 1707, 1708, 1709, 1710, 1711, 1712, 1713, 1714, + 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, + 1725, 1726, 1727, 1728, 1729, 1730, 1731, 1732, 1733, 1734, + 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, + 1745, 1746, 1635, 1747, 1748, 1749, 1750, 1751, 976, 975, + 1002, 1044, 1043, 1045, 982, 0, 0, 403, 0, 0, + 1055, 0, 1301, 162, 1301, 302, 0, 302, 0, 0, + 1068, 1071, 362, 358, 356, 355, 357, 0, 368, 369, + 0, 339, 0, 1036, 0, 0, 341, 0, 0, 0, + 0, 129, 0, 175, 1110, 1111, 1109, 0, 0, 1093, + 178, 195, 1104, 1113, 1103, 1112, 1092, 1087, 0, 1084, + 394, 1128, 1127, 1136, 157, 0, 1032, 1572, 646, 1573, + 675, 653, 675, 675, 1574, 1575, 1576, 1577, 642, 642, + 655, 1578, 1579, 1580, 1581, 1582, 643, 644, 680, 1583, + 1584, 1585, 1586, 1587, 0, 0, 1588, 675, 1589, 642, + 1590, 1591, 647, 1592, 616, 0, 1593, 645, 617, 1594, + 683, 683, 1595, 670, 1596, 0, 933, 628, 629, 630, + 631, 656, 657, 632, 662, 663, 633, 715, 0, 642, + 1033, 1034, 129, 1171, 1180, 0, 812, 927, 674, 661, + 971, 0, 0, 669, 668, 0, 0, 0, 0, 0, + 651, 650, 649, 818, 936, 0, 648, 0, 0, 675, + 675, 673, 739, 0, 652, 0, 0, 951, 0, 957, + 0, 0, 0, 679, 0, 677, 0, 0, 0, 740, + 720, 721, 926, 0, 816, 924, 0, 891, 0, 1024, + 0, 810, 811, 817, 1012, 0, 0, 715, 715, 992, + 910, 0, 907, 908, 909, 0, 0, 0, 986, 0, + 918, 920, 0, 0, 755, 916, 0, 758, 0, 0, + 0, 0, 904, 905, 906, 898, 899, 900, 901, 902, + 903, 914, 897, 736, 0, 0, 862, 815, 0, 0, + 735, 0, 0, 0, 559, 0, 1010, 1007, 974, 559, + 1130, 1134, 1135, 1133, 0, 1129, 1121, 1120, 1125, 1123, + 1126, 1124, 0, 1143, 0, 1140, 556, 0, 453, 0, + 0, 1163, 0, 123, 0, 1158, 0, 448, 447, 477, + 478, 484, 446, 529, 530, 0, 0, 0, 0, 549, + 547, 520, 494, 519, 0, 0, 498, 0, 521, 716, + 542, 435, 488, 489, 492, 434, 0, 545, 0, 555, + 543, 493, 0, 1165, 1175, 0, 0, 0, 0, 0, + 1301, 0, 0, 76, 57, 254, 0, 115, 0, 0, + 0, 0, 0, 0, 0, 84, 81, 82, 83, 0, + 0, 0, 0, 179, 180, 193, 0, 184, 185, 182, + 186, 187, 0, 0, 172, 173, 0, 0, 0, 0, + 171, 0, 0, 0, 0, 0, 0, 0, 0, 476, + 476, 476, 825, 0, 0, 475, 0, 0, 977, 980, + 393, 310, 0, 300, 0, 0, 0, 0, 0, 335, + 1061, 1059, 1057, 1060, 1062, 0, 0, 159, 161, 0, + 299, 273, 0, 1073, 377, 0, 0, 1301, 1070, 290, + 0, 370, 367, 1037, 0, 370, 1000, 0, 370, 166, + 1301, 302, 0, 1089, 1106, 176, 196, 177, 197, 426, + 0, 400, 408, 413, 391, 0, 391, 0, 410, 414, + 391, 409, 391, 404, 0, 158, 0, 0, 628, 0, + 622, 618, 688, 689, 690, 691, 698, 699, 696, 697, + 692, 693, 686, 687, 694, 695, 684, 685, 0, 700, + 701, 702, 703, 704, 705, 706, 707, 634, 1049, 0, + 640, 1048, 0, 984, 0, 0, 0, 970, 966, 0, + 0, 0, 0, 0, 0, 937, 938, 939, 940, 941, + 942, 943, 944, 945, 0, 0, 946, 0, 0, 0, + 672, 671, 0, 896, 907, 908, 909, 904, 905, 906, + 898, 899, 900, 901, 902, 903, 922, 0, 0, 0, + 0, 0, 0, 0, 0, 785, 0, 0, 808, 887, + 0, 924, 956, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 924, 962, 0, 0, 761, 983, 0, + 0, 760, 0, 0, 0, 1014, 1015, 717, 731, 732, + 733, 737, 1047, 1046, 991, 0, 985, 0, 0, 718, + 741, 746, 0, 963, 779, 0, 767, 0, 754, 0, + 765, 769, 742, 757, 0, 738, 0, 986, 919, 921, + 0, 917, 0, 728, 729, 730, 722, 723, 724, 725, + 726, 727, 734, 895, 893, 894, 0, 0, 0, 870, + 762, 0, 0, 764, 763, 1293, 1328, 0, 467, 467, + 467, 455, 465, 990, 0, 608, 476, 608, 0, 715, + 597, 1040, 1153, 1141, 1142, 925, 1039, 129, 0, 1161, + 0, 0, 0, 135, 118, 0, 1159, 0, 151, 559, + 0, 983, 0, 482, 483, 0, 487, 1523, 1416, 0, + 0, 0, 0, 522, 550, 0, 541, 0, 1006, 523, + 1005, 524, 527, 528, 499, 551, 995, 553, 0, 546, + 439, 438, 557, 0, 42, 0, 1301, 59, 0, 0, + 0, 0, 0, 0, 209, 245, 209, 97, 1301, 370, + 1301, 370, 1200, 1268, 1432, 0, 55, 88, 0, 278, + 109, 0, 263, 307, 78, 93, 102, 0, 0, 44, + 169, 183, 188, 105, 192, 189, 1078, 190, 116, 0, + 40, 0, 103, 0, 1076, 0, 0, 46, 107, 1080, + 0, 0, 0, 0, 929, 820, 931, 932, 979, 0, + 392, 0, 301, 0, 402, 382, 383, 393, 0, 302, + 1058, 0, 0, 0, 0, 0, 293, 291, 321, 0, + 298, 292, 300, 0, 0, 249, 0, 1194, 0, 0, + 378, 374, 0, 0, 338, 1038, 340, 0, 342, 0, + 0, 395, 0, 398, 0, 397, 401, 396, 390, 0, + 421, 0, 0, 0, 0, 0, 0, 1137, 0, 623, + 619, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 934, 472, 784, 0, 0, 0, 967, 0, 0, 858, + 0, 833, 835, 641, 848, 0, 654, 837, 839, 0, + 911, 0, 0, 0, 849, 787, 788, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 803, 802, 850, 886, 0, 0, + 954, 955, 851, 682, 681, 683, 683, 0, 0, 961, + 0, 0, 0, 856, 0, 813, 925, 892, 890, 666, + 715, 0, 0, 0, 0, 0, 0, 0, 768, 756, + 0, 766, 770, 0, 0, 0, 750, 0, 0, 748, + 780, 744, 0, 0, 781, 0, 0, 0, 826, 467, + 467, 467, 467, 464, 466, 0, 0, 0, 0, 1416, + 0, 580, 558, 560, 567, 580, 585, 828, 606, 829, + 1024, 0, 532, 0, 532, 0, 1131, 1144, 0, 1162, + 0, 140, 122, 142, 141, 0, 149, 0, 983, 0, + 140, 124, 0, 143, 0, 1161, 0, 155, 479, 0, + 912, 487, 0, 481, 526, 525, 0, 497, 548, 495, + 0, 554, 0, 0, 0, 253, 0, 0, 0, 209, + 0, 0, 317, 0, 304, 77, 0, 0, 0, 51, + 96, 69, 61, 47, 75, 0, 0, 80, 0, 73, + 90, 91, 89, 94, 0, 237, 219, 250, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 800, 799, 847, 883, 0, 0, 948, - 949, 848, 681, 680, 682, 682, 0, 0, 955, 0, - 0, 0, 853, 0, 810, 919, 665, 714, 0, 0, - 0, 0, 0, 0, 0, 765, 755, 0, 763, 767, - 0, 0, 0, 749, 0, 0, 747, 777, 743, 0, - 0, 778, 0, 0, 0, 823, 466, 466, 466, 466, - 463, 465, 0, 0, 0, 0, 1410, 0, 579, 557, - 559, 566, 579, 584, 825, 605, 826, 1018, 0, 531, - 0, 531, 0, 1125, 1138, 0, 1156, 0, 139, 121, - 141, 140, 0, 148, 0, 977, 0, 139, 123, 0, - 142, 0, 1155, 0, 154, 478, 0, 906, 486, 0, - 480, 525, 524, 0, 496, 547, 494, 0, 553, 0, - 0, 0, 252, 0, 0, 0, 208, 0, 0, 316, - 0, 303, 77, 0, 0, 0, 51, 0, 69, 61, - 47, 75, 0, 0, 80, 0, 73, 90, 91, 89, - 94, 0, 236, 218, 249, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 820, 821, - 818, 475, 972, 423, 424, 173, 422, 310, 0, 0, - 1067, 1050, 0, 0, 161, 301, 0, 276, 272, 0, - 270, 269, 271, 0, 1189, 166, 0, 1184, 1066, 0, - 0, 1061, 369, 0, 426, 0, 385, 421, 410, 415, - 0, 419, 417, 416, 411, 414, 624, 0, 623, 0, - 613, 0, 625, 0, 634, 707, 708, 709, 710, 711, - 712, 713, 659, 0, 963, 959, 0, 827, 929, 0, - 917, 943, 942, 783, 795, 796, 797, 944, 0, 0, - 0, 792, 793, 794, 786, 787, 788, 789, 790, 791, - 798, 953, 952, 946, 947, 677, 675, 0, 850, 851, - 852, 954, 0, 886, 1007, 718, 0, 0, 746, 958, - 768, 0, 0, 0, 742, 905, 0, 0, 0, 0, - 0, 751, 0, 0, 0, 870, 865, 866, 0, 0, - 0, 0, 457, 456, 462, 579, 584, 0, 440, 0, - 566, 0, 578, 515, 577, 0, 0, 590, 588, 0, - 590, 0, 590, 0, 515, 0, 580, 515, 577, 0, - 597, 992, 606, 0, 538, 817, 538, 0, 453, 1154, - 0, 1150, 0, 0, 0, 129, 126, 116, 0, 0, - 151, 139, 130, 0, 479, 484, 485, 495, 990, 109, - 208, 0, 0, 58, 0, 318, 264, 296, 279, 0, - 0, 0, 209, 0, 284, 0, 50, 70, 0, 66, - 0, 95, 0, 0, 0, 0, 0, 53, 65, 0, - 48, 0, 369, 369, 56, 263, 1026, 1566, 1567, 1568, - 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, - 1579, 1580, 1581, 1658, 1582, 215, 1583, 1358, 1584, 1585, - 1586, 0, 1587, 616, 1588, 1589, 1590, 892, 893, 213, - 305, 210, 311, 212, 214, 0, 1027, 211, 308, 105, - 1073, 0, 103, 0, 1071, 112, 110, 107, 1075, 0, - 404, 384, 0, 0, 1295, 0, 1067, 159, 298, 0, - 326, 248, 321, 0, 1188, 1186, 0, 1183, 0, 0, - 0, 342, 0, 398, 0, 418, 0, 0, 619, 626, - 962, 843, 854, 941, 0, 0, 0, 0, 849, 844, - 774, 772, 769, 0, 770, 750, 0, 0, 748, 744, - 0, 779, 856, 0, 872, 869, 461, 460, 459, 458, - 565, 563, 0, 568, 1018, 575, 508, 514, 564, 0, - 560, 0, 589, 585, 0, 586, 0, 0, 587, 0, - 561, 0, 1018, 562, 0, 604, 0, 0, 861, 1005, - 861, 1126, 138, 118, 0, 119, 149, 0, 0, 0, - 0, 143, 378, 0, 314, 60, 0, 296, 0, 208, - 281, 280, 283, 278, 282, 285, 0, 0, 0, 0, - 0, 265, 0, 0, 0, 229, 0, 0, 296, 302, - 225, 226, 335, 0, 0, 0, 62, 52, 49, 54, - 63, 0, 0, 64, 67, 612, 79, 72, 1658, 1665, - 0, 0, 0, 0, 0, 819, 380, 387, 173, 0, - 0, 0, 0, 298, 0, 0, 0, 1188, 0, 0, - 206, 0, 245, 0, 163, 1187, 0, 1176, 0, 1059, - 1060, 0, 0, 386, 620, 614, 801, 0, 0, 0, - 771, 775, 773, 752, 858, 0, 475, 600, 0, 603, - 567, 0, 0, 503, 510, 0, 513, 507, 0, 569, - 0, 0, 571, 573, 0, 0, 0, 608, 0, 0, - 0, 995, 0, 530, 532, 535, 534, 537, 0, 506, - 506, 0, 0, 0, 152, 0, 145, 145, 0, 131, - 907, 0, 208, 0, 295, 315, 243, 0, 0, 227, - 0, 233, 0, 267, 268, 266, 228, 296, 301, 230, - 336, 0, 71, 0, 87, 0, 0, 307, 113, 111, - 404, 0, 1067, 1183, 0, 0, 272, 166, 1185, 261, - 254, 255, 256, 257, 258, 259, 260, 275, 274, 246, - 247, 0, 0, 0, 1061, 0, 802, 0, 803, 0, - 875, 605, 0, 0, 599, 0, 501, 499, 502, 504, - 500, 0, 0, 576, 592, 0, 572, 570, 581, 0, - 612, 0, 583, 536, 0, 860, 862, 0, 0, 442, - 441, 0, 125, 0, 607, 0, 0, 147, 147, 133, - 0, 301, 317, 0, 287, 294, 286, 0, 0, 224, - 0, 231, 325, 217, 611, 0, 74, 0, 312, 379, - 383, 0, 1177, 0, 1183, 248, 1188, 0, 1180, 0, - 0, 1067, 804, 871, 0, 0, 0, 598, 601, 0, - 574, 0, 0, 0, 609, 610, 582, 533, 0, 0, - 508, 120, 145, 124, 153, 144, 483, 146, 483, 0, - 325, 273, 0, 0, 251, 217, 0, 242, 0, 68, - 86, 313, 0, 298, 1178, 207, 164, 1181, 1182, 0, - 607, 1489, 1238, 1460, 0, 873, 876, 874, 868, 0, - 511, 0, 517, 591, 863, 864, 505, 147, 486, 486, - 607, 242, 288, 293, 0, 232, 234, 322, 323, 324, - 0, 238, 235, 239, 0, 1183, 0, 1057, 0, 880, - 879, 878, 882, 881, 602, 0, 0, 509, 483, 136, - 135, 132, 208, 250, 0, 0, 0, 240, 0, 241, - 216, 1179, 1067, 0, 512, 0, 486, 319, 221, 0, - 220, 0, 304, 237, 607, 877, 0, 137, 219, 223, - 222, 1058, 516 + 0, 823, 824, 821, 476, 978, 424, 425, 174, 423, + 311, 0, 0, 1073, 1056, 0, 0, 162, 302, 0, + 277, 273, 0, 271, 270, 272, 0, 1195, 167, 0, + 1190, 1072, 0, 0, 1067, 370, 0, 427, 0, 386, + 422, 411, 416, 0, 420, 418, 417, 412, 415, 628, + 0, 628, 0, 614, 0, 626, 0, 635, 708, 709, + 710, 711, 712, 713, 714, 660, 0, 969, 965, 0, + 830, 935, 0, 923, 949, 948, 786, 798, 799, 800, + 950, 0, 0, 0, 795, 796, 797, 789, 790, 791, + 792, 793, 794, 801, 959, 958, 952, 953, 678, 676, + 0, 853, 854, 855, 960, 0, 889, 1013, 719, 0, + 0, 747, 964, 771, 0, 0, 0, 743, 911, 0, + 0, 0, 0, 0, 752, 0, 0, 0, 873, 868, + 869, 0, 0, 0, 0, 458, 457, 463, 580, 585, + 0, 441, 0, 567, 0, 579, 516, 578, 0, 0, + 591, 589, 0, 591, 0, 591, 0, 516, 0, 581, + 516, 578, 0, 598, 998, 607, 0, 539, 820, 539, + 0, 454, 1160, 0, 1156, 0, 0, 0, 130, 127, + 117, 0, 0, 152, 140, 131, 0, 480, 485, 486, + 496, 996, 110, 209, 0, 0, 58, 0, 319, 265, + 297, 280, 0, 0, 0, 210, 0, 285, 0, 50, + 70, 0, 66, 0, 95, 0, 0, 0, 0, 0, + 53, 65, 0, 48, 0, 370, 370, 56, 264, 1032, + 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, + 1582, 1583, 1584, 1585, 1586, 1587, 1664, 1588, 216, 1589, + 1364, 1590, 1591, 1592, 0, 1593, 617, 1594, 1595, 1596, + 898, 899, 214, 306, 211, 312, 213, 215, 0, 1033, + 212, 309, 106, 1079, 0, 104, 0, 1077, 113, 111, + 108, 1081, 0, 405, 385, 0, 0, 1301, 0, 1073, + 160, 299, 0, 327, 249, 322, 0, 1194, 1192, 0, + 1189, 0, 0, 0, 343, 0, 399, 0, 419, 625, + 0, 624, 0, 620, 627, 968, 846, 857, 947, 0, + 0, 0, 0, 852, 847, 777, 775, 772, 0, 773, + 751, 0, 0, 749, 745, 0, 782, 859, 0, 875, + 872, 462, 461, 460, 459, 566, 564, 0, 569, 1024, + 576, 509, 515, 565, 0, 561, 0, 590, 586, 0, + 587, 0, 0, 588, 0, 562, 0, 1024, 563, 0, + 605, 0, 0, 864, 1011, 864, 1132, 139, 119, 0, + 120, 150, 0, 0, 0, 0, 144, 379, 0, 315, + 60, 0, 297, 0, 209, 282, 281, 284, 279, 283, + 286, 0, 0, 0, 0, 0, 266, 0, 0, 0, + 230, 0, 0, 297, 303, 226, 227, 336, 0, 0, + 0, 62, 52, 49, 54, 63, 0, 0, 64, 67, + 613, 79, 72, 1664, 1671, 0, 0, 0, 0, 0, + 822, 381, 388, 174, 0, 0, 0, 0, 299, 0, + 0, 0, 1194, 0, 0, 207, 0, 246, 0, 164, + 1193, 0, 1182, 0, 1065, 1066, 0, 0, 387, 621, + 615, 804, 0, 0, 0, 774, 778, 776, 753, 861, + 0, 476, 601, 0, 604, 568, 0, 0, 504, 511, + 0, 514, 508, 0, 570, 0, 0, 572, 574, 0, + 0, 0, 609, 0, 0, 0, 1001, 0, 531, 533, + 536, 535, 538, 0, 507, 507, 0, 0, 0, 153, + 0, 146, 146, 0, 132, 913, 0, 209, 0, 296, + 316, 244, 0, 0, 228, 0, 234, 0, 268, 269, + 267, 229, 297, 302, 231, 337, 0, 71, 0, 87, + 0, 0, 308, 114, 112, 405, 0, 1073, 1189, 0, + 0, 273, 167, 1191, 262, 255, 256, 257, 258, 259, + 260, 261, 276, 275, 247, 248, 0, 0, 0, 1067, + 0, 805, 0, 806, 0, 878, 606, 0, 0, 600, + 0, 502, 500, 503, 505, 501, 0, 0, 577, 593, + 0, 573, 571, 582, 0, 613, 0, 584, 537, 0, + 863, 865, 0, 0, 443, 442, 0, 126, 0, 608, + 0, 0, 148, 148, 134, 0, 302, 318, 0, 288, + 295, 287, 0, 0, 225, 0, 232, 326, 218, 612, + 0, 74, 0, 313, 380, 384, 0, 1183, 0, 1189, + 249, 1194, 0, 1186, 0, 0, 1073, 807, 874, 0, + 0, 0, 599, 602, 0, 575, 0, 0, 0, 610, + 611, 583, 534, 0, 0, 509, 121, 146, 125, 154, + 145, 484, 147, 484, 0, 326, 274, 0, 0, 252, + 218, 0, 243, 0, 68, 86, 314, 0, 299, 1184, + 208, 165, 1187, 1188, 0, 608, 1495, 1244, 1466, 0, + 876, 879, 877, 871, 0, 512, 0, 518, 592, 866, + 867, 506, 148, 487, 487, 608, 243, 289, 294, 0, + 233, 235, 323, 324, 325, 0, 239, 236, 240, 0, + 1189, 0, 1063, 0, 883, 882, 881, 885, 884, 603, + 0, 0, 510, 484, 137, 136, 133, 209, 251, 0, + 0, 0, 241, 0, 242, 217, 1185, 1073, 0, 513, + 0, 487, 320, 222, 0, 221, 0, 305, 238, 608, + 880, 0, 138, 220, 224, 223, 1064, 517 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int16 yydefgoto[] = { - -1, 35, 36, 37, 38, 2038, 2039, 2040, 1744, 886, - 2706, 1745, 887, 888, 2042, 39, 40, 1361, 525, 1682, - 1304, 2449, 42, 1993, 1687, 1997, 2584, 2241, 1992, 1999, - 2687, 2746, 1994, 1688, 2585, 1689, 43, 44, 1429, 45, - 652, 46, 1430, 1388, 1099, 906, 1378, 1088, 47, 82, - 48, 2029, 2330, 2759, 2052, 2842, 2479, 2480, 1746, 2801, - 2802, 2031, 2094, 1370, 2795, 1805, 2637, 1750, 1733, 2481, - 1814, 2594, 2360, 1747, 2272, 1806, 2466, 2693, 1440, 1807, - 2694, 2459, 1808, 1413, 1433, 2274, 2803, 1751, 1414, 2331, - 1357, 1809, 2757, 1810, 485, 2484, 49, 517, 518, 643, - 1069, 519, 50, 486, 1057, 526, 52, 1796, 2508, 2105, - 2509, 1838, 1790, 1097, 1835, 1461, 1417, 1098, 474, 1475, - 2106, 2075, 1462, 527, 790, 55, 56, 57, 577, 571, - 572, 1276, 1660, 1962, 856, 550, 551, 587, 1407, 1311, - 1312, 1695, 2010, 1336, 1337, 865, 866, 2554, 2679, 2555, - 2556, 2417, 2418, 2817, 1324, 1328, 1329, 1708, 1701, 1317, - 2234, 2573, 2574, 2575, 2438, 1332, 1333, 868, 869, 870, - 1341, 1718, 59, 1664, 1969, 1970, 1971, 2212, 2213, 2227, - 2223, 2423, 2562, 1972, 1973, 2547, 2548, 2654, 2230, 1979, - 2566, 2567, 2614, 1850, 1153, 1154, 1483, 1155, 791, 1156, - 1188, 792, 1194, 1158, 793, 794, 795, 1161, 796, 797, - 798, 799, 1177, 800, 801, 1211, 1501, 1502, 1503, 1504, - 1505, 1506, 1507, 1508, 1509, 841, 1567, 803, 804, 805, - 1975, 806, 1268, 1648, 2579, 2675, 2676, 1955, 2196, 2404, - 2546, 2726, 2775, 2776, 807, 808, 1645, 1263, 1264, 809, - 2333, 1266, 1560, 1587, 1517, 949, 950, 1166, 1537, 1538, - 1563, 1882, 1570, 1576, 1909, 1910, 1588, 1613, 810, 1520, - 1521, 1866, 1179, 811, 613, 956, 614, 1174, 1607, 819, - 812, 813, 1714, 563, 2558, 648, 1037, 814, 815, 816, - 817, 818, 565, 951, 467, 1167, 2747, 1071, 846, 952, - 1748, 1603, 60, 720, 618, 61, 1048, 1424, 62, 2530, - 2370, 1059, 1442, 1818, 487, 63, 64, 65, 66, 67, - 594, 1091, 528, 1092, 1093, 708, 68, 1100, 710, 711, - 69, 556, 557, 1101, 1288, 1102, 70, 537, 71, 837, - 568, 838, 840, 530, 853, 1988, 1678, 73, 74, 544, - 545, 75, 76, 558, 77, 78, 2527, 531, 2097, 1054, - 495, 469, 470, 1169, 1040, 954, 1041 + -1, 35, 36, 37, 38, 2051, 2052, 2053, 1755, 888, + 2721, 1756, 889, 890, 2055, 39, 40, 1369, 525, 1693, + 1312, 2464, 42, 2006, 1698, 2010, 2599, 2254, 2005, 2012, + 2702, 2761, 2007, 1699, 2600, 1700, 43, 44, 1437, 45, + 652, 46, 1438, 1396, 1101, 908, 1386, 1090, 47, 82, + 48, 2042, 2343, 2774, 2065, 2857, 2494, 2495, 1757, 2816, + 2817, 2044, 2107, 1378, 2810, 1816, 2652, 1761, 1744, 2496, + 1825, 2609, 2373, 1758, 2285, 1817, 2481, 2708, 1448, 1818, + 2709, 2474, 1819, 1421, 1441, 2287, 2818, 1762, 1422, 2344, + 1365, 1820, 2772, 1821, 485, 2499, 49, 517, 518, 643, + 1071, 519, 50, 486, 1059, 526, 52, 1807, 2523, 2118, + 2524, 1849, 1801, 1099, 1846, 1469, 1425, 1100, 474, 1483, + 2119, 2088, 1470, 527, 792, 55, 56, 57, 577, 571, + 572, 1284, 1671, 1975, 858, 550, 551, 587, 1415, 1319, + 1320, 1706, 2023, 1344, 1345, 867, 868, 2569, 2694, 2570, + 2571, 2432, 2433, 2832, 1332, 1336, 1337, 1719, 1712, 1325, + 2247, 2588, 2589, 2590, 2453, 1340, 1341, 870, 871, 872, + 1349, 1729, 59, 1675, 1982, 1983, 1984, 2225, 2226, 2240, + 2236, 2438, 2577, 1985, 1986, 2562, 2563, 2669, 2243, 1992, + 2581, 2582, 2629, 1861, 1155, 1156, 1491, 1157, 793, 1158, + 1190, 794, 1196, 1160, 795, 796, 797, 1163, 798, 799, + 800, 801, 1179, 802, 803, 1213, 1509, 1510, 1511, 1512, + 1513, 1514, 1515, 1516, 1517, 843, 1574, 805, 806, 807, + 1988, 808, 1276, 1659, 2594, 2690, 2691, 1968, 2209, 2419, + 2561, 2741, 2790, 2791, 809, 810, 1227, 1228, 1656, 1271, + 1272, 811, 2346, 1274, 1567, 1594, 1223, 951, 952, 1168, + 1544, 1545, 1570, 1893, 1577, 1583, 1920, 1921, 1595, 1624, + 812, 1527, 1528, 1877, 1181, 813, 613, 958, 614, 1176, + 1618, 821, 814, 815, 1725, 563, 2573, 648, 1039, 816, + 817, 818, 819, 820, 565, 953, 467, 1169, 2762, 1073, + 848, 954, 1759, 1614, 60, 720, 618, 61, 1050, 1432, + 62, 2545, 2383, 1061, 1450, 1829, 487, 63, 64, 65, + 66, 67, 594, 1093, 528, 1094, 1095, 708, 68, 1102, + 710, 711, 69, 556, 557, 1103, 1296, 1104, 70, 537, + 71, 839, 568, 840, 842, 530, 855, 2001, 1689, 73, + 74, 544, 545, 75, 76, 558, 77, 78, 2542, 531, + 2110, 1056, 495, 469, 470, 1171, 1042, 956, 1043 }; /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing STATE-NUM. */ -#define YYPACT_NINF -2528 +#define YYPACT_NINF -2446 static const int yypact[] = { - 4506, 212, 449, -2528, -2528, 212, 30395, -2528, 212, 31, - 1097, 35065, -2528, 5645, 212, 41136, 945, 202, 268, 321, - 325, 41136, 41136, 35532, 212, 218, 41603, -2528, 212, 32730, - -45, -43, 42070, 41136, 535, 554, -40, -2528, -2528, -2528, - -2528, -2528, 45, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, 93, -2528, 126, 133, 333, 122, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, 132, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, 20245, -2528, -2528, -2528, - -2528, -2528, -2528, 35999, 41136, 36466, 33197, 36933, -2528, 105, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, 117, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, 152, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, 106, -2, -2528, 154, -2528, - -2528, -2528, -2528, 535, 37400, -2528, 393, 605, -2528, 326, - 42537, -2528, -2528, -2528, -2528, 663, 37400, 487, -2528, -2528, - -2528, 37867, -2528, -2528, -2528, -2528, 463, -2528, -2528, 300, - -2528, 44, -2528, -2528, -2528, 277, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, 394, -2528, -2528, 43004, 43471, 43938, - -2528, 275, 559, 612, 19777, -2528, -2528, -2528, -2528, 132, - -2528, -2528, 325, -2528, 325, -2528, -2528, -2528, 409, 330, - -2528, 365, 648, -2528, -2528, -2528, 352, -2528, -2528, 603, - 8174, 8174, 44405, 325, 44405, 432, -2528, -2528, 71, -2528, - -2528, 21181, -2528, 443, -2, -2528, -2528, 233, 814, 11114, - 41136, 525, -2528, 545, 525, 584, 588, 333, -2528, 4506, - 924, 903, 32730, 226, 226, 1054, 226, 626, 949, -2528, - 1503, -2528, 655, -2528, 37400, -2528, 662, 933, -43, -2528, - 352, 1021, 492, 846, 1037, 2899, 1045, 860, 1063, 1326, - 6214, 11114, 25044, -2528, -2, -2528, -2528, -2528, 736, -2528, - 776, -2528, -2528, -2528, -2528, 559, 1004, -2528, 849, 38334, - 38801, 37400, 845, 1220, -2528, -2528, -2528, -2528, 877, -2528, - -2528, 60, 1189, 24, 887, -2528, 1228, 30, -2528, 1247, - 1128, 11114, -2528, 1018, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -43, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, 488, -2528, - -2528, 26912, -2528, -2528, 612, 938, -2528, 26912, 11604, 47207, - 1378, -2528, 1197, 41136, 944, -2528, -2528, -2528, -2528, -2528, - -2528, 953, 1438, 84, 1441, 11114, 965, 84, 84, 972, - 1299, -2528, -2528, -2528, 155, 983, 985, -2528, 158, 158, - -2528, 1179, 1014, 1016, -2528, 160, 1505, 1509, 102, 1035, - 1046, 196, 84, 11114, -2528, 1048, 158, 1051, 1055, 1061, - 1546, 1067, -2528, 1550, 1071, 81, 100, 1076, 1083, -2528, - 1095, -2528, 162, 11114, 11114, 11114, -2528, 7194, 1583, -2528, - -2, 325, -2528, -2528, -2528, -2528, -2528, -2528, -2528, 1108, - -2528, 112, 5091, -2528, 1147, -2528, -2528, -2528, 180, 11114, - -2528, 1587, -33, -2528, 167, -2528, -2528, -2528, -2, 1388, - 1114, -2528, -2528, -2528, 189, 1512, 25978, 26445, 37400, -2528, - -2528, -2, -2528, -2528, -2528, -2528, -2528, -2528, 495, -2528, - 132, 28063, 514, 525, 41136, 41136, 1578, -2528, -2528, -2528, - 32730, 37400, 44872, 1249, -2528, -2528, 333, 333, 11114, 333, - 161, 4, 8664, 12094, 1464, 1352, 111, 536, 1465, -2528, - 1356, 626, 949, 11114, 545, -2528, 1407, 37400, 30862, 225, - 802, 1148, 1230, 1150, -54, 1548, -2528, 1149, -2528, 1232, - 37400, 48578, 192, -2528, 1584, 192, 192, 262, 1586, 1236, - 207, 1394, -9, 223, 1149, 153, -2528, 32730, 53, 556, - 1149, 37400, 1238, 656, 1149, 101, 11604, 960, 1141, 291, - 1229, 1295, 131, 11604, 1311, 1343, 1351, 1387, 1428, 1440, - 1442, 1450, 1452, 1454, 135, 1472, 1476, 1480, 1494, 1536, - 1551, 138, 1553, 78, 11604, 1559, 1165, -2528, 28063, -67, - -2528, -2528, 1570, 148, -2528, 24576, 1160, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, 1250, 41136, 1214, -91, 1519, 1585, 37400, - 1406, 153, 1414, 1194, 1645, 776, 9154, 1658, -2528, 45339, - -2528, -2528, -2528, -2528, -2528, 41136, -2528, -2528, 41136, -2528, - 28994, 1208, 41136, 41136, -2528, 41136, 41136, 538, 39268, 612, - 33664, -2528, -2528, -2528, -2528, 882, 894, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, 28994, -2528, 1980, -2528, - -2528, -2528, 1212, 558, -2528, -2528, -2528, -2528, 1260, -2528, - 1260, 1260, -2528, -2528, -2528, -2528, 1219, 1219, 1221, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, 1223, 196, -2528, 1260, -2528, 1219, -2528, -2528, - -2528, -2528, -2528, 48578, -2528, -2528, -2528, -2528, 322, 324, - -2528, -2528, -2528, 1231, -2528, 1679, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, 1595, 562, 1219, -2528, -2528, - 535, -2528, -2528, 11114, -2, 11114, -2528, 1237, 28063, 1278, - 11114, -2528, -2528, 11114, 1239, 1715, 1715, 11114, -2528, -2528, - -2528, -2528, 2858, 1715, -2528, 1715, 1715, 1260, 1260, -2528, - 4553, 11114, -2528, 23524, 11114, 14544, 9644, 11114, 1330, 1339, - 1715, -2528, 1715, -2528, 11114, 7684, 11114, 4553, 1718, 1718, - 1544, 5139, 1254, -2528, -2, -2, -2528, 1715, 11114, 4192, - 4192, -2528, 120, 47207, 11114, 11114, 11114, 11114, 28527, 1342, - 118, 41136, 11114, 11114, 1261, 590, -2528, 11114, 1479, -2528, - 1267, 11114, 1350, 107, 11114, 11114, 11114, 11114, 11114, 11114, - 11114, 11114, 11114, -2528, -2528, 16492, 175, 1588, 1604, -2, - 11114, -121, 221, 11114, 34131, 8174, 1596, 6214, -2528, -2, - 113, 1596, -2528, -2528, -2528, -2528, 168, -2528, -2528, -2528, - -2528, 1212, -2528, 1212, 1279, 37400, 233, 32263, -2528, 11114, - -2528, 569, 1281, 1340, 973, 1732, 41136, -2528, 22117, 1562, - -2528, 1282, -2528, 27375, 1562, -2528, -2528, 15512, 1405, 1564, - 1501, -2528, -2528, -2528, 1300, 28063, 12584, 12584, -2528, 639, - 28063, 1241, -2528, -2528, -2528, -2528, -2528, -2528, 73, -2528, - 37400, 9, 1464, 536, 602, -2528, -2528, 1375, 1308, 45806, - 41136, 1579, 1532, 1582, -122, -2528, -2528, -2528, 47207, -2528, - 41136, 41136, 46273, 46740, 29461, 41136, 28994, -2528, -2528, -2528, - -2528, 41136, 1006, 41136, 4613, -2528, -2528, -2528, 192, -2528, - -2528, -2528, -2528, -2528, 41136, 41136, -2528, -2528, 192, 41136, - 41136, 192, -2528, 1625, 41136, 41136, 41136, 41136, 1687, 41136, - 41136, -66, -66, 1515, -2528, 10134, 1314, -2528, 11114, 11114, - -2528, 11114, 1485, -2528, 613, -2528, 1526, 57, 1365, 37400, - 41136, 1684, -2528, -2528, -2528, -2528, -2528, 1322, 1661, -2528, - 153, 1667, 31329, 18, 1068, 1369, -2528, 625, 11114, 1554, - -2528, -2528, 1540, 24, -2528, -2528, 28994, 30, -2528, 1545, - 121, -2528, 1558, 776, 1790, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, 632, 19309, -2528, -2528, 1794, 325, 1794, 586, - -2528, -2528, 1794, -2528, 1794, -2528, 26912, -2528, 11604, 47207, - 1799, 41136, 1353, 1354, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - 1715, 1430, -2528, 1431, 1444, 1445, -2528, -2528, -2528, -2528, - -2528, 47207, -2528, -2528, 634, -2528, 1361, 1367, 11114, 11114, - 94, -2528, 27439, 638, 11114, 1359, 1368, 645, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, 1372, 1689, -2528, - 1373, 1377, 1379, -2528, -2528, 4205, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - 1380, 1366, 27475, 1382, 14544, 14544, 7194, 518, -2528, 14544, - 1384, -2528, -2528, 646, 27393, 1361, 1386, 1393, 1396, 1389, - 1390, 27559, 10624, 11114, 10624, 10624, 27791, 1361, 1399, 27926, - -2528, 11114, 1400, 5376, -2528, -2528, -2528, 3187, 3187, 3187, - 4553, -2528, -2528, -2528, 1401, -2528, 14544, 14544, -2528, 2480, - 1047, 7194, -2528, -2528, 1707, -2528, 642, -2528, 1412, -2528, - -2528, 1305, -2528, 23524, 5326, 11114, 134, -2528, 11114, 1261, - 11114, 1475, 3187, 3187, 3187, 235, 235, 83, 83, 83, - 271, 221, -2528, -2528, -2528, 1417, 1419, 1421, 1611, 28063, - 1067, 11114, -2528, 28063, 616, 685, 37400, 1344, 2908, 3200, - -2528, -2528, -2528, 17905, 1466, -67, 1466, 1715, 4192, -2528, - 545, -2528, -2528, -2528, 28063, -2528, 535, 17905, 1467, 1474, - -96, 20713, 1634, -2528, 41136, -2528, 41136, -2528, -38, 1443, - -2528, 11114, -2528, -2528, 2356, 1449, 1636, 1641, 910, 910, - 639, 1643, -2528, -2528, 1499, -2528, 11114, 1243, -2528, 1259, - -2528, -2528, -2528, -2528, 1439, -2528, -2528, 1693, -2528, -2528, - -2528, -2528, 1525, 1149, 11114, 1668, -2528, 49, 1447, 1782, - -103, 1746, 41136, -2528, 1659, -2528, 673, 1792, 121, 1796, - 121, 28994, 28994, 28994, 715, -2528, -2528, 325, -2528, -2528, - 727, -2528, 294, -2528, -2528, -2528, 1541, 689, 1149, 153, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, 103, 692, 1149, - 1542, -2528, 1549, -2528, 1552, 786, 1149, -2528, -2528, 1461, - 1462, 1463, 11604, -2528, -2528, 28063, 28063, 28063, 1468, -2528, - 97, -2528, 41136, -2528, -2528, -2528, 1485, 37400, 776, -2528, - 921, 41136, 37400, 37400, 37400, -2528, -2528, -2528, 1477, 1473, - -2528, 47207, -60, 1678, 1690, 41136, 1527, 1150, 1930, -2528, - 28063, 1825, 37400, -2528, -2528, -2528, 41136, -2528, 1827, 535, - -2528, 28994, -2528, 31796, -2528, -2528, -2528, -2528, 325, -2528, - 325, 1709, 41136, 25511, 325, 325, -2528, 731, 1496, 1354, - 757, 47207, 1715, 76, 1495, 1360, 1186, 410, 593, -2528, - -2528, -2528, 771, 27972, 11114, -2528, 1843, 47207, -2528, 5419, - -2528, -2528, -2528, -2528, 11114, -2528, -2528, -2528, 11114, -2528, - 23524, 11114, 1818, -2528, 1976, 1976, 5139, 47207, 14544, 14544, - 14544, 14544, 521, 1051, 14544, 14544, 14544, 14544, 14544, 14544, - 14544, 14544, 14544, 16002, 260, -2528, -2528, 11114, 11114, 1826, - 1818, -2528, -2528, -2528, 315, 315, 47207, 1502, 1361, 1506, - 1507, 11114, -2528, 47207, -2, 23999, -2528, 4192, 11114, 1001, - 1528, 11114, 773, 11114, 1828, -2528, -2528, 1530, -2528, -2528, - 47207, 11114, 1511, 2069, 14544, 14544, 3260, -2528, 4077, 11114, - 7194, -2528, 1515, 1547, 22585, -2528, 1610, 1610, 1610, 1610, - -2528, -2528, 37400, 37400, 37400, 18373, 1844, 17437, 39735, 1522, - 697, -2528, 39735, 40202, -2528, 1557, -2528, -2, 11114, 1837, - 1539, 1837, 1556, -2528, -2528, 1560, 1522, 11114, 1674, -2528, - -2528, -2528, 1589, -2528, 787, -2528, 1935, 1674, -2528, 788, - -2528, 22117, 1467, 11114, -2, -2528, 1533, -2528, 1449, 528, - -2528, -2528, -2528, 1753, -2528, -2528, -2528, 37400, -2528, 41136, - 24017, 1885, -2528, 41136, 41136, 41136, -2528, 41136, 789, 143, - 1555, -2528, 143, 1864, 629, 1150, 207, 3901, -39, -2528, - -2528, -2528, 1620, 41136, -2528, 41136, -2528, -2528, -2528, -2528, - -2528, 29461, -2528, -2528, -2528, 28994, 23056, 28994, 41136, 41136, - 41136, 41136, 41136, 41136, 41136, 41136, 41136, 41136, -2528, -2528, - -2528, 1515, -2528, -2528, -2528, 223, -2528, -2528, 97, 1566, - 1369, 1585, 40669, 793, 153, 1568, 2002, -2528, 18, 31329, - -2528, -2528, -2528, 1961, -2528, 275, 142, -2528, -2528, 535, - 41136, 1615, 121, 37400, -2528, 798, -2528, -2528, -2528, -2528, - 41136, 1569, -2528, 1569, -2528, -2528, -2528, 1715, -2528, 41136, - -2528, 1561, -2528, 1571, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, -2528, 11114, 28063, -2528, 1573, -2528, 28063, 24035, - -2528, 28063, 1826, -2528, 1656, 1656, 1656, 2439, 1891, 190, - 1577, 1656, 1656, 1656, 272, 272, 66, 66, 66, 1976, - 260, 28063, 28063, -2528, -2528, -2528, -2528, 1580, -2528, -2528, - -2528, 1361, 1591, -2528, -2528, 281, 11114, 11114, 2480, -2528, - 4597, 11114, 47207, 808, 2480, 176, 11114, 3425, 4573, 11114, - 11114, 4719, 24063, 1592, 11114, 47659, -2528, -2528, 37400, 37400, - 37400, 37400, -2528, -2528, -2528, 39735, 40202, 1590, 16968, 697, - 1599, 37400, -2528, 1669, 1597, 17905, 1849, 1778, -2528, 17905, - 1778, 636, 1778, 1854, 1669, 21649, -2528, 1669, 1601, 1786, - -2528, 478, 28063, 2034, 1910, 325, 1910, 325, -2528, 28063, - 8174, -2528, 535, 947, 41136, -2, -72, -2528, 1616, 41136, - -2528, 1674, 28063, 23524, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, 41136, 809, -2528, 810, 143, -2528, 1646, -2528, 173, - 1897, 25, -2528, 28994, 1954, 677, -2528, 1913, 1832, -2528, - 192, -2528, 11114, 677, 1834, 130, 41136, -2528, -2528, 2059, - -2528, 47207, 121, 121, -2528, -2528, 1619, 1623, 1624, 1627, - 1628, 1632, 1633, 1635, 1638, 1640, 1644, 1660, 1662, 1665, - 1670, 1671, 1673, 1223, 1675, -2528, 1680, 1533, 1685, 1686, - 1691, 48126, 1694, 1698, 1701, 1702, 1703, 882, 894, -2528, - -2528, -2528, -2528, -2528, -2528, 887, 1704, -2528, 1649, -2528, - -2528, 1708, -2528, 1739, -2528, -2528, -2528, -2528, -2528, 1621, - 938, 110, 41136, 2111, 1892, 1676, 1369, -2528, 31329, 1139, - 145, 1690, -2528, 109, 1527, -2528, 2049, 1719, 1878, 41136, - 1695, -2528, 2130, -2528, 31796, 1569, 1692, 47207, -2528, -2528, - 28063, -2528, -2528, -2528, 14544, 2005, 1710, 47207, -2528, -2528, - 2480, 2480, 4597, 815, -2528, 2480, 11114, 11114, 2480, 2480, - 11114, -2528, -2528, 24081, 1895, -2528, -2528, -2528, -2528, -2528, - -2528, -2528, 29928, 39735, 1711, -2528, 34598, -2528, -2528, 41136, - 697, 17905, -2528, -2528, 87, -2528, 17905, 1979, -2528, 17905, - -2528, 41136, 1712, -2528, 41136, -2528, 13074, 11114, 1748, -2528, - 1748, -2528, 1114, -2528, -96, -2528, -2528, 2114, 18841, 2070, - 11114, -2528, -2528, 1714, 143, -2528, 1876, 1646, 1720, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, 820, 1721, 41136, 41136, - 14544, -2528, 677, 242, 140, -2528, 1988, 37400, 1646, -2528, - -2528, -2528, -2528, 2091, 2174, 2063, -2528, -2528, 28063, -2528, - -2528, 1715, 1715, -2528, -2528, 2142, -2528, -2528, -2528, -2528, - 887, 470, 23056, 41136, 41136, -2528, -2528, -2528, 223, 2095, - 827, 535, 2068, 31329, 2184, 1735, 41136, 1527, 720, 720, - -2528, 1880, -2528, 1884, -2528, -2528, 214, -2528, 37400, -2528, - -2528, 18841, 535, -2528, -2528, -2528, 4482, 14544, 47207, 836, - -2528, 2480, 2480, 2480, -2528, 2170, 1515, -2528, 843, 2190, - -2528, 41136, -68, -79, 1747, 1750, -2528, -2528, 850, -2528, - 11114, 1751, -2528, -2528, 17905, 87, 858, -2528, 47207, 41136, - 876, 47207, 6704, 1749, -2528, -2528, 28063, 28063, 41136, 1809, - 1809, 1806, 41136, 11114, -2528, 883, 2169, 22, -53, 28063, - -2528, 37400, -2528, 28994, -2528, 143, -2528, 28994, 11114, -2528, - 782, 2439, 2207, -2528, -2528, -2528, -2528, 1646, 776, -2528, - -2528, 2060, -2528, 41136, 1819, 473, 1835, -2528, -2528, -2528, - 938, 325, 1369, 1719, 37400, 535, 18, 275, -2528, -2528, - -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, -2528, - -2528, 2186, 1972, 2189, 1615, 889, 4482, 896, -2528, 11114, - 149, 1557, 29928, 1772, -2528, 898, -2528, -2528, -2528, -2528, - -2528, 41136, 928, -2528, 28063, 41136, -2528, -2528, -2528, 41136, - 2142, 900, -2528, -2528, 13074, 1769, -2528, 2225, 1914, -2528, - -2528, 535, -2528, 24099, 1466, 18841, 41136, 41136, 41136, -2528, - 1902, 776, 143, 902, -2528, 1788, -2528, 24410, 2001, -2528, - 2087, -2528, 2032, 1787, -2528, 11114, -2528, 1859, -2528, -2528, - -2528, 2242, -2528, 1793, 1719, 1690, 1527, 2004, -2528, 2006, - 1797, 1369, -2528, 1361, 13564, 13564, 1800, -2528, -2528, 41136, - -2528, 907, 1804, 919, -2528, -2528, -2528, -2528, 41136, 1798, - 34598, -2528, 2169, -2528, -2528, -2528, 230, -2528, 230, 22117, - 2032, -2528, 28994, 23056, 2035, 1787, 55, 2015, 153, -2528, - 28063, -2528, 535, 31329, -2528, -2528, -2528, -2528, -2528, 18841, - 1466, 15034, 1949, 90, 27411, -2528, -2528, -2528, -2528, 926, - -2528, 2291, 1968, -2528, -2528, -2528, -2528, 41136, 1449, 1449, - -150, 2015, -2528, -2528, 2107, -2528, -2528, -2528, -2528, -2528, - 128, 2025, -2528, 2026, 1306, 1719, 936, -2528, 2273, -2528, - -2528, -2528, -2528, -2528, -2528, 1820, 1830, -2528, 230, -2528, - -2528, -2528, -2528, -2528, 472, 472, 2192, -2528, 1887, -2528, - -2528, -2528, 1369, 14054, -2528, 2308, 1449, 143, -2528, 2296, - -2528, 125, -2528, -2528, 1466, -2528, 1833, -2528, -2528, -2528, - -2528, -2528, -2528 + 4928, -98, -2, -2446, -2446, -98, 30641, -2446, -98, 48, + 2466, 35311, -2446, 2230, -98, 41382, 1727, 207, 238, 215, + 343, 41382, 41382, 35778, -98, 246, 41849, -2446, -98, 32976, + -32, -132, 42316, 41382, 1092, 417, -33, -2446, -2446, -2446, + -2446, -2446, 94, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, 125, -2446, 62, 132, -116, -14, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, 64, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, 20347, -2446, -2446, -2446, + -2446, -2446, -2446, 36245, 41382, 36712, 33443, 37179, -2446, 113, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, 115, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, 123, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, 26, 412, -2446, 134, -2446, + -2446, -2446, -2446, 1092, 37646, -2446, 319, 614, -2446, 208, + 42783, -2446, -2446, -2446, -2446, 568, 37646, 301, -2446, -2446, + -2446, 38113, -2446, -2446, -2446, -2446, 311, -2446, -2446, 180, + -2446, 34, -2446, -2446, -2446, 363, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, 256, -2446, -2446, 43250, 43717, 44184, + -2446, 274, 580, 556, 19879, -2446, -2446, -2446, -2446, 64, + -2446, -2446, 343, -2446, 343, -2446, -2446, -2446, -135, 298, + -2446, 356, 618, -2446, -2446, -2446, 351, -2446, -2446, 606, + 8244, 8244, 44651, 343, 44651, 378, -2446, -2446, -18, -2446, + -2446, 21283, -2446, 452, 412, -2446, -2446, 121, 778, 11196, + 41382, 461, -2446, 473, 461, 482, 501, -116, -2446, 4928, + 886, 792, 32976, 267, 267, 979, 267, 592, 894, -2446, + 1274, -2446, 553, -2446, 37646, -2446, 600, 867, -132, -2446, + 351, 928, 529, 773, 966, 4259, 974, 881, 978, 1091, + 6276, 11196, 25146, -2446, 412, -2446, -2446, -2446, 645, -2446, + 626, -2446, -2446, -2446, -2446, 580, 873, -2446, 700, 38580, + 39047, 37646, 667, 1124, -2446, -2446, -2446, -2446, 769, -2446, + -2446, 93, 1066, 59, 735, -2446, 1068, 96, -2446, 1085, + 972, 11196, -2446, 813, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -132, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, 377, -2446, + -2446, 27014, -2446, -2446, 556, 794, -2446, 27014, 11688, 47453, + 1249, -2446, 1073, 41382, 826, -2446, -2446, -2446, -2446, -2446, + -2446, 834, 1321, 92, 1325, 11196, 857, 92, 92, 861, + 1189, -2446, -2446, -2446, 136, 877, 879, -2446, 137, 137, + -2446, 1061, 885, 891, -2446, 138, 1362, 1377, 79, 904, + 908, 220, 92, 11196, -2446, 917, 137, 930, 941, 957, + 1400, 982, -2446, 1428, 987, 81, 90, 991, 999, -2446, + 1003, -2446, 139, 11196, 11196, 11196, -2446, 11196, 7260, 37646, + 1432, -2446, 412, 343, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, 1008, -2446, 114, 4962, -2446, 1009, -2446, -2446, -2446, + 171, 11196, -2446, 1464, -28, -2446, 141, -2446, -2446, -2446, + 412, 1273, 1016, -2446, -2446, -2446, 160, 1426, 26080, 26547, + 37646, -2446, -2446, 412, -2446, -2446, -2446, -2446, -2446, -2446, + 407, -2446, 64, 28165, 495, 461, 41382, 41382, 1488, -2446, + -2446, -2446, 32976, 37646, 45118, 1160, -2446, -2446, -116, -116, + 11196, -116, 172, -13, 8736, 12180, 1370, 1263, 133, 494, + 1385, -2446, 1294, 592, 894, 11196, 473, -2446, 1338, 37646, + 31108, 569, 623, 1094, 1177, 1104, 204, 1510, -2446, 1109, + -2446, 1198, 37646, 48824, 162, -2446, 1556, 162, 162, 564, + 1573, 1235, 184, 1388, 55, 87, 1109, 1832, -2446, 32976, + 83, 421, 1109, 37646, 1240, 613, 1109, 88, 11688, 651, + 863, 244, 950, 994, 91, 11688, 1005, 1154, 1214, 1252, + 1289, 1297, 1342, 1491, 1538, 1541, 95, 1543, 1547, 1558, + 1560, 1563, 1575, 102, 1610, 97, 11688, 1612, 1167, -2446, + 28165, -78, -2446, -2446, 1618, 108, -2446, 24678, 1168, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, 1265, 41382, 1224, -84, 1534, + 1592, 37646, 1424, 1832, 1427, 1208, 1666, 626, 9228, 1670, + -2446, 45585, -2446, -2446, -2446, -2446, -2446, 41382, -2446, -2446, + 41382, -2446, 29240, 1219, 41382, 41382, -2446, 41382, 41382, 507, + 39514, 556, 33910, -2446, -2446, -2446, -2446, 755, 820, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, 29240, -2446, + 2802, -2446, -2446, -2446, 1226, 533, -2446, -2446, -2446, -2446, + 1271, -2446, 1271, 1271, -2446, -2446, -2446, -2446, 1228, 1228, + 1229, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, 1231, 220, -2446, 1271, -2446, 1228, + -2446, -2446, -2446, -2446, -2446, 48824, -2446, -2446, -2446, -2446, + 196, 217, -2446, -2446, -2446, 1232, -2446, 1684, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, 1898, 570, 1228, + -2446, -2446, 1092, -2446, -2446, 11196, 412, 11196, -2446, 1236, + 28165, 1277, 11196, -2446, -2446, 11196, 1237, 1715, 1715, 11196, + -2446, -2446, -2446, -2446, 3962, 1715, -2446, 1715, 1715, 1271, + 1271, -2446, 28262, 11196, -2446, 23626, 11196, 15132, 9720, 11196, + 1322, 1323, 1715, -2446, 1715, -2446, 11196, 7752, 11196, 28262, + 1716, 1716, 1238, 1245, 1851, 5390, 1241, -2446, 511, -2446, + 1239, -2446, 412, 412, -2446, 1715, 11196, 3659, 3659, -2446, + 150, 47453, 11196, 11196, 11196, 11196, 28773, 1329, 105, 41382, + 11196, 11196, 1247, 763, -2446, 11196, 1466, -2446, 1250, 11196, + 1332, 111, 11196, 11196, 11196, 11196, 11196, 11196, 11196, 11196, + 11196, -2446, -2446, 16594, 156, 1562, 1582, 412, 11196, -59, + 194, 11196, 34377, 8244, 1574, 6276, -2446, 412, 82, 1574, + -2446, -2446, -2446, -2446, 145, -2446, -2446, -2446, -2446, 1226, + -2446, 1226, 1255, 37646, 121, 32509, -2446, 11196, -2446, 571, + 1257, 1315, 539, 1709, 41382, -2446, 22219, 1542, -2446, 1259, + -2446, 27477, 1542, -2446, -2446, 15610, 1382, 1537, 1474, -2446, + -2446, -2446, 1266, 28165, 12672, 12672, -2446, 1299, 28165, 1301, + -2446, -2446, -2446, -2446, -2446, -2446, 586, -2446, 37646, 68, + 1370, 494, 575, -2446, -2446, 1137, 1269, 46052, 41382, 1544, + 1493, 1545, -166, -2446, -2446, -2446, 47453, -2446, 41382, 41382, + 46519, 46986, 29707, 41382, 29240, -2446, -2446, -2446, -2446, 41382, + 1605, 41382, 4679, -2446, -2446, -2446, 162, -2446, -2446, -2446, + -2446, -2446, 41382, 41382, -2446, -2446, 162, 41382, 41382, 162, + -2446, 1225, 41382, 41382, 41382, 41382, 1552, 41382, 41382, -69, + -69, 1477, -2446, 10212, 1276, -2446, 11196, 11196, -2446, 11196, + 1448, -2446, 581, -2446, 1489, 27, 1327, 37646, 41382, 2219, + -2446, -2446, -2446, -2446, -2446, 1284, 1633, -2446, 1832, 1634, + 31575, 654, 544, 1336, -2446, 603, 11196, 1524, -2446, -2446, + 1513, 59, -2446, -2446, 29240, 96, -2446, 1521, 127, -2446, + 1539, 626, 1765, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + 608, 19411, -2446, -2446, 1766, 343, 1766, 499, -2446, -2446, + 1766, -2446, 1766, -2446, 27014, -2446, 11688, 47453, 1769, 41382, + 1324, 1326, -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, -2446, 1715, 1402, + -2446, 1404, 1406, 1407, -2446, -2446, -2446, -2446, -2446, 47453, + -2446, -2446, 635, -2446, 1320, 11196, 11196, 40, -2446, 27541, + 646, 11196, 1328, 1339, 659, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, 1340, 1646, -2446, 1341, 1343, 1344, + -2446, -2446, 4915, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, 1345, 1335, 27577, + 1346, 15132, 15132, 7260, 918, -2446, 15132, 1347, -2446, -2446, + 665, 27495, 1238, 1350, 1348, 1356, 1352, 1355, 27661, 10704, + 11196, 10704, 10704, 27893, 1238, 1359, 28028, -2446, -2446, 11196, + 37646, -2446, 11196, 1360, 5443, -2446, -2446, -2446, 1316, 1316, + 1316, 28262, -2446, -2446, -2446, 1374, -2446, 15132, 15132, -2446, + 2728, 2494, 7260, -2446, -2446, 1647, -2446, 801, -2446, 1368, + -2446, -2446, 2645, -2446, 23626, 28204, 11196, 107, -2446, 11196, + 1247, 11196, 1443, 1316, 1316, 1316, 199, 199, 161, 161, + 161, 288, 194, -2446, -2446, -2446, 1369, 1371, 1372, 1570, + 28165, 982, 11196, -2446, 28165, 731, 774, 37646, 2228, 3003, + 4378, -2446, -2446, -2446, 18007, 1415, -78, 1415, 1715, 3659, + -2446, 473, -2446, -2446, -2446, 28165, -2446, 1092, 18007, 1416, + 1429, 392, 20815, 1584, -2446, 41382, -2446, 41382, -2446, -16, + 1390, -2446, 11196, -2446, -2446, 1127, 1399, 1591, 1593, 916, + 916, 1299, 1594, -2446, -2446, 1449, -2446, 11196, 1311, -2446, + 1313, -2446, -2446, -2446, -2446, 1408, -2446, -2446, 1640, -2446, + -2446, -2446, -2446, 1473, 1109, 11196, 1616, -2446, 89, 1423, + 1741, 56, 1720, 41382, -2446, 1627, -2446, 403, 1767, 127, + 1771, 127, 29240, 29240, 29240, 675, -2446, -2446, 343, -2446, + -2446, 719, -2446, 328, -2446, -2446, -2446, 1518, 621, 1109, + 1832, -2446, -2446, -2446, -2446, -2446, -2446, -2446, 177, 624, + 1109, 1519, -2446, 1522, -2446, 1525, 625, 1109, -2446, -2446, + 1442, 1445, 1452, 11688, -2446, -2446, 28165, 28165, 28165, 1430, + -2446, 124, -2446, 41382, -2446, -2446, -2446, 1448, 37646, 626, + -2446, 650, 41382, 37646, 37646, 37646, -2446, -2446, -2446, 1454, + 1446, -2446, 47453, -66, 1667, 1664, 41382, 1497, 1104, 1910, + -2446, 28165, 1800, 37646, -2446, -2446, -2446, 41382, -2446, 1804, + 1092, -2446, 29240, -2446, 32042, -2446, -2446, -2446, -2446, 343, + -2446, 343, 1685, 41382, 25613, 343, 343, -2446, 729, 1471, + 1326, 742, 47453, 1715, 71, 1475, 1384, 839, 993, 1310, + -2446, -2446, -2446, 765, 28074, 11196, -2446, 1818, 47453, -2446, + 5668, -2446, -2446, -2446, -2446, 11196, -2446, -2446, -2446, 11196, + -2446, 23626, 11196, 1793, -2446, 1950, 1950, 5390, 47453, 15132, + 15132, 15132, 15132, 642, 930, 15132, 15132, 15132, 15132, 15132, + 15132, 15132, 15132, 15132, 16102, 255, -2446, -2446, 11196, 11196, + 1801, 1793, -2446, -2446, -2446, 183, 183, 47453, 1479, 1238, + 1480, 1482, 11196, -2446, 47453, 412, 24101, -2446, 28165, -2446, + 3659, 11196, 906, 1056, 11196, 783, 11196, 1797, -2446, -2446, + 1484, -2446, -2446, 47453, 11196, 1492, 2711, 15132, 15132, 2757, + -2446, 3447, 11196, 7260, -2446, 1477, 1531, 22687, -2446, 1587, + 1587, 1587, 1587, -2446, -2446, 37646, 37646, 37646, 18475, 1816, + 17539, 39981, 1501, 144, -2446, 39981, 40448, -2446, 1515, -2446, + 412, 11196, 1822, 1505, 1822, 1514, -2446, -2446, 1527, 1501, + 11196, 1675, -2446, -2446, -2446, 1583, -2446, 810, -2446, 1937, + 1675, -2446, 817, -2446, 22219, 1416, 11196, 412, -2446, 1546, + -2446, 1399, 146, -2446, -2446, -2446, 1744, -2446, -2446, -2446, + 37646, -2446, 41382, 24119, 1878, -2446, 41382, 41382, 41382, -2446, + 41382, 833, 486, 1548, -2446, 486, 1858, 131, 1104, 184, + 3360, 400, -2446, -2446, -2446, 1614, 41382, -2446, 41382, -2446, + -2446, -2446, -2446, -2446, 29707, -2446, -2446, -2446, 29240, 23158, + 29240, 41382, 41382, 41382, 41382, 41382, 41382, 41382, 41382, 41382, + 41382, -2446, -2446, -2446, 1477, -2446, -2446, -2446, 87, -2446, + -2446, 124, 1549, 1336, 1592, 40915, 835, 1832, 1550, 1995, + -2446, 654, 31575, -2446, -2446, -2446, 1954, -2446, 274, 163, + -2446, -2446, 1092, 41382, 1609, 127, 37646, -2446, 855, -2446, + -2446, -2446, -2446, 41382, 1551, -2446, 1551, -2446, -2446, -2446, + 1715, -2446, 41382, -2446, 1554, -2446, 1555, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, 11196, 28165, -2446, 1559, + -2446, 28165, 24137, -2446, 28165, 1801, -2446, 1018, 1018, 1018, + 1298, 1880, 135, 1569, 1018, 1018, 1018, 214, 214, 126, + 126, 126, 1950, 255, 28165, 28165, -2446, -2446, -2446, -2446, + 1561, -2446, -2446, -2446, 1238, 1578, -2446, -2446, 269, 11196, + 11196, 2728, -2446, 28309, 11196, 47453, 914, 2728, 170, 11196, + 1140, 1833, 11196, 11196, 3550, 24165, 1579, 11196, 47905, -2446, + -2446, 37646, 37646, 37646, 37646, -2446, -2446, -2446, 39981, 40448, + 1571, 17070, 144, 1585, 37646, -2446, 1660, 1598, 18007, 1845, + 1773, -2446, 18007, 1773, 128, 1773, 1864, 1660, 21751, -2446, + 1660, 1599, 1787, -2446, 559, 28165, 2033, 1912, 343, 1912, + 343, -2446, 28165, 8244, -2446, 1092, 1683, 41382, 412, -64, + -2446, 1617, 41382, -2446, 1675, 28165, 23626, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, 41382, 919, -2446, 925, 486, -2446, + 1641, -2446, 159, 1897, 44, -2446, 29240, 2165, 655, -2446, + 1911, 1829, -2446, 162, -2446, 11196, 655, 1830, 164, 41382, + -2446, -2446, 1826, -2446, 47453, 127, 127, -2446, -2446, 1613, + 1615, 1619, 1620, 1622, 1623, 1624, 1625, 1628, 1632, 1635, + 1636, 1638, 1642, 1644, 1645, 1649, 1231, 1650, -2446, 1665, + 1546, 1668, 1671, 1672, 48372, 1673, 1679, 1686, 1688, 1689, + 755, 820, -2446, -2446, -2446, -2446, -2446, -2446, 735, 1690, + -2446, 1631, -2446, -2446, 1695, -2446, 1697, -2446, -2446, -2446, + -2446, -2446, 1648, 794, 100, 41382, 2079, 1865, 1655, 1336, + -2446, 31575, 1405, 531, 1664, -2446, 104, 1497, -2446, 2022, + 1700, 1873, 41382, 1693, -2446, 2118, -2446, 32042, 1551, 1326, + 1687, 1326, 47453, -2446, -2446, 28165, -2446, -2446, -2446, 15132, + 1991, 1694, 47453, -2446, -2446, 2728, 2728, 28309, 927, -2446, + 2728, 11196, 11196, 2728, 2728, 11196, -2446, -2446, 24183, 1883, + -2446, -2446, -2446, -2446, -2446, -2446, -2446, 30174, 39981, 1696, + -2446, 34844, -2446, -2446, 41382, 144, 18007, -2446, -2446, 2101, + -2446, 18007, 1972, -2446, 18007, -2446, 41382, 1704, -2446, 41382, + -2446, 13164, 11196, 1751, -2446, 1751, -2446, 1016, -2446, 392, + -2446, -2446, 2115, 18943, 2072, 11196, -2446, -2446, 1721, 486, + -2446, 1876, 1641, 1724, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, 935, 1725, 41382, 41382, 15132, -2446, 655, 250, 130, + -2446, 1999, 37646, 1641, -2446, -2446, -2446, -2446, 2093, 2180, + 2068, -2446, -2446, 28165, -2446, -2446, 1715, 1715, -2446, -2446, + 2145, -2446, -2446, -2446, -2446, 735, 327, 23158, 41382, 41382, + -2446, -2446, -2446, 87, 2098, 936, 1092, 2071, 31575, 2187, + 1737, 41382, 1497, 140, 140, -2446, 1877, -2446, 1879, -2446, + -2446, 189, -2446, 37646, -2446, -2446, 18943, 1092, -2446, -2446, + -2446, 3497, 15132, 47453, 947, -2446, 2728, 2728, 2728, -2446, + 2175, 1477, -2446, 948, 2195, -2446, 41382, -81, -47, 1747, + 1748, -2446, -2446, 955, -2446, 11196, 1749, -2446, -2446, 18007, + 2101, 963, -2446, 47453, 41382, 965, 47453, 6768, 1753, -2446, + -2446, 28165, 28165, 41382, 1808, 1808, 1805, 41382, 11196, -2446, + 969, 2168, 9, -34, 28165, -2446, 37646, -2446, 29240, -2446, + 486, -2446, 29240, 11196, -2446, 1653, 1298, 2208, -2446, -2446, + -2446, -2446, 1641, 626, -2446, -2446, 2061, -2446, 41382, 1817, + 340, 1843, -2446, -2446, -2446, 794, 343, 1336, 1700, 37646, + 1092, 654, 274, -2446, -2446, -2446, -2446, -2446, -2446, -2446, + -2446, -2446, -2446, -2446, -2446, -2446, 2181, 1980, 2182, 1609, + 976, 3497, 977, -2446, 11196, 435, 1515, 30174, 1777, -2446, + 985, -2446, -2446, -2446, -2446, -2446, 41382, 937, -2446, 28165, + 41382, -2446, -2446, -2446, 41382, 2145, 997, -2446, -2446, 13164, + 1774, -2446, 2229, 1916, -2446, -2446, 1092, -2446, 24201, 1415, + 18943, 41382, 41382, 41382, -2446, 1901, 626, 486, 1024, -2446, + 1795, -2446, 24512, 2004, -2446, 2084, -2446, 2034, 1789, -2446, + 11196, -2446, 1860, -2446, -2446, -2446, 2241, -2446, 1796, 1700, + 1664, 1497, 2001, -2446, 2002, 1799, 1336, -2446, 1238, 13656, + 13656, 1803, -2446, -2446, 41382, -2446, 1031, 1806, 1054, -2446, + -2446, -2446, -2446, 41382, 1802, 34844, -2446, 2168, -2446, -2446, + -2446, 240, -2446, 240, 22219, 2034, -2446, 29240, 23158, 2024, + 1789, 41, 2008, 1832, -2446, 28165, -2446, 1092, 31575, -2446, + -2446, -2446, -2446, -2446, 18943, 1415, 14640, 1944, 98, 27513, + -2446, -2446, -2446, -2446, 1058, -2446, 2283, 1960, -2446, -2446, + -2446, -2446, 41382, 1399, 1399, -145, 2008, -2446, -2446, 2099, + -2446, -2446, -2446, -2446, -2446, 120, 2017, -2446, 2018, 866, + 1700, 1059, -2446, 2266, -2446, -2446, -2446, -2446, -2446, -2446, + 1813, 1819, -2446, 240, -2446, -2446, -2446, -2446, -2446, 103, + 103, 2183, -2446, 1882, -2446, -2446, -2446, 1336, 14148, -2446, + 2308, 1399, 486, -2446, 2296, -2446, 119, -2446, -2446, 1415, + -2446, 1834, -2446, -2446, -2446, -2446, -2446, -2446 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int16 yypgoto[] = { - -2528, -2528, -2528, 1744, -2528, -2528, -2528, 286, -2528, 954, - -2528, 274, -475, 591, -2528, -2528, -2528, -841, 15, -2528, - -2528, -2528, -2528, 85, 327, -2528, -354, -1800, -112, -2528, - -2346, -2448, -2528, -416, -2348, -1617, -2528, -1014, 250, -2528, - -2012, -2528, -604, -887, -705, -898, -2528, 51, -2528, 1102, - -1013, -1704, -2303, -420, -2528, -488, -2528, -261, -1697, -463, - -445, -2528, -2241, -845, -2528, 1469, -178, -2528, 618, -2528, - -2015, -2528, -2528, 601, -2528, -851, -2528, -2528, -1743, 264, - -403, -2182, -2260, 563, -618, -2528, -447, 296, -1674, -2528, - 633, -2528, -394, -2528, -494, -2062, -2528, -2528, -2528, 1297, - -572, -2528, -2528, -2528, -2528, 17, -2528, -2528, -2528, -2528, - -2528, -448, 567, -2175, -2528, 534, -2528, -2528, -2528, -2528, - -8, 292, -2528, 70, 50, -31, -15, -11, 14, 43, - 1535, 1563, -2528, -1282, 446, -2528, -2528, -579, -48, -2528, - 678, -2460, -1956, -417, 1032, 1514, 1520, -285, -197, -2528, - -356, -2528, -877, -2528, -2528, 680, 1072, -1214, -1217, -2528, - 407, -2528, -282, -2528, 157, -406, 1053, -2528, 1534, -2528, - -2528, -2528, -2528, -1181, 721, -1902, 434, -1864, -1788, 193, - 182, -1184, -161, 13, 440, -245, -2528, -2528, -243, -1587, - -2268, -257, -254, -2528, -2528, -1143, 937, -869, -2528, -2528, - -662, 1134, -2528, -2528, -2528, 1383, 1531, -2528, -2528, 1543, - 1677, -2528, -533, 2135, 932, -743, 1233, -1132, 1234, -1131, - -1121, -1136, 1244, 1245, -1167, 3108, -1510, -738, 6, -2528, - -2277, -1349, -2528, -2528, -12, -2528, -293, -2528, -291, -2528, - -2528, -2528, -279, -2527, -2528, 1178, -2528, -1183, -2528, 3584, - 758, -2528, -1559, -550, -2528, -638, -903, -1456, -2528, -2528, - -2528, -2528, -2528, -2528, -1222, -1695, -767, 824, -2528, -2528, - 939, -2528, -2528, -955, -589, 1050, -538, -760, 832, -2528, - -541, 1188, -2528, 1510, -514, 1112, -967, 12, -2528, 1518, - 252, 1728, -1397, -2003, -2528, -2528, -508, -2100, -823, -2528, - -607, -2528, -2528, -2528, -1092, -2528, 666, -2528, -1012, -2528, - -177, -2528, -2528, -2032, -2528, -2528, -2528, -2528, -2528, -2528, - -392, -2528, -451, -450, -2528, -2528, 16, -687, 1395, -2528, - -2528, 732, -2528, 990, -2528, 791, -2528, -2528, -2528, 1172, - -2528, -2528, -2528, 21, 1630, 467, -2528, 1173, -2528, -2528, - -2528, -2528, -2528, 566, -2528, -1008, -2366, 68, -2188, -1041, - -6, -2528, -2528, -2528, -511, -2528, -2007 + -2446, -2446, -2446, 1743, -2446, -2446, -2446, 273, -2446, 946, + -2446, 265, -474, 583, -2446, -2446, -2446, -836, 6, -2446, + -2446, -2446, -2446, 74, 318, -2446, -367, -1821, -125, -2446, + -2366, -2252, -2446, -427, -2346, -1626, -2446, -1014, 243, -2446, + -2029, -2446, -602, -868, -699, -900, -2446, 43, -2446, 1330, + -1012, -1718, -2321, -418, -2446, -487, -2446, -261, -1699, -460, + -446, -2446, -2244, -856, -2446, 1481, -172, -2446, 627, -2446, + -2024, -2446, -2446, 616, -2446, -855, -2446, -2446, -1790, 264, + -400, -2228, -2265, 567, -611, -2446, -445, 307, -1674, -2446, + 638, -2446, -387, -2446, -477, -2064, -2446, -2446, -2446, 1312, + -625, -2446, -2446, -2446, -2446, 17, -2446, -2446, -2446, -2446, + -2446, -571, 576, -2185, -2446, 545, -2446, -2446, -2446, -2446, + 1, 300, -2446, 5, 517, -30, -8, 14, 16, 45, + 1557, 1568, -2446, -1225, 577, -2446, -2446, -575, -54, -2446, + 695, -1950, -1961, -413, 1047, 1532, 1535, -276, -187, -2446, + -345, -2446, -1251, -2446, -2446, 694, 1087, -1201, -1234, -2446, + 420, -2446, -273, -2446, 169, -406, 1070, -2446, 1553, -2446, + -2446, -2446, -2446, -1198, 734, -1921, 445, -1901, -1763, 209, + 192, -1093, -153, 28, 453, -235, -2446, -2446, -233, -1651, + -2292, -247, -246, -2446, -2446, -1071, -1413, -874, -2446, -2446, + -417, 989, -2446, -2446, -2446, 1133, 1178, -2446, -2446, 1988, + 2032, -2446, 317, 2372, 287, -741, 1246, -1127, 1248, -1136, + -1140, -1143, 1254, 1256, -1173, 2891, -1528, -618, 24, -2446, + -2236, -1562, -2446, -2446, -12, -2446, -299, -2446, -298, -2446, + -2446, -2446, -278, -2445, -2446, 1184, 864, -2446, -2446, -1181, + -2446, 3631, 767, -2446, -1578, -559, 1293, -650, -894, -1453, + -2446, -2446, -2446, -2446, -2446, -2446, -1084, -1719, -644, 838, + -2446, -2446, 954, -2446, -2446, -1113, -573, 1055, -541, -758, + 846, -2446, -543, 1192, -2446, 1191, -517, 271, -973, 25, + -2446, 2222, 167, 1967, -762, -2007, -2446, -2446, -506, -2118, + -829, -2446, -599, -2446, -2446, -2446, -1089, -2446, 679, -2446, + -1011, -2446, -169, -2446, -2446, -2030, -2446, -2446, -2446, -2446, + -2446, -2446, -374, -2446, -453, -448, -2446, -2446, 4, -697, + 1409, -2446, -2446, 639, -2446, 1014, -2446, 872, -2446, -2446, + -2446, 1196, -2446, -2446, -2446, 18, 1654, 478, -2446, 1204, + -2446, -2446, -2446, -2446, -2446, 258, -2446, -1010, -2396, 73, + -2215, -1038, -6, -2446, -2446, -2446, -503, -2446, -2003 }; /* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If positive, shift that token. If negative, reduce the rule which number is the opposite. If zero, do what YYDEFACT says. If YYTABLE_NINF, syntax error. */ -#define YYTABLE_NINF -1669 +#define YYTABLE_NINF -1675 static const yytype_int16 yytable[] = { - 468, 905, 1045, 576, 647, 1038, 1087, 1392, 588, 645, - 820, 871, 464, 713, 58, 41, 1391, 51, 465, 842, - 1559, 72, 1375, 1847, 1094, 1042, 831, 1356, 653, 1224, - 58, 2032, 529, 1213, 1422, 1423, 1425, 1454, 1367, 1363, - 1426, 1403, 562, 590, 2047, 1269, 2049, 591, 2353, 2337, - 54, 1345, 2254, 2335, 1884, 1885, 1534, 1279, 2028, 1904, - 1530, 1532, 1594, 1595, 1942, 2209, 54, 1396, 79, 2000, - 53, 1533, 654, 2361, 707, 1074, 574, 1887, 1513, 1981, - 1103, 472, 1066, 2364, 575, 729, -682, 1189, 1066, -674, - 1596, -1667, -1667, -428, 1233, 2686, 1929, 1930, 2515, 607, - 1666, 1077, 535, 1445, 1202, -682, -1023, -679, 589, 2226, - -1023, -679, 1713, 1710, -1020, -1020, 1348, 535, -998, 1285, - 2520, 729, -1556, 566, 1239, 1331, 54, 2083, 1359, 1663, - 904, 1046, 910, -431, 914, 2549, -1545, 714, 1394, 729, - -1556, 2463, 1061, -1563, -1647, -1647, 599, -890, -1658, -1658, - 638, -1665, -1665, -1024, -890, 1626, 1241, -1563, 1605, -1024, - -1545, -1021, -1021, -641, 580, -654, 2570, -669, 2111, 2113, - 864, 2586, 535, 535, 1605, 2506, 2524, 1274, 1359, 1066, - 2216, 867, 1368, 2645, 2224, -428, 1062, 1376, 2060, 891, - 1379, 1380, 1282, 1081, 535, 1270, 1081, 2247, 729, 2617, - 713, 729, 1642, -909, 1181, 1182, 713, 2447, 892, 1643, - -909, -173, 2689, 1351, 2163, 1734, -173, 2090, 585, 585, - 1318, 2489, 2658, 2797, 1794, -431, 1650, 1368, 2521, 1199, - 1864, -388, 1233, 2656, 1418, 2501, 832, 1222, 825, 2849, - 2748, 2688, 1042, 2522, 2808, 548, 1233, 2824, 2365, 581, - 1571, 2810, 1225, 854, 2586, 2217, 2605, 2712, 893, 2268, - 1197, 1198, 1239, 1095, 3, 4, 1319, 1692, 860, 2659, - 583, 1887, 536, 2641, 1480, 2592, 1239, 1716, 1401, 1627, - 1280, 2642, 1233, 1887, 2266, 1402, 2788, 2218, 2789, 1318, - 2460, 2518, 1233, 1628, 1241, 1978, 2609, 1990, 1629, 2036, - 1359, 2671, 2219, 1283, -1644, -1644, 2845, 1731, 1241, 532, - 1360, 2220, 1239, 2420, 1976, 713, 713, 2424, 2500, 2385, - 1315, 2140, 2265, 1344, 2514, 1320, 1731, 2289, 1976, 2628, - 535, 1301, 1991, 1630, 584, 1319, 533, 2519, 862, 2818, - 2001, 2269, 2226, 1390, 1241, 2221, 894, 549, 2764, 2262, - 1419, 2264, 2798, 1693, 1241, 855, 1063, 2368, 2836, 2603, - 2461, 1353, 1571, 1067, 1732, 1651, 2560, 1381, 1859, 1067, - 1406, 1369, -428, 1321, 2690, 2549, 1963, 1964, 2144, 2145, - 2146, 2147, 1717, 2025, 2151, 2152, 2153, 2154, 2155, 2156, - 2157, 2158, 2159, 2160, 2850, 895, 2787, 2811, 1126, 833, - 2615, 834, 2660, 896, 2366, 2270, 1213, 2271, 2586, 2606, - 2602, 585, -431, 2657, 2448, 897, 1369, 2410, 1437, 1284, - 2523, 2806, 1316, 1405, 1478, 2701, 1366, 2799, 534, 2604, - 2222, 1322, 1366, -428, 2187, 2188, 2375, 1435, 639, 2831, - 1559, 567, 1321, 569, 2643, 2709, 898, 2383, 1297, 579, - 2793, 2452, 2001, 1184, 1342, 1189, 2464, 1275, 1395, 1064, - 1067, 2779, 1795, 1038, 2386, 1343, 1812, 1568, 1813, 2724, - 1271, 1354, 582, -431, 2765, 1202, 826, 1366, 1631, 1824, - 2073, 2074, 611, 2013, 2183, 1710, 612, 58, 615, 1463, - 616, 900, 2586, 576, 619, 2337, -890, 2725, 1726, 2335, - 1322, 1983, 1783, 2515, 1382, 1512, 611, 2002, 2061, 1606, - 612, 2007, 1383, 901, 1068, 878, 2561, 473, 712, 2559, - 1073, 879, 1176, 54, 2563, 1944, 728, 2565, 2766, 1887, - 2838, 1208, 1392, 903, 1888, 1889, 1890, 58, 1519, 2507, - 709, -428, 872, 617, 468, 468, 827, 586, 1902, 2550, - 1208, 1644, -909, 1209, 578, 2825, 2454, 645, 1447, 723, - 1444, 2122, 590, 468, 1216, 1262, 591, 1210, 1450, 713, - -674, 593, 1209, 54, 575, 1783, 2465, 1181, 1182, 1702, - -428, -431, -428, 836, 1985, 1515, 1212, -1023, -679, 546, - 2711, -1023, 610, 58, 41, 852, 51, 611, 1228, -998, - 72, 612, 1199, -1556, 953, 468, 1039, 2491, 2492, 1571, - 1571, 2715, 592, 843, 1571, 2716, 1976, -1545, 1976, 80, - -431, -1556, -431, 1514, -1563, 1516, 880, 589, 1455, 54, - 707, 1602, 2451, 1523, -1024, 1515, 1042, 1527, -1563, 1665, - -1024, -1545, 600, 83, 1187, 468, 1193, 79, -669, 53, - 2148, 1571, 1571, 1277, 1667, 1386, 1573, 1575, 1492, 1493, - 2142, 625, 2666, 81, 1543, 1544, 1085, 1086, 1734, 1085, - 1086, 1386, 835, 1760, 2198, 2199, 2200, 2201, 2510, 1387, - 1515, 633, 1225, 1763, -173, -173, 1766, 2255, 2164, 2770, - 1515, 1042, 25, 860, 2453, 1387, 2033, 1559, 1257, 1258, - 1259, 1260, 1261, 1262, 522, 712, 636, 2482, 2120, 1891, - 637, 712, 953, 1168, 1259, 1260, 1261, 1262, 640, 1614, - 2128, 2130, 2131, 2125, 2136, 2126, 2393, 29, 58, 468, - 1892, 2483, 476, 1608, 2839, 2127, 2129, 1897, 1898, 1899, - 1900, 1901, 1902, 2276, 2143, 1615, 2337, 2629, 2256, 641, - 2335, 1899, 1900, 1901, 1902, 2595, 576, 468, 1834, 1752, - 2174, 651, 2086, 862, 54, 1208, 1496, 1497, 31, 2056, - 1759, 1934, 1208, 2167, 1208, 881, 1836, 468, 468, 468, - 2172, 468, 2057, 860, 715, 2149, 716, 1209, 861, 477, - 2216, 621, 1368, 622, 1209, 2150, 1209, 1935, 2630, 1893, - 2844, 84, 1191, 468, 2217, 822, 2631, 2034, 1210, 85, - 1212, 2840, 863, 2277, 2415, 1917, 719, 1919, 1920, 34, - 712, 712, 1779, 1780, 1781, 882, 1568, 1568, 2415, 883, - 1924, 1568, 2819, 2820, 721, 2632, 2218, 1220, 2841, 1038, - 723, 1309, 1310, 86, 1314, 591, 591, 1984, 591, 2467, - 1841, 2426, 468, 862, 1616, 1842, 468, 468, 884, 1617, - 2220, 1392, 722, 852, 1618, 2217, 1976, 468, 1568, 1568, - 1976, 1823, 1723, 2007, 2536, 1825, 1761, 1359, 1827, 2071, - 2847, 1764, 724, 878, 717, 1168, 1457, 1362, 2692, 879, - 87, 729, 1368, 2278, 25, 718, 2470, 2218, 1459, 1758, - 2696, 824, 863, 729, 2633, 1843, 589, 589, 1397, 589, - 953, 1936, 2219, 2332, 2011, 2634, 1937, 953, 1769, 729, - 1393, 2220, 830, 1776, 885, 1368, 2062, 1800, 1368, 29, - 2004, 2539, 2552, 1571, 1571, 1571, 1571, 2553, 953, 1571, - 1571, 1571, 1571, 1571, 1571, 1571, 1571, 1571, 1571, 1496, - 1497, 2616, 713, 2472, 2707, 2221, 881, 481, 1446, 482, - 2601, 1446, 611, 1090, -998, 713, 612, 2091, 1862, 1090, - 31, 25, 2098, -1642, -1642, 1079, 3, 4, 1080, 2222, - 839, 1369, 1295, 32, 484, 1296, 1711, 1712, 628, 1571, - 1571, 1894, 1895, 1896, 880, 1897, 1898, 1899, 1900, 1901, - 1902, 1298, 1619, 623, 1299, 624, 29, 33, 1400, 728, - 2035, 2036, 1887, 1620, 1956, 844, 1957, 1888, 1889, 1890, - 1840, 34, 1368, 629, 1844, 1451, 1845, 2646, 1299, 878, - 857, 845, 859, 1918, 2176, 879, 2425, 522, 2428, 2037, - 2222, 2059, 1366, 1226, 2063, 1477, 2698, 31, 1478, 1510, - 953, 2475, 1511, 1230, 1938, 1232, 1675, 630, 1233, 1073, - 32, 1932, 2372, 1976, 1039, 1939, 1278, 1422, 1423, 1425, - 1559, 847, 1976, 1426, 712, 848, 1287, 1976, 1090, 1090, - 1976, 1369, 2647, 1958, 33, 1959, 1980, 1679, 1239, 1721, - 1039, 850, 1299, 58, 2476, -1669, 709, 2743, 34, 1976, - 1791, 1222, 2635, 1792, 631, 2636, -474, 88, 858, 2477, - 471, -474, 1819, 851, 1369, 1478, 520, 1369, 2837, 1830, - 1241, 1860, 1831, 493, 1299, 1868, 547, 521, 1299, 54, - 559, 875, 1872, 1906, 539, 1299, 1299, 1168, 2067, 2287, - 880, 873, 1366, 881, 573, 573, 2107, 876, 2495, 2280, - 1568, 1568, 1568, 1568, -1643, -1643, 1568, 1568, 1568, 1568, - 1568, 1568, 1568, 1568, 1568, 1568, 2044, 468, 2046, 468, - 877, 2165, 2166, 2263, 468, 1366, -474, 468, 1366, 889, - 2080, 468, 1976, 2807, 58, 615, 890, 616, 2350, 1931, - 2279, 619, 2288, 908, 907, 468, 603, 883, 468, 468, - 468, 468, 2050, 2821, 2478, 2051, 1568, 1568, 468, 468, - 468, 1369, 911, 1892, 2054, 1976, 1815, 2055, 2116, -1669, - 54, 1511, 468, 1043, 1463, -474, 909, 1168, 468, 468, - 468, 468, 1039, -1669, 2535, 2245, 468, 468, -1669, 2444, - 617, 468, -1645, -1645, 2118, 468, 628, 2119, 468, 468, - 468, 468, 468, 468, 468, 468, 468, 2851, 2132, 468, - 2179, 1299, 1044, 1299, 468, 1680, 475, 468, 1047, 468, - 476, 953, 1366, -1669, 2243, 2248, 2267, 2244, 2249, 1792, - 2356, 629, 1893, 1792, 821, 2373, 823, 2516, 2374, 881, - 1049, -1069, 885, 468, 1612, 2394, 2456, 2457, 1511, 1792, - 1792, 1058, 2540, 1976, 25, 1511, 25, 2596, -1646, -1646, - 2597, 468, 836, 1232, 2622, 2082, 1233, 1792, 2102, 2007, - 468, 468, 713, 2648, -1648, -1648, 1511, 477, 1250, 1060, - 2651, 1056, 25, 2652, 478, 1515, 1976, 2663, 1065, 29, - 1073, 29, 891, 883, -1449, 2668, 1239, 2430, 2669, 878, - 2433, 2329, 1168, -1669, 54, 879, -1649, -1649, 1039, 2332, - 1039, 892, 631, 2672, -1650, -1650, 2669, 29, 1486, 1487, - 2684, 2171, 884, 2685, 1683, 1070, 2721, 1072, 1241, 2685, - 31, 479, 31, 2722, 523, 2730, 1511, 2736, 1073, 2751, - 2669, 2287, 2752, 32, 2780, 32, 1075, 2781, 878, 953, - -1651, -1651, 468, 468, 879, 468, 2783, 1076, 31, 1073, - 2455, 893, 1078, 2814, 1170, 1515, 2669, 33, -1669, 33, - 1976, 32, 1171, 2832, 1096, 2670, 2685, 25, 2670, 1571, - 1173, 524, 468, 34, 1492, 1493, -1449, 1175, 885, 2231, - 1039, -1652, -1652, -645, 1539, 33, -652, 1941, 1752, 480, - 2338, 1180, 1480, -1653, -1653, -1654, -1654, 712, 1183, 1681, - 880, 1184, 29, -1655, -1655, -1656, -1656, -1657, -1657, 1185, - 712, 1186, 953, 1168, 1894, 1895, 1896, -1669, 1897, 1898, - 1899, 1900, 1901, 1902, 54, -1659, -1659, 1190, -1449, -1660, - -1660, -1669, 2349, -1661, -1661, 481, -1669, 482, 25, 894, - 34, -1449, 1192, 31, 1816, 1168, -1449, -1662, -1662, 880, - -642, -1449, 468, 468, -643, 1571, 32, -1669, 468, 483, - -1449, 1195, 484, -1449, 1257, 1258, 1259, 1260, 1261, 1262, - 2371, -1669, 1196, 29, 1201, 576, 728, 1203, 1668, 1887, - 33, 1204, 1496, 1497, 1888, 1889, 1890, 1205, 895, -1663, - -1663, -646, -1449, 1206, 34, -644, 896, 1207, 468, 468, - 468, 2177, 1214, 468, -1664, -1664, -1666, -1666, 897, 1215, - 2007, -1449, -1668, -1668, 31, 2487, 468, 468, 468, 468, - 576, 1216, 1571, 1408, 1409, 468, 1250, 32, -521, -521, - -525, -525, 1223, 602, 1227, 605, 1267, 609, 1274, 898, - 468, 468, 1273, 635, 1275, 468, -524, -524, 1289, 881, - 2332, 33, 1494, 1495, 1302, 1308, 1220, 1291, 1293, 468, - -1449, 2620, 468, -1449, 468, 34, 861, 1338, 863, -1449, - 1340, 650, 1346, 1371, 1364, 1365, 1366, 1373, 1377, 1372, - 1384, 1385, 1042, 1399, 900, 468, 1568, 583, 878, 1389, - 576, 1165, 1404, 1411, 879, 1412, 1515, 468, 881, 912, - -475, 1220, 1416, 883, 728, -475, 901, 1887, 1420, 1974, - 1428, 468, -1669, -1669, -1669, 465, -1669, 1421, 1431, 1612, - 1432, 1434, 573, 1974, -1449, 468, 903, 2107, -440, 465, - 58, 615, 913, 616, 1438, 1951, 1446, 619, 1176, 2442, - 468, -440, 1476, 1484, 1485, 1187, -440, 1193, 1722, 1479, - 878, 584, 883, 1968, 1482, 1090, 879, 1481, 468, 1839, - 2496, 2497, 1519, 1518, 729, 1524, 54, 1968, 1090, 1233, - -475, 575, 1568, 1577, 466, 1039, 1039, 1039, 1961, 494, - 1892, 884, 1578, 494, 1591, 1604, 617, 1611, 1622, 538, - 494, 543, -440, 1623, 543, 1625, 1286, 564, 885, 880, - 494, 494, 1647, 1663, 1646, -1449, 1669, 1676, 1684, 1677, - 584, -440, 1691, -1449, 1703, -1669, 953, 1486, 1487, -475, - 1704, 1705, 1257, 1258, 1259, 1260, 1261, 1262, 585, -1449, - 1706, -1449, -1449, 2830, 1724, 1728, 1729, 2329, 1730, 1568, - 585, 1784, 1789, 1793, 543, 1168, 1797, 885, 1801, 1893, - 1802, 564, 494, 564, 564, 564, 1803, 1817, 1821, 1822, - -440, 880, 1828, 1165, 1826, 1039, 1829, 712, -1449, -440, - 1837, -1449, -1449, -1449, 1848, 1855, 1856, 1852, 1853, 1488, - 1489, 1490, 1491, 1492, 1493, 1168, 1870, 1494, 1495, 1857, - 1858, 1299, 1861, 1157, 1880, 1871, 1874, 476, 468, 1873, - 1875, 1168, 1912, 713, 1876, 1913, 1877, 1879, 468, 1883, - 1928, 1905, 468, 1911, 1933, 468, 1914, 1915, -1069, 54, - 1949, 1168, 468, 468, 468, 468, 1922, 1926, 468, 468, - 468, 468, 468, 468, 468, 468, 468, 468, 1940, 1816, - 1954, 468, 468, 1950, 2193, 1952, 1392, 1953, 881, 1989, - 1168, 1978, 1987, 1996, 477, 468, -526, 1168, 2003, 576, - 2009, -527, 468, 2014, 586, 468, 2015, 468, 2018, 2017, - 2019, 2024, 2021, 2023, 1168, 468, 576, 1893, 468, 468, - 2027, 2043, 2030, 468, 468, 2045, 2058, 2064, 2068, 2069, - 2070, 1496, 1497, 2072, 2065, 2092, 573, 2066, 1767, 468, - 2599, 468, 883, 2089, 2088, -440, 2099, 2650, 479, 2093, - 881, 1974, 468, 1974, 2100, 2096, 2103, 465, 2110, 465, - 2117, 468, 2124, 2135, 620, 1908, 2695, 1887, 1907, 2168, - 2702, 1768, 2194, 2169, 2170, 2181, 632, 468, 2185, 2053, - 1220, 1894, 1895, 1896, 1961, 1897, 1898, 1899, 1900, 1901, - 1902, 2207, 2215, 2233, 2240, 2205, 2182, 2208, 2246, 2253, - 1774, 2467, 1222, 2242, 883, 1157, 2235, 2468, 611, 2229, - 1464, 847, 612, 2257, 2261, 2275, 2291, 2655, 2359, 2469, - 2363, 2273, 2076, 2237, 2369, 1039, 2378, 2238, 2329, 1039, - 2336, 1039, 2352, 1775, 2358, 2655, 2379, 885, 2384, 1792, - 2381, 1498, 1499, 2387, 2421, 2416, 2422, 2388, 2470, 2429, - 2471, 829, 2435, 2750, 1465, 1165, 2412, 1232, 2389, 2402, - 1233, 1500, 481, 2419, 482, 1090, 2413, 2434, 2436, 2437, - 2108, 2450, 2109, 2462, 2458, 891, 2114, 2115, 2485, 2723, - 1466, 2486, 1159, 2490, 874, 2704, 483, -1020, 2505, 484, - 1239, -1642, -1643, 2503, 892, -1644, -1645, -1669, 1467, 885, - -1646, -1647, 1468, -1648, 2502, 2472, -1649, 468, -1650, -1669, - -1669, -1669, -1651, 1897, 1898, 1899, 1900, 1901, 1902, 1051, - 1053, 1055, 1241, 1469, 2504, 2695, 1470, 2511, -1652, 54, - -1653, 2733, 2473, -1654, 2804, 2525, 2512, 2528, -1655, -1656, - 1471, -1657, 2513, -1659, 893, 1165, 2532, 2526, -1660, 2367, - 468, 468, 2537, -1661, -1662, 468, 1168, 2534, 2745, -1663, - 468, 2531, -1664, 468, 468, 1448, -1665, 1449, 468, -1666, - -1667, -1668, -1021, 2545, 2564, 2578, 2538, 2551, 2569, 2582, - 2588, 2590, 564, 2591, 2607, 2610, 2593, 2598, 627, 468, - 2611, 2186, 2612, 468, 564, 2613, 2621, 2624, 2474, 494, - 2625, 1974, 2626, 2475, 2649, 1974, 2653, 465, 2639, 1089, - 2283, 465, 2640, 2661, 468, 1089, 2662, 2665, 2678, 2674, - 2681, -1669, 2686, 2700, 2703, 644, 494, 494, 2705, 1472, - 1160, 2708, 894, 2717, 2718, -1669, 2719, 1473, 2729, 2738, - -1669, 2739, 1162, 2753, 2740, 1968, 2476, 1039, 2749, 1968, - 2606, 2755, 2756, 2758, 1159, 2761, 468, 1157, 2762, 2763, - 543, 2477, 543, 2769, 2195, 1168, 2767, 2778, 2768, 564, - 1165, 2782, 54, 54, 2800, -1669, 2809, 2794, 494, 1474, - 2815, 895, 2816, 2823, 2826, 2828, 2833, 2834, 2334, 896, - 564, 2824, 2443, 2445, 2825, 2336, 2835, 2846, 2848, 1229, - 2852, 897, 564, 849, 2290, 2294, 1754, 2041, 2250, 2446, - 2076, 2744, 2581, 2790, 2357, 2796, 2493, 2843, 1294, 2699, - 2829, 2638, 1089, 2048, 1089, 1089, 2822, 1355, 2026, 2792, - 1250, 2295, 898, 2362, 2827, 2077, 2791, 564, 564, 564, - 2022, 1305, 1443, 2078, 1546, 2104, 2533, 1157, 712, 2005, - 2351, 1168, 1547, 1548, 1549, 1720, 2478, 2732, 468, 1300, - 1335, 1168, 1281, 2680, 2786, 1334, 2016, 1347, 2236, 1700, - 468, 468, 2737, 2440, 468, 1719, 1163, 900, 1986, 2411, - 1374, 2210, 1339, 2427, 2667, 2206, 468, 2728, 2727, 54, - 1089, 1165, 2734, 1089, 1089, 468, 2735, 1849, 1974, 901, - 468, 1398, 1160, 468, 465, 1529, 1531, 1974, 2580, 2517, - 468, 468, 1974, 465, 1162, 1974, 1535, 1536, 465, 903, - -1669, 465, 468, 1165, 468, 2784, 2777, 728, 2785, 1652, - 1887, 1172, 2008, 1947, 1974, 1888, 1889, 1890, 1945, 1865, - 465, 1788, 1727, 1662, 468, 2081, 1846, 2720, 1671, 2251, - 1673, 1968, 1735, 0, 1738, 1456, 1968, 1749, 0, 1968, - 1303, 0, 0, 1753, 0, 1755, 0, 2439, 1232, 2441, - 0, 1233, 1157, 0, 0, 0, 2336, 1762, 0, 0, - 0, 0, 1765, 0, 0, 0, 1770, 1771, 1772, 1773, - 0, 1777, 1778, 0, 0, 0, 0, 0, 0, 0, - 0, 1239, 0, 0, 0, 468, 1159, 0, -1669, 0, - 0, 468, 1168, 0, 0, 0, 0, 1974, 0, -1669, - 0, 576, 0, 465, 0, 0, 1257, 1258, 1259, 1260, - 1261, 1262, 0, 1241, 468, 0, 564, 0, 468, 1427, - 0, 54, 1168, 0, 0, 1168, 468, 0, 1163, 0, - 1974, 0, 494, 494, 0, 0, 465, 468, 564, 564, - 1307, 2623, 54, 0, 0, 0, 0, 1039, 1453, 0, - 0, 1039, 468, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 2517, 1458, 1460, 564, 1358, 0, 0, 0, - 0, 0, 0, 1157, 1968, 0, 1159, 0, 564, 0, - 0, 0, 1220, 0, 0, 0, 1090, 0, 0, 0, - 0, 0, 0, 0, 0, 564, 0, 2006, 0, 564, - 0, 0, 0, 468, 0, 1157, 468, 0, 0, 0, - 0, 1892, -1669, 0, 0, 0, 0, 0, 1974, 0, - 0, 0, 0, 0, 465, 0, -1669, 0, 468, 0, - 0, -1669, 0, 0, 1160, 54, 0, 0, 0, 468, - 0, 0, 0, 0, 0, 0, 1162, 0, 0, 0, - 0, 1974, 0, 0, 0, 2714, 0, 465, 0, 468, - 0, 0, 0, 1525, 1526, 0, -1669, 0, 0, 0, - 0, 1540, 0, 1541, 1542, 0, 0, 0, 468, 468, - 1893, 0, 0, 0, 0, 0, 0, 0, 1579, 0, - 1580, 54, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1159, 0, 1165, 0, 1592, 1039, 2336, 0, 0, - 0, 2741, 0, 0, 2334, 0, 0, 0, 0, 0, - 0, 1250, 0, 468, 1160, 468, 0, 0, 0, 0, - 0, 0, 1415, 0, 0, 1974, 1162, 564, 0, 0, - 0, 465, 0, 1165, 1661, 0, 0, 1441, 0, 0, - 0, 0, 0, 644, 0, 0, 644, 0, 0, 1165, - 494, 494, 0, 494, 644, 1670, 564, 1672, 0, 0, - 0, 0, 54, 0, 0, 0, 0, 0, 0, 1165, - 1163, 0, 0, 0, 0, 0, 0, 468, 0, 1550, - 1551, 1552, 2805, 1553, 1554, 1555, 1556, 1557, 1558, 0, - 0, 0, 0, 0, 1709, 1709, 0, 0, 1165, 0, - 1715, -1669, 0, 0, 1164, 1165, 0, 0, 0, 0, - 0, 1528, 1159, 535, 0, 0, 0, 0, 0, 0, - 0, 0, 1165, 2710, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1160, - 0, 0, 0, 0, 1159, 0, 1089, 0, 0, 0, - 0, 1162, 0, 0, 0, 0, 1089, 0, -1451, 1089, - 1163, 0, 1894, 1895, 1896, 0, 1897, 1898, 1899, 1900, - 1901, 1902, 878, 0, 0, 0, 0, 2095, 879, 1798, - 0, 1561, 0, 0, 0, 891, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1157, 0, 0, 0, 0, - -1669, 0, 0, 0, 892, 0, 0, 1257, 1258, 1259, - 1260, 1261, 1262, 0, 0, 0, 1484, 1485, 0, 644, + 468, 647, 588, 907, 576, 53, 41, 1400, 822, 1047, + 844, 645, 1089, 1040, 1096, 873, 58, 51, 72, 1383, + 529, 713, 1076, 833, 1566, 1364, 1994, 1230, 2045, 1375, + 464, 465, 58, 1232, 1858, 1215, 1430, 1399, 1431, 1433, + 1434, 1044, 1462, 1895, 1896, 653, 1371, 1353, 1915, 1277, + 590, 1541, 1411, 2060, 1540, 2062, 1955, 562, 1539, 2222, + 2267, 1287, 2348, 2366, 1605, 1606, 2350, 1537, 1105, 2041, + 654, 2013, 591, 79, 1404, 1860, 707, 2374, 574, 2377, + 729, 599, 2701, 1521, -680, 2239, -683, -1004, -680, 1942, + 1943, 1677, 1079, -1029, 1578, -683, -1551, -675, 472, 1453, + -1562, -1026, -1026, 1724, -1653, -1653, 2530, -1569, -1664, -1664, + -1673, -1673, 1989, -1030, 607, -1671, -1671, 1068, -1029, 535, + -1562, -1027, -1027, 729, 834, -429, 1989, 1293, -1569, 535, + 2535, 906, -432, 912, 1721, 916, 566, 1898, 2096, -1030, + 638, -1551, -642, -655, -670, 1616, 535, 1616, 1048, 827, + 535, 1674, 3, 4, 1068, 714, 1356, 2585, 1367, 1637, + 2478, 2853, 2539, 1290, 1083, 535, 1083, 2644, 1402, 729, + 1607, 729, 1241, 729, 866, 1063, 1875, -896, 2521, 2124, + 2126, 869, 1282, 1653, -896, 1068, 1278, 536, -174, 2260, + 1654, 2564, 83, -174, 1805, 1384, 2632, -915, 1387, 1388, + 2660, 2176, 1247, 1326, -915, 1241, 583, 585, 713, 2812, + 1241, 2103, 1578, 580, 713, 2462, 585, -429, 2645, 1064, + 2671, -389, 2237, 2536, -432, 1898, 2646, 2601, 1222, 1226, + 2516, 2704, 2504, 2864, 1249, 1247, 2703, 2229, 2537, 2839, + 1247, 1426, 2727, 25, 2607, 2289, 2620, 1339, 2656, 1327, + 2279, 1233, 1367, 1359, 2673, 2647, 2657, -1650, -1650, 2825, + 1044, 1097, 2073, 1742, 2400, 2624, 1898, 1249, 1409, 2378, + 584, 1488, 1249, 548, 1291, 1410, 2475, 1703, 29, 1288, + 1241, 546, 493, 1638, 1199, 1200, 521, 835, 1661, 836, + 862, 1376, 2686, 539, 856, 1745, 2230, 1639, 581, 1241, + 1991, 2674, 1640, 573, 573, 2268, 533, 2435, 1328, 80, + 2601, 2439, 2230, 2153, 532, 2290, 1352, 2643, 2239, 31, + 1743, 2278, 534, 2381, 2648, 713, 713, 2515, 2231, 1247, + 1309, 1323, 1191, 2779, 600, 2649, 1727, 1641, 2813, 2529, + 717, 2823, 1249, 2441, 2231, 2014, 2476, 585, 535, 1204, + 84, 718, 2233, 81, 569, 603, 1329, 1427, 85, 2232, + 864, 1249, 2275, 1704, 2277, 2854, 2269, 2618, 2233, 1128, + 34, 2157, 2158, 2159, 2160, 549, 1414, 2164, 2165, 2166, + 2167, 2168, 2169, 2170, 2171, 2172, 2173, 828, 2865, 1065, + 1292, 2802, 86, 2705, 2716, 2291, 857, 2630, 1069, 2621, + 2672, 1361, 2234, 2860, -429, 2826, 1186, 1398, 1445, 2401, + 1215, -432, 1413, 2814, 1330, 2538, 1989, 578, 1989, 2658, + 1374, 1486, 2463, 2617, 2846, 2379, 2046, 1662, 639, 2200, + 2201, 2564, 1806, 1324, 2675, 1069, 2398, 2619, 2821, 87, + 1376, 1728, 2855, 2467, 1976, 1977, 1443, 2808, 1870, 2388, + 2724, 2763, 2794, 1566, 567, 2425, 579, 829, 1578, 1578, + 1350, 1279, 1283, 1578, 2601, -429, 1069, 1351, 1305, 2856, + 723, 2235, -432, 1040, 2014, 2479, 592, 2026, 617, 615, + 1377, 1835, 1642, 586, 1526, 1742, 2780, 2235, 1403, 58, + 616, 619, 1066, 611, 576, 633, 1617, 612, 1957, 1471, + 2196, 2015, 1737, 593, 1578, 1578, 1996, 2086, 2087, 1721, + 2348, 1362, 610, 2530, 2350, 2574, 2781, 54, 712, 1794, + 2578, 582, 2650, 2580, 2020, 2651, -896, 2565, 709, 2522, + 1178, 1210, 1655, 54, 473, 1394, 874, 2047, 1400, 58, + 1210, 1374, 2038, 625, 468, 468, -915, 2840, 2601, 1070, + 2833, 575, 880, 1211, 636, 2469, 2135, 1455, 881, 1395, + 837, 645, 1211, 468, 1452, -680, 611, 1212, -1004, 590, + 612, 838, 1458, -429, -1029, 589, 1214, -1551, -675, 713, + -432, -1562, 2074, 1218, 53, 41, 1075, 2480, -1569, 1575, + 637, 591, 1794, 54, -1030, 58, 51, 72, 1998, -1029, + 1236, -1562, 2281, 1523, 955, 468, 1041, 2726, 1913, -1569, + 854, 641, -429, 2731, -429, 845, 1522, 2730, 1222, -432, + -1030, -432, -1551, 1189, 1195, -670, 1530, 1285, 1463, 1377, + 1534, 1678, 1394, 1210, 707, 1676, 2466, 1087, 1088, 1087, + 1088, 2506, 2507, 1270, 1367, 468, 1210, 1613, 522, 1580, + 1582, 862, 79, 1690, 1368, 1211, 1395, 1376, 2681, 1523, + 1044, -174, -174, 882, -1648, -1648, 1989, 1210, 1211, 1389, + 1989, 1265, 1266, 1267, 1268, 1269, 1270, 2533, 1267, 1268, + 1269, 1270, 1212, 1233, 2282, 2497, 883, 1771, 2468, 1211, + 1374, 2525, 1826, 1910, 1911, 1912, 1913, 1774, 1367, 715, + 1777, 716, 1191, 1214, 1523, 712, 2785, 1713, 1370, 2498, + 1566, 712, 955, 1170, 1523, 1044, 2389, 621, 2391, 622, + 824, 864, 1204, 2534, 2141, 2143, 2144, 2140, 2142, 468, + 58, 2139, 1908, 1909, 1910, 1911, 1912, 1913, 2049, 2138, + 2048, 2049, 2408, 1619, 2211, 2212, 2213, 2214, 2283, 862, + 2284, 1745, 1520, 476, 863, 2739, 2610, 468, 576, 1465, + 651, 2348, 635, 1852, 729, 2350, 2302, 2187, 1853, 2050, + 865, 2161, 1845, 1405, 1847, 1763, 2099, 468, 468, 468, + 1770, 468, 468, 2740, 719, 2003, 1578, 1578, 1578, 1578, + 650, 2133, 1578, 1578, 1578, 1578, 1578, 1578, 1578, 1578, + 1578, 1578, 1326, 2069, 640, 468, 1390, 2149, 2631, 2155, + 477, 2803, 883, 2804, 1391, 1454, 2070, 2859, 1854, 864, + 2004, 2722, 712, 712, 1467, 721, 1834, 2156, 1454, 729, + 1836, 1691, 722, 1838, 1790, 1791, 1792, 2177, 1230, 723, + 1935, 573, 2834, 2835, 1578, 1578, 1377, 826, 1327, 1376, + 1317, 1318, 1997, 1322, 468, 1040, 2180, 1376, 468, 468, + 1376, 1376, 884, 2185, 1081, 1989, 885, 1082, 865, 468, + 1400, 2551, 591, 591, 1989, 591, -1649, -1649, 1092, 1989, + 854, 1734, 1989, 2851, 1092, 724, 1772, 1170, 2020, 2707, + 2862, 1775, 1625, 628, 1303, 886, 611, 1304, 25, 2084, + 612, 1989, 893, 25, 880, 1851, 2162, 1374, 1769, 1855, + 881, 1856, 955, 2711, 728, 25, 2163, 1898, 1626, 955, + 2024, 894, 1899, 1900, 1901, 729, 728, 1780, 629, 1898, + 1947, 832, 1787, 29, 1899, 1900, 1901, 1401, 29, 2189, + 955, 2567, 2075, 2017, 841, 1928, 2568, 1930, 1931, 2554, + 29, 846, 1811, 1575, 1575, 1329, 1948, 2616, 1575, 847, + 1234, 887, 630, -1651, -1651, 1408, 1873, 2104, 713, 849, + 1238, 895, 2111, 2072, 31, 628, 2076, 2080, 481, 31, + 482, 713, 1306, 1286, 1989, 1307, 2445, 32, 850, 2448, + 54, 31, 32, 1295, 1459, 1092, 1092, 1307, 2345, 1575, + 1575, 1600, 853, 1601, 32, 484, 1167, -1652, -1652, 631, + 629, 33, 623, 1330, 624, 882, 33, 1989, -1654, -1654, + 1485, 1494, 1495, 1486, 2661, 1692, 728, 1627, 33, 1898, + 34, 1929, 1628, 860, -1675, -1675, -1675, 1629, 1377, 875, + 54, 575, 34, 611, 2095, -1004, 1377, 612, 2758, 1377, + 1377, -475, 955, 852, 1183, 1184, -475, 1518, 1686, 896, + 1519, 1075, 1732, 1945, 728, 1307, 1041, 1898, 1802, 877, + 1949, 1803, 1899, 1900, 1901, 1950, 712, 879, 2385, 1201, + 1430, 878, 1431, 1433, 1434, 1566, 709, 1500, 1501, 2190, + 1830, 631, 1041, 1486, 589, 1841, 54, 58, 1842, 1374, + 2662, 1993, 1823, 1226, 1824, 1989, 891, 1374, 897, 1902, + 1374, 1374, 1046, 1294, 880, 892, 898, 573, 1903, 2852, + 881, -475, 1871, 909, 2057, 1307, 2059, 913, 899, 1969, + 1903, 1970, 1045, 1879, 2822, 1553, 1307, 1049, 1989, 1170, + 2440, 1051, 2443, 1554, 1555, 1556, 1883, 2120, 728, 1307, + 2300, 1898, 1917, 1058, 2836, 1307, 1899, 1900, 1901, 900, + 880, 859, 2063, 861, 883, 2064, 881, -1655, -1655, 468, + -475, 468, 1971, 2411, 1972, 1630, 468, 617, 615, 468, + 1167, 2293, 2276, 468, 2178, 2179, 1631, 1904, 58, 616, + 619, 823, 2292, 825, 2301, 1504, 1505, 468, 2093, 1904, + 468, 468, 468, 468, 902, 1060, 2067, 1080, 2866, 2068, + 468, 468, 468, 1951, 910, 1067, 2129, 1074, 885, 1519, + 2363, 1062, 1989, 1072, 1952, 882, 903, -1656, -1656, 2131, + 468, 54, 2132, 2510, 1077, 1170, 468, 468, 468, 468, + 1041, 1500, 1501, 1471, 468, 468, 905, 911, 880, 468, + 2258, 1078, 2145, 468, 881, 1307, 468, 468, 468, 468, + 468, 468, 468, 468, 468, -1657, -1657, 468, 1903, 1193, + 2192, 882, 468, 1307, 602, 468, 605, 468, 609, 955, + 1098, 1575, 1575, 1575, 1575, 1172, 1578, 1575, 1575, 1575, + 1575, 1575, 1575, 1575, 1575, 1575, 1575, 2256, 1173, 1904, + 2257, 468, -1658, -1658, 2261, 1224, 728, 2262, 838, 1898, + -1659, -1659, 1175, 887, 1899, 1900, 1901, 1694, 1177, 468, + 2280, 2550, 2369, 1803, 1240, 1803, -646, 1241, 468, 468, + -653, 2115, -1675, -1675, -1675, 88, 2020, 1904, 471, 1575, + 1575, 713, 2386, 1182, 520, 2387, 1456, 1185, 1457, 1504, + 1505, 1186, 1903, 2845, 547, -1660, -1660, 1247, 559, 882, + 1170, 1546, 1523, 1187, 1248, 1188, 1041, -643, 1041, 1192, + 2342, 34, 1578, 2184, 883, 589, 589, 1194, 589, 1905, + 1906, 1907, -644, 1908, 1909, 1910, 1911, 1912, 1913, 1249, + 1197, 1905, 1906, 1907, 1198, 1908, 1909, 1910, 1911, 1912, + 1913, 2409, 2300, 1203, 1519, -647, 2471, 955, 2019, 1803, + 468, 468, 2472, 468, 2555, 1803, 1205, 1519, 583, 2470, + 883, 1904, 2611, 2637, 914, 2612, 1803, 1206, 885, 1183, + 1184, -476, 1167, -645, 2663, 2666, -476, 1519, 2667, 1578, + 468, 1231, 2678, 1207, 1523, 1075, 2345, 1827, 1041, 2244, + 2683, 25, 2687, 2684, 1201, 2684, 2699, 915, 1275, 2700, + 1488, 1679, 2430, 2736, 2737, 712, 2700, 1519, 1208, 1763, + 1733, 2351, 2745, 1209, 885, 1075, 2430, 1216, 712, 1281, + 955, 1170, 584, 1282, 2751, 1217, 29, 2684, 1250, 1218, + 2384, -1675, -1675, -1675, 1235, 1908, 1909, 1910, 1911, 1912, + 1913, -476, 1251, 886, -1661, -1661, 1283, 1252, 883, 2362, + 1903, 2766, 2685, 1170, 2767, 2685, 1550, 1551, 2795, 468, + 468, 2796, 1297, 887, 1310, 468, 1316, 31, 1167, 1905, + 1906, 1907, 863, 1908, 1909, 1910, 1911, 1912, 1913, 865, + 32, 2798, 1255, 576, 1075, 2829, 2847, 1346, 2684, 2700, + -476, -1662, -1662, 2531, -1663, -1663, -1665, -1665, 1778, 585, + -1666, -1666, 885, 1354, 33, 468, 468, 468, 1348, 887, + 468, -1667, -1667, -1668, -1668, 880, -1669, -1669, 34, 1904, + 1372, 881, 1373, 468, 468, 468, 468, 2020, -1670, -1670, + 1374, 1779, 576, 468, 2502, 1379, 468, 1258, 54, 1380, + 1557, 1558, 1559, 1381, 1560, 1561, 1562, 1563, 1564, 1565, + 1385, 468, 468, 1905, 1906, 1907, 468, 1908, 1909, 1910, + 1911, 1912, 1913, -1672, -1672, -1674, -1674, 1392, 880, 1738, + 468, 1416, 1417, 468, 881, 468, 1502, 1503, 1092, 1746, + 1393, 1749, 1850, 1397, 1760, 1407, 1722, 1723, -522, -522, + 1764, 1092, 1766, 1167, 1412, 2635, 468, 887, -526, -526, + -525, -525, 576, 1419, 1773, 620, 1504, 1505, 468, 1776, + 1420, 1044, 1424, 1781, 1782, 1783, 1784, 632, 1788, 1789, + 2511, 2512, 468, 1428, 1429, 1523, 882, 1260, 1436, 54, + 2120, 1439, 617, 615, 1440, 586, 468, 2345, 1987, 465, + 1299, 1301, 1442, 58, 616, 619, 1446, 1454, 1159, 1178, + 2457, 468, 1987, 465, 1189, 1195, 1484, 1487, 1489, 1490, + 2482, 1526, 1525, 1531, 729, 1584, 1585, 1241, 1307, 468, + 1597, 1599, 1615, 1622, 1602, 1633, 1634, 1636, 1657, 882, + 1658, 1674, 1680, 1687, 1688, 1695, 1041, 1041, 1041, 1702, + 584, 1714, 831, 1715, 1716, 1735, 1717, 1740, 3, 4, + 1739, 1741, 585, 1795, 25, 1800, 1804, 2485, 1808, 1623, + 1812, 1905, 1906, 1907, 1167, 1908, 1909, 1910, 1911, 1912, + 1913, 1575, 1813, 1814, 1828, 876, 1261, 955, 1832, -1675, + -1675, -1675, 1833, 1265, 1266, 1267, 1268, 1269, 1270, 29, + 1837, 1840, 1848, 1839, 1859, 1872, 1167, 1866, 1863, 1867, + 1864, 1868, 1869, 1885, 1946, 1881, 1170, 1923, 2342, 522, + 1053, 1055, 1057, 1891, 2487, 1924, 1882, 1884, 1886, 54, + 1887, 1888, 1890, 1894, 1916, 883, 1041, 1922, 712, 1925, + 31, 728, 1926, 1941, 1898, 1827, 1933, 1939, 1962, 1899, + 1900, 1901, 1161, 32, 1953, 1963, 1170, 1965, 1966, 1967, + 1991, 2000, 893, 2009, 2002, 2016, 2412, 1575, 893, 468, + 2022, -527, 1170, -528, 2027, 2031, 2028, 33, 2032, 468, + 2034, 894, 1159, 468, 713, 1785, 468, 894, 883, 885, + 2037, 34, 1170, 468, 468, 468, 468, 1162, 2030, 468, + 468, 468, 468, 468, 468, 468, 468, 468, 468, 2036, + 2043, 2206, 468, 468, 2040, 2085, 2056, 2713, 1786, 1400, + 2058, 1170, 2490, 2071, 2077, 2066, 468, 2078, 1170, 2081, + 2079, 895, 2082, 576, 1575, 468, 2102, 895, 468, 2083, + 468, 2101, 885, 2106, 2105, 2109, 2112, 1170, 468, 2113, + 576, 468, 468, 2116, 2123, 2130, 468, 468, 2148, 54, + 1919, 1898, 2137, 1918, 2194, 2491, 2181, 2182, 2089, 2183, + 2195, 886, 468, 466, 468, 2459, 2207, 2614, 494, 2198, + 2492, 1974, 494, 2220, 887, 468, 2665, 2242, 538, 494, + 543, 2228, 2248, 543, 468, -441, 564, 2296, 2246, 494, + 494, 2250, 1987, 465, 1987, 465, 1492, 1493, -441, 2710, + 468, 1092, 2717, -441, 2251, 2253, 2121, 2255, 2122, 896, + 2259, 1302, 2127, 2128, 2270, 896, 1161, 2274, 1226, 2288, + 2304, 2372, 2266, 2376, 2286, 2365, 2371, 887, 2382, 2393, + 2394, 1803, 25, 543, 1313, 1903, 2396, 2399, 2403, 2670, + 564, 494, 564, 564, 564, 2402, 2431, 2427, 1041, -441, + 2436, 2437, 1041, 2349, 1041, 2404, 2417, 2670, 897, 2342, + 1355, 1162, 2428, 2450, 897, 2493, 898, 29, -441, 2444, + 1494, 1495, 898, 1382, 2434, 2449, 25, 2451, 899, 2473, + 1224, 2452, 2465, 2477, 899, 2765, 2500, 2108, 2501, 2505, + 2518, -1026, 2519, -1648, 1406, 2738, 2517, -1649, -1650, 1167, + -1651, -1652, -1653, -1654, 1904, 2526, -1655, 2380, 31, 900, + -1656, 29, 2719, -1657, -1658, 900, -1659, -441, 2540, 2527, + -1660, 32, -1661, -1662, 1159, 2520, -441, -1663, -1665, 1224, + 468, 2528, 1496, 1497, 1498, 1499, 1500, 1501, 2541, 1167, + 1502, 1503, 2543, -1666, 2547, 33, -1667, 1623, 2552, -1668, + -1669, -1670, 31, 2748, 902, 1167, 523, -1671, 2710, 34, + 902, 2819, 2549, 1964, -1672, 32, -1673, -1674, -1027, 2546, + 2553, 2560, 2566, 468, 468, 1167, 903, 2579, 468, 1170, + 2584, 1981, 903, 468, 2229, 2760, 468, 468, 2593, 33, + 2597, 468, 2603, 2606, 54, 1981, 905, 2625, 2605, 575, + 2608, 2613, 905, 524, 1167, 2622, 2626, 2627, 2628, 2636, + 2639, 1167, 468, 2640, 2641, 2654, 468, 2655, -1455, 2664, + 1159, 2668, 2482, 2676, 2677, 2680, 2347, 2693, 2483, 2696, + 1167, 2701, 1435, 2689, 2715, 2718, 2720, 468, 2732, 2734, + 2484, 496, 1987, 465, 1504, 1505, 1987, 465, 2089, 2723, + 2458, 2460, 2733, 2744, 2753, 2754, 2755, 2764, 2770, 2230, + 2768, 1461, -441, 2621, 2771, 2773, 2776, 2777, 1161, 2485, + 1041, 2486, 2778, 2782, 2783, 2784, 2809, 2815, 2208, 468, + 2793, 2824, 2830, 2797, 2831, 2838, 2841, 2843, 1170, 2848, + 2849, 2231, 2839, 2272, 497, 2850, 1905, 1906, 1907, 2840, + 1908, 1909, 1910, 1911, 1912, 1913, 2232, 2861, 2863, 498, + -1455, 2867, 851, 1162, 2303, 2233, 1765, 2305, 2349, 2307, + 2054, 2461, 2263, 2759, 2596, 611, 2487, 2805, 849, 612, + 2370, 2508, 2352, 2353, 2354, 2355, 2356, 2357, 2358, 2359, + 2360, 2361, 2811, 2858, 2714, 1159, 1167, 54, 2844, 2234, + 2837, 1363, 2653, 2488, 1506, 1507, 2375, 2807, 2039, 2061, + 2090, 2842, -1455, 499, 1161, 2308, 2035, 2532, 2806, 1451, + 2575, 712, 500, 2091, 1508, -1455, 1170, 2117, 2548, 1289, + -1455, 2364, 476, 468, 501, -1455, 1170, 2018, 1731, 502, + 1343, 2747, 1342, 1308, -1455, 468, 468, -1455, 2695, 468, + 2801, 2029, 1711, -1075, 2249, 2454, 2752, 2456, 2455, 1162, + 1730, 468, 1999, 1347, 503, 2223, 2442, 2682, 2426, 2489, + 468, 2219, 2743, 2742, 2490, 468, -1455, 2749, 468, 2750, + 1536, 564, 1538, 2595, 2235, 468, 468, 627, 1542, 477, + 1543, 1987, 465, 564, 2799, -1455, 2800, 468, 494, 468, + 1987, 465, 2792, 1663, 1937, 1987, 465, 504, 1987, 465, + 1524, 505, 2021, 1672, 1799, 1673, 1159, 2491, 1960, 468, + 1224, 1876, 1167, 1958, 644, 494, 494, 1987, 465, 2094, + 2735, 1464, 2492, 2264, 1681, 2218, 1683, 2221, 1857, 1161, + 1682, 0, 1240, 479, -1455, 1241, 1311, -1455, 1159, 1684, + 0, 2349, 0, -1455, 0, 0, 0, 0, 0, 543, + 0, 543, 0, 0, 0, 0, 0, 0, 564, 0, + 2576, 2638, 0, 0, 506, 1247, 0, 494, 0, 1726, + 468, 0, -1675, 0, 1162, 0, 468, 1170, 507, 564, + 0, 0, 2532, 0, 1092, 0, 0, 576, 0, 0, + 0, 564, 0, 0, 0, 0, 0, 1249, -1455, 468, + 1987, 465, 0, 468, 0, 0, 0, 1170, 0, 508, + 1170, 468, 509, 0, 0, 0, 0, 2493, 0, 0, + 510, 1167, 468, 511, 0, 0, 564, 564, 564, 0, + 0, 0, 1041, 1987, 465, 0, 1041, 468, 0, 0, + 0, 0, 512, 0, 0, 0, 0, 481, 1809, 482, + 1161, 1167, 1974, 0, 513, 0, 0, 0, 0, 54, + 0, 514, 0, 0, 0, 475, 1944, 0, 0, 476, + 515, 483, 0, 0, 484, 2729, 0, 0, 0, -1455, + 0, 0, 1161, 1240, 0, 0, 1241, -1455, 468, 0, + -1075, 468, 0, 0, 0, 1162, -1675, 0, 0, 0, + 0, 516, 0, -1455, 0, -1455, -1455, 0, 0, 1167, + -1675, 0, 0, 468, 2347, -1675, 1247, 0, 0, 1167, + 1174, 1987, 465, -1675, 468, 0, 477, 1162, 0, 0, + 0, 2756, 0, 478, 0, 0, 0, 1164, 0, 0, + 0, 0, -1455, 0, 468, -1455, -1455, -1455, 1249, 1240, + -1675, 0, 1241, 0, 1987, 465, 0, 0, 0, 0, + 0, 0, 0, 468, 468, 0, 1240, 0, 0, 1241, + 0, 0, 0, 0, 0, 1981, 0, 0, 0, 1981, + 479, 1165, 1247, 0, 0, 2615, 1229, 0, 0, -1675, + 0, 1041, 2349, 0, 0, 1240, 0, 0, 1241, 1247, + 0, 0, 54, 54, 0, 1258, -1675, 0, 468, 0, + 468, 0, 2820, 0, 1249, 0, 0, 1954, 0, 2633, + 2634, 0, 0, 0, 0, 0, 0, 564, 1247, 0, + 0, 1249, 2642, 2725, 1167, -1675, 0, 0, 1987, 465, + 0, 1159, 0, 494, 494, 0, 0, -1675, 480, 564, + 564, 1315, 0, 0, 0, 0, 0, 0, 0, 0, + 1249, -1675, 0, 0, 0, 0, -1675, 0, 0, 0, + 1167, 0, 468, 0, 0, 0, 564, 1366, 0, 0, + 0, 1159, 1472, 2199, 0, 0, 0, 0, 1973, 564, + 0, 0, 0, 0, 481, -1675, 482, 1159, 2697, 0, + 1167, -1675, 0, 1167, 0, 0, 564, 0, 0, 0, + 564, 1164, 0, -1675, 0, 0, 0, 1159, 483, 54, + 0, 484, 0, 0, 0, 0, 1473, -1675, 0, 2202, + -1675, 0, -1675, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, -1675, 0, 1159, 0, 0, -1675, + 0, 0, 1474, 1159, 0, 1165, 1258, 0, 0, -1675, + 0, 0, 0, 1091, 0, 2347, 0, -1675, 0, 1091, + 1475, 0, 1159, -1675, 1476, 0, 0, 0, -1675, 0, + 0, 0, 0, 1981, -1675, 1161, 0, 0, 1981, 0, + 0, 1981, 0, 0, -1675, 1477, 0, 0, 1478, 0, + 0, 1265, 1266, 1267, 1268, 1269, 1270, 0, 0, 0, + 0, 0, 1479, -1675, 0, 0, 0, 0, 0, 0, + 0, 0, 1258, 0, 0, 1161, 0, 0, 0, 2092, + 1162, 0, 0, -1457, 2097, 2098, 2100, 0, 0, 1258, + 0, 1161, 0, 1423, 0, 0, -1675, 0, 564, 0, + 0, 0, 0, 0, 2114, 1237, 0, 0, 1449, 0, + 0, 1161, 0, 0, 644, 0, 0, 644, 1258, 0, + 1162, 494, 494, 54, 494, 644, 0, 564, 1091, 0, + 1091, 1091, 0, 0, 0, 1167, 1162, 0, 1159, 0, + 1161, 0, 0, 0, 54, 0, 0, 1161, 0, 0, + 0, 1480, 0, 0, 0, 0, 1162, 0, 0, 1481, + 0, 0, -1675, 0, 0, 0, 1161, 0, 0, 0, + 0, 1166, 0, 0, 0, -1457, 1981, 0, 0, -1675, + 0, 0, 0, 0, 1224, 1162, 0, 0, 0, 0, + 0, 0, 1162, 0, 0, -1675, 1091, 0, 0, 1091, + 1091, 1482, 1265, 1266, 1267, 1268, 1269, 1270, -1675, 0, + 0, 1162, 0, 1164, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, -1457, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, + -1457, 0, 0, 0, 0, -1457, 2215, 2216, 2217, 0, + -1457, 0, 1568, 0, 0, 0, 0, 1165, 0, -1457, + 0, -1675, -1457, 0, 1159, 0, 0, 0, 1265, 1266, + 1267, 1268, 1269, 1270, 0, 0, 0, 0, -1675, 0, + 0, 0, 1161, 0, 0, 1265, 1266, 1267, 1268, 1269, + 1270, -1457, 0, 54, 0, 0, 644, 0, 0, 0, + 0, 2271, 0, 0, 0, 0, 0, -1675, 0, 1164, + -1457, 0, 0, 0, 1265, 1266, 1267, 1268, 1269, 1270, + 0, 0, 0, 0, 0, 0, 0, 1162, 0, 564, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1089, 0, 0, 0, 1157, 0, 0, 1165, 0, - 0, 0, 0, 0, 1089, 0, 0, 0, 0, 0, - -1451, 1157, 564, 0, 893, 2334, 0, 0, 0, 0, - 1160, 0, 0, 0, 0, 0, 0, 0, 1854, 0, - 0, 1157, 1162, 564, 0, 564, 1164, 0, 0, 0, - 0, 0, 0, 880, 1685, 1163, 1690, 0, 0, 0, - 1486, 1487, 1160, 0, 0, 0, 0, 0, 0, 0, - 1157, 0, -1451, 0, 1162, 0, 0, 1157, 0, 0, - 0, 0, 0, 0, 0, -1451, 0, 0, 564, 0, - -1451, 0, 0, 0, 1157, -1451, 0, 1358, 494, 0, - 0, 0, 0, 0, -1451, 0, 0, -1451, 494, 1736, - 494, 1740, 894, 494, 0, 0, 0, 0, 0, 494, - 0, 494, 1488, 1489, 1490, 1491, 1492, 1493, 0, 0, - 1494, 1495, 644, 494, 1165, 0, -1451, 644, 494, 0, - 0, 0, 494, 494, 494, 494, 0, 494, 494, 0, - 0, 2259, 0, 0, 0, -1451, 0, 0, 0, 0, - 0, 895, 0, 0, 0, 0, 0, 564, 1799, 896, - 0, 0, 0, 0, 0, 2292, 1163, 0, 0, 0, - 1811, 897, 0, 0, 0, 0, 1960, 0, 0, 0, - 2339, 2340, 2341, 2342, 2343, 2344, 2345, 2346, 2347, 2348, - 0, 0, 881, 0, -1451, 1982, 0, -1451, 1163, 0, - 1157, 0, 898, -1451, 1159, 1232, 0, 0, 1233, 0, - -1465, 0, 0, -1669, -1669, -1669, 0, 0, 0, 1851, - 0, 0, 0, 0, 1496, 1497, 2012, 2012, 0, 0, - 0, 0, 0, 1165, 0, 0, 0, 0, 1239, 0, - 0, 0, 899, 0, 1159, 1240, 883, 900, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, -1451, 0, - 1159, 0, 0, 1165, 0, 0, 0, 0, 0, 901, - 1241, 0, 0, 0, 0, 902, 0, 0, 1232, 0, - 1159, 1233, 0, 0, 0, 0, 0, 0, 1164, 903, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, -1465, 0, 0, 0, 0, 0, 0, 1159, - 0, 1239, 1961, 0, 0, 0, 1159, 2079, -1669, 1165, - 0, 0, 2084, 2085, 2087, 0, 1157, 0, 0, 1165, - 0, 0, 0, 1159, 1498, 1499, 0, 0, 0, -1451, - 0, 885, 2101, 1241, 0, 0, 0, -1451, 0, 0, - 0, 0, 1160, 0, -1465, 0, 0, 0, 0, 0, - 0, 1561, 0, -1451, 1162, -1451, -1451, -1465, 0, 1242, - 0, 0, -1465, 0, 0, 0, 0, -1465, 1164, 0, - 2121, 2123, 0, 1243, 0, 0, -1465, 0, 1244, -1465, - 0, 0, 1160, 0, 564, 0, 0, 0, 0, 0, - 0, 1977, -1451, 0, 1162, -1451, -1451, -1451, 1160, 0, - 0, 0, 2189, 0, 0, 1977, 0, 0, -1465, 1995, - 1162, 0, 1998, 1247, 1690, 0, 0, 0, 1160, 0, - 0, 0, 0, 0, 0, 1157, 0, -1465, 0, 0, - 1162, 0, -1669, 728, 1165, 0, 1887, 0, 0, 1159, - 0, 1888, 1889, 1890, 0, 0, -1669, 1160, 0, 0, - 0, -1669, 0, 0, 1160, 1157, 0, 0, 2396, 1162, - 1415, 0, 0, 0, 0, 0, 1162, 0, 1250, 0, - 1165, 1160, 2202, 2203, 2204, 0, -1465, 0, 0, -1465, - 0, 0, 0, 1162, 0, -1465, -1669, 0, 1163, 0, - 0, 0, 0, 1164, 0, 0, 0, 0, 0, 0, - 1165, 0, 0, 1165, 0, 0, 0, 0, 0, 0, - 0, 1157, 0, 0, 0, 0, 0, 0, 0, 0, - 1415, 1157, 0, 0, 0, 564, 0, 2258, 1163, 1415, + 0, 0, 0, 0, 0, 1166, 0, 0, 0, 0, + 564, 0, 564, 1165, 0, 0, 0, 0, 0, -1457, + 0, 1696, -1457, 1701, 0, 0, 2368, 0, -1457, 0, + 0, 0, 0, 1159, 54, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1461, 0, 1466, + 1468, 0, 0, 0, 0, 564, 0, 0, 0, 0, + 0, 0, 0, 1159, 1366, 494, 0, 0, 1161, 0, + 0, 0, 0, 0, 0, 494, 1747, 494, 1751, 0, + 494, 0, 0, -1457, 0, 0, 494, 0, 494, 0, + 0, 0, 0, 0, 1164, 0, 0, 0, 0, 644, + 494, 0, 0, 0, 644, 494, 0, 0, 0, 494, + 494, 494, 494, 1162, 494, 494, 0, 0, 0, 0, + 0, 1159, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1159, 0, 0, 564, 1810, 893, 1974, 1165, 0, + 0, 0, 2421, 2422, 2423, 2424, 0, 1822, 0, 1532, + 1533, 0, 0, 0, 0, 894, 0, 1547, 0, 1548, + 1549, 0, 0, 0, -1457, 0, 0, 0, 0, 0, + 0, 0, -1457, 0, 1586, 0, 1587, 1161, 0, 0, + 0, 804, 804, 0, 0, 0, 0, 0, -1457, 0, + -1457, -1457, 0, 0, 0, 1240, 1862, 1603, 1241, 0, + 0, 0, 0, 0, 0, 895, 2294, 1161, 0, 0, + 0, 0, 0, 0, 2295, 1164, 0, 0, 0, 0, + 0, 0, 1162, 0, 0, 0, 0, -1457, 1247, 0, + -1457, -1457, -1457, 0, 0, -1675, 0, 0, 0, 0, + 0, 950, 957, 0, 0, 728, 1159, 1164, 1898, 0, + 0, 0, 1162, 1899, 1900, 1901, 0, 1166, 0, 1165, + 1249, 0, 0, 0, 0, 1161, 0, 0, 0, 0, + 0, 2296, 0, 0, 0, 1161, 0, 0, 0, 0, + 0, 0, 1159, 0, 0, 0, 0, 0, 0, 0, + 0, 1165, 0, 896, 0, 0, 1720, 1720, 1240, 0, + 0, 1241, 0, 0, 0, 0, 0, 1229, 0, 0, + 1162, 0, 1159, 0, 0, 1159, 0, 0, 0, 0, + 1162, 0, 0, 0, 0, 0, 0, 0, 0, 2203, + 0, 1247, 0, 0, 0, 0, 0, 0, -1675, 0, + 0, 1568, 897, 0, 0, 0, 0, 0, 1091, 950, + 898, 0, 0, 1166, 0, 0, 0, 0, 1091, -1675, + 0, 1091, 899, 1249, 2297, 0, 1180, 0, 0, 0, + 0, 0, 0, -1675, 564, 0, 0, 0, -1675, 0, + 0, 1990, 0, 0, 0, 0, 0, 0, 0, 0, + 1161, 0, 0, 900, 1202, 1990, 0, 0, 0, 2008, + 0, 0, 2011, 0, 1701, 0, 0, 0, 0, 0, + 0, 0, 0, -1675, 1219, 1220, 1221, 0, 0, 1225, + 0, 0, 0, 2623, 0, 0, 1161, 0, 0, 0, + 0, 0, 2415, 1091, 0, 1162, 0, 0, 902, 0, + 0, 0, 1280, 0, 0, 0, 1091, 0, 0, -1675, + 1423, 0, 0, 0, 0, 0, 1161, 0, 0, 1161, + 903, 0, -1675, 0, 0, 0, 0, 0, 1258, 0, + 1865, 1162, 0, 0, 2659, 0, -1675, 0, 1166, 0, + 905, -1675, 2298, 0, 0, 2299, 0, 0, 0, 0, + 0, 1321, 0, 0, 0, 1333, 1338, 1159, 0, 0, + 0, 1162, 0, 0, 1162, 0, 0, 1492, 1493, 0, + 1423, 0, 0, 0, 0, 564, -1675, 0, 1904, 1423, 564, 564, 564, 0, 0, 0, 0, 0, 0, 0, - -1465, 1250, 0, 494, 1163, 0, 0, 0, 0, 0, - 564, 0, 0, 0, 644, 0, 0, 0, 1252, 0, - 0, 0, 0, 0, 1163, 1159, 0, 0, 0, 0, - 1415, 1415, 0, 0, 1089, 0, 0, 0, 0, 0, - 0, 2600, 0, 0, 0, 0, 0, 1160, 0, 0, - 0, 0, 2355, 1163, 1961, 0, 0, 0, 0, 1162, - 1163, 0, 0, 0, 0, 0, 0, 0, 1561, 0, - 0, 0, 0, 1453, 1164, 2618, 2619, 1163, 0, 0, - 0, -1465, 0, 0, 0, 0, 0, 0, 2627, -1465, - 0, -1669, 0, 0, 0, 2376, 1157, 1892, 0, 0, - 0, 0, 0, 0, 0, -1465, 1164, -1465, -1465, 0, - 0, 0, 0, 0, 0, 0, 0, 1253, 802, 802, - -1669, -1669, -1669, 0, 1257, 1258, 1259, 1260, 1261, 1262, - 0, 0, 1157, 0, 1159, 0, 0, 0, 0, 0, - 0, 0, 2197, 0, -1465, 1165, 0, -1465, -1465, -1465, - 564, 564, 564, 466, 2682, 1977, 2214, 0, 0, 0, - 2214, 2228, 1157, 0, 1159, 1157, 1893, 0, 2406, 2407, - 2408, 2409, 0, 1160, 0, 0, 0, 0, 948, 955, - 0, 0, 0, 0, 0, 1162, 0, 0, 0, 1690, - -1669, 0, 0, 1163, 0, 0, 0, 1257, 1258, 1259, - 1260, 1261, 1262, 0, 0, 564, 0, 494, 0, 0, - 0, 1415, 1358, 1415, 0, 1441, 0, 0, 0, 0, - 1159, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1159, 494, 0, 2293, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 494, 494, 494, 494, - 494, 494, 494, 494, 494, 494, 0, 0, 1089, 0, - 0, 0, 0, 1089, 0, 0, 0, 0, 0, 0, - 564, 0, 0, 0, 0, 0, 0, 1358, 0, 0, - 0, 0, 1160, 0, 0, 0, 948, 0, 1441, 0, - 0, 564, 0, 0, 1162, 0, 0, 0, 1415, 0, - 0, 0, 0, 1178, 0, 1458, 1460, 2377, 0, 0, - 0, 0, 1160, 0, 0, 0, 0, 0, 0, 1163, - 0, 0, 0, 0, 1162, 0, 0, 0, 0, 0, - 0, 1200, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1159, 0, 1157, 0, 0, - 0, 1217, 1218, 1219, 0, 1221, 0, 0, 1894, 1895, - 1896, 0, 1897, 1898, 1899, 1900, 1901, 1902, 1160, 0, - 0, 0, 0, 0, 0, 0, 0, 1272, 1160, 0, - 1162, 1159, 0, 2405, 0, 0, 564, 564, 564, 564, - 1162, 0, 0, 2214, 2228, 0, 2214, 891, 0, 2414, - 0, 0, 0, 1977, 0, 0, 1164, 1977, 0, 0, - 0, 1159, 0, 2432, 1159, 0, 892, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1313, 0, 1163, 0, - 1325, 1330, 1995, 0, 0, 0, 0, 1690, 0, 0, - 0, 1561, 0, 0, 0, 0, 1164, 2608, 0, 1358, - 0, 0, 0, 0, 0, 0, 0, 0, 1163, 0, - 0, 0, 1164, 0, 0, 0, 893, 2281, 0, 1458, - 1460, 0, 0, 0, 2494, 2282, 0, 0, 0, 0, - 1089, 0, 1164, 0, 948, 0, 0, 0, 0, 0, - 0, 948, 0, 1160, 0, 0, 0, 0, 2644, 0, - 0, 0, 0, 0, 0, 1162, 0, 0, 0, 0, - 0, 1164, 948, 0, 1163, 0, 0, 0, 1164, 0, - 0, 0, 0, 0, 1163, 0, 0, 0, 0, 1160, - 0, 0, 2283, 0, 0, 1164, 0, 0, 0, 0, - 1415, 1162, 0, 0, 0, 1232, 1811, 0, 1233, 0, - 0, 0, 0, 0, 894, 0, 0, 2529, 0, 1160, - 0, 2691, 1160, 0, 0, 0, 0, 0, 0, 0, - 0, 1162, 0, 0, 1162, 0, 0, 0, 1239, 0, - 0, 0, 0, 0, 0, -1669, 0, 0, 0, 0, - 0, 0, 0, 0, 2713, 0, 1159, 0, 0, 0, - 466, 2214, 0, 895, 2557, 0, 0, 494, 0, 1977, - 1241, 896, 0, 0, 1977, 0, 0, 1977, 0, 2568, - 0, 0, 2571, 897, 948, 2284, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 2587, 0, 0, 1163, + 0, 0, 0, 494, 0, 0, 0, 2706, 0, 0, + 564, 0, 0, 0, 644, 0, 0, 0, 0, 950, + 1164, 0, 0, 0, 0, 0, 950, 0, -1675, 0, + 1423, 1423, 0, 0, 0, 0, 0, 0, 0, 0, + 2728, 1258, 0, 0, 0, 0, 0, 950, 0, 0, + 0, 1494, 1495, 0, 0, 0, 1374, 0, 0, 0, + 1164, 0, 0, 0, 1165, 0, 0, 0, 1568, 1166, + 0, 0, 0, 0, 0, 0, 1164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1164, 0, 0, 898, 0, 644, 494, 0, 0, - 0, 0, 0, 0, 0, 564, 0, 0, 0, 0, - 0, 0, 0, 1232, 0, 1163, 1233, 0, 0, 2190, - 0, 1234, 1235, 1236, 0, 0, 0, 0, 0, 0, - 0, 494, 494, 0, 0, 0, 0, 0, 1237, 900, - 0, 1358, 0, 0, 494, 1163, 1239, 0, 1163, -1669, - 0, 0, 0, 1240, 0, 0, 564, 0, 0, 2587, - 0, 901, 0, -1669, 0, 0, 0, 0, -1669, 0, - 0, 1089, 0, 0, 0, 0, 0, 0, 1241, 494, - 0, 903, 0, 2285, 1160, 0, 2286, 0, 1522, 0, - 0, 0, 1977, 0, 0, 0, 1162, 2571, 0, 0, - 1484, 1485, 0, -1669, 0, 0, 2677, 0, 0, 1545, - 494, 0, 1562, 0, 0, 1574, 0, 1164, 0, 564, - 0, 0, 1581, 1586, 1589, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1593, 0, 0, 0, - 0, 644, 1597, 1598, 1599, 1600, 0, 0, 0, 0, - 1609, 1610, 564, 0, 0, 1621, 0, 0, 1250, 1624, - 0, 0, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, - 1640, 0, 0, 1641, 1486, 1487, 0, 1242, 1649, 0, - 466, 1653, 0, 802, 0, 948, 1265, 1366, 0, 2731, - 0, 1243, 0, 494, 0, 0, 1244, 2568, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1674, 0, 0, - 0, 0, 0, 2587, 644, 644, 644, 1245, 1246, 0, - 0, 0, 0, 0, 0, 1265, 1164, 0, 0, 0, - 1163, 1247, 0, 0, 1218, 1219, 1488, 1489, 1490, 1491, - 1492, 1493, 0, 0, 1494, 1495, 0, 0, -1669, 0, - 0, 0, 0, 0, 0, 0, 1164, 2568, 0, 0, - 0, 0, 0, 0, 0, 0, 2677, 0, 2557, 1248, - 0, 0, 1249, 0, 0, 0, 0, 1690, 0, 0, - 0, 0, 0, 0, 0, 0, 1250, 0, 0, 1251, - 728, 1358, 0, 1887, 0, 0, 0, 2587, 1888, 1889, - 1890, 0, 0, 0, 0, 0, -38, 0, 0, 0, - 0, 0, 1164, 948, 0, 644, 1785, 1786, 0, 1787, - 0, 0, 1164, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 1265, 0, 0, 2, 0, 3, 4, 1265, - 0, 0, 0, 0, 0, 0, 1820, -1669, 1496, 1497, - 0, 0, 0, 5, 1257, 1258, 1259, 1260, 1261, 1262, - 0, 1232, 6, 0, 1233, 0, 0, 0, 0, 1234, - 1235, 1236, 0, 0, 7, 0, 1252, 0, 0, 0, - 0, 728, 0, 0, 1887, 8, 948, 0, 0, 1888, - 1889, 1890, 0, 0, 1239, 0, 9, 0, 10, 0, - 0, 1240, 0, 0, 0, 1232, 2397, 0, 1233, 0, - 0, 0, 0, 1234, 1235, 1236, 11, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1241, 1863, 0, 0, - 12, 0, 1869, 0, 0, 0, 878, 1164, 1239, 0, - 13, 0, 879, 0, 0, 1240, 14, 0, 0, 891, - 0, 0, 0, 0, 15, 0, 16, 17, 1498, 1499, - 0, 0, 0, 0, 0, 0, 0, 0, 892, 18, - 1241, 0, 0, 1164, 1886, 1253, 0, 0, 1254, 1255, - 1256, 0, 1257, 1258, 1259, 1260, 1261, 1262, 0, 0, - 1586, 0, 1586, 1586, -1669, 1878, 19, 0, 0, 1925, - 0, 0, 0, 1164, 0, 0, 1164, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 893, 0, - 0, 0, 0, 0, 0, 1242, 0, 1232, 0, 0, - 1233, 0, 0, 1943, 0, 20, 1946, 0, 1948, 1243, - 0, 0, 0, 0, 1244, 0, 0, 880, 0, 0, + 0, 0, 0, 0, 0, 0, 1164, 0, 0, 0, + 0, 1166, 0, 0, 1165, 0, 0, 0, 0, 0, + 1995, 1161, 0, 1496, 1497, 1498, 1499, 1500, 1501, 0, + 1165, 1502, 1503, 0, 0, 1164, 0, -1675, 0, 0, + 0, -1675, 1164, 0, 1265, 1266, 1267, 1268, 1269, 1270, + 1165, 2025, 2025, 0, 2210, 0, 0, 0, 0, 0, + 0, 1164, 564, 564, 564, 466, 1162, 1990, 2227, 950, + 0, 0, 2227, 2241, 0, 0, 0, 0, 0, 1165, + 0, 0, 0, 0, 0, 1535, 1165, 535, 0, 0, + 1905, 1906, 1907, 0, 1908, 1909, 1910, 1911, 1912, 1913, + 0, 1701, 0, 0, 0, 1165, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 564, 0, 494, + 0, 0, 0, 1423, 1366, 1423, 0, 1449, 0, 0, + 0, 0, 0, 0, 0, 1504, 1505, 0, 0, 0, + -1675, 0, 0, 494, 0, 2306, 0, 1265, 1266, 1267, + 1268, 1269, 1270, 0, 0, 0, 0, 0, 494, 494, + 494, 494, 494, 494, 494, 494, 494, 494, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1164, 0, 0, + 0, 0, 564, 0, 0, 0, 0, 0, 0, 1366, + 1492, 1493, 0, 1529, 0, 0, 0, 0, 0, 0, + 1449, 0, 0, 564, 0, 2134, 2136, 0, 0, 0, + 1423, 0, 0, 0, 1552, 0, 0, 1569, 0, 2392, + 1581, 1165, 0, 0, 0, 0, 0, 1588, 1593, 1596, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1239, 0, 1265, 1893, 0, 1245, 1246, -1669, 0, 1242, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1247, - 0, 0, 0, 1243, 1265, 1892, 0, 0, 1244, 1569, - 0, 0, 1241, 0, 0, 0, 0, 0, 0, 1313, - 0, 1265, 1265, 1265, 0, 1265, 894, 0, 0, -1669, - -1669, 0, 21, 0, 1330, 22, 0, 1248, 0, 0, - 1249, 0, 0, 1247, 0, 0, 0, 0, 0, 0, - 0, 0, 2020, 0, 1250, 0, 0, 0, 0, 0, - 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, - 0, 24, 0, 0, 1893, 895, 1265, 0, 0, 0, - 0, 2400, 0, 896, -1669, 25, 0, 0, 0, 0, - 0, 0, 26, 0, 0, 897, 27, 0, 1250, 0, - 0, 0, 0, 0, 0, 0, 28, 0, 1164, 0, - 948, -1669, 0, 0, 0, 0, 881, 1265, 0, 0, - 29, 0, 0, 0, 0, -1669, 898, 0, 0, 1265, - -1669, 0, 0, 0, 1265, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1252, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, - 0, 31, 0, 0, 0, -1669, 1756, 0, 0, 0, - 883, 900, 0, 0, 32, 1894, 1895, 1896, 0, 1897, - 1898, 1899, 1900, 1901, 1902, 0, 0, 0, 1252, 0, - 0, 0, 2134, 901, 0, 0, 0, 0, 33, 1757, - 0, 0, 2138, 0, 0, 0, 2139, 0, 0, 2141, - 0, 0, 34, 903, 0, -38, 0, 0, 0, 0, - 1250, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2161, 2162, 0, 0, 0, - 0, 0, 0, 1253, 0, 0, 1254, 1255, 1256, 0, - 1257, 1258, 1259, 1260, 1261, 1262, 2175, 0, 0, 2178, - 0, 2180, 0, 0, 0, 885, 1894, 1895, 1896, 2184, - 1897, 1898, 1899, 1900, 1901, 1902, 0, 2191, 2192, 0, - 0, 0, 0, 0, 0, 0, 0, 1253, 0, 0, - 1254, 1255, 1256, 0, 1257, 1258, 1259, 1260, 1261, 1262, - 0, 0, 0, 0, 0, 0, 2232, 0, 0, 0, - -1669, 0, 0, 0, 1231, 2239, 0, 0, 0, 1232, - 0, 0, 1233, 0, 0, 0, 1265, 1234, 1235, 1236, + 0, 0, 0, 0, 0, 1506, 1507, 1604, 0, 0, + 0, 0, 0, 1608, 1609, 1610, 1611, 0, 0, 0, + 0, 1620, 1621, 0, 1494, 1495, 1632, 0, 0, 0, + 1635, 0, 0, 1643, 1644, 1645, 1646, 1647, 1648, 1649, + 1650, 1651, 0, 0, 1652, 0, 0, 0, 0, 1660, + 0, 0, 1664, 0, 804, 2420, 950, 0, 564, 564, + 564, 564, 0, 1164, 0, 2227, 2241, 0, 2227, 0, + 0, 2429, 0, 0, 1166, 1990, 0, 0, 1685, 1990, + 0, 0, 0, 0, 0, 2447, 1496, 1497, 1498, 1499, + 1500, 1501, 0, 0, 1502, 1503, 0, 0, 0, 0, + 0, 0, 0, 0, 2008, 1220, 1221, 1165, 0, 1701, + 0, 0, 0, 1568, 1166, 0, 0, 0, 0, 0, + 0, 1366, 0, 0, 0, 0, 0, 0, 0, 0, + 1166, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2509, 0, 0, 0, + 1166, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 880, 0, 0, 0, 0, 0, 881, 0, + 0, 1091, 1164, 0, 0, 893, 0, 0, 0, 1166, + 0, 0, 0, 0, 950, 0, 1166, 1796, 1797, 0, + 1798, 0, 0, 0, 894, 0, 0, 0, 1504, 1505, + 0, 0, 1164, 0, 0, 1166, 0, 0, 0, 0, + 0, 0, 1423, 0, 0, 0, 1165, 1831, 1822, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2544, + 0, 0, 2390, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 895, 0, 1165, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 950, -1471, 0, + 1164, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1164, 0, 0, 882, 466, 2227, 0, 0, 2572, 0, + 0, 494, 0, 1990, 0, 0, 0, 0, 1990, 0, + 0, 1990, 0, 2583, 0, 0, 2586, 1874, 0, 0, + 0, 0, 1880, 0, 1165, 0, 0, 0, 1506, 1507, + 2602, 0, 0, 0, 1165, 1273, 0, 0, 0, 0, + 0, 1166, 0, 0, 0, 0, 0, 0, 0, 0, + 644, 494, 896, 0, 0, 0, 0, 0, 0, 564, + 0, 0, 0, 0, 1897, 0, 0, 0, 0, 0, + -1471, 0, 0, 0, 1273, 0, 0, 0, 0, 0, + 1593, 0, 1593, 1593, 0, 494, 494, 0, 0, 0, + 1936, 0, 0, 1938, 0, 1366, 0, 0, 494, 0, + 0, 897, 0, 0, 0, 1164, 0, 0, 0, 898, + 564, 0, 0, 2602, 0, 1091, 0, 0, 0, 0, + 1091, 899, -1471, 0, 0, 0, 0, 1956, 0, 0, + 1959, 0, 1961, 494, 0, -1471, 0, 0, 0, 0, + -1471, 1164, 883, 0, 0, -1471, 1990, 0, 0, 1165, + 0, 2586, 900, 0, -1471, 0, 0, -1471, 0, 0, + 2692, 0, 1466, 1468, 494, 0, 0, 1166, 0, 0, + 0, 1164, 0, 564, 1164, 0, 0, 0, 0, 0, + 0, 1273, 0, 0, 0, 1165, -1471, 0, 1273, 0, + 0, 0, 901, 1321, 0, 644, 885, 902, 0, 0, + 0, 0, 0, 0, 0, -1471, 564, 0, 1338, 0, + 0, 0, 0, 0, 0, 1165, 0, 0, 1165, 903, + 0, 0, 0, 0, 0, 904, 2033, 0, 0, 0, + 0, 0, 0, 0, 466, 0, 0, 0, 0, 905, + 0, 0, 0, 2746, 0, 0, 0, 494, 0, 0, + 0, 2583, 0, 0, -1471, 0, 0, -1471, 0, 0, + 0, 0, 0, -1471, 0, 0, 0, 2602, 644, 644, + 644, 0, 0, 0, 0, 0, 1166, 0, 0, 0, + 0, 0, 0, 0, 950, 0, 0, 0, 0, 0, + 0, 887, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 880, 0, 0, 0, 1166, 0, 881, 0, + 0, 2583, 0, 0, 0, 893, 0, 0, -1471, 0, + 2692, 0, 2572, 0, 0, 0, 0, 0, 1466, 1468, + 0, 1701, 0, 0, 894, 0, 0, 0, 0, 1091, + 0, 0, 0, 0, 0, 1366, 0, 0, 0, 0, + 0, 2602, 0, 0, 0, 0, 1164, 0, 0, 0, + 0, 0, 0, 0, 1166, 0, 2147, 0, 0, 644, + 0, 0, 1974, 0, 1166, 0, 2151, 0, 0, 0, + 2152, 0, 0, 2154, 895, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, -1471, + 1165, 0, 0, 0, 0, 0, 0, -1471, 0, 2174, + 2175, 1273, 0, 882, 0, 0, 0, 0, 0, 0, + 0, 0, 0, -1471, 0, -1471, -1471, 0, 0, 0, + 0, 0, 2188, 1273, 0, 2191, 0, 2193, 1576, 0, + 0, 0, 0, 0, 0, 2197, 0, 0, 0, 0, + 1273, 1273, 1273, 2204, 2205, 0, 1273, 0, 0, 0, + 0, 0, -1471, 0, 0, -1471, -1471, -1471, 0, 0, + 0, 0, 896, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2245, 0, 0, 0, 0, 0, 0, 1166, 0, 2252, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1237, 0, 0, 1238, 0, 1265, - 0, 0, 1239, 0, 0, 0, 0, 0, 0, 1240, - 0, 0, 0, 0, 0, 0, 1265, 1232, 1569, 1569, - 1233, 1903, 0, 1569, 0, 1234, 1235, 1236, 1265, 0, - 0, 0, 0, 0, 1241, 1265, 0, 0, 0, 0, - 1265, 0, 1237, 1265, 0, 0, 0, 1265, 0, 0, - 1239, 1265, 1265, 1265, 1265, 0, 0, 1240, 0, -1669, - 1569, 1569, 0, 1265, 1265, 0, 1257, 1258, 1259, 1260, - 1261, 1262, 0, 0, 0, 1265, 0, 0, 1265, 0, - 0, 0, 1241, 0, 0, 0, 1265, 1265, 1265, 1265, - 1265, 1265, 1265, 1265, 1265, 1265, 0, 0, 0, 0, - 0, 0, 0, 1265, 0, 0, 0, 1265, 0, 0, - 0, 2380, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1265, 0, - 0, 0, 0, 1242, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1243, 0, 0, - 0, 0, 1244, 0, 2390, 2391, 0, 0, 0, 2392, - 0, 0, 0, 0, 2395, 0, 0, 2398, 2399, 0, - 0, 0, 2403, 1245, 1246, 0, 0, 0, 0, 0, - 0, 1242, 0, 0, 0, 0, 0, 1247, 0, 0, - 0, 0, 0, 0, 0, 1243, 0, 0, 0, 0, - 1244, 0, 0, 0, 1232, 0, 0, 1233, 0, 0, - 0, 0, 1234, 1235, 1236, 0, 0, 0, 802, 0, - 0, 1245, 1246, 0, 0, 1248, 0, 0, 1249, 1237, - 0, 0, 0, 0, 0, 1247, 0, 1239, 0, 1265, - 1265, 1265, 1250, 0, 1240, 1251, 0, 0, 0, 0, - 0, 0, 0, 0, 1232, 0, 0, 1233, 0, 0, - 2488, 0, 1234, 1235, 1236, 0, 0, 0, 0, 1241, - 0, 0, 0, 1248, 1265, 0, 1249, 0, 0, 1237, - 0, 0, 0, 0, 0, 0, 0, 1239, 0, 0, - 1250, 0, 0, 1251, 1240, 0, 0, 1232, 0, 0, - 1233, 0, 0, 0, 0, 1234, 1235, 1236, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1265, 0, 1241, - 0, 0, 1237, 1265, 0, 0, 0, 0, 0, 0, - 1239, 0, 1252, 0, 0, 0, 0, 1240, 1903, 1903, - 1265, 0, 1569, 1569, 1569, 1569, 0, 0, 1569, 1569, - 1569, 1569, 1569, 1569, 1569, 1569, 1569, 1569, 1903, 0, - 0, 0, 1241, 0, 0, 0, 0, 0, 1242, 0, - 0, 0, 0, 0, 2541, 2542, 0, 0, 2543, 1265, - 1252, 0, 1243, 1903, 1903, 0, 0, 1244, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1265, 1569, 1569, - 1265, 0, 1265, 0, 0, 0, 0, 0, 1245, 1246, - 0, 0, 0, 0, 2576, 2577, 0, 0, 1242, 0, - 0, 0, 1247, 0, 0, 0, 0, 0, 2589, 0, - 0, 1253, 1243, 0, 1254, 1255, 1256, 1244, 1257, 1258, - 1259, 1260, 1261, 1262, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1245, 1246, - 1248, 1242, 0, 1249, 0, 0, 0, 0, 0, 0, - 0, 0, 1247, 0, 1265, 1243, 0, 1250, 0, 1253, - 1244, 0, 1254, 1255, 1256, 0, 1257, 1258, 1259, 1260, - 1261, 1262, 0, 0, 0, 0, 1590, 0, 0, 0, - 0, 1245, 1246, 0, 0, 0, 0, 0, 0, 0, - 1248, 0, 0, 1249, 0, 1247, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1250, 0, 0, - 1251, 0, 0, 0, 0, 0, 496, 0, 2664, 0, + 0, 0, 0, 0, 0, 0, 0, 2265, 0, 0, + 0, 1273, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 897, 0, 1240, 0, 1166, 1241, 0, -38, 898, + 0, 1242, 1243, 1244, 0, 0, 0, 0, 0, 0, + 0, 899, 0, 0, 0, 0, 0, 1, 1245, 0, + 0, 0, 1273, 0, 0, 1166, 1247, 2, 1166, 3, + 4, 0, 883, 1248, 1273, 1239, 0, 0, 0, 1273, + 1240, 0, 900, 1241, 0, 5, 0, 0, 1242, 1243, + 1244, 0, 0, 0, 6, 0, 0, 0, 1249, 0, + 1091, 0, 0, 0, 0, 1245, 7, 0, 1246, 0, + 0, 0, 0, 1247, 0, 0, 0, 8, 0, 0, + 1248, 0, 1767, 0, 0, 0, 885, 902, 9, 0, + 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1249, 0, 2395, 11, 903, + 0, 0, 0, 0, 0, 1768, 0, 0, 0, 0, + 0, 0, 12, 0, 0, 0, 0, 0, 0, 905, + 0, 0, 13, 0, 0, 0, 0, 0, 14, 0, + 0, 0, 0, 0, 0, 0, 15, 0, 16, 17, + 2405, 2406, 0, 0, 0, 2407, 0, 1250, 0, 0, + 2410, 18, 0, 2413, 2414, 0, 0, 0, 2418, 0, + 0, 1251, 0, 0, 0, 0, 1252, 0, 0, 0, + 0, 887, 0, 0, 0, 0, 0, 0, 19, 0, + 0, 0, 0, 0, 0, 0, 0, 1253, 1254, 0, + 0, 0, 0, 0, 1250, 0, 0, 0, 0, 0, + 1166, 1255, 0, 0, 804, 0, 0, 0, 1251, 0, + 0, 0, 0, 1252, 0, 0, 0, 20, 0, 0, + 1273, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1253, 1254, 0, 0, 0, 1256, + 0, 0, 1257, 1273, 0, 0, 2503, 0, 1255, 0, + 0, 0, 0, 0, 0, 0, 1258, 0, 0, 1259, + 1273, 0, 1576, 1576, 0, 1914, 0, 1576, 0, 0, + 0, 0, 1273, 0, 0, 0, 0, 0, 0, 1273, + 0, 0, 0, 0, 1273, 0, 1256, 1273, 0, 1257, + 0, 0, 0, 0, 21, 1273, 0, 22, 0, 1273, + 1273, 1273, 1273, 1258, 0, 0, 1259, 0, 1576, 1576, + 0, 1273, 1273, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1273, 0, 23, 1273, 0, 0, 0, + 0, 0, 0, 24, 1273, 1273, 1273, 1273, 1273, 1273, + 1273, 1273, 1273, 1273, 0, 0, 1260, 25, 0, 0, + 0, 1273, 0, 0, 26, 1273, 0, 0, 27, 0, + 0, 0, 2556, 2557, 0, 0, 2558, 0, 28, 0, + 0, 0, 0, 0, 0, 0, 1273, 0, 0, 0, + 0, 0, 29, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1260, 0, 0, 0, 0, 0, 0, + 0, 0, 2591, 2592, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2604, 0, 30, 0, + 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 1261, 0, 0, 1262, 1263, + 1264, 0, 1265, 1266, 1267, 1268, 1269, 1270, 1240, 0, + 33, 1241, 0, 0, 0, 1889, 1242, 1243, 1244, 0, + 0, 0, 0, 0, 34, 0, 0, -38, 0, 0, + 0, 0, 0, 1245, 0, 0, 0, 1273, 1273, 1273, + 0, 1247, 1261, 0, 0, 1262, 1263, 1264, 1248, 1265, + 1266, 1267, 1268, 1269, 1270, 0, 0, 0, 0, 0, + 0, 1240, 0, 0, 1241, 0, 0, 0, 0, 1242, + 1243, 1244, 1273, 1249, 0, 0, 2679, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1245, 0, 1225, 0, + 0, 0, 0, 0, 1247, 0, 0, 0, 0, 2698, + 0, 1248, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2712, 1273, 0, 0, 0, 0, + 0, 1273, 0, 0, 0, 0, 1249, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1914, 1914, 1273, 0, + 1576, 1576, 1576, 1576, 0, 0, 1576, 1576, 1576, 1576, + 1576, 1576, 1576, 1576, 1576, 1576, 1914, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1221, 0, 0, 1248, 0, 0, 1249, 0, 0, 0, - 0, 2683, 0, 0, 0, 0, 0, 1252, 0, 0, - 1250, 0, 0, 1251, 0, 0, 2697, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1265, 497, - 0, 0, 1265, 1265, 0, 1265, 0, 0, 1903, 1903, - 1903, 1903, 0, 0, 498, 1903, 1903, 1903, 1903, 1903, - 1903, 1903, 1903, 1903, 1903, 1265, 1265, 1252, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1265, - 0, 0, 1265, 0, 1265, 0, 0, 0, 1265, 0, - 0, 1903, 1903, 0, 0, 1265, 1265, 0, 0, 0, - 0, 0, 2576, 0, 0, 0, 0, 0, 499, 0, - 1252, 0, 0, 0, 0, 0, 1253, 500, 0, 1254, - 1255, 1256, 0, 1257, 1258, 1259, 1260, 1261, 1262, 501, - 0, 0, 0, 2760, 502, 0, 1265, 0, 0, 0, - 0, 0, 0, 1265, 0, 0, 0, 0, 0, 0, - 0, 0, 2774, 2774, 0, 0, 1265, 0, 0, 503, - 0, 0, 0, 0, 0, 0, 1253, 0, 0, 1254, - 1255, 1256, 0, 1257, 1258, 1259, 1260, 1261, 1262, 0, - 0, 0, 0, 1927, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2774, - 0, 0, 504, 0, 0, 0, 505, 0, 0, 1253, - 0, 0, 1254, 1255, 1256, 0, 1257, 1258, 1259, 1260, - 1261, 1262, 0, 0, 0, 0, 2137, 0, 0, 0, + 0, 0, 1250, 0, 0, 0, 0, 1273, 0, 1273, + 0, 0, 0, 1914, 1914, 0, 1251, 0, 0, 0, + 2591, 1252, 0, 0, 0, 0, 0, 1273, 1576, 1576, + 1273, 0, 1273, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1253, 1254, 0, 0, 0, 0, 0, 0, + 0, 2775, 0, 0, 0, 1250, 1255, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1251, + 2789, 2789, 0, 0, 1252, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1256, 1253, 1254, 1257, 0, 0, + 0, 0, 0, 0, 1273, 0, 0, 0, 0, 1255, + 0, 1258, 0, 0, 1259, 0, 1240, 2789, 0, 1241, + 0, 0, 0, 0, 1242, 1243, 1244, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1245, 0, 0, 0, 0, 0, 1256, 0, 1247, + 1257, 0, 0, 0, 0, 0, 1248, 0, 0, 0, + 0, 0, 0, 0, 1258, 0, 0, 1259, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2789, + 0, 1249, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2774, 0, 0, 0, 0, 0, 0, 0, 506, + 0, 1260, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1273, 0, + 0, 0, 1273, 1273, 0, 1273, 0, 0, 1914, 1914, + 1914, 1914, 0, 0, 0, 1914, 1914, 1914, 1914, 1914, + 1914, 1914, 1914, 1914, 1914, 1273, 1273, 0, 0, 0, + 0, 0, 0, 0, 1260, 0, 0, 0, 0, 1273, + 0, 0, 1273, 0, 1273, 0, 0, 0, 1273, 0, + 0, 1914, 1914, 0, 0, 1273, 1273, 0, 0, 0, + 1250, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1251, 0, 0, 0, 0, 1252, + 1261, 0, 0, 1262, 1263, 1264, 0, 1265, 1266, 1267, + 1268, 1269, 1270, 0, 0, 0, 1273, 1598, 0, 0, + 1253, 1254, 0, 1273, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1255, 0, 1273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 507, 1265, 0, 0, 0, 1569, 0, - 0, 0, 0, 0, 1265, 1265, 1265, 0, 0, 1265, - 0, 0, 1265, 1265, 0, 0, 0, 1265, 0, 0, - 0, 0, 0, 0, 508, 0, 0, 509, 0, 0, - 0, 0, 0, 0, 0, 510, 0, 0, 511, 0, + 0, 0, 0, 1261, 0, 0, 1262, 1263, 1264, 0, + 1265, 1266, 1267, 1268, 1269, 1270, 0, 0, 0, 0, + 1940, 0, 1256, 0, 0, 1257, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1258, + 0, 0, 1259, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 512, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 513, - 0, 0, 0, 0, 0, 0, 514, 0, 0, 0, - 0, 0, 0, 0, 1569, 515, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1265, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 516, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1903, 1569, 0, 0, 0, 1265, 1265, 1265, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1273, 0, 0, 0, + 1576, 0, 0, 0, 0, 0, 1273, 1273, 1273, 1260, + 0, 1273, 0, 0, 1273, 1273, 0, 0, 0, 1273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1265, 1265, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1265, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1903, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 915, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 1903, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 916, 98, 99, 100, 0, 0, 0, 1265, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 917, 107, - 108, 109, 110, 732, 918, 733, 734, 1265, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 1265, 126, 127, 128, 129, 739, 0, 919, 0, - 132, 133, 134, 135, 136, 920, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 921, 148, 149, - 150, 741, 742, 743, 922, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 923, 1265, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 1265, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 924, 0, 925, 204, 205, 206, 926, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 927, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 928, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 929, 930, 242, 931, - 244, 245, 246, 247, 248, 0, 0, 249, 932, 251, - 933, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 934, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 935, 762, 283, 284, 285, 286, 763, 287, - 288, 936, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 937, 305, 938, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 939, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 940, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 941, 398, 774, 0, 400, 401, 0, 402, - 942, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 943, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 944, 436, 945, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 946, 0, 0, 0, 0, 0, 0, - 787, 947, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 110, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 763, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 25, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 29, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 598, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 32, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 33, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 2673, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 110, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 763, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 25, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 29, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 598, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 32, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 33, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 110, 732, 112, 733, 734, 1582, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 1583, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 1584, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 763, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 1585, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 110, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 763, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 786, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 1323, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 110, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 763, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 915, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 917, 107, - 108, 109, 110, 732, 918, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 919, 0, - 132, 133, 134, 135, 136, 920, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 921, 148, 149, - 150, 741, 742, 743, 922, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 924, 0, 925, 204, 205, 206, 926, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 927, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 928, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 929, 930, 242, 931, - 244, 245, 246, 247, 248, 0, 0, 249, 932, 251, - 933, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 934, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 935, 762, 283, 284, 285, 286, 763, 287, - 288, 936, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 937, 305, 938, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 939, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 940, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 941, 398, 774, 0, 400, 401, 0, 402, - 942, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 943, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 945, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 1436, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 110, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 763, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 1572, 0, 0, 0, 788, 789, 915, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 917, 107, - 108, 109, 110, 732, 918, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 919, 0, - 132, 133, 134, 135, 136, 920, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 921, 148, 149, - 150, 741, 742, 743, 922, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 924, 0, 925, 204, 205, 206, 926, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 927, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 928, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 929, 930, 242, 931, - 244, 245, 246, 247, 248, 0, 0, 249, 932, 251, - 933, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 934, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 935, 762, 283, 284, 285, 286, 763, 287, - 288, 936, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 937, 305, 938, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 939, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 940, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 941, 398, 774, 0, 400, 401, 0, 402, - 942, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 943, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 1782, 436, 945, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 110, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 1583, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 763, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 110, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 763, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 915, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 917, 107, - 108, 109, 110, 732, 918, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 919, 0, - 132, 133, 134, 135, 136, 920, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 921, 148, 149, - 150, 741, 742, 743, 922, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 924, 0, 925, 204, 205, 206, 926, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 927, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 928, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 929, 930, 242, 931, - 244, 245, 246, 247, 248, 0, 0, 249, 932, 251, - 933, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 934, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 935, 762, 283, 284, 285, 286, 763, 287, - 288, 936, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 937, 305, 938, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 939, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 940, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 941, 398, 774, 0, 400, 401, 0, 402, - 942, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 943, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 945, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 89, 725, 535, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1576, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1273, 0, 0, 0, 1261, 0, + 0, 1262, 1263, 1264, 0, 1265, 1266, 1267, 1268, 1269, + 1270, 0, 0, 0, 0, 2150, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1914, 1576, 0, 0, 0, 1273, 1273, 1273, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1273, 1273, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1273, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1914, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 917, + 725, 535, 726, 727, 728, 729, 730, 0, 0, 0, + 0, 0, 1914, 0, 0, 90, 91, 92, 93, 94, + 95, 96, 97, 918, 98, 99, 100, 0, 0, 0, + 1273, 731, 0, 0, 101, 102, 0, 103, 104, 105, + 919, 107, 108, 109, 110, 732, 920, 733, 734, 1273, + 115, 116, 117, 118, 119, 120, 735, 736, 121, 122, + 737, 738, 125, 1273, 126, 127, 128, 129, 739, 0, + 921, 0, 132, 133, 134, 135, 136, 922, 138, 139, + 140, 0, 141, 142, 143, 144, 145, 146, 0, 923, + 148, 149, 150, 741, 742, 743, 924, 745, 746, 747, + 152, 153, 154, 155, 156, 157, 158, 748, 749, 161, + 750, 162, 0, 163, 164, 165, 166, 167, 168, 0, + 169, 170, 171, 172, 173, 925, 1273, 174, 175, 751, + 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, + 1273, 185, 186, 187, 188, 752, 190, 191, 192, 193, + 753, 754, 195, 0, 196, 197, 755, 199, 0, 200, + 0, 201, 926, 0, 927, 204, 205, 206, 928, 208, + 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, + 216, 217, 929, 219, 220, 221, 222, 0, 223, 224, + 225, 226, 227, 228, 0, 229, 930, 231, 232, 233, + 234, 235, 756, 757, 0, 758, 0, 239, 931, 932, + 242, 933, 244, 245, 246, 247, 248, 0, 0, 249, + 934, 251, 935, 0, 253, 254, 255, 759, 760, 256, + 257, 258, 259, 260, 936, 262, 263, 264, 265, 266, + 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, + 277, 278, 279, 761, 937, 762, 283, 284, 285, 286, + 763, 287, 288, 938, 290, 764, 765, 292, 766, 294, + 295, 296, 0, 297, 298, 0, 0, 767, 300, 301, + 0, 0, 302, 303, 939, 305, 940, 768, 308, 309, + 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, + 319, 769, 321, 322, 323, 324, 325, 326, 0, 327, + 328, 329, 330, 331, 332, 333, 334, 335, 770, 337, + 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 0, 353, 354, 941, + 356, 357, 358, 771, 360, 361, 362, 363, 364, 365, + 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, + 375, 772, 376, 377, 378, 379, 380, 942, 382, 383, + 773, 385, 0, 386, 387, 388, 389, 390, 391, 392, + 393, 394, 395, 396, 943, 398, 774, 0, 400, 401, + 0, 402, 944, 404, 405, 406, 407, 408, 0, 775, + 776, 0, 0, 411, 412, 777, 414, 778, 779, 416, + 417, 945, 419, 420, 421, 422, 423, 0, 0, 424, + 425, 426, 427, 428, 781, 0, 429, 430, 431, 432, + 433, 434, 782, 946, 436, 947, 438, 439, 440, 441, + 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, + 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, + 458, 459, 460, 461, 462, 463, 783, 0, 0, 0, + 0, 0, 0, 784, 785, 948, 0, 0, 0, 0, + 787, 0, 788, 949, 0, 0, 0, 789, 0, 790, + 791, 89, 725, 535, 726, 727, 728, 729, 730, 0, + 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, + 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, + 0, 0, 0, 731, 0, 0, 101, 102, 0, 103, + 104, 105, 106, 107, 108, 109, 110, 732, 112, 733, + 734, 0, 115, 116, 117, 118, 119, 120, 735, 736, + 121, 122, 737, 738, 125, 0, 126, 127, 128, 129, + 739, 0, 740, 0, 132, 133, 134, 135, 136, 137, + 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, + 0, 147, 148, 149, 150, 741, 742, 743, 744, 745, + 746, 747, 152, 153, 154, 155, 156, 157, 158, 748, + 749, 161, 750, 162, 0, 163, 164, 165, 166, 167, + 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, + 175, 751, 177, 178, 0, 179, 180, 181, 0, 182, + 183, 184, 0, 185, 186, 187, 188, 752, 190, 191, + 192, 193, 753, 754, 195, 0, 196, 197, 755, 199, + 0, 200, 0, 201, 202, 0, 203, 204, 205, 206, + 207, 208, 0, 209, 0, 210, 211, 0, 212, 213, + 214, 215, 216, 217, 218, 219, 220, 221, 222, 0, + 223, 224, 225, 226, 227, 228, 0, 229, 230, 231, + 232, 233, 234, 235, 756, 757, 0, 758, 0, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 0, + 0, 249, 250, 251, 252, 0, 253, 254, 255, 759, + 760, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 761, 281, 762, 283, 284, + 285, 286, 763, 287, 288, 289, 290, 764, 765, 292, + 766, 294, 295, 296, 0, 297, 298, 0, 0, 767, + 300, 301, 0, 0, 302, 303, 304, 305, 306, 768, + 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, + 0, 318, 319, 769, 321, 322, 323, 324, 325, 326, + 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, + 770, 337, 338, 339, 340, 0, 341, 342, 343, 344, + 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, + 354, 355, 356, 357, 358, 771, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 25, 371, 372, + 373, 374, 375, 772, 376, 377, 378, 379, 380, 381, + 382, 383, 773, 385, 0, 386, 387, 388, 389, 390, + 391, 392, 393, 394, 395, 396, 397, 398, 774, 0, + 400, 401, 29, 402, 403, 404, 405, 406, 407, 408, + 0, 775, 776, 0, 0, 411, 412, 777, 414, 778, + 779, 416, 417, 780, 419, 420, 421, 422, 423, 0, + 0, 424, 425, 426, 427, 428, 781, 0, 429, 430, + 431, 432, 433, 598, 782, 0, 436, 437, 438, 439, + 440, 441, 0, 0, 442, 0, 32, 443, 444, 445, + 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 458, 459, 460, 461, 462, 463, 783, 0, + 33, 0, 0, 0, 0, 784, 785, 0, 0, 0, + 0, 0, 787, 0, 788, 2688, 0, 0, 0, 789, + 0, 790, 791, 89, 725, 535, 726, 727, 728, 729, + 730, 0, 0, 0, 0, 0, 0, 0, 0, 90, + 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, + 100, 0, 0, 0, 0, 731, 0, 0, 101, 102, + 0, 103, 104, 105, 106, 107, 108, 109, 110, 732, + 112, 733, 734, 0, 115, 116, 117, 118, 119, 120, + 735, 736, 121, 122, 737, 738, 125, 0, 126, 127, + 128, 129, 739, 0, 740, 0, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 0, 141, 142, 143, 144, + 145, 146, 0, 147, 148, 149, 150, 741, 742, 743, + 744, 745, 746, 747, 152, 153, 154, 155, 156, 157, + 158, 748, 749, 161, 750, 162, 0, 163, 164, 165, + 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, + 0, 174, 175, 751, 177, 178, 0, 179, 180, 181, + 0, 182, 183, 184, 0, 185, 186, 187, 188, 752, + 190, 191, 192, 193, 753, 754, 195, 0, 196, 197, + 755, 199, 0, 200, 0, 201, 202, 0, 203, 204, + 205, 206, 207, 208, 0, 209, 0, 210, 211, 0, + 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, + 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, + 230, 231, 232, 233, 234, 235, 756, 757, 0, 758, + 0, 239, 240, 241, 242, 243, 244, 245, 246, 247, + 248, 0, 0, 249, 250, 251, 252, 0, 253, 254, + 255, 759, 760, 256, 257, 258, 259, 260, 261, 262, + 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, + 273, 274, 275, 276, 277, 278, 279, 761, 281, 762, + 283, 284, 285, 286, 763, 287, 288, 289, 290, 764, + 765, 292, 766, 294, 295, 296, 0, 297, 298, 0, + 0, 767, 300, 301, 0, 0, 302, 303, 304, 305, + 306, 768, 308, 309, 310, 311, 312, 313, 314, 315, + 316, 317, 0, 318, 319, 769, 321, 322, 323, 324, + 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, + 334, 335, 770, 337, 338, 339, 340, 0, 341, 342, + 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, + 0, 353, 354, 355, 356, 357, 358, 771, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 25, + 371, 372, 373, 374, 375, 772, 376, 377, 378, 379, + 380, 381, 382, 383, 773, 385, 0, 386, 387, 388, + 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, + 774, 0, 400, 401, 29, 402, 403, 404, 405, 406, + 407, 408, 0, 775, 776, 0, 0, 411, 412, 777, + 414, 778, 779, 416, 417, 780, 419, 420, 421, 422, + 423, 0, 0, 424, 425, 426, 427, 428, 781, 0, + 429, 430, 431, 432, 433, 598, 782, 0, 436, 437, + 438, 439, 440, 441, 0, 0, 442, 0, 32, 443, + 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 783, 0, 33, 0, 0, 0, 0, 784, 785, 0, + 0, 0, 0, 0, 787, 0, 788, 0, 0, 0, + 0, 789, 0, 790, 791, 89, 725, 535, 726, 727, + 728, 729, 730, 0, 0, 0, 0, 0, 0, 0, + 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, + 98, 99, 100, 0, 0, 0, 0, 731, 0, 0, + 101, 102, 0, 103, 104, 105, 106, 107, 108, 109, + 110, 732, 112, 733, 734, 1589, 115, 116, 117, 118, + 119, 120, 735, 736, 121, 122, 737, 738, 125, 0, + 126, 127, 128, 129, 739, 0, 740, 0, 132, 133, + 134, 135, 136, 137, 138, 139, 140, 0, 141, 142, + 143, 144, 145, 146, 0, 147, 148, 149, 150, 741, + 742, 743, 744, 745, 746, 747, 152, 153, 154, 155, + 156, 157, 158, 748, 749, 161, 750, 162, 0, 163, + 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, + 173, 0, 0, 174, 175, 751, 177, 178, 0, 179, + 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, + 188, 752, 190, 191, 192, 193, 753, 754, 195, 0, + 196, 197, 755, 199, 0, 200, 0, 201, 202, 1590, + 203, 204, 205, 206, 207, 208, 0, 209, 0, 210, + 211, 0, 212, 213, 214, 215, 216, 217, 218, 219, + 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, + 0, 229, 230, 231, 232, 233, 234, 235, 756, 757, + 0, 758, 0, 239, 240, 241, 242, 243, 244, 245, + 246, 247, 248, 0, 1591, 249, 250, 251, 252, 0, + 253, 254, 255, 759, 760, 256, 257, 258, 259, 260, + 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, + 271, 272, 273, 274, 275, 276, 277, 278, 279, 761, + 281, 762, 283, 284, 285, 286, 763, 287, 288, 289, + 290, 764, 765, 292, 766, 294, 295, 296, 0, 297, + 298, 0, 0, 767, 300, 301, 0, 0, 302, 303, + 304, 305, 306, 768, 308, 309, 310, 311, 312, 313, + 314, 315, 316, 317, 0, 318, 319, 769, 321, 322, + 323, 324, 325, 326, 0, 327, 328, 329, 330, 331, + 332, 333, 334, 335, 770, 337, 338, 339, 340, 0, + 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, + 351, 352, 0, 353, 354, 355, 356, 357, 358, 771, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 0, 371, 372, 373, 374, 375, 772, 376, 377, + 378, 379, 380, 381, 382, 383, 773, 385, 0, 386, + 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, + 397, 398, 774, 0, 400, 401, 0, 402, 403, 404, + 405, 406, 407, 408, 0, 775, 776, 0, 1592, 411, + 412, 777, 414, 778, 779, 416, 417, 780, 419, 420, + 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, + 781, 0, 429, 430, 431, 432, 433, 434, 782, 0, + 436, 437, 438, 439, 440, 441, 0, 0, 442, 0, + 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, + 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, + 462, 463, 783, 0, 0, 0, 0, 0, 0, 784, + 785, 0, 0, 0, 0, 0, 787, 0, 788, 0, + 0, 0, 0, 789, 0, 790, 791, 89, 725, 535, 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, @@ -189476,8 +202887,205 @@ static const yytype_int16 yytable[] = 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 1326, 1327, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 89, 1707, 535, + 0, 784, 785, 786, 0, 0, 0, 0, 787, 0, + 788, 0, 0, 0, 0, 789, 0, 790, 791, 89, + 725, 535, 726, 727, 728, 729, 730, 0, 0, 0, + 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, + 95, 96, 97, 1331, 98, 99, 100, 0, 0, 0, + 0, 731, 0, 0, 101, 102, 0, 103, 104, 105, + 106, 107, 108, 109, 110, 732, 112, 733, 734, 0, + 115, 116, 117, 118, 119, 120, 735, 736, 121, 122, + 737, 738, 125, 0, 126, 127, 128, 129, 739, 0, + 740, 0, 132, 133, 134, 135, 136, 137, 138, 139, + 140, 0, 141, 142, 143, 144, 145, 146, 0, 147, + 148, 149, 150, 741, 742, 743, 744, 745, 746, 747, + 152, 153, 154, 155, 156, 157, 158, 748, 749, 161, + 750, 162, 0, 163, 164, 165, 166, 167, 168, 0, + 169, 170, 171, 172, 173, 0, 0, 174, 175, 751, + 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, + 0, 185, 186, 187, 188, 752, 190, 191, 192, 193, + 753, 754, 195, 0, 196, 197, 755, 199, 0, 200, + 0, 201, 202, 0, 203, 204, 205, 206, 207, 208, + 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 0, 223, 224, + 225, 226, 227, 228, 0, 229, 230, 231, 232, 233, + 234, 235, 756, 757, 0, 758, 0, 239, 240, 241, + 242, 243, 244, 245, 246, 247, 248, 0, 0, 249, + 250, 251, 252, 0, 253, 254, 255, 759, 760, 256, + 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, + 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, + 277, 278, 279, 761, 281, 762, 283, 284, 285, 286, + 763, 287, 288, 289, 290, 764, 765, 292, 766, 294, + 295, 296, 0, 297, 298, 0, 0, 767, 300, 301, + 0, 0, 302, 303, 304, 305, 306, 768, 308, 309, + 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, + 319, 769, 321, 322, 323, 324, 325, 326, 0, 327, + 328, 329, 330, 331, 332, 333, 334, 335, 770, 337, + 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 0, 353, 354, 355, + 356, 357, 358, 771, 360, 361, 362, 363, 364, 365, + 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, + 375, 772, 376, 377, 378, 379, 380, 381, 382, 383, + 773, 385, 0, 386, 387, 388, 389, 390, 391, 392, + 393, 394, 395, 396, 397, 398, 774, 0, 400, 401, + 0, 402, 403, 404, 405, 406, 407, 408, 0, 775, + 776, 0, 0, 411, 412, 777, 414, 778, 779, 416, + 417, 780, 419, 420, 421, 422, 423, 0, 0, 424, + 425, 426, 427, 428, 781, 0, 429, 430, 431, 432, + 433, 434, 782, 0, 436, 437, 438, 439, 440, 441, + 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, + 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, + 458, 459, 460, 461, 462, 463, 783, 0, 0, 0, + 0, 0, 0, 784, 785, 0, 0, 0, 0, 0, + 787, 0, 788, 0, 0, 0, 0, 789, 0, 790, + 791, 917, 725, 535, 726, 727, 728, 729, 730, 0, + 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, + 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, + 0, 0, 0, 731, 0, 0, 101, 102, 0, 103, + 104, 105, 919, 107, 108, 109, 110, 732, 920, 733, + 734, 0, 115, 116, 117, 118, 119, 120, 735, 736, + 121, 122, 737, 738, 125, 0, 126, 127, 128, 129, + 739, 0, 921, 0, 132, 133, 134, 135, 136, 922, + 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, + 0, 923, 148, 149, 150, 741, 742, 743, 924, 745, + 746, 747, 152, 153, 154, 155, 156, 157, 158, 748, + 749, 161, 750, 162, 0, 163, 164, 165, 166, 167, + 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, + 175, 751, 177, 178, 0, 179, 180, 181, 0, 182, + 183, 184, 0, 185, 186, 187, 188, 752, 190, 191, + 192, 193, 753, 754, 195, 0, 196, 197, 755, 199, + 0, 200, 0, 201, 926, 0, 927, 204, 205, 206, + 928, 208, 0, 209, 0, 210, 211, 0, 212, 213, + 214, 215, 216, 217, 929, 219, 220, 221, 222, 0, + 223, 224, 225, 226, 227, 228, 0, 229, 930, 231, + 232, 233, 234, 235, 756, 757, 0, 758, 0, 239, + 931, 932, 242, 933, 244, 245, 246, 247, 248, 0, + 0, 249, 934, 251, 935, 0, 253, 254, 255, 759, + 760, 256, 257, 258, 259, 260, 936, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 761, 937, 762, 283, 284, + 285, 286, 763, 287, 288, 938, 290, 764, 765, 292, + 766, 294, 295, 296, 0, 297, 298, 0, 0, 767, + 300, 301, 0, 0, 302, 303, 939, 305, 940, 768, + 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, + 0, 318, 319, 769, 321, 322, 323, 324, 325, 326, + 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, + 770, 337, 338, 339, 340, 0, 341, 342, 343, 344, + 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, + 354, 941, 356, 357, 358, 771, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, + 373, 374, 375, 772, 376, 377, 378, 379, 380, 942, + 382, 383, 773, 385, 0, 386, 387, 388, 389, 390, + 391, 392, 393, 394, 395, 396, 943, 398, 774, 0, + 400, 401, 0, 402, 944, 404, 405, 406, 407, 408, + 0, 775, 776, 0, 0, 411, 412, 777, 414, 778, + 779, 416, 417, 945, 419, 420, 421, 422, 423, 0, + 0, 424, 425, 426, 427, 428, 781, 0, 429, 430, + 431, 432, 433, 434, 782, 0, 436, 947, 438, 439, + 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, + 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 458, 459, 460, 461, 462, 463, 783, 0, + 0, 0, 0, 0, 0, 784, 785, 0, 0, 0, + 0, 0, 787, 0, 788, 1444, 0, 0, 0, 789, + 0, 790, 791, 89, 725, 535, 726, 727, 728, 729, + 730, 0, 0, 0, 0, 0, 0, 0, 0, 90, + 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, + 100, 0, 0, 0, 0, 731, 0, 0, 101, 102, + 0, 103, 104, 105, 106, 107, 108, 109, 110, 732, + 112, 733, 734, 0, 115, 116, 117, 118, 119, 120, + 735, 736, 121, 122, 737, 738, 125, 0, 126, 127, + 128, 129, 739, 0, 740, 0, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 0, 141, 142, 143, 144, + 145, 146, 0, 147, 148, 149, 150, 741, 742, 743, + 744, 745, 746, 747, 152, 153, 154, 155, 156, 157, + 158, 748, 749, 161, 750, 162, 0, 163, 164, 165, + 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, + 0, 174, 175, 751, 177, 178, 0, 179, 180, 181, + 0, 182, 183, 184, 0, 185, 186, 187, 188, 752, + 190, 191, 192, 193, 753, 754, 195, 0, 196, 197, + 755, 199, 0, 200, 0, 201, 202, 0, 203, 204, + 205, 206, 207, 208, 0, 209, 0, 210, 211, 0, + 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, + 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, + 230, 231, 232, 233, 234, 235, 756, 757, 0, 758, + 0, 239, 240, 241, 242, 243, 244, 245, 246, 247, + 248, 0, 0, 249, 250, 251, 252, 0, 253, 254, + 255, 759, 760, 256, 257, 258, 259, 260, 261, 262, + 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, + 273, 274, 275, 276, 277, 278, 279, 761, 281, 762, + 283, 284, 285, 286, 763, 287, 288, 289, 290, 764, + 765, 292, 766, 294, 295, 296, 0, 297, 298, 0, + 0, 767, 300, 301, 0, 0, 302, 303, 304, 305, + 306, 768, 308, 309, 310, 311, 312, 313, 314, 315, + 316, 317, 0, 318, 319, 769, 321, 322, 323, 324, + 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, + 334, 335, 770, 337, 338, 339, 340, 0, 341, 342, + 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, + 0, 353, 354, 355, 356, 357, 358, 771, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, + 371, 372, 373, 374, 375, 772, 376, 377, 378, 379, + 380, 381, 382, 383, 773, 385, 0, 386, 387, 388, + 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, + 774, 0, 400, 401, 0, 402, 403, 404, 405, 406, + 407, 408, 0, 775, 776, 0, 0, 411, 412, 777, + 414, 778, 779, 416, 417, 780, 419, 420, 421, 422, + 423, 0, 0, 424, 425, 426, 427, 428, 781, 0, + 429, 430, 431, 432, 433, 434, 782, 0, 436, 437, + 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, + 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 783, 0, 0, 0, 0, 0, 0, 784, 785, 0, + 0, 0, 0, 0, 787, 0, 788, 1579, 0, 0, + 0, 789, 0, 790, 791, 917, 725, 535, 726, 727, + 728, 729, 730, 0, 0, 0, 0, 0, 0, 0, + 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, + 98, 99, 100, 0, 0, 0, 0, 731, 0, 0, + 101, 102, 0, 103, 104, 105, 919, 107, 108, 109, + 110, 732, 920, 733, 734, 0, 115, 116, 117, 118, + 119, 120, 735, 736, 121, 122, 737, 738, 125, 0, + 126, 127, 128, 129, 739, 0, 921, 0, 132, 133, + 134, 135, 136, 922, 138, 139, 140, 0, 141, 142, + 143, 144, 145, 146, 0, 923, 148, 149, 150, 741, + 742, 743, 924, 745, 746, 747, 152, 153, 154, 155, + 156, 157, 158, 748, 749, 161, 750, 162, 0, 163, + 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, + 173, 0, 0, 174, 175, 751, 177, 178, 0, 179, + 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, + 188, 752, 190, 191, 192, 193, 753, 754, 195, 0, + 196, 197, 755, 199, 0, 200, 0, 201, 926, 0, + 927, 204, 205, 206, 928, 208, 0, 209, 0, 210, + 211, 0, 212, 213, 214, 215, 216, 217, 929, 219, + 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, + 0, 229, 930, 231, 232, 233, 234, 235, 756, 757, + 0, 758, 0, 239, 931, 932, 242, 933, 244, 245, + 246, 247, 248, 0, 0, 249, 934, 251, 935, 0, + 253, 254, 255, 759, 760, 256, 257, 258, 259, 260, + 936, 262, 263, 264, 265, 266, 267, 268, 269, 270, + 271, 272, 273, 274, 275, 276, 277, 278, 279, 761, + 937, 762, 283, 284, 285, 286, 763, 287, 288, 938, + 290, 764, 765, 292, 766, 294, 295, 296, 0, 297, + 298, 0, 0, 767, 300, 301, 0, 0, 302, 303, + 939, 305, 940, 768, 308, 309, 310, 311, 312, 313, + 314, 315, 316, 317, 0, 318, 319, 769, 321, 322, + 323, 324, 325, 326, 0, 327, 328, 329, 330, 331, + 332, 333, 334, 335, 770, 337, 338, 339, 340, 0, + 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, + 351, 352, 0, 353, 354, 941, 356, 357, 358, 771, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 0, 371, 372, 373, 374, 375, 772, 376, 377, + 378, 379, 380, 942, 382, 383, 773, 385, 0, 386, + 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, + 943, 398, 774, 0, 400, 401, 0, 402, 944, 404, + 405, 406, 407, 408, 0, 775, 776, 0, 0, 411, + 412, 777, 414, 778, 779, 416, 417, 945, 419, 420, + 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, + 781, 0, 429, 430, 431, 432, 433, 434, 782, 1793, + 436, 947, 438, 439, 440, 441, 0, 0, 442, 0, + 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, + 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, + 462, 463, 783, 0, 0, 0, 0, 0, 0, 784, + 785, 0, 0, 0, 0, 0, 787, 0, 788, 0, + 0, 0, 0, 789, 0, 790, 791, 89, 725, 535, 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, @@ -189494,7 +203102,7 @@ static const yytype_int16 yytable[] = 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, + 202, 1590, 203, 204, 205, 206, 207, 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, @@ -189525,8 +203133,205 @@ static const yytype_int16 yytable[] = 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 89, 725, 535, + 0, 784, 785, 0, 0, 0, 0, 0, 787, 0, + 788, 0, 0, 0, 0, 789, 0, 790, 791, 89, + 725, 535, 726, 727, 728, 729, 730, 0, 0, 0, + 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, + 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, + 0, 731, 0, 0, 101, 102, 0, 103, 104, 105, + 106, 107, 108, 109, 110, 732, 112, 733, 734, 0, + 115, 116, 117, 118, 119, 120, 735, 736, 121, 122, + 737, 738, 125, 0, 126, 127, 128, 129, 739, 0, + 740, 0, 132, 133, 134, 135, 136, 137, 138, 139, + 140, 0, 141, 142, 143, 144, 145, 146, 0, 147, + 148, 149, 150, 741, 742, 743, 744, 745, 746, 747, + 152, 153, 154, 155, 156, 157, 158, 748, 749, 161, + 750, 162, 0, 163, 164, 165, 166, 167, 168, 0, + 169, 170, 171, 172, 173, 0, 0, 174, 175, 751, + 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, + 0, 185, 186, 187, 188, 752, 190, 191, 192, 193, + 753, 754, 195, 0, 196, 197, 755, 199, 0, 200, + 0, 201, 202, 0, 203, 204, 205, 206, 207, 208, + 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 0, 223, 224, + 225, 226, 227, 228, 0, 229, 230, 231, 232, 233, + 234, 235, 756, 757, 0, 758, 0, 239, 240, 241, + 242, 243, 244, 245, 246, 247, 248, 0, 0, 249, + 250, 251, 252, 0, 253, 254, 255, 759, 760, 256, + 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, + 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, + 277, 278, 279, 761, 281, 762, 283, 284, 285, 286, + 763, 287, 288, 289, 290, 764, 765, 292, 766, 294, + 295, 296, 0, 297, 298, 0, 0, 767, 300, 301, + 0, 0, 302, 303, 304, 305, 306, 768, 308, 309, + 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, + 319, 769, 321, 322, 323, 324, 325, 326, 0, 327, + 328, 329, 330, 331, 332, 333, 334, 335, 770, 337, + 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 0, 353, 354, 355, + 356, 357, 358, 771, 360, 361, 362, 363, 364, 365, + 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, + 375, 772, 376, 377, 378, 379, 380, 381, 382, 383, + 773, 385, 0, 386, 387, 388, 389, 390, 391, 392, + 393, 394, 395, 396, 397, 398, 774, 0, 400, 401, + 0, 402, 403, 404, 405, 406, 407, 408, 0, 775, + 776, 0, 0, 411, 412, 777, 414, 778, 779, 416, + 417, 780, 419, 420, 421, 422, 423, 0, 0, 424, + 425, 426, 427, 428, 781, 0, 429, 430, 431, 432, + 433, 434, 782, 0, 436, 437, 438, 439, 440, 441, + 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, + 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, + 458, 459, 460, 461, 462, 463, 783, 0, 0, 0, + 0, 0, 0, 784, 785, 0, 0, 0, 0, 0, + 787, 0, 788, 0, 0, 0, 0, 789, 0, 790, + 791, 917, 725, 535, 726, 727, 728, 729, 730, 0, + 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, + 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, + 0, 0, 0, 731, 0, 0, 101, 102, 0, 103, + 104, 105, 919, 107, 108, 109, 110, 732, 920, 733, + 734, 0, 115, 116, 117, 118, 119, 120, 735, 736, + 121, 122, 737, 738, 125, 0, 126, 127, 128, 129, + 739, 0, 921, 0, 132, 133, 134, 135, 136, 922, + 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, + 0, 923, 148, 149, 150, 741, 742, 743, 924, 745, + 746, 747, 152, 153, 154, 155, 156, 157, 158, 748, + 749, 161, 750, 162, 0, 163, 164, 165, 166, 167, + 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, + 175, 751, 177, 178, 0, 179, 180, 181, 0, 182, + 183, 184, 0, 185, 186, 187, 188, 752, 190, 191, + 192, 193, 753, 754, 195, 0, 196, 197, 755, 199, + 0, 200, 0, 201, 926, 0, 927, 204, 205, 206, + 928, 208, 0, 209, 0, 210, 211, 0, 212, 213, + 214, 215, 216, 217, 929, 219, 220, 221, 222, 0, + 223, 224, 225, 226, 227, 228, 0, 229, 930, 231, + 232, 233, 234, 235, 756, 757, 0, 758, 0, 239, + 931, 932, 242, 933, 244, 245, 246, 247, 248, 0, + 0, 249, 934, 251, 935, 0, 253, 254, 255, 759, + 760, 256, 257, 258, 259, 260, 936, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 761, 937, 762, 283, 284, + 285, 286, 763, 287, 288, 938, 290, 764, 765, 292, + 766, 294, 295, 296, 0, 297, 298, 0, 0, 767, + 300, 301, 0, 0, 302, 303, 939, 305, 940, 768, + 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, + 0, 318, 319, 769, 321, 322, 323, 324, 325, 326, + 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, + 770, 337, 338, 339, 340, 0, 341, 342, 343, 344, + 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, + 354, 941, 356, 357, 358, 771, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, + 373, 374, 375, 772, 376, 377, 378, 379, 380, 942, + 382, 383, 773, 385, 0, 386, 387, 388, 389, 390, + 391, 392, 393, 394, 395, 396, 943, 398, 774, 0, + 400, 401, 0, 402, 944, 404, 405, 406, 407, 408, + 0, 775, 776, 0, 0, 411, 412, 777, 414, 778, + 779, 416, 417, 945, 419, 420, 421, 422, 423, 0, + 0, 424, 425, 426, 427, 428, 781, 0, 429, 430, + 431, 432, 433, 434, 782, 0, 436, 947, 438, 439, + 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, + 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 458, 459, 460, 461, 462, 463, 783, 0, + 0, 0, 0, 0, 0, 784, 785, 0, 0, 0, + 0, 0, 787, 0, 788, 0, 0, 0, 0, 789, + 0, 790, 791, 89, 725, 535, 726, 727, 728, 729, + 730, 0, 0, 0, 0, 0, 0, 0, 0, 90, + 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, + 100, 0, 0, 0, 0, 731, 0, 0, 101, 102, + 0, 103, 104, 105, 106, 107, 108, 109, 110, 732, + 112, 733, 734, 0, 115, 116, 117, 118, 119, 120, + 735, 736, 121, 122, 737, 738, 125, 0, 126, 127, + 128, 129, 739, 0, 740, 0, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 0, 141, 142, 143, 144, + 145, 146, 0, 147, 148, 149, 150, 741, 742, 743, + 744, 745, 746, 747, 152, 153, 154, 155, 156, 157, + 158, 748, 749, 161, 750, 162, 0, 163, 164, 165, + 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, + 0, 174, 175, 751, 177, 178, 0, 179, 180, 181, + 0, 182, 183, 184, 0, 185, 186, 187, 188, 752, + 190, 191, 192, 193, 753, 754, 195, 0, 196, 197, + 755, 199, 0, 200, 0, 201, 202, 0, 203, 204, + 205, 206, 207, 208, 0, 209, 0, 210, 211, 0, + 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, + 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, + 230, 231, 232, 233, 234, 235, 756, 757, 0, 758, + 0, 239, 240, 241, 242, 243, 244, 245, 246, 247, + 248, 0, 0, 249, 250, 251, 252, 0, 253, 254, + 255, 759, 760, 256, 257, 258, 259, 260, 261, 262, + 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, + 273, 274, 275, 276, 277, 278, 279, 761, 281, 762, + 283, 284, 285, 286, 763, 287, 288, 289, 290, 764, + 765, 292, 766, 294, 295, 296, 0, 297, 298, 0, + 0, 767, 300, 301, 0, 0, 302, 303, 304, 305, + 306, 768, 308, 309, 310, 311, 312, 313, 314, 315, + 316, 317, 0, 318, 319, 769, 321, 322, 323, 324, + 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, + 334, 335, 770, 337, 338, 339, 340, 0, 341, 342, + 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, + 0, 353, 354, 355, 356, 357, 358, 771, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, + 371, 372, 373, 374, 375, 772, 376, 377, 378, 379, + 380, 381, 382, 383, 773, 385, 0, 386, 387, 388, + 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, + 774, 0, 400, 401, 0, 402, 403, 404, 405, 406, + 407, 408, 0, 775, 776, 0, 0, 411, 412, 777, + 414, 778, 779, 416, 417, 780, 419, 420, 421, 422, + 423, 0, 0, 424, 425, 426, 427, 428, 781, 0, + 429, 430, 431, 432, 433, 434, 782, 0, 436, 437, + 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, + 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 783, 0, 0, 0, 0, 0, 0, 1334, 1335, 0, + 0, 0, 0, 0, 787, 0, 788, 0, 0, 0, + 0, 789, 0, 790, 791, 89, 1718, 535, 726, 727, + 728, 729, 730, 0, 0, 0, 0, 0, 0, 0, + 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, + 98, 99, 100, 0, 0, 0, 0, 731, 0, 0, + 101, 102, 0, 103, 104, 105, 106, 107, 108, 109, + 110, 732, 112, 733, 734, 0, 115, 116, 117, 118, + 119, 120, 735, 736, 121, 122, 737, 738, 125, 0, + 126, 127, 128, 129, 739, 0, 740, 0, 132, 133, + 134, 135, 136, 137, 138, 139, 140, 0, 141, 142, + 143, 144, 145, 146, 0, 147, 148, 149, 150, 741, + 742, 743, 744, 745, 746, 747, 152, 153, 154, 155, + 156, 157, 158, 748, 749, 161, 750, 162, 0, 163, + 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, + 173, 0, 0, 174, 175, 751, 177, 178, 0, 179, + 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, + 188, 752, 190, 191, 192, 193, 753, 754, 195, 0, + 196, 197, 755, 199, 0, 200, 0, 201, 202, 0, + 203, 204, 205, 206, 207, 208, 0, 209, 0, 210, + 211, 0, 212, 213, 214, 215, 216, 217, 218, 219, + 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, + 0, 229, 230, 231, 232, 233, 234, 235, 756, 757, + 0, 758, 0, 239, 240, 241, 242, 243, 244, 245, + 246, 247, 248, 0, 0, 249, 250, 251, 252, 0, + 253, 254, 255, 759, 760, 256, 257, 258, 259, 260, + 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, + 271, 272, 273, 274, 275, 276, 277, 278, 279, 761, + 281, 762, 283, 284, 285, 286, 763, 287, 288, 289, + 290, 764, 765, 292, 766, 294, 295, 296, 0, 297, + 298, 0, 0, 767, 300, 301, 0, 0, 302, 303, + 304, 305, 306, 768, 308, 309, 310, 311, 312, 313, + 314, 315, 316, 317, 0, 318, 319, 769, 321, 322, + 323, 324, 325, 326, 0, 327, 328, 329, 330, 331, + 332, 333, 334, 335, 770, 337, 338, 339, 340, 0, + 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, + 351, 352, 0, 353, 354, 355, 356, 357, 358, 771, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 0, 371, 372, 373, 374, 375, 772, 376, 377, + 378, 379, 380, 381, 382, 383, 773, 385, 0, 386, + 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, + 397, 398, 774, 0, 400, 401, 0, 402, 403, 404, + 405, 406, 407, 408, 0, 775, 776, 0, 0, 411, + 412, 777, 414, 778, 779, 416, 417, 780, 419, 420, + 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, + 781, 0, 429, 430, 431, 432, 433, 434, 782, 0, + 436, 437, 438, 439, 440, 441, 0, 0, 442, 0, + 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, + 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, + 462, 463, 783, 0, 0, 0, 0, 0, 0, 784, + 785, 0, 0, 0, 0, 0, 787, 0, 788, 0, + 0, 0, 0, 789, 0, 790, 791, 89, 725, 535, 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, @@ -189574,204 +203379,157 @@ static const yytype_int16 yytable[] = 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 2572, 0, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 2771, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 2772, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 763, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 2773, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 110, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 2772, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 763, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 2773, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 783, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 0, 0, 0, 0, 0, - 787, 0, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, 110, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 0, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 0, 287, - 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 0, 0, 0, 0, 0, 0, - 0, 1564, 1565, 0, 0, 0, 0, 0, 0, 0, - 1566, 0, 0, 0, 0, 788, 789, 89, 725, 535, - 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, - 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, - 108, 109, -1669, 732, 112, 733, 734, 0, 115, 116, - 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, - 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, - 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, - 2772, 741, 742, 743, 744, 745, 746, 747, 152, 153, - 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, - 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, - 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, - 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, - 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, - 202, 0, 203, 204, 205, 206, -1669, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, - -1669, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, - 756, 757, 0, 758, 0, 239, 0, 0, 242, 243, - 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, - -1669, 0, 253, 254, 255, 759, 760, 256, 257, 258, - 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 761, 281, 762, 283, 284, 285, 286, 0, 287, - 288, 0, 290, 764, 765, 292, 766, 294, 295, 296, - 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, - 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, - 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, - 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, - 376, 377, 378, 379, 380, -1669, 382, 383, 773, 385, - 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, - 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, - 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, - 419, 420, 2773, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, - 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, -1669, 0, 0, 0, 0, 0, - 0, 784, 785, 0, 0, 89, 725, 535, 726, 727, - 787, 729, 730, 0, 0, 788, 789, 0, 0, 0, + 0, 784, 785, 0, 0, 0, 0, 0, 787, 0, + 2587, 0, 0, 0, 0, 789, 0, 790, 791, 89, + 725, 535, 726, 727, 728, 729, 730, 0, 0, 0, + 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, + 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, + 0, 731, 0, 0, 101, 102, 0, 103, 104, 105, + 106, 107, 108, 109, 2786, 732, 112, 733, 734, 0, + 115, 116, 117, 118, 119, 120, 735, 736, 121, 122, + 737, 738, 125, 0, 126, 127, 128, 129, 739, 0, + 740, 0, 132, 133, 134, 135, 136, 137, 138, 139, + 140, 0, 141, 142, 143, 144, 145, 146, 0, 147, + 148, 149, 2787, 741, 742, 743, 744, 745, 746, 747, + 152, 153, 154, 155, 156, 157, 158, 748, 749, 161, + 750, 162, 0, 163, 164, 165, 166, 167, 168, 0, + 169, 170, 171, 172, 173, 0, 0, 174, 175, 751, + 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, + 0, 185, 186, 187, 188, 752, 190, 191, 192, 193, + 753, 754, 195, 0, 196, 197, 755, 199, 0, 200, + 0, 201, 202, 0, 203, 204, 205, 206, 207, 208, + 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 0, 223, 224, + 225, 226, 227, 228, 0, 229, 230, 231, 232, 233, + 234, 235, 756, 757, 0, 758, 0, 239, 240, 241, + 242, 243, 244, 245, 246, 247, 248, 0, 0, 249, + 250, 251, 252, 0, 253, 254, 255, 759, 760, 256, + 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, + 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, + 277, 278, 279, 761, 281, 762, 283, 284, 285, 286, + 763, 287, 288, 289, 290, 764, 765, 292, 766, 294, + 295, 296, 0, 297, 298, 0, 0, 767, 300, 301, + 0, 0, 302, 303, 304, 305, 306, 768, 308, 309, + 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, + 319, 769, 321, 322, 323, 324, 325, 326, 0, 327, + 328, 329, 330, 331, 332, 333, 334, 335, 770, 337, + 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 0, 353, 354, 355, + 356, 357, 358, 771, 360, 361, 362, 363, 364, 365, + 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, + 375, 772, 376, 377, 378, 379, 380, 381, 382, 383, + 773, 385, 0, 386, 387, 388, 389, 390, 391, 392, + 393, 394, 395, 396, 397, 398, 774, 0, 400, 401, + 0, 402, 403, 404, 405, 406, 407, 408, 0, 775, + 776, 0, 0, 411, 412, 777, 414, 778, 779, 416, + 417, 780, 419, 420, 2788, 422, 423, 0, 0, 424, + 425, 426, 427, 428, 781, 0, 429, 430, 431, 432, + 433, 434, 782, 0, 436, 437, 438, 439, 440, 441, + 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, + 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, + 458, 459, 460, 461, 462, 463, 783, 0, 0, 0, + 0, 0, 0, 784, 785, 0, 0, 0, 0, 0, + 787, 0, 788, 0, 0, 0, 0, 789, 0, 790, + 791, 89, 725, 535, 726, 727, 728, 729, 730, 0, + 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, + 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, + 0, 0, 0, 731, 0, 0, 101, 102, 0, 103, + 104, 105, 106, 107, 108, 109, 110, 732, 112, 733, + 734, 0, 115, 116, 117, 118, 119, 120, 735, 736, + 121, 122, 737, 738, 125, 0, 126, 127, 128, 129, + 739, 0, 740, 0, 132, 133, 134, 135, 136, 137, + 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, + 0, 147, 148, 149, 2787, 741, 742, 743, 744, 745, + 746, 747, 152, 153, 154, 155, 156, 157, 158, 748, + 749, 161, 750, 162, 0, 163, 164, 165, 166, 167, + 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, + 175, 751, 177, 178, 0, 179, 180, 181, 0, 182, + 183, 184, 0, 185, 186, 187, 188, 752, 190, 191, + 192, 193, 753, 754, 195, 0, 196, 197, 755, 199, + 0, 200, 0, 201, 202, 0, 203, 204, 205, 206, + 207, 208, 0, 209, 0, 210, 211, 0, 212, 213, + 214, 215, 216, 217, 218, 219, 220, 221, 222, 0, + 223, 224, 225, 226, 227, 228, 0, 229, 230, 231, + 232, 233, 234, 235, 756, 757, 0, 758, 0, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 0, + 0, 249, 250, 251, 252, 0, 253, 254, 255, 759, + 760, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 761, 281, 762, 283, 284, + 285, 286, 763, 287, 288, 289, 290, 764, 765, 292, + 766, 294, 295, 296, 0, 297, 298, 0, 0, 767, + 300, 301, 0, 0, 302, 303, 304, 305, 306, 768, + 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, + 0, 318, 319, 769, 321, 322, 323, 324, 325, 326, + 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, + 770, 337, 338, 339, 340, 0, 341, 342, 343, 344, + 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, + 354, 355, 356, 357, 358, 771, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, + 373, 374, 375, 772, 376, 377, 378, 379, 380, 381, + 382, 383, 773, 385, 0, 386, 387, 388, 389, 390, + 391, 392, 393, 394, 395, 396, 397, 398, 774, 0, + 400, 401, 0, 402, 403, 404, 405, 406, 407, 408, + 0, 775, 776, 0, 0, 411, 412, 777, 414, 778, + 779, 416, 417, 780, 419, 420, 2788, 422, 423, 0, + 0, 424, 425, 426, 427, 428, 781, 0, 429, 430, + 431, 432, 433, 434, 782, 0, 436, 437, 438, 439, + 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, + 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 458, 459, 460, 461, 462, 463, 783, 0, + 0, 0, 0, 0, 0, 784, 785, 0, 0, 0, + 0, 0, 787, 0, 788, 0, 0, 0, 0, 789, + 0, 790, 791, 89, 725, 535, 726, 727, 728, 729, + 730, 0, 0, 0, 0, 0, 0, 0, 0, 90, + 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, + 100, 0, 0, 0, 0, 731, 0, 0, 101, 102, + 0, 103, 104, 105, 106, 107, 108, 109, -1675, 732, + 112, 733, 734, 0, 115, 116, 117, 118, 119, 120, + 735, 736, 121, 122, 737, 738, 125, 0, 126, 127, + 128, 129, 739, 0, 740, 0, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 0, 141, 142, 143, 144, + 145, 146, 0, 147, 148, 149, 2787, 741, 742, 743, + 744, 745, 746, 747, 152, 153, 154, 155, 156, 157, + 158, 748, 749, 161, 750, 162, 0, 163, 164, 165, + 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, + 0, 174, 175, 751, 177, 178, 0, 179, 180, 181, + 0, 182, 183, 184, 0, 185, 186, 187, 188, 752, + 190, 191, 192, 193, 753, 754, 195, 0, 196, 197, + 755, 199, 0, 200, 0, 201, 202, 0, 203, 204, + 205, 206, -1675, 208, 0, 209, 0, 210, 211, 0, + 212, 213, 214, 215, 216, 217, -1675, 219, 220, 221, + 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, + 230, 231, 232, 233, 234, 235, 756, 757, 0, 758, + 0, 239, 0, 0, 242, 243, 244, 245, 246, 247, + 248, 0, 0, 249, 250, 251, -1675, 0, 253, 254, + 255, 759, 760, 256, 257, 258, 259, 260, 261, 262, + 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, + 273, 274, 275, 276, 277, 278, 279, 761, 281, 762, + 283, 284, 285, 286, 0, 287, 288, 0, 290, 764, + 765, 292, 766, 294, 295, 296, 0, 297, 298, 0, + 0, 767, 300, 301, 0, 0, 302, 303, 304, 305, + 306, 768, 308, 309, 310, 311, 312, 313, 314, 315, + 316, 317, 0, 318, 319, 769, 321, 322, 323, 324, + 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, + 334, 335, 770, 337, 338, 339, 340, 0, 341, 342, + 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, + 0, 353, 354, 355, 356, 357, 358, 771, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, + 371, 372, 373, 374, 375, 772, 376, 377, 378, 379, + 380, -1675, 382, 383, 773, 385, 0, 386, 387, 388, + 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, + 774, 0, 400, 401, 0, 402, 403, 404, 405, 406, + 407, 408, 0, 775, 776, 0, 0, 411, 412, 777, + 414, 778, 779, 416, 417, 780, 419, 420, 2788, 422, + 423, 0, 0, 424, 425, 426, 427, 428, 781, 0, + 429, 430, 431, 432, 433, 434, 782, 0, 436, 437, + 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, + 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + -1675, 0, 0, 0, 0, 0, 0, 784, 785, 0, + 0, 0, 0, 0, 787, 0, 788, 0, 0, 0, + 0, 789, 0, 790, 791, 89, 725, 535, 726, 727, + 728, 729, 730, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, 108, 109, @@ -189798,14 +203556,14 @@ static const yytype_int16 yytable[] = 271, 272, 273, 274, 275, 276, 277, 278, 279, 761, 281, 762, 283, 284, 285, 286, 0, 287, 288, 289, 290, 764, 765, 292, 766, 294, 295, 296, 0, 297, - 298, 0, 0, 299, 300, 301, 0, 0, 302, 303, + 298, 0, 0, 767, 300, 301, 0, 0, 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, - 351, 352, 0, 353, 354, 355, 356, 357, 358, 1696, - 1697, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 351, 352, 0, 353, 354, 355, 356, 357, 358, 359, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, 376, 377, 378, 379, 380, 381, 382, 383, 773, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, @@ -189817,9 +203575,57 @@ static const yytype_int16 yytable[] = 436, 437, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, - 462, 463, 0, 0, 0, 0, 0, 0, 0, 1698, - 1699, 0, 0, 0, 0, 0, 0, 0, 1566, 0, - 0, 0, 0, 788, 789, 89, 725, 535, 726, 727, + 462, 463, 0, 0, 0, 0, 0, 0, 0, 1571, + 1572, 0, 0, 89, 725, 535, 726, 727, 1573, 729, + 730, 0, 0, 0, 0, 790, 791, 0, 0, 90, + 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, + 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, + 0, 103, 104, 105, 106, 107, 108, 109, 110, 732, + 112, 733, 734, 0, 115, 116, 117, 118, 119, 120, + 735, 736, 121, 122, 737, 738, 125, 0, 126, 127, + 128, 129, 739, 0, 740, 0, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 0, 141, 142, 143, 144, + 145, 146, 0, 147, 148, 149, 150, 741, 742, 743, + 744, 745, 746, 747, 152, 153, 154, 155, 156, 157, + 158, 748, 749, 161, 0, 162, 0, 163, 164, 165, + 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, + 0, 174, 175, 751, 177, 178, 0, 179, 180, 181, + 0, 182, 183, 184, 0, 185, 186, 187, 188, 752, + 190, 191, 192, 193, 753, 754, 195, 0, 196, 197, + 755, 199, 0, 200, 0, 201, 202, 0, 203, 204, + 205, 206, 207, 208, 0, 209, 0, 210, 211, 0, + 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, + 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, + 230, 231, 232, 233, 234, 235, 756, 757, 0, 758, + 0, 239, 240, 241, 242, 243, 244, 245, 246, 247, + 248, 0, 0, 249, 250, 251, 252, 0, 253, 254, + 255, 759, 760, 256, 257, 258, 259, 260, 261, 262, + 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, + 273, 274, 275, 276, 277, 278, 279, 761, 281, 762, + 283, 284, 285, 286, 0, 287, 288, 289, 290, 764, + 765, 292, 766, 294, 295, 296, 0, 297, 298, 0, + 0, 299, 300, 301, 0, 0, 302, 303, 304, 305, + 306, 768, 308, 309, 310, 311, 312, 313, 314, 315, + 316, 317, 0, 318, 319, 769, 321, 322, 323, 324, + 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, + 334, 335, 770, 337, 338, 339, 340, 0, 341, 342, + 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, + 0, 353, 354, 355, 356, 357, 358, 1707, 1708, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, + 371, 372, 373, 374, 375, 772, 376, 377, 378, 379, + 380, 381, 382, 383, 773, 385, 0, 386, 387, 388, + 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, + 774, 0, 400, 401, 0, 402, 403, 404, 405, 406, + 407, 408, 0, 775, 776, 0, 0, 411, 412, 777, + 414, 778, 779, 416, 417, 780, 419, 420, 421, 422, + 423, 0, 0, 424, 425, 426, 427, 428, 781, 0, + 429, 430, 431, 432, 433, 434, 782, 0, 436, 437, + 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, + 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 0, 0, 0, 0, 0, 0, 0, 1709, 1710, 0, + 0, 0, 0, 0, 0, 0, 1573, 0, 0, 0, + 0, 0, 0, 790, 791, 89, 725, 535, 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, @@ -189866,105 +203672,152 @@ static const yytype_int16 yytable[] = 436, 437, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, - 462, 463, 0, 0, 0, 0, 0, 0, 0, 1564, - 1565, 0, 0, 0, 0, 0, 0, 0, 1566, 0, - 0, 0, 0, 788, 789, 89, 725, 535, 726, 727, - 728, 729, 730, 0, 0, 0, 0, 0, 0, 0, - 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, - 98, 99, 100, 0, 0, 0, 0, 731, 0, 0, - 101, 102, 0, 103, 104, 105, 106, 107, 108, 109, - 0, 732, 112, 733, 734, 0, 115, 116, 117, 118, - 119, 120, 735, 736, 121, 122, 737, 738, 125, 0, - 126, 127, 128, 129, 739, 0, 740, 0, 132, 133, - 134, 135, 136, 137, 138, 139, 140, 0, 141, 142, - 143, 144, 145, 146, 0, 147, 148, 149, 150, 741, - 742, 743, 744, 745, 746, 747, 152, 153, 154, 155, - 156, 157, 158, 748, 749, 161, 750, 162, 0, 163, - 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, - 173, 0, 0, 174, 175, 751, 177, 178, 0, 179, - 180, 181, 0, 182, 0, 184, 0, 185, 186, 187, - 188, 752, 190, 191, 192, 193, 753, 754, 195, 0, - 196, 197, 755, 199, 0, 200, 0, 201, 202, 0, - 203, 204, 205, 206, 0, 208, 0, 209, 0, 210, - 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, - 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, - 0, 229, 230, 231, 232, 233, 234, 235, 756, 757, - 0, 758, 0, 239, 0, 0, 242, 243, 244, 245, - 246, 247, 248, 0, 0, 249, 250, 251, 0, 0, - 253, 254, 255, 759, 760, 256, 257, 258, 259, 260, - 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, - 271, 272, 273, 274, 275, 276, 277, 278, 279, 761, - 281, 762, 283, 284, 285, 286, 0, 287, 288, 0, - 290, 764, 765, 292, 766, 294, 295, 296, 0, 297, - 298, 0, 0, 767, 300, 301, 0, 0, 302, 303, - 304, 305, 306, 768, 308, 309, 310, 311, 312, 313, - 314, 315, 316, 317, 0, 318, 319, 769, 321, 322, - 323, 324, 325, 326, 0, 327, 328, 329, 330, 331, - 332, 333, 334, 335, 770, 337, 338, 339, 340, 0, - 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, - 351, 352, 0, 353, 354, 355, 356, 357, 358, 771, - 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, - 370, 0, 371, 372, 373, 374, 375, 772, 376, 377, - 378, 379, 380, 0, 382, 383, 773, 385, 0, 386, - 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, - 397, 398, 774, 0, 400, 401, 0, 402, 403, 404, - 405, 406, 407, 408, 0, 775, 776, 0, 0, 411, - 412, 777, 414, 778, 779, 416, 417, 780, 419, 420, - 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, - 781, 0, 429, 430, 431, 432, 433, 434, 782, 0, - 436, 437, 438, 439, 440, 441, 0, 0, 442, 0, - 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, - 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, - 462, 463, 0, 0, 0, 0, 0, 0, 0, 784, - 785, 488, 0, 0, 0, 0, 0, 0, 787, 0, - 0, 0, 0, 788, 789, 0, 0, 90, 91, 92, - 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, - 0, 0, 0, 0, 2211, 0, 101, 102, 0, 103, - 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, - 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, - 121, 122, 123, 124, 125, 0, 126, 127, 128, 129, - 130, 0, 0, 0, 132, 133, 134, 135, 136, 0, - 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, - 0, -579, 148, 149, 150, 0, 0, 0, 0, 0, - 0, 0, 152, 153, 154, 155, 156, 157, 158, 159, - 160, 161, 0, 162, 0, 163, 164, 165, 166, 167, - 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, - 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, - 183, 184, 0, 185, 186, 187, 188, 189, 190, 191, - 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, - 0, 200, 0, 201, 0, 0, -579, 204, 205, 206, - 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, - 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, - 223, 224, 225, 226, 227, 228, 0, 229, -579, 231, - 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, - 0, 0, 242, -579, 244, 245, 246, 247, 248, 0, - 0, 249, -579, 251, 0, 0, 253, 254, 255, 0, - 0, 256, 257, 258, 259, 260, 490, 262, 263, 264, - 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, - 275, 276, 277, 278, 279, 280, -579, 282, 283, 284, - 285, 286, 0, 287, 288, 0, 290, 0, 291, 292, - 293, 294, 295, 296, 0, 297, 298, 0, 0, 299, - 300, 301, 0, 0, 302, 303, 0, 305, 0, 307, - 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, - 0, 318, 319, 320, 321, 322, 323, 324, 325, 326, - 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, - 354, -579, 356, 357, 358, 359, 360, 361, 362, 363, - 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, - 373, 374, 375, 0, 376, 377, 378, 379, 380, 0, - 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, - 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, - 400, 401, 0, 402, -579, 404, 405, 406, 407, 408, - 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, - 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, - 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, - 431, 432, 433, 434, 435, 0, 436, 0, 438, 439, - 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, - 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 0, 0, - 89, 0, 560, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 847, 90, 91, 92, 93, + 462, 463, 0, 0, 0, 0, 0, 0, 0, 1571, + 1572, 0, 0, 0, 0, 0, 0, 0, 1573, 0, + 0, 0, 0, 0, 0, 790, 791, 89, 725, 535, + 726, 727, 728, 729, 730, 0, 0, 0, 0, 0, + 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, + 97, 0, 98, 99, 100, 0, 0, 0, 0, 731, + 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, + 108, 109, 0, 732, 112, 733, 734, 0, 115, 116, + 117, 118, 119, 120, 735, 736, 121, 122, 737, 738, + 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, + 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, + 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, + 154, 155, 156, 157, 158, 748, 749, 161, 750, 162, + 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, + 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, + 0, 179, 180, 181, 0, 182, 0, 184, 0, 185, + 186, 187, 188, 752, 190, 191, 192, 193, 753, 754, + 195, 0, 196, 197, 755, 199, 0, 200, 0, 201, + 202, 0, 203, 204, 205, 206, 0, 208, 0, 209, + 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, + 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, + 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, + 756, 757, 0, 758, 0, 239, 0, 0, 242, 243, + 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, + 0, 0, 253, 254, 255, 759, 760, 256, 257, 258, + 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, + 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, + 279, 761, 281, 762, 283, 284, 285, 286, 0, 287, + 288, 0, 290, 764, 765, 292, 766, 294, 295, 296, + 0, 297, 298, 0, 0, 767, 300, 301, 0, 0, + 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, + 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, + 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, + 330, 331, 332, 333, 334, 335, 770, 337, 338, 339, + 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, + 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, + 358, 771, 360, 361, 362, 363, 364, 365, 366, 367, + 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, + 376, 377, 378, 379, 380, 0, 382, 383, 773, 385, + 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, + 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, + 403, 404, 405, 406, 407, 408, 0, 775, 776, 0, + 0, 411, 412, 777, 414, 778, 779, 416, 417, 780, + 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, + 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, + 782, 0, 436, 437, 438, 439, 440, 441, 0, 0, + 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 0, 0, 0, 0, 0, 0, + 0, 784, 785, 488, 0, 0, 0, 0, 787, 0, + 788, 0, 0, 0, 0, 789, 0, 790, 791, 90, + 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, + 100, 0, 0, 0, 0, 0, 2224, 0, 101, 102, + 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, + 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, + 0, 0, 121, 122, 123, 124, 125, 0, 126, 127, + 128, 129, 130, 0, 0, 0, 132, 133, 134, 135, + 136, 0, 138, 139, 140, 0, 141, 142, 143, 144, + 145, 146, 0, -580, 148, 149, 150, 0, 0, 0, + 0, 0, 0, 0, 152, 153, 154, 155, 156, 157, + 158, 159, 160, 161, 0, 162, 0, 163, 164, 165, + 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, + 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, + 0, 182, 183, 184, 0, 185, 186, 187, 188, 189, + 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, + 198, 199, 0, 200, 0, 201, 0, 0, -580, 204, + 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, + 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, + 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, + -580, 231, 232, 233, 234, 235, 236, 237, 0, 238, + 0, 239, 0, 0, 242, -580, 244, 245, 246, 247, + 248, 0, 0, 249, -580, 251, 0, 0, 253, 254, + 255, 0, 0, 256, 257, 258, 259, 260, 490, 262, + 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, + 273, 274, 275, 276, 277, 278, 279, 280, -580, 282, + 283, 284, 285, 286, 0, 287, 288, 0, 290, 0, + 291, 292, 293, 294, 295, 296, 0, 297, 298, 0, + 0, 299, 300, 301, 0, 0, 302, 303, 0, 305, + 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, + 316, 317, 0, 318, 319, 320, 321, 322, 323, 324, + 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, + 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, + 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, + 0, 353, 354, -580, 356, 357, 358, 359, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, + 371, 372, 373, 374, 375, 0, 376, 377, 378, 379, + 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, + 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, + 399, 0, 400, 401, 0, 402, -580, 404, 405, 406, + 407, 408, 0, 409, 410, 0, 0, 411, 412, 413, + 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, + 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, + 429, 430, 431, 432, 433, 434, 435, 0, 436, 0, + 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, + 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 0, 0, 89, 0, 560, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 849, 90, 91, + 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, + 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, + 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, + 736, 121, 122, 123, 124, 125, 0, 126, 127, 128, + 129, 739, 0, 740, 0, 132, 133, 134, 135, 136, + 137, 138, 139, 140, 0, 141, 142, 143, 144, 145, + 146, 0, 147, 148, 149, 150, 741, 742, 743, 744, + 745, 746, 747, 152, 153, 154, 155, 156, 157, 158, + 159, 160, 161, 0, 162, 0, 163, 164, 165, 166, + 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, + 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, + 182, 183, 184, 0, 185, 186, 187, 188, 189, 190, + 191, 192, 193, 753, 0, 195, 0, 196, 197, 198, + 199, 0, 200, 0, 201, 202, 0, 203, 204, 205, + 206, 207, 208, 0, 209, 0, 210, 211, 0, 212, + 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, + 0, 223, 224, 225, 226, 227, 228, 0, 229, 230, + 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, + 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, + 1978, 0, 249, 250, 251, 252, 0, 253, 254, 255, + 759, 760, 256, 257, 258, 259, 260, 261, 262, 263, + 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, + 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, + 284, 285, 286, 0, 287, 288, 289, 290, 0, 765, + 292, 293, 294, 295, 296, 0, 297, 298, 0, 561, + 299, 300, 301, 0, 0, 302, 303, 304, 305, 306, + 768, 308, 309, 310, 311, 312, 313, 314, 315, 316, + 317, 0, 318, 319, 769, 321, 322, 323, 324, 325, + 326, 0, 327, 328, 329, 330, 331, 332, 333, 334, + 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, + 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, + 353, 354, 355, 356, 357, 358, 359, 1979, 361, 362, + 363, 364, 365, 366, 367, 368, 369, 370, 25, 371, + 372, 373, 374, 375, 772, 376, 377, 378, 379, 380, + 381, 382, 383, 384, 385, 0, 386, 387, 388, 389, + 390, 391, 392, 393, 394, 395, 396, 397, 398, 774, + 0, 400, 401, 29, 402, 403, 404, 405, 406, 407, + 408, 0, 409, 410, 0, 0, 411, 412, 777, 414, + 778, 0, 416, 417, 780, 419, 420, 421, 422, 423, + 0, 0, 424, 425, 426, 427, 428, 781, 0, 429, + 430, 431, 432, 433, 598, 435, 0, 436, 437, 438, + 439, 440, 441, 0, 0, 442, 0, 32, 443, 444, + 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, + 455, 456, 457, 458, 459, 460, 461, 462, 463, 0, + 89, 33, 560, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1980, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, @@ -189984,7 +203837,7 @@ static const yytype_int16 yytable[] = 215, 216, 217, 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 240, - 241, 242, 243, 244, 245, 246, 247, 248, 1965, 0, + 241, 242, 243, 244, 245, 246, 247, 248, 1978, 0, 249, 250, 251, 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, @@ -189997,21 +203850,21 @@ static const yytype_int16 yytable[] = 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, - 355, 356, 357, 358, 359, 1966, 361, 362, 363, 364, - 365, 366, 367, 368, 369, 370, 25, 371, 372, 373, + 355, 356, 357, 358, 359, 1979, 361, 362, 363, 364, + 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 774, 0, 400, - 401, 29, 402, 403, 404, 405, 406, 407, 408, 0, + 401, 0, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 0, 0, 411, 412, 777, 414, 778, 0, 416, 417, 780, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 781, 0, 429, 430, 431, - 432, 433, 598, 435, 0, 436, 437, 438, 439, 440, - 441, 0, 0, 442, 0, 32, 443, 444, 445, 446, + 432, 433, 434, 435, 0, 436, 437, 438, 439, 440, + 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, - 457, 458, 459, 460, 461, 462, 463, 0, 89, 33, - 560, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1967, 90, 91, 92, 93, 94, 95, + 457, 458, 459, 460, 461, 462, 463, 0, 89, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1980, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 0, 115, @@ -190031,20 +203884,20 @@ static const yytype_int16 yytable[] = 217, 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 240, 241, 242, - 243, 244, 245, 246, 247, 248, 1965, 0, 249, 250, + 243, 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 0, 287, 288, 289, 290, 0, 765, 292, 293, 294, 295, - 296, 0, 297, 298, 0, 561, 299, 300, 301, 0, + 296, 0, 297, 298, 0, 0, 299, 300, 301, 0, 0, 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 355, 356, - 357, 358, 359, 1966, 361, 362, 363, 364, 365, 366, + 357, 358, 359, 1979, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, @@ -190058,7 +203911,7 @@ static const yytype_int16 yytable[] = 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 0, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1967, 90, 91, 92, 93, 94, 95, 96, 97, + 0, 34, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 0, 115, 116, 117, @@ -190091,7 +203944,7 @@ static const yytype_int16 yytable[] = 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, 358, - 359, 1966, 361, 362, 363, 364, 365, 366, 367, 368, + 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, @@ -190103,57 +203956,57 @@ static const yytype_int16 yytable[] = 0, 436, 437, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, - 461, 462, 463, 0, 89, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, + 461, 462, 463, 0, 655, 1083, 535, 0, 0, 0, + 729, 0, 0, 0, 0, 0, 0, 0, 0, 2598, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, - 102, 0, 103, 104, 105, 106, 107, 108, 109, 110, - 111, 112, 113, 114, 0, 115, 116, 117, 118, 119, - 120, 0, 736, 121, 122, 123, 124, 125, 0, 126, - 127, 128, 129, 739, 0, 740, 0, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 0, 141, 142, 143, - 144, 145, 146, 0, 147, 148, 149, 150, 741, 742, - 743, 744, 745, 746, 747, 152, 153, 154, 155, 156, - 157, 158, 159, 160, 161, 0, 162, 0, 163, 164, + 102, 0, 103, 104, 105, 0, 107, 108, 109, 656, + 657, 0, 658, 659, 0, 115, 116, 117, 118, 119, + 120, 0, 0, 121, 122, 660, 661, 125, 0, 126, + 127, 128, 129, 662, 0, 0, 0, 132, 133, 134, + 135, 136, 0, 138, 139, 140, 0, 141, 142, 143, + 144, 145, 146, 0, 0, 148, 149, 150, 0, 0, + 0, 0, 0, 0, 0, 152, 153, 154, 155, 156, + 157, 158, 663, 664, 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, 188, - 189, 190, 191, 192, 193, 753, 0, 195, 0, 196, - 197, 198, 199, 0, 200, 0, 201, 202, 0, 203, - 204, 205, 206, 207, 208, 0, 209, 0, 210, 211, - 0, 212, 213, 214, 215, 216, 217, 218, 219, 220, + 665, 190, 191, 192, 193, 666, 1084, 195, 0, 196, + 197, 667, 199, 0, 200, 0, 201, 0, 0, 0, + 204, 205, 206, 0, 208, 0, 209, 0, 668, 211, + 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, - 229, 230, 231, 232, 233, 234, 235, 236, 237, 0, - 238, 0, 239, 240, 241, 242, 243, 244, 245, 246, - 247, 248, 0, 0, 249, 250, 251, 252, 0, 253, - 254, 255, 759, 760, 256, 257, 258, 259, 260, 261, + 229, 0, 669, 232, 233, 234, 235, 670, 671, 0, + 672, 0, 239, 0, 0, 242, 0, 244, 245, 246, + 247, 248, 0, 0, 249, 0, 251, 0, 0, 253, + 254, 255, 0, 0, 256, 257, 258, 259, 260, 673, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, - 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, - 282, 283, 284, 285, 286, 0, 287, 288, 289, 290, - 0, 765, 292, 293, 294, 295, 296, 0, 297, 298, - 0, 0, 299, 300, 301, 0, 0, 302, 303, 304, - 305, 306, 768, 308, 309, 310, 311, 312, 313, 314, - 315, 316, 317, 0, 318, 319, 769, 321, 322, 323, + 272, 273, 274, 275, 276, 277, 278, 279, 674, 0, + 675, 283, 284, 285, 676, 0, 287, 288, 0, 290, + 0, 677, 292, 678, 294, 295, 296, 0, 297, 298, + 1085, 0, 299, 300, 301, 0, 0, 302, 679, 0, + 305, 0, 680, 308, 309, 310, 311, 312, 313, 314, + 315, 316, 317, 0, 318, 319, 681, 321, 322, 682, 324, 325, 326, 0, 327, 328, 329, 330, 331, 332, - 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, + 333, 334, 335, 683, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, - 352, 0, 353, 354, 355, 356, 357, 358, 359, 360, + 352, 0, 353, 354, 0, 356, 357, 358, 684, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, - 0, 371, 372, 373, 374, 375, 772, 376, 377, 378, - 379, 380, 381, 382, 383, 384, 385, 0, 386, 387, - 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, - 398, 774, 0, 400, 401, 0, 402, 403, 404, 405, - 406, 407, 408, 0, 409, 410, 0, 0, 411, 412, - 777, 414, 778, 0, 416, 417, 780, 419, 420, 421, - 422, 423, 0, 0, 424, 425, 426, 427, 428, 781, - 0, 429, 430, 431, 432, 433, 434, 435, 0, 436, - 437, 438, 439, 440, 441, 0, 0, 442, 0, 0, - 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, - 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, - 463, 0, 655, 1081, 535, 0, 0, 0, 729, 0, - 0, 0, 0, 0, 0, 0, 0, 2583, 90, 91, + 0, 371, 372, 373, 374, 375, 0, 376, 685, 378, + 379, 380, 0, 382, 383, 686, 385, 0, 386, 387, + 388, 389, 390, 391, 392, 393, 394, 395, 396, 687, + 398, 688, 0, 400, 401, 0, 402, 0, 404, 405, + 406, 407, 408, 0, 689, 690, 0, 0, 411, 412, + 691, 414, 692, 1086, 416, 417, 693, 419, 420, 421, + 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, + 0, 429, 430, 431, 432, 433, 1032, 695, 0, 436, + 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, + 443, 444, 445, 446, 447, 448, 696, 697, 698, 699, + 700, 701, 702, 703, 704, 705, 706, 460, 461, 462, + 463, 0, 655, 0, 0, 0, 0, 0, 1087, 1088, + 1843, 0, 0, 0, 0, 0, 0, 1844, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, - 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, + 3, 4, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 656, 657, 0, 658, 659, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 660, 661, 125, 0, 126, 127, 128, @@ -190165,7 +204018,7 @@ static const yytype_int16 yytable[] = 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, 188, 665, 190, - 191, 192, 193, 666, 1082, 195, 0, 196, 197, 667, + 191, 192, 193, 666, 0, 195, 0, 196, 197, 667, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 668, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, @@ -190177,7 +204030,7 @@ static const yytype_int16 yytable[] = 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 674, 0, 675, 283, 284, 285, 676, 0, 287, 288, 0, 290, 0, 677, - 292, 678, 294, 295, 296, 0, 297, 298, 1083, 0, + 292, 678, 294, 295, 296, 0, 297, 298, 0, 0, 299, 300, 301, 0, 0, 302, 679, 0, 305, 0, 680, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 681, 321, 322, 682, 324, 325, @@ -190185,68 +204038,68 @@ static const yytype_int16 yytable[] = 335, 683, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, 684, 360, 361, 362, - 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, + 363, 364, 365, 366, 367, 368, 369, 370, 25, 371, 372, 373, 374, 375, 0, 376, 685, 378, 379, 380, 0, 382, 383, 686, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 687, 398, 688, - 0, 400, 401, 0, 402, 0, 404, 405, 406, 407, + 0, 400, 401, 29, 402, 0, 404, 405, 406, 407, 408, 0, 689, 690, 0, 0, 411, 412, 691, 414, - 692, 1084, 416, 417, 693, 419, 420, 421, 422, 423, + 692, 0, 416, 417, 693, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, - 430, 431, 432, 433, 1030, 695, 0, 436, 0, 438, - 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, + 430, 431, 432, 433, 694, 695, 0, 436, 0, 438, + 439, 440, 441, 0, 0, 442, 0, 32, 443, 444, 445, 446, 447, 448, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 460, 461, 462, 463, 0, - 655, 0, 0, 0, 0, 0, 1085, 1086, 1832, 0, - 0, 0, 0, 0, 0, 1833, 90, 91, 92, 93, - 94, 95, 96, 97, 0, 98, 99, 100, 3, 4, + 488, 33, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 34, 90, 91, 92, 93, + 94, 95, 96, 97, 595, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, - 105, 0, 107, 108, 109, 656, 657, 0, 658, 659, + 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, - 122, 660, 661, 125, 0, 126, 127, 128, 129, 662, + 122, 123, 124, 125, 0, 126, 127, 128, 129, 130, 0, 0, 0, 132, 133, 134, 135, 136, 0, 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, 0, 0, 148, 149, 150, 0, 0, 0, 0, 0, 0, - 0, 152, 153, 154, 155, 156, 157, 158, 663, 664, + 0, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, - 184, 0, 185, 186, 187, 188, 665, 190, 191, 192, - 193, 666, 0, 195, 0, 196, 197, 667, 199, 0, + 184, 0, 185, 186, 187, 188, 189, 190, 191, 192, + 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, - 208, 0, 209, 0, 668, 211, 0, 212, 213, 214, + 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, - 224, 225, 226, 227, 228, 0, 229, 0, 669, 232, - 233, 234, 235, 670, 671, 0, 672, 0, 239, 0, + 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, + 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, 251, 0, 0, 253, 254, 255, 0, 0, - 256, 257, 258, 259, 260, 673, 262, 263, 264, 265, + 256, 257, 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - 276, 277, 278, 279, 674, 0, 675, 283, 284, 285, - 676, 0, 287, 288, 0, 290, 0, 677, 292, 678, + 276, 277, 278, 279, 280, 0, 282, 283, 284, 285, + 286, 0, 287, 288, 0, 290, 0, 291, 292, 293, 294, 295, 296, 0, 297, 298, 0, 0, 299, 300, - 301, 0, 0, 302, 679, 0, 305, 0, 680, 308, + 301, 0, 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, - 318, 319, 681, 321, 322, 682, 324, 325, 326, 0, - 327, 328, 329, 330, 331, 332, 333, 334, 335, 683, + 318, 319, 320, 321, 322, 323, 324, 325, 326, 0, + 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, - 0, 356, 357, 358, 684, 360, 361, 362, 363, 364, + 0, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 25, 371, 372, 373, - 374, 375, 0, 376, 685, 378, 379, 380, 0, 382, - 383, 686, 385, 0, 386, 387, 388, 389, 390, 391, - 392, 393, 394, 395, 396, 687, 398, 688, 0, 400, + 374, 375, 0, 376, 377, 378, 379, 380, 0, 382, + 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, + 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 29, 402, 0, 404, 405, 406, 407, 408, 0, - 689, 690, 0, 0, 411, 412, 691, 414, 692, 0, - 416, 417, 693, 419, 420, 421, 422, 423, 0, 0, + 596, 410, 0, 0, 597, 412, 413, 414, 415, 0, + 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, - 432, 433, 694, 695, 0, 436, 0, 438, 439, 440, + 432, 433, 598, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 32, 443, 444, 445, 446, - 447, 448, 696, 697, 698, 699, 700, 701, 702, 703, - 704, 705, 706, 460, 461, 462, 463, 0, 488, 33, + 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, + 457, 458, 459, 460, 461, 462, 463, 0, 488, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 90, 91, 92, 93, 94, 95, - 96, 97, 595, 98, 99, 100, 0, 0, 0, 0, + 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, @@ -190283,14 +204136,14 @@ static const yytype_int16 yytable[] = 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 29, - 402, 0, 404, 405, 406, 407, 408, 0, 596, 410, - 0, 0, 597, 412, 413, 414, 415, 0, 416, 417, + 402, 0, 404, 405, 406, 407, 408, 0, 409, 410, + 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, 598, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 32, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, - 459, 460, 461, 462, 463, 0, 488, 33, 0, 0, + 459, 460, 461, 462, 463, 0, 488, 33, 560, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, @@ -190326,19 +204179,19 @@ static const yytype_int16 yytable[] = 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, - 369, 370, 25, 371, 372, 373, 374, 375, 0, 376, + 369, 370, 0, 371, 372, 373, 374, 375, 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, - 396, 492, 398, 399, 0, 400, 401, 29, 402, 0, + 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, 404, 405, 406, 407, 408, 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, - 428, 0, 0, 429, 430, 431, 432, 433, 598, 435, + 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, - 0, 32, 443, 444, 445, 446, 447, 448, 449, 450, + 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, - 461, 462, 463, 0, 488, 33, 560, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, + 461, 462, 463, 0, 488, 0, 560, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 830, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, @@ -190384,8 +204237,8 @@ static const yytype_int16 yytable[] = 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, - 463, 0, 488, 0, 560, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 828, 90, 91, + 463, 0, 488, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 2446, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, @@ -190432,7 +204285,7 @@ static const yytype_int16 yytable[] = 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 0, 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2431, 90, 91, 92, 93, + 0, 0, 0, 0, 0, 1697, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, @@ -190477,441 +204330,897 @@ static const yytype_int16 yytable[] = 432, 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, - 457, 458, 459, 460, 461, 462, 463, 0, 488, 0, + 457, 458, 459, 460, 461, 462, 463, 0, 0, 0, + 0, 2309, 1083, 535, 0, 0, 1553, 729, 0, 0, + 0, 0, 0, 2208, 1554, 1555, 1556, 90, 91, 92, + 93, 94, 95, 96, 97, 960, 98, 99, 100, 961, + 962, 963, 964, 965, 966, 967, 101, 102, 968, 103, + 104, 105, 2310, 107, 108, 109, 0, 1108, 2311, 1110, + 1111, 969, 115, 116, 117, 118, 119, 120, 970, 971, + 121, 122, 1112, 1113, 125, 972, 126, 127, 128, 129, + 0, 973, 2312, 974, 132, 133, 134, 135, 136, 2313, + 138, 139, 140, 975, 141, 142, 143, 144, 145, 146, + 976, 2314, 148, 149, 150, 977, 978, 979, 2315, 980, + 981, 982, 152, 153, 154, 155, 156, 157, 158, 1118, + 1119, 161, 983, 162, 984, 163, 164, 165, 166, 167, + 168, 985, 169, 170, 171, 172, 173, 986, 987, 174, + 175, 751, 177, 178, 988, 179, 180, 181, 989, 182, + 183, 184, 990, 185, 186, 187, 188, 0, 190, 191, + 192, 193, 0, 991, 195, 992, 196, 197, 1120, 199, + 993, 200, 994, 201, 2316, 995, 2317, 204, 205, 206, + 2318, 208, 996, 209, 997, 0, 211, 998, 212, 213, + 214, 215, 216, 217, 2319, 219, 220, 221, 222, 999, + 223, 224, 225, 226, 227, 228, 1000, 229, 2320, 0, + 232, 233, 234, 235, 1126, 1127, 1001, 1128, 1002, 239, + 2321, 2322, 242, 2323, 244, 245, 246, 247, 248, 1003, + 1004, 249, 2324, 251, 2325, 1005, 253, 254, 255, 1006, + 1007, 256, 257, 258, 259, 260, 2326, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 1135, 2327, 1137, 283, 284, + 285, 2328, 1008, 287, 288, 2329, 290, 1009, 0, 292, + 1139, 294, 295, 296, 1010, 297, 298, 1011, 1012, 2330, + 300, 301, 1013, 1014, 302, 0, 2331, 305, 2332, 0, + 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, + 1015, 318, 319, 0, 321, 322, 0, 324, 325, 326, + 1016, 327, 328, 329, 330, 331, 332, 333, 334, 335, + 1142, 337, 338, 339, 340, 1017, 341, 342, 343, 344, + 345, 346, 347, 348, 349, 350, 351, 352, 1018, 353, + 354, 2333, 356, 357, 358, 1144, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 1019, 371, 372, + 373, 374, 375, 1020, 376, 2334, 378, 379, 380, 2335, + 382, 383, 1147, 385, 1021, 386, 387, 388, 389, 390, + 391, 392, 393, 394, 395, 396, 2336, 398, 0, 1022, + 400, 401, 1023, 402, 2337, 404, 405, 406, 407, 408, + 1024, 1150, 1151, 1025, 1026, 411, 412, 0, 414, 0, + 1027, 416, 417, 2338, 419, 420, 421, 422, 423, 1028, + 1029, 424, 425, 426, 427, 428, 1030, 1031, 429, 430, + 431, 432, 433, 0, 1153, 1033, 436, 2339, 438, 439, + 440, 441, 1034, 1035, 442, 1036, 1037, 443, 444, 445, + 446, 447, 448, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 460, 461, 462, 463, 0, 488, + 0, 1557, 1558, 1559, 1553, 2340, 2341, 1562, 1563, 1564, + 1565, 0, 1554, 1555, 1556, 90, 91, 92, 93, 94, + 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, + 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, + 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, + 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, + 123, 124, 125, 0, 126, 127, 128, 129, 130, 0, + 0, 0, 132, 133, 134, 135, 136, 0, 138, 139, + 140, 0, 141, 142, 143, 144, 145, 146, 0, 0, + 148, 149, 150, 0, 0, 0, 0, 0, 0, 0, + 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, + 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, + 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, + 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, + 0, 185, 186, 187, 188, 189, 190, 191, 192, 193, + 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, + 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, + 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, + 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, + 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, + 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, + 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, + 0, 251, 0, 0, 253, 254, 255, 0, 0, 256, + 257, 258, 259, 260, 490, 262, 263, 264, 265, 266, + 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, + 277, 278, 279, 280, 0, 282, 283, 284, 285, 286, + 0, 287, 288, 0, 290, 0, 291, 292, 293, 294, + 295, 296, 0, 297, 298, 0, 0, 299, 300, 301, + 0, 0, 302, 303, 0, 305, 0, 307, 308, 309, + 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, + 319, 320, 321, 322, 323, 324, 325, 326, 0, 327, + 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, + 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, + 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, + 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, + 375, 0, 376, 377, 378, 379, 380, 0, 382, 383, + 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, + 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, + 0, 402, 0, 404, 405, 406, 407, 408, 0, 409, + 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, + 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, + 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, + 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, + 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, + 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, + 458, 459, 460, 461, 462, 463, 0, 0, 0, 1557, + 1558, 1559, 0, 1560, 1561, 1562, 1563, 1564, 1565, 1240, + 0, 0, 1241, 0, 0, 0, 0, 1242, 1243, 1244, + 0, 0, 0, 0, 0, 0, 0, 1240, 0, 0, + 1241, 0, 0, 0, 1245, 1242, 1243, 1244, 0, 0, + 0, 0, 1247, 0, 0, 1240, 0, 0, 1241, 1248, + 0, 0, 1245, 1242, 1243, 1244, 0, 0, 0, 0, + 1247, 0, 0, 0, 0, 0, 0, 1248, 0, 0, + 1245, 0, 0, 1240, 1249, 0, 1241, 0, 1247, 0, + 0, 1242, 1243, 1244, 0, 1248, 0, 0, 0, 0, + 0, 1240, 1249, 0, 1241, 0, 0, 0, 1245, 1242, + 1243, 1244, 0, 0, 0, 0, 1247, 0, 0, 1240, + 1249, 0, 1241, 1248, 0, 0, 1245, 1242, 1243, 1244, + 0, 0, 0, 0, 1247, 0, 0, 0, 0, 0, + 0, 1248, 0, 0, 1245, 0, 0, 0, 1249, 0, + 0, 0, 1247, 0, 0, 0, 0, 0, 0, 1248, + 0, 0, 0, 0, 0, 0, 1249, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1686, 90, 91, 92, 93, 94, 95, - 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, - 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, - 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, - 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, - 124, 125, 0, 126, 127, 128, 129, 130, 0, 0, - 0, 132, 133, 134, 135, 136, 0, 138, 139, 140, - 0, 141, 142, 143, 144, 145, 146, 0, 0, 148, - 149, 150, 0, 0, 0, 0, 0, 0, 0, 152, - 153, 154, 155, 156, 157, 158, 159, 160, 161, 0, - 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, - 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, - 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, - 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, - 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, - 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, - 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, - 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, - 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, - 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, - 251, 0, 0, 253, 254, 255, 0, 0, 256, 257, - 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, - 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, - 278, 279, 280, 0, 282, 283, 284, 285, 286, 0, - 287, 288, 0, 290, 0, 291, 292, 293, 294, 295, - 296, 0, 297, 298, 0, 0, 299, 300, 301, 0, - 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, - 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, - 320, 321, 322, 323, 324, 325, 326, 0, 327, 328, - 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, - 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, - 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, - 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, - 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, - 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, - 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, - 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, - 402, 0, 404, 405, 406, 407, 408, 0, 409, 410, - 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, - 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, - 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, - 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, - 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, - 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, - 459, 460, 461, 462, 463, 0, 0, 0, 0, 2296, - 1081, 535, 0, 0, 1546, 729, 0, 0, 0, 0, - 0, 2195, 1547, 1548, 1549, 90, 91, 92, 93, 94, - 95, 96, 97, 958, 98, 99, 100, 959, 960, 961, - 962, 963, 964, 965, 101, 102, 966, 103, 104, 105, - 2297, 107, 108, 109, 0, 1106, 2298, 1108, 1109, 967, - 115, 116, 117, 118, 119, 120, 968, 969, 121, 122, - 1110, 1111, 125, 970, 126, 127, 128, 129, 0, 971, - 2299, 972, 132, 133, 134, 135, 136, 2300, 138, 139, - 140, 973, 141, 142, 143, 144, 145, 146, 974, 2301, - 148, 149, 150, 975, 976, 977, 2302, 978, 979, 980, - 152, 153, 154, 155, 156, 157, 158, 1116, 1117, 161, - 981, 162, 982, 163, 164, 165, 166, 167, 168, 983, - 169, 170, 171, 172, 173, 984, 985, 174, 175, 751, - 177, 178, 986, 179, 180, 181, 987, 182, 183, 184, - 988, 185, 186, 187, 188, 0, 190, 191, 192, 193, - 0, 989, 195, 990, 196, 197, 1118, 199, 991, 200, - 992, 201, 2303, 993, 2304, 204, 205, 206, 2305, 208, - 994, 209, 995, 0, 211, 996, 212, 213, 214, 215, - 216, 217, 2306, 219, 220, 221, 222, 997, 223, 224, - 225, 226, 227, 228, 998, 229, 2307, 0, 232, 233, - 234, 235, 1124, 1125, 999, 1126, 1000, 239, 2308, 2309, - 242, 2310, 244, 245, 246, 247, 248, 1001, 1002, 249, - 2311, 251, 2312, 1003, 253, 254, 255, 1004, 1005, 256, - 257, 258, 259, 260, 2313, 262, 263, 264, 265, 266, + 0, 0, 0, 1250, 1249, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1251, 0, 0, + 0, 1250, 1252, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1251, 0, 0, 0, 1250, + 1252, 0, 0, 1253, 1254, 0, 0, 0, 0, 0, + 0, 0, 0, 1251, 0, 0, 0, 1255, 1252, 0, + 0, 1253, 1254, 0, 0, 0, 0, 1250, 0, 0, + 0, 0, 0, 0, 0, 1255, 0, 0, 0, 1253, + 1254, 1251, 0, 0, 0, 1250, 1252, 0, 0, 0, + 0, 0, 0, 1255, 0, 1256, 0, 0, 1257, 1251, + 0, 0, 0, 1250, 1252, 0, 0, 1253, 1254, 0, + 0, 0, 1258, 1256, 0, 1259, 1257, 1251, 0, 0, + 0, 1255, 1252, 0, 0, 1253, 1254, 0, 0, 0, + 1258, 1256, 0, 1259, 1257, 0, 0, 0, 0, 1255, + 0, 0, 0, 1253, 1254, 0, 0, 0, 1258, 0, + 0, 1259, 0, 0, 0, 0, 0, 1255, 0, 1256, + 0, 0, 1257, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1258, 1256, 0, 1259, + 1257, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1258, 1256, 0, 1259, 1257, 0, + 0, 0, 1260, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1258, 0, 0, 1259, 0, 0, 0, 0, + 1260, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1260, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1240, 0, 0, 1241, 0, 0, 0, 0, 1242, 1243, + 1244, 0, 0, 0, 0, 0, 1260, 0, 0, 0, + 0, 0, 0, 0, 0, 1245, 0, 0, 0, 0, + 0, 0, 0, 1247, 1260, 0, 0, 0, 0, 0, + 1248, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1261, 1260, 0, 1262, 1263, 1264, 0, 1265, 1266, + 1267, 1268, 1269, 1270, 0, 1249, 0, 0, 2186, 1261, + 0, 0, 1262, 1263, 1264, 0, 1265, 1266, 1267, 1268, + 1269, 1270, 0, 0, 0, 0, 2273, 1261, 0, 0, + 1262, 1263, 1264, 0, 1265, 1266, 1267, 1268, 1269, 1270, + 0, 0, 0, 0, 2397, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1261, 0, 0, 1262, 1263, + 1264, 0, 1265, 1266, 1267, 1268, 1269, 1270, 0, 0, + 0, 0, 2416, 1261, 0, 0, 1262, 1263, 1264, 0, + 1265, 1266, 1267, 1268, 1269, 1270, 0, 0, 0, 0, + 2559, 1261, 0, 0, 1262, 1263, 1264, 0, 1265, 1266, + 1267, 1268, 1269, 1270, 1250, 0, 1240, 0, 2757, 1241, + 0, 0, 0, 0, 1242, 1243, 1244, 0, 1251, 0, + 0, 0, 0, 1252, 0, 0, 0, 0, 0, 0, + 0, 1245, 0, 0, 0, 0, 0, 0, 0, 1247, + 0, 0, 0, 0, 1253, 1254, 1248, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1255, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1249, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1256, 0, 0, 1257, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1258, 0, 0, 1259, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1250, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1251, 0, 0, 0, 0, 1252, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1260, 0, 0, 0, 0, 0, 0, + 1253, 1254, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1255, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1256, 0, 0, 1257, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1258, + 0, 0, 1259, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1261, 0, 0, 1262, 1263, 1264, 0, 1265, + 1266, 1267, 1268, 1269, 1270, 0, 0, 0, 0, 2769, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1260, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1261, 959, + 0, 1262, 1263, 1264, 0, 1265, 1266, 1267, 1268, 1269, + 1270, 0, 0, 1418, 0, 90, 91, 92, 93, 94, + 95, 96, 97, 960, 98, 99, 100, 961, 962, 963, + 964, 965, 966, 967, 101, 102, 968, 103, 104, 105, + 0, 107, 108, 109, 656, 657, 0, 658, 659, 969, + 115, 116, 117, 118, 119, 120, 970, 971, 121, 122, + 660, 661, 125, 972, 126, 127, 128, 129, 662, 973, + 0, 974, 132, 133, 134, 135, 136, 0, 138, 139, + 140, 975, 141, 142, 143, 144, 145, 146, 976, 0, + 148, 149, 150, 977, 978, 979, 0, 980, 981, 982, + 152, 153, 154, 155, 156, 157, 158, 663, 664, 161, + 983, 162, 984, 163, 164, 165, 166, 167, 168, 985, + 169, 170, 171, 172, 173, 986, 987, 174, 175, 176, + 177, 178, 988, 179, 180, 181, 989, 182, 183, 184, + 990, 185, 186, 187, 188, 665, 190, 191, 192, 193, + 666, 991, 195, 992, 196, 197, 667, 199, 993, 200, + 994, 201, 0, 995, 0, 204, 205, 206, 0, 208, + 996, 209, 997, 668, 211, 998, 212, 213, 214, 215, + 216, 217, 0, 219, 220, 221, 222, 999, 223, 224, + 225, 226, 227, 228, 1000, 229, 0, 669, 232, 233, + 234, 235, 670, 671, 1001, 672, 1002, 239, 0, 0, + 242, 0, 244, 245, 246, 247, 248, 1003, 1004, 249, + 0, 251, 0, 1005, 253, 254, 255, 1006, 1007, 256, + 257, 258, 259, 260, 673, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, - 277, 278, 279, 1133, 2314, 1135, 283, 284, 285, 2315, - 1006, 287, 288, 2316, 290, 1007, 0, 292, 1137, 294, - 295, 296, 1008, 297, 298, 1009, 1010, 2317, 300, 301, - 1011, 1012, 302, 0, 2318, 305, 2319, 0, 308, 309, - 310, 311, 312, 313, 314, 315, 316, 317, 1013, 318, - 319, 0, 321, 322, 0, 324, 325, 326, 1014, 327, - 328, 329, 330, 331, 332, 333, 334, 335, 1140, 337, - 338, 339, 340, 1015, 341, 342, 343, 344, 345, 346, - 347, 348, 349, 350, 351, 352, 1016, 353, 354, 2320, - 356, 357, 358, 1142, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 1017, 371, 372, 373, 374, - 375, 1018, 376, 2321, 378, 379, 380, 2322, 382, 383, - 1145, 385, 1019, 386, 387, 388, 389, 390, 391, 392, - 393, 394, 395, 396, 2323, 398, 0, 1020, 400, 401, - 1021, 402, 2324, 404, 405, 406, 407, 408, 1022, 1148, - 1149, 1023, 1024, 411, 412, 0, 414, 0, 1025, 416, - 417, 2325, 419, 420, 421, 422, 423, 1026, 1027, 424, - 425, 426, 427, 428, 1028, 1029, 429, 430, 431, 432, - 433, 0, 1151, 1031, 436, 2326, 438, 439, 440, 441, - 1032, 1033, 442, 1034, 1035, 443, 444, 445, 446, 447, - 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 460, 461, 462, 463, 0, 488, 0, 1550, - 1551, 1552, 1546, 2327, 2328, 1555, 1556, 1557, 1558, 0, - 1547, 1548, 1549, 90, 91, 92, 93, 94, 95, 96, + 277, 278, 279, 674, 0, 675, 283, 284, 285, 676, + 1008, 287, 288, 0, 290, 1009, 677, 292, 678, 294, + 295, 296, 1010, 297, 298, 1011, 1012, 299, 300, 301, + 1013, 1014, 302, 679, 0, 305, 0, 680, 308, 309, + 310, 311, 312, 313, 314, 315, 316, 317, 1015, 318, + 319, 681, 321, 322, 682, 324, 325, 326, 1016, 327, + 328, 329, 330, 331, 332, 333, 334, 335, 683, 337, + 338, 339, 340, 1017, 341, 342, 343, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 1018, 353, 354, 0, + 356, 357, 358, 684, 360, 361, 362, 363, 364, 365, + 366, 367, 368, 369, 370, 1019, 371, 372, 373, 374, + 375, 1020, 376, 685, 378, 379, 380, 0, 382, 383, + 686, 385, 1021, 386, 387, 388, 389, 390, 391, 392, + 393, 394, 395, 396, 687, 398, 688, 1022, 400, 401, + 1023, 402, 0, 404, 405, 406, 407, 408, 1024, 689, + 690, 1025, 1026, 411, 412, 691, 414, 692, 1027, 416, + 417, 693, 419, 420, 421, 422, 423, 1028, 1029, 424, + 425, 426, 427, 428, 1030, 1031, 429, 430, 431, 432, + 433, 1032, 695, 1033, 436, 0, 438, 439, 440, 441, + 1034, 1035, 442, 1036, 1037, 443, 444, 445, 446, 447, + 448, 696, 697, 698, 699, 700, 701, 702, 703, 704, + 705, 706, 460, 461, 462, 463, 488, 0, 0, 0, + 0, 0, 0, 0, 0, 1038, 0, 0, 0, 0, + 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, + 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, + 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, + 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, + 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, + 0, 126, 127, 128, 129, 130, 0, 0, 0, 132, + 133, 134, 135, 136, 0, 138, 139, 140, 0, 141, + 142, 143, 144, 145, 146, 0, 0, 148, 149, 150, + 0, 0, 0, 0, 0, 0, 0, 152, 153, 154, + 155, 156, 157, 158, 159, 160, 161, 0, 162, 0, + 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, + 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, + 179, 180, 181, 0, 182, 183, 184, 0, 185, 186, + 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, + 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, + 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, + 210, 211, 0, 212, 213, 214, 215, 216, 217, 0, + 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, + 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, + 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, + 245, 246, 247, 248, 0, 0, 249, 0, 251, 0, + 0, 253, 254, 255, 0, 0, 256, 257, 258, 259, + 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, + 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, + 280, 0, 282, 283, 284, 285, 286, 0, 287, 288, + 0, 290, 0, 291, 292, 293, 294, 295, 296, 0, + 297, 298, 0, 0, 299, 300, 301, 0, 0, 302, + 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, + 313, 314, 315, 316, 317, 0, 318, 319, 320, 321, + 322, 323, 324, 325, 326, 0, 327, 328, 329, 330, + 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, + 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, + 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, + 369, 370, 0, 371, 372, 373, 374, 375, 0, 376, + 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, + 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, + 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, + 404, 405, 406, 407, 408, 0, 409, 410, 0, 0, + 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, + 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, + 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, + 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, + 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, + 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, + 461, 462, 463, 655, 1083, 535, 0, 0, 0, 729, + 0, 0, 2125, 0, 0, 0, 0, 0, 0, 90, + 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, + 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, + 0, 103, 104, 105, 0, 107, 108, 109, 656, 657, + 0, 658, 659, 0, 115, 116, 117, 118, 119, 120, + 0, 0, 121, 122, 660, 661, 125, 0, 126, 127, + 128, 129, 662, 0, 0, 0, 132, 133, 134, 135, + 136, 0, 138, 139, 140, 0, 141, 142, 143, 144, + 145, 146, 0, 0, 148, 149, 150, 0, 0, 0, + 0, 0, 0, 0, 152, 153, 154, 155, 156, 157, + 158, 663, 664, 161, 1298, 162, 0, 163, 164, 165, + 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, + 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, + 0, 182, 183, 184, 0, 185, 186, 187, 188, 665, + 190, 191, 192, 193, 666, 1084, 195, 0, 196, 197, + 667, 199, 0, 200, 0, 201, 0, 0, 0, 204, + 205, 206, 0, 208, 0, 209, 0, 668, 211, 0, + 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, + 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, + 0, 669, 232, 233, 234, 235, 670, 671, 0, 672, + 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, + 248, 0, 0, 249, 0, 251, 0, 0, 253, 254, + 255, 0, 0, 256, 257, 258, 259, 260, 673, 262, + 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, + 273, 274, 275, 276, 277, 278, 279, 674, 0, 675, + 283, 284, 285, 676, 0, 287, 288, 0, 290, 0, + 677, 292, 678, 294, 295, 296, 0, 297, 298, 1085, + 0, 299, 300, 301, 0, 0, 302, 679, 0, 305, + 0, 680, 308, 309, 310, 311, 312, 313, 314, 315, + 316, 317, 0, 318, 319, 681, 321, 322, 682, 324, + 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, + 334, 335, 683, 337, 338, 339, 340, 0, 341, 342, + 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, + 0, 353, 354, 0, 356, 357, 358, 684, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, + 371, 372, 373, 374, 375, 0, 376, 685, 378, 379, + 380, 0, 382, 383, 686, 385, 0, 386, 387, 388, + 389, 390, 391, 392, 393, 394, 395, 396, 687, 398, + 688, 0, 400, 401, 0, 402, 0, 404, 405, 406, + 407, 408, 0, 689, 690, 0, 0, 411, 412, 691, + 414, 692, 1086, 416, 417, 693, 419, 420, 421, 422, + 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, + 429, 430, 431, 432, 433, 1032, 695, 0, 436, 0, + 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, + 444, 445, 446, 447, 448, 696, 697, 698, 699, 700, + 701, 702, 703, 704, 705, 706, 460, 461, 462, 463, + 655, 1083, 535, 0, 0, 0, 729, 1087, 1088, 0, + 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, + 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, + 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, + 105, 0, 107, 108, 109, 656, 657, 0, 658, 659, + 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, + 122, 660, 661, 125, 0, 126, 127, 128, 129, 662, + 0, 0, 0, 132, 133, 134, 135, 136, 0, 138, + 139, 140, 0, 141, 142, 143, 144, 145, 146, 0, + 0, 148, 149, 150, 0, 0, 0, 0, 0, 0, + 0, 152, 153, 154, 155, 156, 157, 158, 663, 664, + 161, 1300, 162, 0, 163, 164, 165, 166, 167, 168, + 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, + 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, + 184, 0, 185, 186, 187, 188, 665, 190, 191, 192, + 193, 666, 1084, 195, 0, 196, 197, 667, 199, 0, + 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, + 208, 0, 209, 0, 668, 211, 0, 212, 213, 214, + 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, + 224, 225, 226, 227, 228, 0, 229, 0, 669, 232, + 233, 234, 235, 670, 671, 0, 672, 0, 239, 0, + 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, + 249, 0, 251, 0, 0, 253, 254, 255, 0, 0, + 256, 257, 258, 259, 260, 673, 262, 263, 264, 265, + 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, + 276, 277, 278, 279, 674, 0, 675, 283, 284, 285, + 676, 0, 287, 288, 0, 290, 0, 677, 292, 678, + 294, 295, 296, 0, 297, 298, 1085, 0, 299, 300, + 301, 0, 0, 302, 679, 0, 305, 0, 680, 308, + 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, + 318, 319, 681, 321, 322, 682, 324, 325, 326, 0, + 327, 328, 329, 330, 331, 332, 333, 334, 335, 683, + 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, + 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, + 0, 356, 357, 358, 684, 360, 361, 362, 363, 364, + 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, + 374, 375, 0, 376, 685, 378, 379, 380, 0, 382, + 383, 686, 385, 0, 386, 387, 388, 389, 390, 391, + 392, 393, 394, 395, 396, 687, 398, 688, 0, 400, + 401, 0, 402, 0, 404, 405, 406, 407, 408, 0, + 689, 690, 0, 0, 411, 412, 691, 414, 692, 1086, + 416, 417, 693, 419, 420, 421, 422, 423, 0, 0, + 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, + 432, 433, 1032, 695, 0, 436, 0, 438, 439, 440, + 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, + 447, 448, 696, 697, 698, 699, 700, 701, 702, 703, + 704, 705, 706, 460, 461, 462, 463, 655, 1083, 535, + 0, 0, 0, 729, 1087, 1088, 0, 0, 0, 0, + 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, - 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, - 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, - 125, 0, 126, 127, 128, 129, 130, 0, 0, 0, + 108, 109, 656, 657, 0, 658, 659, 0, 115, 116, + 117, 118, 119, 120, 0, 0, 121, 122, 660, 661, + 125, 0, 126, 127, 128, 129, 662, 0, 0, 0, 132, 133, 134, 135, 136, 0, 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, 0, 0, 148, 149, 150, 0, 0, 0, 0, 0, 0, 0, 152, 153, - 154, 155, 156, 157, 158, 159, 160, 161, 0, 162, + 154, 155, 156, 157, 158, 663, 664, 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 189, 190, 191, 192, 193, 194, 0, - 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, + 186, 187, 188, 665, 190, 191, 192, 193, 666, 1084, + 195, 0, 196, 197, 667, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, + 0, 668, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, - 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, + 227, 228, 0, 229, 0, 669, 232, 233, 234, 235, + 670, 671, 0, 672, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, 251, 0, 0, 253, 254, 255, 0, 0, 256, 257, 258, - 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, + 259, 260, 673, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 280, 0, 282, 283, 284, 285, 286, 0, 287, - 288, 0, 290, 0, 291, 292, 293, 294, 295, 296, - 0, 297, 298, 0, 0, 299, 300, 301, 0, 0, - 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 320, - 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + 279, 674, 0, 675, 283, 284, 285, 676, 0, 287, + 288, 0, 290, 0, 677, 292, 678, 294, 295, 296, + 0, 297, 298, 1085, 0, 299, 300, 301, 0, 0, + 302, 679, 0, 305, 0, 680, 308, 309, 310, 311, + 312, 313, 314, 315, 316, 317, 0, 318, 319, 681, + 321, 322, 682, 324, 325, 326, 0, 327, 328, 329, + 330, 331, 332, 333, 334, 335, 683, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, - 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, + 358, 684, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, 0, - 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, + 376, 685, 378, 379, 380, 0, 382, 383, 686, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 492, 398, 399, 0, 400, 401, 0, 402, - 0, 404, 405, 406, 407, 408, 0, 409, 410, 0, - 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, + 395, 396, 687, 398, 688, 0, 400, 401, 0, 402, + 0, 404, 405, 406, 407, 408, 0, 689, 690, 0, + 0, 411, 412, 691, 414, 692, 1086, 416, 417, 693, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, - 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 0, 0, 0, 1550, 1551, 1552, - 0, 1553, 1554, 1555, 1556, 1557, 1558, 1232, 0, 0, - 1233, 0, 0, 0, 0, 1234, 1235, 1236, 0, 0, - 0, 0, 0, 0, 0, 1232, 0, 0, 1233, 0, - 0, 0, 1237, 1234, 1235, 1236, 0, 0, 0, 0, - 1239, 0, 0, 1232, 0, 0, 1233, 1240, 0, 0, - 1237, 1234, 1235, 1236, 0, 0, 0, 0, 1239, 0, - 0, 0, 0, 0, 0, 1240, 0, 0, 1237, 0, - 0, 1232, 1241, 0, 1233, 0, 1239, 0, 0, 1234, - 1235, 1236, 0, 1240, 0, 0, 0, 0, 0, 1232, - 1241, 0, 1233, 0, 0, 0, 1237, 1234, 1235, 1236, - 0, 0, 0, 0, 1239, 0, 0, 1232, 1241, 0, - 1233, 1240, 0, 0, 1237, 1234, 1235, 1236, 0, 0, - 0, 0, 1239, 0, 0, 0, 0, 0, 0, 1240, - 0, 0, 1237, 0, 0, 0, 1241, 0, 0, 0, - 1239, 0, 0, 0, 0, 0, 0, 1240, 0, 0, - 0, 0, 0, 0, 1241, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1242, 1241, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1243, 0, 0, 0, 1242, - 1244, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1243, 0, 0, 0, 1242, 1244, 0, - 0, 1245, 1246, 0, 0, 0, 0, 0, 0, 0, - 0, 1243, 0, 0, 0, 1247, 1244, 0, 0, 1245, - 1246, 0, 0, 0, 0, 1242, 0, 0, 0, 0, - 0, 0, 0, 1247, 0, 0, 0, 1245, 1246, 1243, - 0, 0, 0, 1242, 1244, 0, 0, 0, 0, 0, - 0, 1247, 0, 1248, 0, 0, 1249, 1243, 0, 0, - 0, 1242, 1244, 0, 0, 1245, 1246, 0, 0, 0, - 1250, 1248, 0, 1251, 1249, 1243, 0, 0, 0, 1247, - 1244, 0, 0, 1245, 1246, 0, 0, 0, 1250, 1248, - 0, 1251, 1249, 0, 0, 0, 0, 1247, 0, 0, - 0, 1245, 1246, 0, 0, 0, 1250, 0, 0, 1251, - 0, 0, 0, 0, 0, 1247, 0, 1248, 0, 0, - 1249, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1250, 1248, 0, 1251, 1249, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1250, 1248, 0, 1251, 1249, 0, 0, 0, - 1252, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1250, 0, 0, 1251, 0, 0, 0, 0, 1252, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1252, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1232, 0, - 0, 1233, 0, 0, 0, 0, 1234, 1235, 1236, 0, - 0, 0, 0, 0, 1252, 0, 0, 0, 0, 0, - 0, 0, 0, 1237, 0, 0, 0, 0, 0, 0, - 0, 1239, 1252, 0, 0, 0, 0, 0, 1240, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1253, - 1252, 0, 1254, 1255, 1256, 0, 1257, 1258, 1259, 1260, - 1261, 1262, 0, 1241, 0, 0, 2173, 1253, 0, 0, - 1254, 1255, 1256, 0, 1257, 1258, 1259, 1260, 1261, 1262, - 0, 0, 0, 0, 2260, 1253, 0, 0, 1254, 1255, - 1256, 0, 1257, 1258, 1259, 1260, 1261, 1262, 0, 0, - 0, 0, 2382, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1253, 0, 0, 1254, 1255, 1256, 0, - 1257, 1258, 1259, 1260, 1261, 1262, 0, 0, 0, 0, - 2401, 1253, 0, 0, 1254, 1255, 1256, 0, 1257, 1258, - 1259, 1260, 1261, 1262, 0, 0, 0, 0, 2544, 1253, - 0, 0, 1254, 1255, 1256, 0, 1257, 1258, 1259, 1260, - 1261, 1262, 1242, 0, 1232, 0, 2742, 1233, 0, 0, - 0, 0, 1234, 1235, 1236, 0, 1243, 0, 0, 0, - 0, 1244, 0, 0, 0, 0, 0, 0, 0, 1237, - 0, 0, 0, 0, 0, 0, 0, 1239, 0, 0, - 0, 0, 1245, 1246, 1240, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1247, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1241, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1248, 0, 0, 1249, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1250, 0, 0, 1251, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1242, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1243, 0, 0, 0, 0, 1244, 0, 0, + 427, 428, 0, 0, 429, 430, 431, 432, 433, 1032, + 695, 0, 436, 0, 438, 439, 440, 441, 0, 0, + 442, 0, 0, 443, 444, 445, 446, 447, 448, 696, + 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, + 460, 461, 462, 463, 0, 1240, 0, 0, 1241, 0, + 0, 1087, 1088, 1242, 1243, 1244, 0, 0, 0, 0, + 0, 0, 0, 1240, 0, 0, 1241, 0, 0, 0, + 1245, 1242, 1243, 1244, 1703, 0, 0, 0, 1247, 0, + 0, 1240, 0, 0, 1241, 1248, 0, 0, 1245, 1242, + 1243, 1244, 0, 0, 0, 0, 1247, 0, 0, 0, + 0, 0, 0, 1248, 0, 0, 1245, 0, 0, 1240, + 1249, 0, 1241, 0, 1247, 0, 0, 1242, 1243, 1244, + 0, 1248, 0, 0, 0, 0, 0, 0, 1249, 0, + 0, 0, 0, 0, 1245, 0, 0, 1878, 0, 0, + 0, 0, 1247, 0, 0, 1240, 1249, 0, 1241, 1248, + 0, 0, 0, 1242, 1243, 1244, 0, 0, 0, 0, + 1704, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1245, 0, 0, 0, 1249, 0, 0, 0, 1247, 0, + 0, 0, 0, 0, 0, 1248, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1252, 0, 0, 0, 0, 0, 0, 1245, 1246, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1250, + 1249, 0, 0, 0, 0, 0, 0, 1918, 0, 0, + 0, 0, 1919, 1251, 0, 0, 0, 1250, 1252, 1240, + 0, 0, 1241, 0, 2827, 0, 0, 1242, 1243, 1244, + 0, 1251, 0, 0, 0, 1250, 1252, 0, 0, 1253, + 1254, 0, 0, 0, 1245, 0, 0, 1927, 0, 1251, + 0, 0, 1247, 1255, 1252, 0, 0, 1253, 1254, 1248, + 0, 0, 0, 1250, 0, 0, 0, 0, 0, 0, + 0, 1255, 0, 0, 0, 1253, 1254, 1251, 0, 0, + 0, 0, 1252, 0, 1249, 0, 0, 0, 0, 1255, + 0, 1256, 0, 0, 1257, 0, 0, 0, 0, 1250, + 0, 0, 0, 1253, 1254, 0, 0, 0, 1258, 1256, + 0, 1259, 1257, 1251, 0, 0, 0, 1255, 1252, 0, + 0, 0, 0, 0, 0, 0, 1258, 1256, 0, 1259, + 1257, 0, 0, 0, 0, 0, 0, 0, 0, 1253, + 1254, 0, 0, 0, 1258, 0, 0, 1259, 0, 0, + 0, 0, 0, 1255, 0, 1256, 0, 0, 1257, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1247, 0, 0, 0, 0, 0, 0, 0, + 2828, 0, 1258, 0, 0, 1259, 0, 0, 0, 0, + 0, 0, 0, 1250, 0, 0, 0, 0, 0, 0, + 0, 1256, 0, 0, 1257, 0, 0, 1251, 1260, 0, + 0, 0, 1252, 0, 0, 0, 0, 0, 1258, 0, + 0, 1259, 0, 0, 0, 0, 1260, 0, 0, 0, + 0, 0, 0, 1253, 1254, 0, 0, 0, 0, 1892, + 0, 0, 0, 0, 1260, 0, 0, 1255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1240, 0, 0, 1241, 0, 1705, 0, 0, 1242, + 1243, 1244, 1260, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1256, 1245, 0, 1257, 0, + 0, 0, 0, 0, 1247, 0, 0, 0, 0, 0, + 0, 1248, 1258, 0, 0, 1259, 0, 1261, 1260, 0, + 1262, 1263, 1264, 0, 1265, 1266, 1267, 1268, 1269, 1270, + 0, 0, 0, 0, 0, 1261, 1249, 0, 1262, 1263, + 1264, 0, 1265, 1266, 1267, 1268, 1269, 1270, 0, 0, + 0, 0, 0, 1261, 0, 0, 1262, 1263, 1264, 0, + 1265, 1266, 1267, 1268, 1269, 1270, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1261, 0, 0, 1262, 1263, 1264, 0, 1265, 1266, + 1267, 1268, 1269, 1270, 0, 0, 0, 0, 0, 0, + 0, 0, 1260, 0, 0, 0, 1240, 0, 0, 1241, + 0, 0, 0, 0, 1242, 1243, 1244, 1261, 0, 0, + 1262, 1263, 1264, 0, 1265, 1266, 1267, 1268, 1269, 1270, + 1932, 1245, 0, 0, 1934, 1250, 0, 0, 0, 1247, + 0, 0, 0, 0, 0, 0, 1248, 0, 0, 1251, + 0, 0, 1240, 0, 1252, 1241, 0, 0, 0, 0, + 1242, 1243, 1244, 0, 0, 0, 0, 0, 0, 0, + 0, 1249, 0, 0, 0, 1253, 1254, 1245, 0, 0, + 0, 0, 0, 0, 0, 1247, 0, 0, 0, 1255, + 0, 0, 1248, 0, 0, 0, 0, 0, 0, 0, + 0, 1261, 0, 0, 1262, 1263, 1264, 0, 1265, 1266, + 1267, 1268, 1269, 1270, 0, 0, 0, 1249, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1256, 0, 0, + 1257, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1240, 1258, 0, 1241, 1259, 0, 0, + 0, 1242, 1243, 1244, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1245, 0, + 1250, 0, 0, 0, 0, 0, 1247, 0, 0, 0, + 0, 0, 1240, 1248, 1251, 1241, 0, 0, 0, 1252, + 1242, 1243, 1244, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1245, 1249, 0, + 1253, 1254, 0, 0, 0, 1247, 1250, 0, 0, 0, + 0, 0, 1248, 0, 1255, 0, 0, 0, 0, 0, + 1251, 0, 0, 0, 1260, 1252, 0, 0, 0, 0, + 1240, 0, 0, 1241, 0, 0, 0, 1249, 1242, 1243, + 1244, 0, 0, 0, 0, 0, 1253, 1254, 0, 0, + 0, 0, 1256, 0, 0, 1257, 0, 0, 0, 0, + 1255, 0, 0, 1247, 0, 0, 0, 0, 0, 1258, + 1248, 0, 1259, 0, 0, 0, 0, 1240, 0, 0, + 1241, 0, 0, 0, 0, 1242, 1243, 1244, 0, 0, + 0, 0, 0, 0, 0, 1249, 0, 1250, 1256, 0, + 0, 1257, 0, 0, 0, 0, 0, 0, 0, 0, + 1247, 1251, 0, 0, 0, 1258, 1252, 1248, 1259, 0, + 0, 0, 0, 1261, 0, 0, 1262, 1263, 1264, 0, + 1265, 1266, 1267, 1268, 1269, 1270, 1250, 1253, 1254, 0, + 0, 0, 1249, 0, 0, 0, 0, 0, 0, 0, + 1251, 1255, 0, 0, 0, 1252, 0, 0, 0, 1260, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1248, 0, 0, 1249, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1250, 0, 0, - 1251, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1253, 1254, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1256, + 1255, 0, 1257, 0, 1250, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1260, 1258, 0, 1251, 1259, + 0, 0, 0, 1252, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1256, 0, + 0, 1257, 0, 0, 1253, 1254, 2146, 0, 0, 0, + 0, 1250, 0, 0, 0, 1258, 0, 0, 1255, 0, + 0, 0, 0, 0, 0, 1251, 0, 0, 1261, 0, + 1252, 1262, 1263, 1264, 0, 1265, 1266, 1267, 1268, 1269, + 1270, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -1675, -1675, 0, 0, 0, 1256, 0, 0, 1257, + 0, 0, 0, 0, 0, 1255, 1260, 0, 0, 0, + 0, 0, 0, 1258, 1261, 0, 0, 1262, 1263, 1264, + 0, 1265, 1266, 1267, 1268, 1269, 1270, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1253, 0, 0, 1254, 1255, 1256, 0, 1257, 1258, 1259, - 1260, 1261, 1262, 0, 0, 0, 0, 2754, 0, 0, + 0, 0, 0, 0, 0, 1260, -1675, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1258, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1252, 0, 0, + 0, 0, 0, 1260, 0, 1261, 0, 0, 1262, 1263, + 1264, 0, 1265, 1266, 1267, 1268, 1269, 1270, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1261, 0, 0, 1262, 1263, 1264, + 1260, 1265, 1266, 1267, 1268, 1269, 1270, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1261, 0, 0, 1262, 1263, 1264, 0, 1265, + 1266, 1267, 1268, 1269, 1270, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 959, 0, 1612, 1261, + 0, 0, 1262, 1263, 1264, 0, 1265, 1266, 1267, 1268, + 1269, 1270, 90, 91, 92, 93, 94, 95, 96, 97, + 960, 98, 99, 100, 961, 962, 963, 964, 965, 966, + 967, 101, 102, 968, 103, 104, 105, 0, 107, 108, + 109, 656, 657, 0, 658, 659, 969, 115, 116, 117, + 118, 119, 120, 970, 971, 121, 122, 660, 661, 125, + 972, 126, 127, 128, 129, 662, 973, 0, 974, 132, + 133, 134, 135, 136, 0, 138, 139, 140, 975, 141, + 142, 143, 144, 145, 146, 976, 0, 148, 149, 150, + 977, 978, 979, 0, 980, 981, 982, 152, 153, 154, + 155, 156, 157, 158, 663, 664, 161, 983, 162, 984, + 163, 164, 165, 166, 167, 168, 985, 169, 170, 171, + 172, 173, 986, 987, 174, 175, 176, 177, 178, 988, + 179, 180, 181, 989, 182, 183, 184, 990, 185, 186, + 187, 188, 665, 190, 191, 192, 193, 666, 991, 195, + 992, 196, 197, 667, 199, 993, 200, 994, 201, 0, + 995, 0, 204, 205, 206, 0, 208, 996, 209, 997, + 668, 211, 998, 212, 213, 214, 215, 216, 217, 0, + 219, 220, 221, 222, 999, 223, 224, 225, 226, 227, + 228, 1000, 229, 0, 669, 232, 233, 234, 235, 670, + 671, 1001, 672, 1002, 239, 0, 0, 242, 0, 244, + 245, 246, 247, 248, 1003, 1004, 249, 0, 251, 0, + 1005, 253, 254, 255, 1006, 1007, 256, 257, 258, 259, + 260, 673, 262, 263, 264, 265, 266, 267, 268, 269, + 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, + 674, 0, 675, 283, 284, 285, 676, 1008, 287, 288, + 0, 290, 1009, 677, 292, 678, 294, 295, 296, 1010, + 297, 298, 1011, 1012, 299, 300, 301, 1013, 1014, 302, + 679, 0, 305, 0, 680, 308, 309, 310, 311, 312, + 313, 314, 315, 316, 317, 1015, 318, 319, 681, 321, + 322, 682, 324, 325, 326, 1016, 327, 328, 329, 330, + 331, 332, 333, 334, 335, 683, 337, 338, 339, 340, + 1017, 341, 342, 343, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 1018, 353, 354, 0, 356, 357, 358, + 684, 360, 361, 362, 363, 364, 365, 366, 367, 368, + 369, 370, 1019, 371, 372, 373, 374, 375, 1020, 376, + 685, 378, 379, 380, 0, 382, 383, 686, 385, 1021, + 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, + 396, 687, 398, 688, 1022, 400, 401, 1023, 402, 0, + 404, 405, 406, 407, 408, 1024, 689, 690, 1025, 1026, + 411, 412, 691, 414, 692, 1027, 416, 417, 693, 419, + 420, 421, 422, 423, 1028, 1029, 424, 425, 426, 427, + 428, 1030, 1031, 429, 430, 431, 432, 433, 1032, 695, + 1033, 436, 0, 438, 439, 440, 441, 1034, 1035, 442, + 1036, 1037, 443, 444, 445, 446, 447, 448, 696, 697, + 698, 699, 700, 701, 702, 703, 704, 705, 706, 460, + 461, 462, 463, 959, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, + 91, 92, 93, 94, 95, 96, 97, 960, 98, 99, + 100, 961, 962, 963, 964, 965, 966, 967, 101, 102, + 968, 103, 104, 105, 0, 107, 108, 109, 656, 657, + 0, 658, 659, 969, 115, 116, 117, 118, 119, 120, + 970, 971, 121, 122, 660, 661, 125, 972, 126, 127, + 128, 129, 662, 973, 0, 974, 132, 133, 134, 135, + 136, 0, 138, 139, 140, 975, 141, 142, 143, 144, + 145, 146, 976, 0, 148, 149, 150, 977, 978, 979, + 0, 980, 981, 982, 152, 153, 154, 155, 156, 157, + 158, 663, 664, 161, 983, 162, 984, 163, 164, 165, + 166, 167, 168, 985, 169, 170, 171, 172, 173, 986, + 987, 174, 175, 176, 177, 178, 988, 179, 180, 181, + 989, 182, 183, 184, 990, 185, 186, 187, 188, 665, + 190, 191, 192, 193, 666, 991, 195, 992, 196, 197, + 667, 199, 993, 200, 994, 201, 0, 995, 0, 204, + 205, 206, 0, 208, 996, 209, 997, 668, 211, 998, + 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, + 222, 999, 223, 224, 225, 226, 227, 228, 1000, 229, + 0, 669, 232, 233, 234, 235, 670, 671, 1001, 672, + 1002, 239, 0, 0, 242, 0, 244, 245, 246, 247, + 248, 1003, 1004, 249, 0, 251, 0, 1005, 253, 254, + 255, 1006, 1007, 256, 257, 258, 259, 260, 673, 262, + 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, + 273, 274, 275, 276, 277, 278, 279, 674, 0, 675, + 283, 284, 285, 676, 1008, 287, 288, 0, 290, 1009, + 677, 292, 678, 294, 295, 296, 1010, 297, 298, 1011, + 1012, 299, 300, 301, 1013, 1014, 302, 679, 0, 305, + 0, 680, 308, 309, 310, 311, 312, 313, 314, 315, + 316, 317, 1015, 318, 319, 681, 321, 322, 682, 324, + 325, 326, 1016, 327, 328, 329, 330, 331, 332, 333, + 334, 335, 683, 337, 338, 339, 340, 1017, 341, 342, + 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, + 1018, 353, 354, 0, 356, 357, 358, 684, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 1019, + 371, 372, 373, 374, 375, 1020, 376, 685, 378, 379, + 380, 0, 382, 383, 686, 385, 1021, 386, 387, 388, + 389, 390, 391, 392, 393, 394, 395, 396, 687, 398, + 688, 1022, 400, 401, 1023, 402, 0, 404, 405, 406, + 407, 408, 1024, 689, 690, 1025, 1026, 411, 412, 691, + 414, 692, 1027, 416, 417, 693, 419, 420, 421, 422, + 423, 1028, 1029, 424, 425, 426, 427, 428, 1030, 1031, + 429, 430, 431, 432, 433, 1032, 695, 1033, 436, 0, + 438, 439, 440, 441, 1034, 1035, 442, 1036, 1037, 443, + 444, 445, 446, 447, 448, 696, 697, 698, 699, 700, + 701, 702, 703, 704, 705, 706, 460, 461, 462, 463, + 959, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, + 1752, 95, 96, 97, 960, 98, 99, 100, 961, 962, + 963, 964, 965, 966, 967, 101, 102, 968, 103, 104, + 105, 0, 107, 108, 109, 656, 657, 0, 658, 659, + 969, 115, 116, 117, 118, 119, 120, 970, 971, 121, + 122, 660, 661, 125, 972, 126, 127, 128, 129, 662, + 973, 0, 974, 132, 133, 134, 135, 136, 0, 138, + 139, 140, 975, 141, 142, 143, 144, 145, 146, 976, + 0, 148, 149, 150, 977, 978, 979, 0, 980, 981, + 982, 152, 153, 154, 155, 156, 157, 158, 663, 664, + 161, 983, 162, 984, 163, 164, 165, 166, 167, 168, + 985, 169, 170, 171, 172, 173, 986, 987, 174, 175, + 176, 1753, 178, 988, 179, 180, 181, 989, 182, 183, + 184, 990, 185, 186, 187, 188, 665, 190, 191, 192, + 193, 666, 991, 195, 992, 196, 197, 667, 199, 993, + 200, 994, 201, 0, 995, 0, 204, 205, 206, 0, + 208, 996, 209, 997, 668, 211, 998, 212, 213, 214, + 215, 216, 217, 0, 219, 220, 221, 222, 999, 223, + 224, 225, 226, 227, 228, 1000, 229, 0, 669, 232, + 233, 234, 235, 670, 671, 1001, 672, 1002, 239, 0, + 0, 242, 0, 244, 245, 246, 247, 248, 1003, 1004, + 249, 0, 251, 0, 1005, 253, 254, 255, 1006, 1007, + 256, 257, 258, 259, 260, 673, 262, 263, 264, 265, + 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, + 276, 277, 278, 279, 674, 0, 675, 283, 284, 285, + 676, 1008, 287, 288, 0, 290, 1009, 677, 292, 678, + 294, 295, 296, 1010, 297, 298, 1011, 1012, 299, 300, + 301, 1013, 1014, 302, 679, 0, 305, 0, 680, 308, + 309, 310, 311, 312, 313, 314, 315, 316, 317, 1015, + 318, 319, 681, 321, 322, 682, 324, 325, 326, 1016, + 327, 328, 329, 330, 331, 332, 333, 334, 335, 683, + 337, 338, 339, 340, 1017, 341, 342, 343, 344, 345, + 346, 347, 348, 349, 350, 351, 352, 1018, 353, 354, + 0, 356, 357, 358, 684, 360, 361, 362, 363, 364, + 365, 366, 367, 368, 369, 370, 1019, 371, 372, 373, + 374, 375, 1020, 1754, 685, 378, 379, 380, 0, 382, + 383, 686, 385, 1021, 386, 387, 388, 389, 390, 391, + 392, 393, 394, 395, 396, 687, 398, 688, 1022, 400, + 401, 1023, 402, 0, 404, 405, 406, 407, 408, 1024, + 689, 690, 1025, 1026, 411, 412, 691, 414, 692, 1027, + 416, 417, 693, 419, 420, 421, 422, 423, 1028, 1029, + 424, 425, 426, 427, 428, 1030, 1031, 429, 430, 431, + 432, 433, 1032, 695, 1033, 436, 0, 438, 439, 440, + 441, 1034, 1035, 442, 1036, 1037, 443, 444, 445, 446, + 447, 448, 696, 697, 698, 699, 700, 701, 702, 703, + 704, 705, 706, 460, 461, 462, 463, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1253, 957, 0, 1254, - 1255, 1256, 0, 1257, 1258, 1259, 1260, 1261, 1262, 0, - 0, 1410, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 958, 98, 99, 100, 959, 960, 961, 962, 963, - 964, 965, 101, 102, 966, 103, 104, 105, 0, 107, - 108, 109, 656, 657, 0, 658, 659, 967, 115, 116, - 117, 118, 119, 120, 968, 969, 121, 122, 660, 661, - 125, 970, 126, 127, 128, 129, 662, 971, 0, 972, - 132, 133, 134, 135, 136, 0, 138, 139, 140, 973, - 141, 142, 143, 144, 145, 146, 974, 0, 148, 149, - 150, 975, 976, 977, 0, 978, 979, 980, 152, 153, - 154, 155, 156, 157, 158, 663, 664, 161, 981, 162, - 982, 163, 164, 165, 166, 167, 168, 983, 169, 170, - 171, 172, 173, 984, 985, 174, 175, 176, 177, 178, - 986, 179, 180, 181, 987, 182, 183, 184, 988, 185, - 186, 187, 188, 665, 190, 191, 192, 193, 666, 989, - 195, 990, 196, 197, 667, 199, 991, 200, 992, 201, - 0, 993, 0, 204, 205, 206, 0, 208, 994, 209, - 995, 668, 211, 996, 212, 213, 214, 215, 216, 217, - 0, 219, 220, 221, 222, 997, 223, 224, 225, 226, - 227, 228, 998, 229, 0, 669, 232, 233, 234, 235, - 670, 671, 999, 672, 1000, 239, 0, 0, 242, 0, - 244, 245, 246, 247, 248, 1001, 1002, 249, 0, 251, - 0, 1003, 253, 254, 255, 1004, 1005, 256, 257, 258, - 259, 260, 673, 262, 263, 264, 265, 266, 267, 268, + 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, + 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, + 0, 0, 101, 102, 0, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 0, 115, 116, + 117, 118, 119, 120, 0, 736, 121, 122, 123, 124, + 125, 0, 126, 127, 128, 129, 739, 0, 740, 0, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 0, + 141, 142, 143, 144, 145, 146, 0, 147, 148, 149, + 150, 741, 742, 743, 744, 745, 746, 747, 152, 153, + 154, 155, 156, 157, 158, 159, 160, 161, 0, 162, + 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, + 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, + 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, + 186, 187, 188, 189, 190, 191, 192, 193, 753, 0, + 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, + 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, + 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, + 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, + 227, 228, 0, 229, 230, 231, 232, 233, 234, 235, + 236, 237, 0, 238, 0, 239, 240, 241, 242, 243, + 244, 245, 246, 247, 248, 0, 0, 249, 250, 251, + 252, 0, 253, 254, 255, 759, 760, 256, 257, 258, + 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 674, 0, 675, 283, 284, 285, 676, 1006, 287, - 288, 0, 290, 1007, 677, 292, 678, 294, 295, 296, - 1008, 297, 298, 1009, 1010, 299, 300, 301, 1011, 1012, - 302, 679, 0, 305, 0, 680, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 1013, 318, 319, 681, - 321, 322, 682, 324, 325, 326, 1014, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 683, 337, 338, 339, - 340, 1015, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 1016, 353, 354, 0, 356, 357, - 358, 684, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 1017, 371, 372, 373, 374, 375, 1018, - 376, 685, 378, 379, 380, 0, 382, 383, 686, 385, - 1019, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 687, 398, 688, 1020, 400, 401, 1021, 402, - 0, 404, 405, 406, 407, 408, 1022, 689, 690, 1023, - 1024, 411, 412, 691, 414, 692, 1025, 416, 417, 693, - 419, 420, 421, 422, 423, 1026, 1027, 424, 425, 426, - 427, 428, 1028, 1029, 429, 430, 431, 432, 433, 1030, - 695, 1031, 436, 0, 438, 439, 440, 441, 1032, 1033, - 442, 1034, 1035, 443, 444, 445, 446, 447, 448, 696, - 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, - 460, 461, 462, 463, 488, 0, 0, 0, 0, 0, - 0, 0, 0, 1036, 0, 0, 0, 0, 0, 0, + 279, 280, 281, 282, 283, 284, 285, 286, 0, 287, + 288, 289, 290, 0, 765, 292, 293, 294, 295, 296, + 0, 297, 298, 0, 0, 299, 300, 301, 0, 0, + 302, 303, 304, 305, 306, 768, 308, 309, 310, 311, + 312, 313, 314, 315, 316, 317, 0, 318, 319, 769, + 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, + 349, 350, 351, 352, 0, 353, 354, 355, 356, 357, + 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, + 368, 369, 370, 0, 371, 372, 373, 374, 375, 772, + 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, + 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, + 395, 396, 397, 398, 774, 0, 400, 401, 0, 402, + 403, 404, 405, 406, 407, 408, 0, 409, 410, 0, + 0, 411, 412, 777, 414, 778, 0, 416, 417, 780, + 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, + 427, 428, 781, 0, 429, 430, 431, 432, 433, 434, + 435, 0, 436, 437, 438, 439, 440, 441, 0, 0, + 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 89, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, - 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, - 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, + 102, 0, 103, 104, 105, 106, 107, 108, 109, 110, + 111, 112, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, 0, 126, - 127, 128, 129, 130, 0, 0, 0, 132, 133, 134, - 135, 136, 0, 138, 139, 140, 0, 141, 142, 143, - 144, 145, 146, 0, 0, 148, 149, 150, 0, 0, - 0, 0, 0, 0, 0, 152, 153, 154, 155, 156, + 127, 128, 129, 130, 0, 131, 0, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 0, 141, 142, 143, + 144, 145, 146, 0, 147, 148, 149, 150, 0, 0, + 0, 151, 0, 0, 0, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, - 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, - 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, - 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, + 197, 198, 199, 0, 200, 0, 201, 202, 0, 203, + 204, 205, 206, 207, 208, 0, 209, 0, 210, 211, + 0, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, - 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, - 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, - 247, 248, 0, 0, 249, 0, 251, 0, 0, 253, - 254, 255, 0, 0, 256, 257, 258, 259, 260, 490, + 229, 230, 231, 232, 233, 234, 235, 236, 237, 0, + 238, 0, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 0, 0, 249, 250, 251, 252, 0, 253, + 254, 255, 0, 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, - 272, 273, 274, 275, 276, 277, 278, 279, 280, 0, - 282, 283, 284, 285, 286, 0, 287, 288, 0, 290, + 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, + 282, 283, 284, 285, 286, 0, 287, 288, 289, 290, 0, 291, 292, 293, 294, 295, 296, 0, 297, 298, - 0, 0, 299, 300, 301, 0, 0, 302, 303, 0, - 305, 0, 307, 308, 309, 310, 311, 312, 313, 314, + 0, 0, 299, 300, 301, 0, 0, 302, 303, 304, + 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 320, 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, - 352, 0, 353, 354, 0, 356, 357, 358, 359, 360, + 352, 0, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, 0, 376, 377, 378, - 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, - 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, - 398, 399, 0, 400, 401, 0, 402, 0, 404, 405, + 379, 380, 381, 382, 383, 384, 385, 0, 386, 387, + 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, + 398, 399, 0, 400, 401, 0, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, 0, 436, - 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, + 437, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, - 463, 655, 1081, 535, 0, 0, 0, 729, 0, 0, - 2112, 0, 0, 0, 0, 0, 0, 90, 91, 92, + 463, 488, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, - 104, 105, 0, 107, 108, 109, 656, 657, 0, 658, - 659, 0, 115, 116, 117, 118, 119, 120, 0, 0, - 121, 122, 660, 661, 125, 0, 126, 127, 128, 129, - 662, 0, 0, 0, 132, 133, 134, 135, 136, 0, - 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, + 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, + 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, + 121, 122, 123, 124, 125, 1356, 126, 127, 128, 129, + 130, 0, 0, 1357, 132, 133, 134, 135, 136, 0, + 138, 139, 140, 1358, 141, 142, 143, 144, 145, 146, 0, 0, 148, 149, 150, 0, 0, 0, 0, 0, - 0, 0, 152, 153, 154, 155, 156, 157, 158, 663, - 664, 161, 1290, 162, 0, 163, 164, 165, 166, 167, + 0, 0, 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, - 183, 184, 0, 185, 186, 187, 188, 665, 190, 191, - 192, 193, 666, 1082, 195, 0, 196, 197, 667, 199, - 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, - 0, 208, 0, 209, 0, 668, 211, 0, 212, 213, - 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, - 223, 224, 225, 226, 227, 228, 0, 229, 0, 669, - 232, 233, 234, 235, 670, 671, 0, 672, 0, 239, + 183, 184, 0, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, + 0, 200, 1359, 201, 0, 0, 0, 204, 205, 206, + 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, + 214, 215, 216, 1360, 0, 219, 220, 221, 222, 0, + 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, + 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, 251, 0, 0, 253, 254, 255, 0, - 0, 256, 257, 258, 259, 260, 673, 262, 263, 264, + 0, 256, 257, 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, - 275, 276, 277, 278, 279, 674, 0, 675, 283, 284, - 285, 676, 0, 287, 288, 0, 290, 0, 677, 292, - 678, 294, 295, 296, 0, 297, 298, 1083, 0, 299, - 300, 301, 0, 0, 302, 679, 0, 305, 0, 680, + 275, 276, 277, 278, 279, 280, 0, 282, 283, 284, + 285, 286, 0, 287, 288, 0, 290, 0, 291, 292, + 293, 294, 295, 296, 0, 297, 298, 0, 0, 299, + 300, 301, 0, 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, - 0, 318, 319, 681, 321, 322, 682, 324, 325, 326, - 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, - 683, 337, 338, 339, 340, 0, 341, 342, 343, 344, + 0, 318, 319, 320, 321, 322, 323, 324, 325, 326, + 1361, 327, 328, 329, 330, 331, 332, 333, 334, 335, + 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, - 354, 0, 356, 357, 358, 684, 360, 361, 362, 363, + 354, 0, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, - 373, 374, 375, 0, 376, 685, 378, 379, 380, 0, - 382, 383, 686, 385, 0, 386, 387, 388, 389, 390, - 391, 392, 393, 394, 395, 396, 687, 398, 688, 0, + 373, 374, 375, 0, 376, 377, 378, 379, 380, 0, + 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, + 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, 404, 405, 406, 407, 408, - 0, 689, 690, 0, 0, 411, 412, 691, 414, 692, - 1084, 416, 417, 693, 419, 420, 421, 422, 423, 0, - 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, - 431, 432, 433, 1030, 695, 0, 436, 0, 438, 439, + 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, + 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, + 1362, 424, 425, 426, 427, 428, 0, 0, 429, 430, + 431, 432, 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, - 446, 447, 448, 696, 697, 698, 699, 700, 701, 702, - 703, 704, 705, 706, 460, 461, 462, 463, 655, 1081, - 535, 0, 0, 0, 729, 1085, 1086, 0, 0, 0, + 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 458, 459, 460, 461, 462, 463, 488, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, - 107, 108, 109, 656, 657, 0, 658, 659, 0, 115, - 116, 117, 118, 119, 120, 0, 0, 121, 122, 660, - 661, 125, 0, 126, 127, 128, 129, 662, 0, 0, + 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, + 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, + 124, 125, 1356, 126, 127, 128, 129, 130, 0, 0, 0, 132, 133, 134, 135, 136, 0, 138, 139, 140, - 0, 141, 142, 143, 144, 145, 146, 0, 0, 148, + 1358, 141, 142, 143, 144, 145, 146, 0, 0, 148, 149, 150, 0, 0, 0, 0, 0, 0, 0, 152, - 153, 154, 155, 156, 157, 158, 663, 664, 161, 1292, + 153, 154, 155, 156, 157, 158, 159, 160, 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, - 185, 186, 187, 188, 665, 190, 191, 192, 193, 666, - 1082, 195, 0, 196, 197, 667, 199, 0, 200, 0, + 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, + 0, 195, 0, 196, 197, 198, 199, 0, 200, 1359, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, - 209, 0, 668, 211, 0, 212, 213, 214, 215, 216, + 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, - 226, 227, 228, 0, 229, 0, 669, 232, 233, 234, - 235, 670, 671, 0, 672, 0, 239, 0, 0, 242, + 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, + 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, - 251, 0, 0, 253, 254, 255, 0, 0, 256, 257, - 258, 259, 260, 673, 262, 263, 264, 265, 266, 267, + 251, 1815, 0, 253, 254, 255, 0, 0, 256, 257, + 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, - 278, 279, 674, 0, 675, 283, 284, 285, 676, 0, - 287, 288, 0, 290, 0, 677, 292, 678, 294, 295, - 296, 0, 297, 298, 1083, 0, 299, 300, 301, 0, - 0, 302, 679, 0, 305, 0, 680, 308, 309, 310, + 278, 279, 280, 0, 282, 283, 284, 285, 286, 0, + 287, 288, 0, 290, 0, 291, 292, 293, 294, 295, + 296, 0, 297, 298, 0, 0, 299, 300, 301, 0, + 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, - 681, 321, 322, 682, 324, 325, 326, 0, 327, 328, - 329, 330, 331, 332, 333, 334, 335, 683, 337, 338, + 320, 321, 322, 323, 324, 325, 326, 1361, 327, 328, + 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, - 357, 358, 684, 360, 361, 362, 363, 364, 365, 366, + 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, - 0, 376, 685, 378, 379, 380, 0, 382, 383, 686, + 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, - 394, 395, 396, 687, 398, 688, 0, 400, 401, 0, - 402, 0, 404, 405, 406, 407, 408, 0, 689, 690, - 0, 0, 411, 412, 691, 414, 692, 1084, 416, 417, - 693, 419, 420, 421, 422, 423, 0, 0, 424, 425, + 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, + 402, 0, 404, 405, 406, 407, 408, 0, 409, 410, + 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, + 418, 419, 420, 421, 422, 423, 0, 1362, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, - 1030, 695, 0, 436, 0, 438, 439, 440, 441, 0, + 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, - 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, - 706, 460, 461, 462, 463, 655, 1081, 535, 0, 0, - 0, 729, 1085, 1086, 0, 0, 0, 0, 0, 0, + 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, + 459, 460, 461, 462, 463, 655, 0, 535, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, @@ -190925,7 +205234,7 @@ static const yytype_int16 yytable[] = 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, - 188, 665, 190, 191, 192, 193, 666, 1082, 195, 0, + 188, 665, 190, 191, 192, 193, 666, 1084, 195, 0, 196, 197, 667, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 668, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, @@ -190938,7 +205247,7 @@ static const yytype_int16 yytable[] = 271, 272, 273, 274, 275, 276, 277, 278, 279, 674, 0, 675, 283, 284, 285, 676, 0, 287, 288, 0, 290, 0, 677, 292, 678, 294, 295, 296, 0, 297, - 298, 1083, 0, 299, 300, 301, 0, 0, 302, 679, + 298, 1085, 0, 299, 300, 301, 0, 0, 302, 679, 0, 305, 0, 680, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 681, 321, 322, 682, 324, 325, 326, 0, 327, 328, 329, 330, 331, @@ -190951,416 +205260,21 @@ static const yytype_int16 yytable[] = 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 687, 398, 688, 0, 400, 401, 0, 402, 0, 404, 405, 406, 407, 408, 0, 689, 690, 0, 0, 411, - 412, 691, 414, 692, 1084, 416, 417, 693, 419, 420, + 412, 691, 414, 692, 1086, 416, 417, 693, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, - 0, 0, 429, 430, 431, 432, 433, 1030, 695, 0, + 0, 0, 429, 430, 431, 432, 433, 1032, 695, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 460, 461, - 462, 463, 0, 1232, 0, 0, 1233, 0, 0, 1085, - 1086, 1234, 1235, 1236, 0, 0, 0, 0, 0, 0, - 0, 1232, 0, 0, 1233, 0, 0, 0, 1237, 1234, - 1235, 1236, 1692, 0, 0, 0, 1239, 0, 0, 1232, - 0, 0, 1233, 1240, 0, 0, 1237, 1234, 1235, 1236, - 0, 0, 0, 0, 1239, 0, 0, 0, 0, 0, - 0, 1240, 0, 0, 1237, 0, 0, 1232, 1241, 0, - 1233, 0, 1239, 0, 0, 1234, 1235, 1236, 0, 1240, - 0, 0, 0, 0, 0, 0, 1241, 0, 0, 0, - 0, 0, 1237, 0, 0, 1867, 0, 0, 0, 0, - 1239, 0, 0, 1232, 1241, 0, 1233, 1240, 0, 0, - 0, 1234, 1235, 1236, 0, 0, 0, 0, 1693, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1237, 0, - 0, 0, 1241, 0, 0, 0, 1239, 0, 0, 0, - 0, 0, 0, 1240, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1242, 1241, 0, - 0, 0, 0, 0, 0, 1907, 0, 0, 0, 0, - 1908, 1243, 0, 0, 0, 1242, 1244, 1232, 0, 0, - 1233, 0, 2812, 0, 0, 1234, 1235, 1236, 0, 1243, - 0, 0, 0, 1242, 1244, 0, 0, 1245, 1246, 0, - 0, 0, 1237, 0, 0, 1916, 0, 1243, 0, 0, - 1239, 1247, 1244, 0, 0, 1245, 1246, 1240, 0, 0, - 0, 1242, 0, 0, 0, 0, 0, 0, 0, 1247, - 0, 0, 0, 1245, 1246, 1243, 0, 0, 0, 0, - 1244, 0, 1241, 0, 0, 0, 0, 1247, 0, 1248, - 0, 0, 1249, 0, 0, 0, 0, 1242, 0, 0, - 0, 1245, 1246, 0, 0, 0, 1250, 1248, 0, 1251, - 1249, 1243, 0, 0, 0, 1247, 1244, 0, 0, 0, - 0, 0, 0, 0, 1250, 1248, 0, 1251, 1249, 0, - 0, 0, 0, 0, 0, 0, 0, 1245, 1246, 0, - 0, 0, 1250, 0, 0, 1251, 0, 0, 0, 0, - 0, 1247, 0, 1248, 0, 0, 1249, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 2813, 0, - 1250, 0, 0, 1251, 0, 0, 0, 0, 0, 0, - 0, 1242, 0, 0, 0, 0, 0, 0, 0, 1248, - 0, 0, 1249, 0, 0, 1243, 1252, 0, 0, 0, - 1244, 0, 0, 0, 0, 0, 1250, 0, 0, 1251, - 0, 0, 0, 0, 1252, 0, 0, 0, 0, 0, - 0, 1245, 1246, 0, 0, 0, 0, 1881, 0, 0, - 0, 0, 1252, 0, 0, 1247, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1232, - 0, 0, 1233, 0, 1694, 0, 0, 1234, 1235, 1236, - 1252, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1248, 1237, 0, 1249, 0, 0, 0, - 0, 0, 1239, 0, 0, 0, 0, 0, 0, 1240, - 1250, 0, 0, 1251, 0, 1253, 1252, 0, 1254, 1255, - 1256, 0, 1257, 1258, 1259, 1260, 1261, 1262, 0, 0, - 0, 0, 0, 1253, 1241, 0, 1254, 1255, 1256, 0, - 1257, 1258, 1259, 1260, 1261, 1262, 0, 0, 0, 0, - 0, 1253, 0, 0, 1254, 1255, 1256, 0, 1257, 1258, - 1259, 1260, 1261, 1262, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1253, - 0, 0, 1254, 1255, 1256, 0, 1257, 1258, 1259, 1260, - 1261, 1262, 0, 0, 0, 0, 0, 0, 0, 0, - 1252, 0, 0, 0, 1232, 0, 0, 1233, 0, 0, - 0, 0, 1234, 1235, 1236, 1253, 0, 0, 1254, 1255, - 1256, 0, 1257, 1258, 1259, 1260, 1261, 1262, 1921, 1237, - 0, 0, 1923, 1242, 0, 0, 0, 1239, 0, 0, - 0, 0, 0, 0, 1240, 0, 0, 1243, 0, 0, - 1232, 0, 1244, 1233, 0, 0, 0, 0, 1234, 1235, - 1236, 0, 0, 0, 0, 0, 0, 0, 0, 1241, - 0, 0, 0, 1245, 1246, 1237, 0, 0, 0, 0, - 0, 0, 0, 1239, 0, 0, 0, 1247, 0, 0, - 1240, 0, 0, 0, 0, 0, 0, 0, 0, 1253, - 0, 0, 1254, 1255, 1256, 0, 1257, 1258, 1259, 1260, - 1261, 1262, 0, 0, 0, 1241, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1248, 0, 0, 1249, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1232, 1250, 0, 1233, 1251, 0, 0, 0, 1234, - 1235, 1236, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1237, 0, 1242, 0, - 0, 0, 0, 0, 1239, 0, 0, 0, 0, 0, - 0, 1240, 1243, 0, 0, 0, 0, 1244, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1241, 0, 1245, 1246, - 0, 0, 0, 0, 1242, 0, 0, 0, 0, 0, - 0, 0, 1247, 0, 0, 0, 0, 0, 1243, 0, - 0, 0, 1252, 1244, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1245, 1246, 0, 0, 0, 0, - 1248, 0, 0, 1249, 0, 0, 0, 0, 1247, 0, - 0, 0, 0, 0, 0, 0, 0, 1250, 0, 0, - 1251, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1242, 1248, 0, 0, 1249, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1243, - 0, 0, 0, 1250, 1244, 0, 1251, 0, 0, 0, - 0, 1253, 0, 0, 1254, 1255, 1256, 0, 1257, 1258, - 1259, 1260, 1261, 1262, 0, 1245, 1246, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1247, - 0, 0, 0, 0, 0, 0, 0, 1252, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1248, 0, 0, - 1249, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1252, 1250, 0, 0, 1251, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 2133, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1253, 0, 0, 1254, - 1255, 1256, 0, 1257, 1258, 1259, 1260, 1261, 1262, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1252, 0, 0, 0, 0, 0, - 0, 0, 1253, 0, 0, 1254, 1255, 1256, 0, 1257, - 1258, 1259, 1260, 1261, 1262, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 957, 0, 1601, 1253, 0, 0, 1254, 1255, 1256, 0, - 1257, 1258, 1259, 1260, 1261, 1262, 90, 91, 92, 93, - 94, 95, 96, 97, 958, 98, 99, 100, 959, 960, - 961, 962, 963, 964, 965, 101, 102, 966, 103, 104, - 105, 0, 107, 108, 109, 656, 657, 0, 658, 659, - 967, 115, 116, 117, 118, 119, 120, 968, 969, 121, - 122, 660, 661, 125, 970, 126, 127, 128, 129, 662, - 971, 0, 972, 132, 133, 134, 135, 136, 0, 138, - 139, 140, 973, 141, 142, 143, 144, 145, 146, 974, - 0, 148, 149, 150, 975, 976, 977, 0, 978, 979, - 980, 152, 153, 154, 155, 156, 157, 158, 663, 664, - 161, 981, 162, 982, 163, 164, 165, 166, 167, 168, - 983, 169, 170, 171, 172, 173, 984, 985, 174, 175, - 176, 177, 178, 986, 179, 180, 181, 987, 182, 183, - 184, 988, 185, 186, 187, 188, 665, 190, 191, 192, - 193, 666, 989, 195, 990, 196, 197, 667, 199, 991, - 200, 992, 201, 0, 993, 0, 204, 205, 206, 0, - 208, 994, 209, 995, 668, 211, 996, 212, 213, 214, - 215, 216, 217, 0, 219, 220, 221, 222, 997, 223, - 224, 225, 226, 227, 228, 998, 229, 0, 669, 232, - 233, 234, 235, 670, 671, 999, 672, 1000, 239, 0, - 0, 242, 0, 244, 245, 246, 247, 248, 1001, 1002, - 249, 0, 251, 0, 1003, 253, 254, 255, 1004, 1005, - 256, 257, 258, 259, 260, 673, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - 276, 277, 278, 279, 674, 0, 675, 283, 284, 285, - 676, 1006, 287, 288, 0, 290, 1007, 677, 292, 678, - 294, 295, 296, 1008, 297, 298, 1009, 1010, 299, 300, - 301, 1011, 1012, 302, 679, 0, 305, 0, 680, 308, - 309, 310, 311, 312, 313, 314, 315, 316, 317, 1013, - 318, 319, 681, 321, 322, 682, 324, 325, 326, 1014, - 327, 328, 329, 330, 331, 332, 333, 334, 335, 683, - 337, 338, 339, 340, 1015, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 1016, 353, 354, - 0, 356, 357, 358, 684, 360, 361, 362, 363, 364, - 365, 366, 367, 368, 369, 370, 1017, 371, 372, 373, - 374, 375, 1018, 376, 685, 378, 379, 380, 0, 382, - 383, 686, 385, 1019, 386, 387, 388, 389, 390, 391, - 392, 393, 394, 395, 396, 687, 398, 688, 1020, 400, - 401, 1021, 402, 0, 404, 405, 406, 407, 408, 1022, - 689, 690, 1023, 1024, 411, 412, 691, 414, 692, 1025, - 416, 417, 693, 419, 420, 421, 422, 423, 1026, 1027, - 424, 425, 426, 427, 428, 1028, 1029, 429, 430, 431, - 432, 433, 1030, 695, 1031, 436, 0, 438, 439, 440, - 441, 1032, 1033, 442, 1034, 1035, 443, 444, 445, 446, - 447, 448, 696, 697, 698, 699, 700, 701, 702, 703, - 704, 705, 706, 460, 461, 462, 463, 957, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 958, 98, 99, 100, 959, 960, 961, 962, 963, - 964, 965, 101, 102, 966, 103, 104, 105, 0, 107, - 108, 109, 656, 657, 0, 658, 659, 967, 115, 116, - 117, 118, 119, 120, 968, 969, 121, 122, 660, 661, - 125, 970, 126, 127, 128, 129, 662, 971, 0, 972, - 132, 133, 134, 135, 136, 0, 138, 139, 140, 973, - 141, 142, 143, 144, 145, 146, 974, 0, 148, 149, - 150, 975, 976, 977, 0, 978, 979, 980, 152, 153, - 154, 155, 156, 157, 158, 663, 664, 161, 981, 162, - 982, 163, 164, 165, 166, 167, 168, 983, 169, 170, - 171, 172, 173, 984, 985, 174, 175, 176, 177, 178, - 986, 179, 180, 181, 987, 182, 183, 184, 988, 185, - 186, 187, 188, 665, 190, 191, 192, 193, 666, 989, - 195, 990, 196, 197, 667, 199, 991, 200, 992, 201, - 0, 993, 0, 204, 205, 206, 0, 208, 994, 209, - 995, 668, 211, 996, 212, 213, 214, 215, 216, 217, - 0, 219, 220, 221, 222, 997, 223, 224, 225, 226, - 227, 228, 998, 229, 0, 669, 232, 233, 234, 235, - 670, 671, 999, 672, 1000, 239, 0, 0, 242, 0, - 244, 245, 246, 247, 248, 1001, 1002, 249, 0, 251, - 0, 1003, 253, 254, 255, 1004, 1005, 256, 257, 258, - 259, 260, 673, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 674, 0, 675, 283, 284, 285, 676, 1006, 287, - 288, 0, 290, 1007, 677, 292, 678, 294, 295, 296, - 1008, 297, 298, 1009, 1010, 299, 300, 301, 1011, 1012, - 302, 679, 0, 305, 0, 680, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 1013, 318, 319, 681, - 321, 322, 682, 324, 325, 326, 1014, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 683, 337, 338, 339, - 340, 1015, 341, 342, 343, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 1016, 353, 354, 0, 356, 357, - 358, 684, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 1017, 371, 372, 373, 374, 375, 1018, - 376, 685, 378, 379, 380, 0, 382, 383, 686, 385, - 1019, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 687, 398, 688, 1020, 400, 401, 1021, 402, - 0, 404, 405, 406, 407, 408, 1022, 689, 690, 1023, - 1024, 411, 412, 691, 414, 692, 1025, 416, 417, 693, - 419, 420, 421, 422, 423, 1026, 1027, 424, 425, 426, - 427, 428, 1028, 1029, 429, 430, 431, 432, 433, 1030, - 695, 1031, 436, 0, 438, 439, 440, 441, 1032, 1033, - 442, 1034, 1035, 443, 444, 445, 446, 447, 448, 696, - 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, - 460, 461, 462, 463, 957, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 90, 91, 92, 93, 1741, 95, 96, 97, 958, 98, - 99, 100, 959, 960, 961, 962, 963, 964, 965, 101, - 102, 966, 103, 104, 105, 0, 107, 108, 109, 656, - 657, 0, 658, 659, 967, 115, 116, 117, 118, 119, - 120, 968, 969, 121, 122, 660, 661, 125, 970, 126, - 127, 128, 129, 662, 971, 0, 972, 132, 133, 134, - 135, 136, 0, 138, 139, 140, 973, 141, 142, 143, - 144, 145, 146, 974, 0, 148, 149, 150, 975, 976, - 977, 0, 978, 979, 980, 152, 153, 154, 155, 156, - 157, 158, 663, 664, 161, 981, 162, 982, 163, 164, - 165, 166, 167, 168, 983, 169, 170, 171, 172, 173, - 984, 985, 174, 175, 176, 1742, 178, 986, 179, 180, - 181, 987, 182, 183, 184, 988, 185, 186, 187, 188, - 665, 190, 191, 192, 193, 666, 989, 195, 990, 196, - 197, 667, 199, 991, 200, 992, 201, 0, 993, 0, - 204, 205, 206, 0, 208, 994, 209, 995, 668, 211, - 996, 212, 213, 214, 215, 216, 217, 0, 219, 220, - 221, 222, 997, 223, 224, 225, 226, 227, 228, 998, - 229, 0, 669, 232, 233, 234, 235, 670, 671, 999, - 672, 1000, 239, 0, 0, 242, 0, 244, 245, 246, - 247, 248, 1001, 1002, 249, 0, 251, 0, 1003, 253, - 254, 255, 1004, 1005, 256, 257, 258, 259, 260, 673, - 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, - 272, 273, 274, 275, 276, 277, 278, 279, 674, 0, - 675, 283, 284, 285, 676, 1006, 287, 288, 0, 290, - 1007, 677, 292, 678, 294, 295, 296, 1008, 297, 298, - 1009, 1010, 299, 300, 301, 1011, 1012, 302, 679, 0, - 305, 0, 680, 308, 309, 310, 311, 312, 313, 314, - 315, 316, 317, 1013, 318, 319, 681, 321, 322, 682, - 324, 325, 326, 1014, 327, 328, 329, 330, 331, 332, - 333, 334, 335, 683, 337, 338, 339, 340, 1015, 341, - 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, - 352, 1016, 353, 354, 0, 356, 357, 358, 684, 360, - 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, - 1017, 371, 372, 373, 374, 375, 1018, 1743, 685, 378, - 379, 380, 0, 382, 383, 686, 385, 1019, 386, 387, - 388, 389, 390, 391, 392, 393, 394, 395, 396, 687, - 398, 688, 1020, 400, 401, 1021, 402, 0, 404, 405, - 406, 407, 408, 1022, 689, 690, 1023, 1024, 411, 412, - 691, 414, 692, 1025, 416, 417, 693, 419, 420, 421, - 422, 423, 1026, 1027, 424, 425, 426, 427, 428, 1028, - 1029, 429, 430, 431, 432, 433, 1030, 695, 1031, 436, - 0, 438, 439, 440, 441, 1032, 1033, 442, 1034, 1035, - 443, 444, 445, 446, 447, 448, 696, 697, 698, 699, - 700, 701, 702, 703, 704, 705, 706, 460, 461, 462, - 463, 89, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, - 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, - 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, - 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, - 114, 0, 115, 116, 117, 118, 119, 120, 0, 736, - 121, 122, 123, 124, 125, 0, 126, 127, 128, 129, - 739, 0, 740, 0, 132, 133, 134, 135, 136, 137, - 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, - 0, 147, 148, 149, 150, 741, 742, 743, 744, 745, - 746, 747, 152, 153, 154, 155, 156, 157, 158, 159, - 160, 161, 0, 162, 0, 163, 164, 165, 166, 167, - 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, - 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, - 183, 184, 0, 185, 186, 187, 188, 189, 190, 191, - 192, 193, 753, 0, 195, 0, 196, 197, 198, 199, - 0, 200, 0, 201, 202, 0, 203, 204, 205, 206, - 207, 208, 0, 209, 0, 210, 211, 0, 212, 213, - 214, 215, 216, 217, 218, 219, 220, 221, 222, 0, - 223, 224, 225, 226, 227, 228, 0, 229, 230, 231, - 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 0, - 0, 249, 250, 251, 252, 0, 253, 254, 255, 759, - 760, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, - 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 0, 287, 288, 289, 290, 0, 765, 292, - 293, 294, 295, 296, 0, 297, 298, 0, 0, 299, - 300, 301, 0, 0, 302, 303, 304, 305, 306, 768, - 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, - 0, 318, 319, 769, 321, 322, 323, 324, 325, 326, - 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, - 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, - 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, - 373, 374, 375, 772, 376, 377, 378, 379, 380, 381, - 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, - 391, 392, 393, 394, 395, 396, 397, 398, 774, 0, - 400, 401, 0, 402, 403, 404, 405, 406, 407, 408, - 0, 409, 410, 0, 0, 411, 412, 777, 414, 778, - 0, 416, 417, 780, 419, 420, 421, 422, 423, 0, - 0, 424, 425, 426, 427, 428, 781, 0, 429, 430, - 431, 432, 433, 434, 435, 0, 436, 437, 438, 439, - 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, - 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 89, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, - 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, - 0, 0, 0, 101, 102, 0, 103, 104, 105, 106, - 107, 108, 109, 110, 111, 112, 113, 114, 0, 115, - 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, - 124, 125, 0, 126, 127, 128, 129, 130, 0, 131, - 0, 132, 133, 134, 135, 136, 137, 138, 139, 140, - 0, 141, 142, 143, 144, 145, 146, 0, 147, 148, - 149, 150, 0, 0, 0, 151, 0, 0, 0, 152, - 153, 154, 155, 156, 157, 158, 159, 160, 161, 0, - 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, - 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, - 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, - 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, - 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, - 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, - 217, 218, 219, 220, 221, 222, 0, 223, 224, 225, - 226, 227, 228, 0, 229, 230, 231, 232, 233, 234, - 235, 236, 237, 0, 238, 0, 239, 240, 241, 242, - 243, 244, 245, 246, 247, 248, 0, 0, 249, 250, - 251, 252, 0, 253, 254, 255, 0, 0, 256, 257, - 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, - 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, - 278, 279, 280, 281, 282, 283, 284, 285, 286, 0, - 287, 288, 289, 290, 0, 291, 292, 293, 294, 295, - 296, 0, 297, 298, 0, 0, 299, 300, 301, 0, - 0, 302, 303, 304, 305, 306, 307, 308, 309, 310, - 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, - 320, 321, 322, 323, 324, 325, 326, 0, 327, 328, - 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, - 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, - 348, 349, 350, 351, 352, 0, 353, 354, 355, 356, - 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, - 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, - 0, 376, 377, 378, 379, 380, 381, 382, 383, 384, - 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, - 394, 395, 396, 397, 398, 399, 0, 400, 401, 0, - 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, - 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, - 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, - 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, - 434, 435, 0, 436, 437, 438, 439, 440, 441, 0, - 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, - 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, - 459, 460, 461, 462, 463, 488, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, - 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, - 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, - 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, - 119, 120, 0, 0, 121, 122, 123, 124, 125, 1348, - 126, 127, 128, 129, 130, 0, 0, 1349, 132, 133, - 134, 135, 136, 0, 138, 139, 140, 1350, 141, 142, - 143, 144, 145, 146, 0, 0, 148, 149, 150, 0, - 0, 0, 0, 0, 0, 0, 152, 153, 154, 155, - 156, 157, 158, 159, 160, 161, 0, 162, 0, 163, - 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, - 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, - 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, - 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, - 196, 197, 198, 199, 0, 200, 1351, 201, 0, 0, - 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, - 211, 0, 212, 213, 214, 215, 216, 1352, 0, 219, - 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, - 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, - 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, - 246, 247, 248, 0, 0, 249, 0, 251, 0, 0, - 253, 254, 255, 0, 0, 256, 257, 258, 259, 260, - 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, - 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, - 0, 282, 283, 284, 285, 286, 0, 287, 288, 0, - 290, 0, 291, 292, 293, 294, 295, 296, 0, 297, - 298, 0, 0, 299, 300, 301, 0, 0, 302, 303, - 0, 305, 0, 307, 308, 309, 310, 311, 312, 313, - 314, 315, 316, 317, 0, 318, 319, 320, 321, 322, - 323, 324, 325, 326, 1353, 327, 328, 329, 330, 331, - 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, - 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, - 351, 352, 0, 353, 354, 0, 356, 357, 358, 359, - 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, - 370, 0, 371, 372, 373, 374, 375, 0, 376, 377, - 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, - 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, - 492, 398, 399, 0, 400, 401, 0, 402, 0, 404, - 405, 406, 407, 408, 0, 409, 410, 0, 0, 411, - 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, - 421, 422, 423, 0, 1354, 424, 425, 426, 427, 428, - 0, 0, 429, 430, 431, 432, 433, 434, 435, 0, - 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, - 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, - 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, - 462, 463, 488, 0, 0, 0, 0, 0, 0, 0, + 462, 463, 488, 0, 560, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, - 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, + 3, 4, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, - 0, 121, 122, 123, 124, 125, 1348, 126, 127, 128, + 0, 121, 122, 123, 124, 125, 0, 126, 127, 128, 129, 130, 0, 0, 0, 132, 133, 134, 135, 136, - 0, 138, 139, 140, 1350, 141, 142, 143, 144, 145, + 0, 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, 0, 0, 148, 149, 150, 0, 0, 0, 0, 0, 0, 0, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 0, 162, 0, 163, 164, 165, 166, @@ -191368,13 +205282,13 @@ static const yytype_int16 yytable[] = 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, - 199, 0, 200, 1351, 201, 0, 0, 0, 204, 205, + 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, - 0, 0, 249, 0, 251, 1804, 0, 253, 254, 255, + 0, 0, 249, 0, 251, 0, 0, 253, 254, 255, 0, 0, 256, 257, 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 0, 282, 283, @@ -191383,7 +205297,7 @@ static const yytype_int16 yytable[] = 299, 300, 301, 0, 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 320, 321, 322, 323, 324, 325, - 326, 1353, 327, 328, 329, 330, 331, 332, 333, 334, + 326, 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, 359, 360, 361, 362, @@ -191394,61 +205308,61 @@ static const yytype_int16 yytable[] = 0, 400, 401, 0, 402, 0, 404, 405, 406, 407, 408, 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, - 0, 1354, 424, 425, 426, 427, 428, 0, 0, 429, + 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, - 455, 456, 457, 458, 459, 460, 461, 462, 463, 655, - 0, 535, 0, 0, 0, 0, 0, 0, 0, 0, + 455, 456, 457, 458, 459, 460, 461, 462, 463, 488, + 0, 560, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, - 0, 107, 108, 109, 656, 657, 0, 658, 659, 0, + 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, - 660, 661, 125, 0, 126, 127, 128, 129, 662, 0, + 123, 124, 125, 0, 126, 127, 128, 129, 130, 0, 0, 0, 132, 133, 134, 135, 136, 0, 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, 0, 0, 148, 149, 150, 0, 0, 0, 0, 0, 0, 0, - 152, 153, 154, 155, 156, 157, 158, 663, 664, 161, + 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, - 0, 185, 186, 187, 188, 665, 190, 191, 192, 193, - 666, 1082, 195, 0, 196, 197, 667, 199, 0, 200, + 0, 185, 186, 187, 188, 189, 190, 191, 192, 193, + 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, - 0, 209, 0, 668, 211, 0, 212, 213, 214, 215, + 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, - 225, 226, 227, 228, 0, 229, 0, 669, 232, 233, - 234, 235, 670, 671, 0, 672, 0, 239, 0, 0, + 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, + 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, 251, 0, 0, 253, 254, 255, 0, 0, 256, - 257, 258, 259, 260, 673, 262, 263, 264, 265, 266, + 257, 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, - 277, 278, 279, 674, 0, 675, 283, 284, 285, 676, - 0, 287, 288, 0, 290, 0, 677, 292, 678, 294, - 295, 296, 0, 297, 298, 1083, 0, 299, 300, 301, - 0, 0, 302, 679, 0, 305, 0, 680, 308, 309, + 277, 278, 279, 280, 0, 282, 283, 284, 285, 286, + 0, 287, 288, 0, 290, 0, 291, 292, 293, 294, + 295, 296, 0, 297, 298, 0, 561, 299, 300, 301, + 0, 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, - 319, 681, 321, 322, 682, 324, 325, 326, 0, 327, - 328, 329, 330, 331, 332, 333, 334, 335, 683, 337, + 319, 320, 321, 322, 323, 324, 325, 326, 0, 327, + 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, - 356, 357, 358, 684, 360, 361, 362, 363, 364, 365, + 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, - 375, 0, 376, 685, 378, 379, 380, 0, 382, 383, - 686, 385, 0, 386, 387, 388, 389, 390, 391, 392, - 393, 394, 395, 396, 687, 398, 688, 0, 400, 401, - 0, 402, 0, 404, 405, 406, 407, 408, 0, 689, - 690, 0, 0, 411, 412, 691, 414, 692, 1084, 416, - 417, 693, 419, 420, 421, 422, 423, 0, 0, 424, + 375, 0, 376, 377, 378, 379, 380, 0, 382, 383, + 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, + 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, + 0, 402, 0, 404, 405, 406, 407, 408, 0, 409, + 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, + 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, - 433, 1030, 695, 0, 436, 0, 438, 439, 440, 441, + 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, - 448, 696, 697, 698, 699, 700, 701, 702, 703, 704, - 705, 706, 460, 461, 462, 463, 488, 0, 560, 0, + 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, + 458, 459, 460, 461, 462, 463, 488, 0, 560, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, - 0, 98, 99, 100, 3, 4, 0, 0, 0, 0, + 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, @@ -191463,7 +205377,7 @@ static const yytype_int16 yytable[] = 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, - 210, 211, 0, 212, 213, 214, 215, 216, 217, 0, + 210, 211, 0, 212, 213, 214, 215, 216, 606, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, @@ -191473,7 +205387,7 @@ static const yytype_int16 yytable[] = 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 0, 282, 283, 284, 285, 286, 0, 287, 288, 0, 290, 0, 291, 292, 293, 294, 295, 296, 0, - 297, 298, 0, 0, 299, 300, 301, 0, 0, 302, + 297, 298, 0, 561, 299, 300, 301, 0, 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 320, 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, 330, @@ -191492,53 +205406,53 @@ static const yytype_int16 yytable[] = 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, - 461, 462, 463, 488, 0, 560, 0, 0, 0, 0, + 461, 462, 463, 655, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, - 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, - 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, - 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, - 0, 0, 121, 122, 123, 124, 125, 0, 126, 127, - 128, 129, 130, 0, 0, 0, 132, 133, 134, 135, + 100, 3, 4, 0, 0, 0, 0, 0, 101, 102, + 0, 103, 104, 105, 0, 107, 108, 109, 656, 657, + 0, 658, 659, 0, 115, 116, 117, 118, 119, 120, + 0, 0, 121, 122, 660, 661, 125, 0, 126, 127, + 128, 129, 662, 0, 0, 0, 132, 133, 134, 135, 136, 0, 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, 0, 0, 148, 149, 150, 0, 0, 0, 0, 0, 0, 0, 152, 153, 154, 155, 156, 157, - 158, 159, 160, 161, 0, 162, 0, 163, 164, 165, + 158, 663, 664, 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, - 0, 182, 183, 184, 0, 185, 186, 187, 188, 189, - 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, - 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, - 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, + 0, 182, 183, 184, 0, 185, 186, 187, 188, 665, + 190, 191, 192, 193, 666, 0, 195, 0, 196, 197, + 667, 199, 0, 200, 0, 201, 0, 0, 0, 204, + 205, 206, 0, 208, 0, 209, 0, 668, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, - 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, + 0, 669, 232, 233, 234, 235, 670, 671, 0, 672, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, 251, 0, 0, 253, 254, - 255, 0, 0, 256, 257, 258, 259, 260, 490, 262, + 255, 0, 0, 256, 257, 258, 259, 260, 673, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, - 273, 274, 275, 276, 277, 278, 279, 280, 0, 282, - 283, 284, 285, 286, 0, 287, 288, 0, 290, 0, - 291, 292, 293, 294, 295, 296, 0, 297, 298, 0, - 561, 299, 300, 301, 0, 0, 302, 303, 0, 305, - 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, - 316, 317, 0, 318, 319, 320, 321, 322, 323, 324, + 273, 274, 275, 276, 277, 278, 279, 674, 0, 675, + 283, 284, 285, 676, 0, 287, 288, 0, 290, 0, + 677, 292, 678, 294, 295, 296, 0, 297, 298, 0, + 0, 299, 300, 301, 0, 0, 302, 679, 0, 305, + 0, 680, 308, 309, 310, 311, 312, 313, 314, 315, + 316, 317, 0, 318, 319, 681, 321, 322, 682, 324, 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, - 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, + 334, 335, 683, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, - 0, 353, 354, 0, 356, 357, 358, 359, 360, 361, + 0, 353, 354, 0, 356, 357, 358, 684, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, - 371, 372, 373, 374, 375, 0, 376, 377, 378, 379, - 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, - 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, - 399, 0, 400, 401, 0, 402, 0, 404, 405, 406, - 407, 408, 0, 409, 410, 0, 0, 411, 412, 413, - 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, + 371, 372, 373, 374, 375, 0, 376, 685, 378, 379, + 380, 0, 382, 383, 686, 385, 0, 386, 387, 388, + 389, 390, 391, 392, 393, 394, 395, 396, 687, 398, + 688, 0, 400, 401, 0, 402, 0, 404, 405, 406, + 407, 408, 0, 689, 690, 0, 0, 411, 412, 691, + 414, 692, 0, 416, 417, 693, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, - 429, 430, 431, 432, 433, 434, 435, 0, 436, 0, + 429, 430, 431, 432, 433, 1032, 695, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, - 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, - 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 444, 445, 446, 447, 448, 696, 697, 698, 699, 700, + 701, 702, 703, 704, 705, 706, 460, 461, 462, 463, 488, 0, 560, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, @@ -191556,17 +205470,17 @@ static const yytype_int16 yytable[] = 184, 0, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, - 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, - 215, 216, 606, 0, 219, 220, 221, 222, 0, 223, + 1665, 0, 209, 0, 210, 211, 0, 212, 213, 214, + 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, - 249, 0, 251, 0, 0, 253, 254, 255, 0, 0, + 249, 0, 251, 0, 0, 253, 254, 1666, 0, 0, 256, 257, 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 0, 282, 283, 284, 285, 286, 0, 287, 288, 0, 290, 0, 291, 292, 293, - 294, 295, 296, 0, 297, 298, 0, 561, 299, 300, + 294, 295, 296, 0, 297, 298, 0, 0, 299, 300, 301, 0, 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 320, 321, 322, 323, 324, 325, 326, 0, @@ -191578,63 +205492,63 @@ static const yytype_int16 yytable[] = 374, 375, 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, - 401, 0, 402, 0, 404, 405, 406, 407, 408, 0, + 401, 1667, 402, 0, 404, 1668, 406, 1669, 408, 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, - 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, + 424, 425, 1670, 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, - 457, 458, 459, 460, 461, 462, 463, 655, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 457, 458, 459, 460, 461, 462, 463, 488, 2567, 0, + 0, 0, 0, 2568, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, - 97, 0, 98, 99, 100, 3, 4, 0, 0, 0, + 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, - 108, 109, 656, 657, 0, 658, 659, 0, 115, 116, - 117, 118, 119, 120, 0, 0, 121, 122, 660, 661, - 125, 0, 126, 127, 128, 129, 662, 0, 0, 0, + 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, + 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, + 125, 0, 126, 127, 128, 129, 130, 0, 0, 0, 132, 133, 134, 135, 136, 0, 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, 0, 0, 148, 149, 150, 0, 0, 0, 0, 0, 0, 0, 152, 153, - 154, 155, 156, 157, 158, 663, 664, 161, 0, 162, + 154, 155, 156, 157, 158, 159, 160, 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, - 186, 187, 188, 665, 190, 191, 192, 193, 666, 0, - 195, 0, 196, 197, 667, 199, 0, 200, 0, 201, + 186, 187, 188, 189, 190, 191, 192, 193, 194, 0, + 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, - 0, 668, 211, 0, 212, 213, 214, 215, 216, 217, + 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, - 227, 228, 0, 229, 0, 669, 232, 233, 234, 235, - 670, 671, 0, 672, 0, 239, 0, 0, 242, 0, + 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, + 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, 251, 0, 0, 253, 254, 255, 0, 0, 256, 257, 258, - 259, 260, 673, 262, 263, 264, 265, 266, 267, 268, + 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 674, 0, 675, 283, 284, 285, 676, 0, 287, - 288, 0, 290, 0, 677, 292, 678, 294, 295, 296, + 279, 280, 0, 282, 283, 284, 285, 286, 0, 287, + 288, 0, 290, 0, 291, 292, 293, 294, 295, 296, 0, 297, 298, 0, 0, 299, 300, 301, 0, 0, - 302, 679, 0, 305, 0, 680, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 0, 318, 319, 681, - 321, 322, 682, 324, 325, 326, 0, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 683, 337, 338, 339, + 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, + 312, 313, 314, 315, 316, 317, 0, 318, 319, 320, + 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, - 358, 684, 360, 361, 362, 363, 364, 365, 366, 367, + 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, 0, - 376, 685, 378, 379, 380, 0, 382, 383, 686, 385, + 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 687, 398, 688, 0, 400, 401, 0, 402, - 0, 404, 405, 406, 407, 408, 0, 689, 690, 0, - 0, 411, 412, 691, 414, 692, 0, 416, 417, 693, + 395, 396, 492, 398, 399, 0, 400, 401, 0, 402, + 0, 404, 405, 406, 407, 408, 0, 409, 410, 0, + 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, - 427, 428, 0, 0, 429, 430, 431, 432, 433, 1030, - 695, 0, 436, 0, 438, 439, 440, 441, 0, 0, - 442, 0, 0, 443, 444, 445, 446, 447, 448, 696, - 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, - 460, 461, 462, 463, 488, 0, 560, 0, 0, 0, + 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, + 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, + 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, + 90, 91, 92, 93, 94, 95, 96, 97, 489, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, @@ -191649,13 +205563,13 @@ static const yytype_int16 yytable[] = 181, 0, 182, 183, 184, 0, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, - 204, 205, 206, 0, 1654, 0, 209, 0, 210, 211, + 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, 251, 0, 0, 253, - 254, 1655, 0, 0, 256, 257, 258, 259, 260, 490, + 254, 255, 0, 0, 256, 257, 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 0, 282, 283, 284, 285, 286, 0, 287, 288, 0, 290, @@ -191663,7 +205577,7 @@ static const yytype_int16 yytable[] = 0, 0, 299, 300, 301, 0, 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 320, 321, 322, 323, - 324, 325, 326, 0, 327, 328, 329, 330, 331, 332, + 491, 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, 359, 360, @@ -191671,17 +205585,17 @@ static const yytype_int16 yytable[] = 0, 371, 372, 373, 374, 375, 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, - 398, 399, 0, 400, 401, 1656, 402, 0, 404, 1657, - 406, 1658, 408, 0, 409, 410, 0, 0, 411, 412, + 398, 399, 0, 400, 401, 0, 402, 0, 404, 405, + 406, 407, 408, 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, - 422, 423, 0, 0, 424, 425, 1659, 427, 428, 0, + 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, - 463, 488, 2552, 0, 0, 0, 0, 2553, 0, 0, + 463, 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, - 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, + 93, 94, 95, 96, 97, 540, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, @@ -191719,16 +205633,16 @@ static const yytype_int16 yytable[] = 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, 404, 405, 406, 407, 408, - 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, + 0, 541, 410, 0, 0, 542, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 488, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 560, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, - 96, 97, 489, 98, 99, 100, 0, 0, 0, 0, + 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, @@ -191744,7 +205658,7 @@ static const yytype_int16 yytable[] = 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, - 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, + 601, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, @@ -191756,7 +205670,7 @@ static const yytype_int16 yytable[] = 296, 0, 297, 298, 0, 0, 299, 300, 301, 0, 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, - 320, 321, 322, 323, 491, 325, 326, 0, 327, 328, + 320, 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, @@ -191772,9 +205686,9 @@ static const yytype_int16 yytable[] = 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, - 459, 460, 461, 462, 463, 488, 0, 0, 0, 0, + 459, 460, 461, 462, 463, 488, 0, 560, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 90, 91, 92, 93, 94, 95, 96, 97, 540, + 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, @@ -191790,7 +205704,7 @@ static const yytype_int16 yytable[] = 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, - 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, + 211, 0, 212, 213, 214, 215, 216, 604, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, @@ -191812,7 +205726,7 @@ static const yytype_int16 yytable[] = 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, 404, - 405, 406, 407, 408, 0, 541, 410, 0, 0, 542, + 405, 406, 407, 408, 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, 0, @@ -191837,7 +205751,7 @@ static const yytype_int16 yytable[] = 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, - 213, 214, 215, 216, 601, 0, 219, 220, 221, 222, + 213, 214, 215, 216, 608, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, @@ -191884,7 +205798,7 @@ static const yytype_int16 yytable[] = 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, - 216, 604, 0, 219, 220, 221, 222, 0, 223, 224, + 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, @@ -191912,10 +205826,10 @@ static const yytype_int16 yytable[] = 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, - 458, 459, 460, 461, 462, 463, 488, 0, 560, 0, + 458, 459, 460, 461, 462, 463, 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, - 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, + 634, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, @@ -191930,7 +205844,7 @@ static const yytype_int16 yytable[] = 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, - 210, 211, 0, 212, 213, 214, 215, 216, 608, 0, + 210, 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, @@ -191977,7 +205891,7 @@ static const yytype_int16 yytable[] = 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, - 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, + 212, 213, 214, 215, 216, 1052, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, @@ -192006,9 +205920,9 @@ static const yytype_int16 yytable[] = 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, - 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 488, 0, 560, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, - 94, 95, 96, 97, 634, 98, 99, 100, 0, 0, + 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, @@ -192024,7 +205938,7 @@ static const yytype_int16 yytable[] = 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, - 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, + 215, 216, 1054, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, @@ -192070,7 +205984,7 @@ static const yytype_int16 yytable[] = 186, 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 1050, + 0, 210, 211, 0, 212, 213, 214, 215, 216, 1460, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, @@ -192099,10 +206013,10 @@ static const yytype_int16 yytable[] = 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 488, 0, 560, 0, 0, 0, + 460, 461, 462, 463, 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, - 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, + 99, 100, 0, 0, 0, 0, 0, 2224, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, 0, 126, @@ -192117,7 +206031,7 @@ static const yytype_int16 yytable[] = 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, - 0, 212, 213, 214, 215, 216, 1052, 0, 219, 220, + 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, @@ -192146,10 +206060,10 @@ static const yytype_int16 yytable[] = 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, - 463, 488, 0, 560, 0, 0, 0, 0, 0, 0, + 463, 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, - 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, + 0, 0, 0, 0, 2238, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, 0, 126, 127, 128, 129, @@ -192164,7 +206078,7 @@ static const yytype_int16 yytable[] = 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, - 214, 215, 216, 1452, 0, 219, 220, 221, 222, 0, + 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, @@ -192193,10 +206107,10 @@ static const yytype_int16 yytable[] = 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 488, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 560, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, - 0, 2211, 0, 101, 102, 0, 103, 104, 105, 0, + 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, 0, 126, 127, 128, 129, 130, 0, 0, @@ -192211,7 +206125,7 @@ static const yytype_int16 yytable[] = 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, - 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, + 2367, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, @@ -192242,7 +206156,7 @@ static const yytype_int16 yytable[] = 459, 460, 461, 462, 463, 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, - 98, 99, 100, 0, 0, 0, 0, 0, 2225, 0, + 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, 0, @@ -192286,7 +206200,7 @@ static const yytype_int16 yytable[] = 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, - 462, 463, 488, 0, 560, 0, 0, 0, 0, 0, + 462, 463, 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, @@ -192304,11 +206218,11 @@ static const yytype_int16 yytable[] = 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, - 213, 214, 215, 216, 2354, 0, 219, 220, 221, 222, + 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, - 0, 0, 249, 0, 251, 0, 0, 253, 254, 255, + 0, 0, 249, 0, 251, 0, 0, 253, 254, 552, 0, 0, 256, 257, 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 0, 282, 283, @@ -192321,12 +206235,12 @@ static const yytype_int16 yytable[] = 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, 359, 360, 361, 362, - 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, - 372, 373, 374, 375, 0, 376, 377, 378, 379, 380, + 363, 553, 365, 366, 367, 368, 369, 370, 0, 371, + 372, 373, 374, 554, 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, 404, 405, 406, 407, - 408, 0, 409, 410, 0, 0, 411, 412, 413, 414, + 408, 0, 555, 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, 0, 436, 0, 438, @@ -192365,7 +206279,7 @@ static const yytype_int16 yytable[] = 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 320, 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, - 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, + 338, 570, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, @@ -192397,12 +206311,12 @@ static const yytype_int16 yytable[] = 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, - 210, 211, 0, 212, 213, 214, 215, 216, 217, 0, + 210, 211, 0, 212, 213, 214, 215, 216, 626, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, 251, 0, - 0, 253, 254, 552, 0, 0, 256, 257, 258, 259, + 0, 253, 254, 255, 0, 0, 256, 257, 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 0, 282, 283, 284, 285, 286, 0, 287, 288, @@ -192414,12 +206328,12 @@ static const yytype_int16 yytable[] = 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, - 359, 360, 361, 362, 363, 553, 365, 366, 367, 368, - 369, 370, 0, 371, 372, 373, 374, 554, 0, 376, + 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, + 369, 370, 0, 371, 372, 373, 374, 375, 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, - 404, 405, 406, 407, 408, 0, 555, 410, 0, 0, + 404, 405, 406, 407, 408, 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, @@ -192444,7 +206358,7 @@ static const yytype_int16 yytable[] = 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, - 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, + 212, 213, 214, 215, 216, 642, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, @@ -192458,7 +206372,7 @@ static const yytype_int16 yytable[] = 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, 319, 320, 321, 322, 323, 324, 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, - 334, 335, 336, 337, 338, 570, 340, 0, 341, 342, + 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, @@ -192491,7 +206405,7 @@ static const yytype_int16 yytable[] = 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, - 215, 216, 626, 0, 219, 220, 221, 222, 0, 223, + 215, 216, 646, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, @@ -192537,7 +206451,7 @@ static const yytype_int16 yytable[] = 186, 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, - 0, 210, 211, 0, 212, 213, 214, 215, 216, 642, + 0, 210, 211, 0, 212, 213, 214, 215, 216, 649, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, @@ -192584,7 +206498,7 @@ static const yytype_int16 yytable[] = 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, - 0, 212, 213, 214, 215, 216, 646, 0, 219, 220, + 0, 212, 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, @@ -192601,12 +206515,12 @@ static const yytype_int16 yytable[] = 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, 359, 360, - 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, + 361, 362, 363, 553, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, 404, 405, - 406, 407, 408, 0, 409, 410, 0, 0, 411, 412, + 406, 407, 408, 0, 555, 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, 0, 436, @@ -192616,7 +206530,7 @@ static const yytype_int16 yytable[] = 463, 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, - 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, + 0, 0, 0, 0, 1314, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, 0, 126, 127, 128, 129, @@ -192631,7 +206545,7 @@ static const yytype_int16 yytable[] = 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, - 214, 215, 216, 649, 0, 219, 220, 221, 222, 0, + 214, 215, 216, 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, @@ -192649,7 +206563,7 @@ static const yytype_int16 yytable[] = 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, - 373, 374, 375, 0, 376, 377, 378, 379, 380, 0, + 373, 374, 375, 0, 0, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, 404, 405, 406, 407, 408, @@ -192678,7 +206592,7 @@ static const yytype_int16 yytable[] = 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, - 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, + 1447, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, @@ -192694,12 +206608,12 @@ static const yytype_int16 yytable[] = 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, - 357, 358, 359, 360, 361, 362, 363, 553, 365, 366, + 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, - 402, 0, 404, 405, 406, 407, 408, 0, 555, 410, + 402, 0, 404, 405, 406, 407, 408, 0, 409, 410, 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, @@ -192709,7 +206623,7 @@ static const yytype_int16 yytable[] = 459, 460, 461, 462, 463, 488, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, - 98, 99, 100, 0, 0, 0, 0, 0, 1306, 0, + 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, 0, @@ -192724,7 +206638,7 @@ static const yytype_int16 yytable[] = 188, 189, 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, - 211, 0, 212, 213, 214, 215, 216, 217, 0, 219, + 211, 0, 212, 213, 214, 215, 216, 1736, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, @@ -192742,7 +206656,7 @@ static const yytype_int16 yytable[] = 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, - 370, 0, 371, 372, 373, 374, 375, 0, 0, 377, + 370, 0, 371, 372, 373, 374, 375, 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, 404, @@ -192771,7 +206685,7 @@ static const yytype_int16 yytable[] = 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, - 213, 214, 215, 216, 1439, 0, 219, 220, 221, 222, + 213, 214, 215, 216, 1748, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, @@ -192818,7 +206732,7 @@ static const yytype_int16 yytable[] = 194, 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, 213, 214, 215, - 216, 1725, 0, 219, 220, 221, 222, 0, 223, 224, + 216, 1750, 0, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, 0, 0, 249, @@ -192846,1253 +206760,1020 @@ static const yytype_int16 yytable[] = 433, 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, - 458, 459, 460, 461, 462, 463, 488, 0, 0, 0, + 458, 459, 460, 461, 462, 463, 1106, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, 0, - 0, 101, 102, 0, 103, 104, 105, 0, 107, 108, - 109, 110, 111, 0, 113, 114, 0, 115, 116, 117, - 118, 119, 120, 0, 0, 121, 122, 123, 124, 125, - 0, 126, 127, 128, 129, 130, 0, 0, 0, 132, - 133, 134, 135, 136, 0, 138, 139, 140, 0, 141, - 142, 143, 144, 145, 146, 0, 0, 148, 149, 150, - 0, 0, 0, 0, 0, 0, 0, 152, 153, 154, - 155, 156, 157, 158, 159, 160, 161, 0, 162, 0, + 0, 101, 102, 0, 103, 104, 105, 1107, 107, 108, + 109, 0, 1108, 1109, 1110, 1111, 0, 115, 116, 117, + 118, 119, 120, 0, 0, 121, 122, 1112, 1113, 125, + 0, 126, 127, 128, 129, 0, 0, 1114, 0, 132, + 133, 134, 135, 136, 1115, 138, 139, 140, 0, 141, + 142, 143, 144, 145, 146, 0, 1116, 148, 149, 150, + 0, 0, 0, 1117, 0, 0, 0, 152, 153, 154, + 155, 156, 157, 158, 1118, 1119, 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, 171, - 172, 173, 0, 0, 174, 175, 176, 177, 178, 0, + 172, 173, 0, 0, 174, 175, 751, 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, 186, - 187, 188, 189, 190, 191, 192, 193, 194, 0, 195, - 0, 196, 197, 198, 199, 0, 200, 0, 201, 0, - 0, 0, 204, 205, 206, 0, 208, 0, 209, 0, - 210, 211, 0, 212, 213, 214, 215, 216, 1737, 0, + 187, 188, 0, 190, 191, 192, 193, 0, 0, 195, + 0, 196, 197, 1120, 199, 0, 200, 0, 201, 1121, + 0, 1122, 204, 205, 206, 1123, 208, 0, 209, 0, + 0, 211, 0, 212, 213, 214, 215, 216, 217, 1124, 219, 220, 221, 222, 0, 223, 224, 225, 226, 227, - 228, 0, 229, 0, 231, 232, 233, 234, 235, 236, - 237, 0, 238, 0, 239, 0, 0, 242, 0, 244, - 245, 246, 247, 248, 0, 0, 249, 0, 251, 0, + 228, 0, 229, 1125, 0, 232, 233, 234, 235, 1126, + 1127, 0, 1128, 0, 239, 1129, 1130, 242, 1131, 244, + 245, 246, 247, 248, 0, 0, 249, 1132, 251, 1133, 0, 253, 254, 255, 0, 0, 256, 257, 258, 259, - 260, 490, 262, 263, 264, 265, 266, 267, 268, 269, + 260, 1134, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, - 280, 0, 282, 283, 284, 285, 286, 0, 287, 288, - 0, 290, 0, 291, 292, 293, 294, 295, 296, 0, + 1135, 1136, 1137, 283, 284, 285, 0, 0, 287, 288, + 1138, 290, 0, 0, 292, 1139, 294, 295, 296, 0, 297, 298, 0, 0, 299, 300, 301, 0, 0, 302, - 303, 0, 305, 0, 307, 308, 309, 310, 311, 312, - 313, 314, 315, 316, 317, 0, 318, 319, 320, 321, - 322, 323, 324, 325, 326, 0, 327, 328, 329, 330, - 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, + 0, 1140, 305, 1141, 0, 308, 309, 310, 311, 312, + 313, 314, 315, 316, 317, 0, 318, 319, 0, 321, + 322, 0, 324, 325, 326, 0, 327, 328, 329, 330, + 331, 332, 333, 334, 335, 1142, 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, 349, - 350, 351, 352, 0, 353, 354, 0, 356, 357, 358, - 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, + 350, 351, 352, 0, 353, 354, 1143, 356, 357, 358, + 1144, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, 0, 376, - 377, 378, 379, 380, 0, 382, 383, 384, 385, 0, + 1145, 378, 379, 380, 1146, 382, 383, 1147, 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, - 396, 492, 398, 399, 0, 400, 401, 0, 402, 0, - 404, 405, 406, 407, 408, 0, 409, 410, 0, 0, - 411, 412, 413, 414, 415, 0, 416, 417, 418, 419, + 396, 1148, 398, 0, 0, 400, 401, 0, 402, 1149, + 404, 405, 406, 407, 408, 0, 1150, 1151, 0, 0, + 411, 412, 0, 414, 0, 0, 416, 417, 1152, 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, 427, - 428, 0, 0, 429, 430, 431, 432, 433, 434, 435, - 0, 436, 0, 438, 439, 440, 441, 0, 0, 442, - 0, 0, 443, 444, 445, 446, 447, 448, 449, 450, - 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, - 461, 462, 463, 488, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, - 91, 92, 93, 94, 95, 96, 97, 0, 98, 99, - 100, 0, 0, 0, 0, 0, 0, 0, 101, 102, - 0, 103, 104, 105, 0, 107, 108, 109, 110, 111, - 0, 113, 114, 0, 115, 116, 117, 118, 119, 120, - 0, 0, 121, 122, 123, 124, 125, 0, 126, 127, - 128, 129, 130, 0, 0, 0, 132, 133, 134, 135, - 136, 0, 138, 139, 140, 0, 141, 142, 143, 144, - 145, 146, 0, 0, 148, 149, 150, 0, 0, 0, - 0, 0, 0, 0, 152, 153, 154, 155, 156, 157, - 158, 159, 160, 161, 0, 162, 0, 163, 164, 165, - 166, 167, 168, 0, 169, 170, 171, 172, 173, 0, - 0, 174, 175, 176, 177, 178, 0, 179, 180, 181, - 0, 182, 183, 184, 0, 185, 186, 187, 188, 189, - 190, 191, 192, 193, 194, 0, 195, 0, 196, 197, - 198, 199, 0, 200, 0, 201, 0, 0, 0, 204, - 205, 206, 0, 208, 0, 209, 0, 210, 211, 0, - 212, 213, 214, 215, 216, 1739, 0, 219, 220, 221, - 222, 0, 223, 224, 225, 226, 227, 228, 0, 229, - 0, 231, 232, 233, 234, 235, 236, 237, 0, 238, - 0, 239, 0, 0, 242, 0, 244, 245, 246, 247, - 248, 0, 0, 249, 0, 251, 0, 0, 253, 254, - 255, 0, 0, 256, 257, 258, 259, 260, 490, 262, - 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, - 273, 274, 275, 276, 277, 278, 279, 280, 0, 282, - 283, 284, 285, 286, 0, 287, 288, 0, 290, 0, - 291, 292, 293, 294, 295, 296, 0, 297, 298, 0, - 0, 299, 300, 301, 0, 0, 302, 303, 0, 305, - 0, 307, 308, 309, 310, 311, 312, 313, 314, 315, - 316, 317, 0, 318, 319, 320, 321, 322, 323, 324, - 325, 326, 0, 327, 328, 329, 330, 331, 332, 333, - 334, 335, 336, 337, 338, 339, 340, 0, 341, 342, - 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, - 0, 353, 354, 0, 356, 357, 358, 359, 360, 361, - 362, 363, 364, 365, 366, 367, 368, 369, 370, 0, - 371, 372, 373, 374, 375, 0, 376, 377, 378, 379, - 380, 0, 382, 383, 384, 385, 0, 386, 387, 388, - 389, 390, 391, 392, 393, 394, 395, 396, 492, 398, - 399, 0, 400, 401, 0, 402, 0, 404, 405, 406, - 407, 408, 0, 409, 410, 0, 0, 411, 412, 413, - 414, 415, 0, 416, 417, 418, 419, 420, 421, 422, - 423, 0, 0, 424, 425, 426, 427, 428, 0, 0, - 429, 430, 431, 432, 433, 434, 435, 0, 436, 0, - 438, 439, 440, 441, 0, 0, 442, 0, 0, 443, - 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, - 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, - 1104, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, - 94, 95, 96, 97, 0, 98, 99, 100, 0, 0, - 0, 0, 0, 0, 0, 101, 102, 0, 103, 104, - 105, 1105, 107, 108, 109, 0, 1106, 1107, 1108, 1109, - 0, 115, 116, 117, 118, 119, 120, 0, 0, 121, - 122, 1110, 1111, 125, 0, 126, 127, 128, 129, 0, - 0, 1112, 0, 132, 133, 134, 135, 136, 1113, 138, - 139, 140, 0, 141, 142, 143, 144, 145, 146, 0, - 1114, 148, 149, 150, 0, 0, 0, 1115, 0, 0, - 0, 152, 153, 154, 155, 156, 157, 158, 1116, 1117, - 161, 0, 162, 0, 163, 164, 165, 166, 167, 168, - 0, 169, 170, 171, 172, 173, 0, 0, 174, 175, - 751, 177, 178, 0, 179, 180, 181, 0, 182, 183, - 184, 0, 185, 186, 187, 188, 0, 190, 191, 192, - 193, 0, 0, 195, 0, 196, 197, 1118, 199, 0, - 200, 0, 201, 1119, 0, 1120, 204, 205, 206, 1121, - 208, 0, 209, 0, 0, 211, 0, 212, 213, 214, - 215, 216, 217, 1122, 219, 220, 221, 222, 0, 223, - 224, 225, 226, 227, 228, 0, 229, 1123, 0, 232, - 233, 234, 235, 1124, 1125, 0, 1126, 0, 239, 1127, - 1128, 242, 1129, 244, 245, 246, 247, 248, 0, 0, - 249, 1130, 251, 1131, 0, 253, 254, 255, 0, 0, - 256, 257, 258, 259, 260, 1132, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - 276, 277, 278, 279, 1133, 1134, 1135, 283, 284, 285, - 0, 0, 287, 288, 1136, 290, 0, 0, 292, 1137, - 294, 295, 296, 0, 297, 298, 0, 0, 299, 300, - 301, 0, 0, 302, 0, 1138, 305, 1139, 0, 308, - 309, 310, 311, 312, 313, 314, 315, 316, 317, 0, - 318, 319, 0, 321, 322, 0, 324, 325, 326, 0, - 327, 328, 329, 330, 331, 332, 333, 334, 335, 1140, - 337, 338, 339, 340, 0, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 0, 353, 354, - 1141, 356, 357, 358, 1142, 360, 361, 362, 363, 364, - 365, 366, 367, 368, 369, 370, 0, 371, 372, 373, - 374, 375, 0, 376, 1143, 378, 379, 380, 1144, 382, - 383, 1145, 385, 0, 386, 387, 388, 389, 390, 391, - 392, 393, 394, 395, 396, 1146, 398, 0, 0, 400, - 401, 0, 402, 1147, 404, 405, 406, 407, 408, 0, - 1148, 1149, 0, 0, 411, 412, 0, 414, 0, 0, - 416, 417, 1150, 419, 420, 421, 422, 423, 0, 0, - 424, 425, 426, 427, 428, 0, 0, 429, 430, 431, - 432, 433, 0, 1151, 0, 436, 1152, 438, 439, 440, - 441, 0, 0, 442, 0, 0, 443, 444, 445, 446, - 447, 448, 488, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 460, 461, 462, 463, 0, 90, 91, - 92, 93, 94, 95, 96, 97, 0, 98, 99, 100, - 0, 0, 0, 0, 0, 0, 0, 101, 102, 0, - 103, 104, 105, 0, 107, 108, 109, 110, 111, 0, - 113, 114, 0, 115, 116, 117, 118, 119, 120, 0, - 0, 121, 122, 123, 124, 125, 0, 126, 127, 128, - 129, 130, 0, 0, 0, 132, 133, 134, 135, 136, - 0, 138, 139, 140, 0, 141, 142, 143, 144, 145, - 146, 0, 0, 148, 149, 150, 0, 0, 0, 0, - 0, 0, 0, 152, 153, 154, 155, 156, 157, 158, - 159, 160, 161, 0, 162, 0, 163, 164, 165, 166, - 167, 168, 0, 169, 170, 171, 172, 173, 0, 0, - 174, 175, 176, 177, 178, 0, 179, 180, 181, 0, - 182, 183, 184, 0, 185, 186, 187, 188, 189, 190, - 191, 192, 193, 194, 0, 195, 0, 196, 197, 198, - 199, 0, 200, 0, 201, 0, 0, 0, 204, 205, - 206, 0, 208, 0, 209, 0, 210, 211, 0, 212, - 213, 214, 215, 216, 217, 0, 219, 220, 221, 222, - 0, 223, 224, 225, 226, 227, 228, 0, 229, 0, - 231, 232, 233, 234, 235, 236, 237, 0, 238, 0, - 239, 0, 0, 242, 0, 244, 245, 246, 247, 248, - 0, 0, 249, 0, 251, 0, 0, 253, 254, 255, - 0, 0, 256, 257, 258, 259, 260, 490, 262, 263, - 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, - 274, 275, 276, 277, 278, 279, 280, 0, 282, 283, - 284, 285, 286, 0, 287, 288, 0, 290, 0, 291, - 292, 293, 294, 295, 296, 0, 297, 298, 0, 0, - 299, 300, 301, 0, 0, 302, 303, 0, 305, 0, - 307, 308, 309, 310, 311, 312, 313, 0, 315, 316, - 317, 0, 318, 319, 320, 321, 322, 323, 324, 325, - 326, 0, 327, 328, 329, 330, 331, 332, 333, 0, - 335, 336, 337, 338, 339, 340, 0, 341, 342, 343, - 344, 345, 346, 347, 348, 349, 350, 351, 352, 0, - 353, 354, 0, 356, 357, 358, 359, 0, 361, 362, - 363, 364, 365, 366, 367, 368, 369, 370, 0, 371, - 372, 373, 374, 375, 0, 376, 377, 378, 379, 380, - 0, 382, 383, 384, 385, 0, 386, 387, 388, 389, - 390, 391, 392, 393, 394, 395, 396, 492, 398, 399, - 0, 400, 401, 0, 402, 0, 404, 405, 406, 407, - 408, 0, 409, 410, 0, 0, 411, 412, 413, 414, - 415, 0, 416, 417, 418, 419, 420, 421, 422, 423, - 0, 0, 424, 425, 426, 427, 428, 0, 0, 429, - 430, 431, 432, 433, 434, 435, 0, 436, 0, 438, - 439, 440, 441, 0, 0, 442, 0, 0, 443, 444, - 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, - 455, 456, 457, 458, 459, 460, 461, 462, 463, 2296, + 428, 0, 0, 429, 430, 431, 432, 433, 0, 1153, + 0, 436, 1154, 438, 439, 440, 441, 0, 0, 442, + 0, 0, 443, 444, 445, 446, 447, 448, 488, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 460, + 461, 462, 463, 0, 90, 91, 92, 93, 94, 95, + 96, 97, 0, 98, 99, 100, 0, 0, 0, 0, + 0, 0, 0, 101, 102, 0, 103, 104, 105, 0, + 107, 108, 109, 110, 111, 0, 113, 114, 0, 115, + 116, 117, 118, 119, 120, 0, 0, 121, 122, 123, + 124, 125, 0, 126, 127, 128, 129, 130, 0, 0, + 0, 132, 133, 134, 135, 136, 0, 138, 139, 140, + 0, 141, 142, 143, 144, 145, 146, 0, 0, 148, + 149, 150, 0, 0, 0, 0, 0, 0, 0, 152, + 153, 154, 155, 156, 157, 158, 159, 160, 161, 0, + 162, 0, 163, 164, 165, 166, 167, 168, 0, 169, + 170, 171, 172, 173, 0, 0, 174, 175, 176, 177, + 178, 0, 179, 180, 181, 0, 182, 183, 184, 0, + 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, + 0, 195, 0, 196, 197, 198, 199, 0, 200, 0, + 201, 0, 0, 0, 204, 205, 206, 0, 208, 0, + 209, 0, 210, 211, 0, 212, 213, 214, 215, 216, + 217, 0, 219, 220, 221, 222, 0, 223, 224, 225, + 226, 227, 228, 0, 229, 0, 231, 232, 233, 234, + 235, 236, 237, 0, 238, 0, 239, 0, 0, 242, + 0, 244, 245, 246, 247, 248, 0, 0, 249, 0, + 251, 0, 0, 253, 254, 255, 0, 0, 256, 257, + 258, 259, 260, 490, 262, 263, 264, 265, 266, 267, + 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, + 278, 279, 280, 0, 282, 283, 284, 285, 286, 0, + 287, 288, 0, 290, 0, 291, 292, 293, 294, 295, + 296, 0, 297, 298, 0, 0, 299, 300, 301, 0, + 0, 302, 303, 0, 305, 0, 307, 308, 309, 310, + 311, 312, 313, 0, 315, 316, 317, 0, 318, 319, + 320, 321, 322, 323, 324, 325, 326, 0, 327, 328, + 329, 330, 331, 332, 333, 0, 335, 336, 337, 338, + 339, 340, 0, 341, 342, 343, 344, 345, 346, 347, + 348, 349, 350, 351, 352, 0, 353, 354, 0, 356, + 357, 358, 359, 0, 361, 362, 363, 364, 365, 366, + 367, 368, 369, 370, 0, 371, 372, 373, 374, 375, + 0, 376, 377, 378, 379, 380, 0, 382, 383, 384, + 385, 0, 386, 387, 388, 389, 390, 391, 392, 393, + 394, 395, 396, 492, 398, 399, 0, 400, 401, 0, + 402, 0, 404, 405, 406, 407, 408, 0, 409, 410, + 0, 0, 411, 412, 413, 414, 415, 0, 416, 417, + 418, 419, 420, 421, 422, 423, 0, 0, 424, 425, + 426, 427, 428, 0, 0, 429, 430, 431, 432, 433, + 434, 435, 0, 436, 0, 438, 439, 440, 441, 0, + 0, 442, 0, 0, 443, 444, 445, 446, 447, 448, + 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, + 459, 460, 461, 462, 463, 2309, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, - 95, 96, 97, 0, 98, 99, 100, 0, 0, 0, - 0, 0, 0, 0, 101, 102, 0, 103, 104, 105, - 2297, 107, 108, 109, 0, 1106, 2298, 1108, 1109, 0, - 115, 116, 117, 118, 119, 120, 0, 0, 121, 122, - 1110, 1111, 125, 0, 126, 127, 128, 129, 0, 0, - 2299, 0, 132, 133, 134, 135, 136, 2300, 138, 139, - 140, 0, 141, 142, 143, 144, 145, 146, 0, 2301, - 148, 149, 150, 0, 0, 0, 2302, 0, 0, 0, - 152, 153, 154, 155, 156, 157, 158, 1116, 1117, 161, - 0, 162, 0, 163, 164, 165, 166, 167, 168, 0, - 169, 170, 171, 172, 173, 0, 0, 174, 175, 751, - 177, 178, 0, 179, 180, 181, 0, 182, 183, 184, - 0, 185, 186, 187, 188, 0, 190, 191, 192, 193, - 0, 0, 195, 0, 196, 197, 1118, 199, 0, 200, - 0, 201, 2303, 0, 2304, 204, 205, 206, 2305, 208, - 0, 209, 0, 0, 211, 0, 212, 213, 214, 215, - 216, 217, 2306, 219, 220, 221, 222, 0, 223, 224, - 225, 226, 227, 228, 0, 229, 2307, 0, 232, 233, - 234, 235, 1124, 1125, 0, 1126, 0, 239, 2308, 2309, - 242, 2310, 244, 245, 246, 247, 248, 0, 0, 249, - 2311, 251, 2312, 0, 253, 254, 255, 0, 0, 256, - 257, 258, 259, 260, 2498, 262, 263, 264, 265, 266, - 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, - 277, 278, 279, 1133, 2314, 1135, 283, 284, 285, 0, - 0, 287, 288, 2316, 290, 0, 0, 292, 1137, 294, - 295, 296, 0, 297, 298, 0, 0, 299, 300, 301, - 0, 0, 302, 0, 2318, 305, 2319, 0, 308, 309, - 310, 311, 312, 313, 314, 315, 316, 317, 0, 318, - 319, 0, 321, 322, 0, 324, 325, 326, 0, 327, - 328, 329, 330, 331, 332, 333, 334, 335, 1140, 337, - 338, 339, 340, 0, 341, 342, 343, 344, 345, 346, - 347, 348, 349, 350, 351, 352, 0, 353, 354, 2320, - 356, 357, 358, 0, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 0, 371, 372, 373, 374, - 375, 0, 376, 0, 378, 379, 380, 2322, 382, 383, - 1145, 385, 0, 386, 387, 388, 389, 390, 391, 392, - 393, 394, 395, 396, 2499, 398, 0, 0, 400, 401, - 0, 402, 2324, 404, 405, 406, 407, 408, 0, 1148, - 1149, 0, 0, 411, 412, 0, 414, 0, 0, 416, - 417, 2325, 419, 420, 421, 422, 423, 0, 0, 424, - 425, 426, 427, 428, 0, 0, 429, 430, 431, 432, - 433, 0, 1151, 0, 436, 2326, 438, 439, 440, 441, - 0, 0, 442, 0, 0, 443, 444, 445, 446, 447, - 448, 1104, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 460, 461, 462, 463, 0, 90, 91, 92, - 93, 94, 95, 96, 97, 0, 98, 99, 100, 0, - 0, 0, 0, 0, 0, 0, 101, 102, 0, 103, - 104, 105, 1105, 107, 108, 109, 0, 1106, 1107, 1108, - 1109, 0, 115, 116, 117, 118, 119, 120, 0, 0, - 121, 122, 1110, 1111, 125, 0, 126, 127, 128, 129, - 0, 0, 1112, 0, 132, 133, 134, 135, 136, 1113, - 138, 139, 140, 0, 141, 142, 143, 144, 145, 146, - 0, 1114, 148, 149, 150, 0, 0, 0, 1115, 0, - 0, 0, 152, 153, 154, 155, 156, 157, 158, 1116, - 1117, 161, 0, 162, 0, 163, 164, 165, 166, 167, - 168, 0, 169, 170, 171, 172, 173, 0, 0, 174, - 175, 751, 177, 178, 0, 179, 180, 181, 0, 182, - 183, 184, 0, 185, 186, 187, 188, 0, 190, 191, - 192, 193, 0, 0, 195, 0, 196, 197, 1118, 199, - 0, 200, 0, 201, 1119, 0, 1120, 204, 205, 206, - 1121, 208, 0, 209, 0, 0, 211, 0, 212, 213, - 214, 215, 216, 217, 1122, 219, 220, 221, 222, 0, - 223, 224, 225, 226, 227, 228, 0, 229, 1123, 0, - 232, 233, 234, 235, 1124, 1125, 0, 1126, 0, 239, - 1127, 1128, 242, 1129, 244, 245, 246, 247, 248, 0, - 0, 249, 1130, 251, 1131, 0, 253, 254, 255, 0, - 0, 256, 257, 258, 259, 260, 0, 262, 263, 264, - 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, - 275, 276, 277, 278, 279, 1133, 1134, 1135, 283, 284, - 285, 0, 0, 287, 288, 1136, 290, 0, 0, 292, - 1137, 294, 295, 296, 0, 297, 298, 0, 0, 299, - 300, 301, 0, 0, 302, 0, 1138, 305, 1139, 0, - 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, - 0, 318, 319, 0, 321, 322, 0, 324, 325, 326, - 0, 327, 328, 329, 330, 331, 332, 333, 334, 335, - 1140, 337, 338, 339, 340, 0, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 0, 353, - 354, 1141, 356, 357, 358, 0, 360, 361, 362, 363, - 364, 365, 366, 367, 368, 369, 370, 0, 371, 372, - 373, 374, 375, 0, 376, 0, 378, 379, 380, 1144, - 382, 383, 1145, 385, 0, 386, 387, 388, 389, 390, - 391, 392, 393, 394, 395, 396, 0, 398, 0, 0, - 400, 401, 0, 402, 1147, 404, 405, 406, 407, 408, - 0, 1148, 1149, 0, 0, 411, 412, 0, 414, 0, - 0, 416, 417, 1150, 419, 420, 421, 422, 423, 0, - 0, 424, 425, 426, 427, 428, 0, 0, 429, 430, - 431, 432, 433, 0, 1151, 0, 436, 1152, 438, 439, - 440, 441, 0, 0, 442, 0, 0, 443, 444, 445, - 446, 447, 448, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 460, 461, 462, 463 + 0, 90, 91, 92, 93, 94, 95, 96, 97, 0, + 98, 99, 100, 0, 0, 0, 0, 0, 0, 0, + 101, 102, 0, 103, 104, 105, 2310, 107, 108, 109, + 0, 1108, 2311, 1110, 1111, 0, 115, 116, 117, 118, + 119, 120, 0, 0, 121, 122, 1112, 1113, 125, 0, + 126, 127, 128, 129, 0, 0, 2312, 0, 132, 133, + 134, 135, 136, 2313, 138, 139, 140, 0, 141, 142, + 143, 144, 145, 146, 0, 2314, 148, 149, 150, 0, + 0, 0, 2315, 0, 0, 0, 152, 153, 154, 155, + 156, 157, 158, 1118, 1119, 161, 0, 162, 0, 163, + 164, 165, 166, 167, 168, 0, 169, 170, 171, 172, + 173, 0, 0, 174, 175, 751, 177, 178, 0, 179, + 180, 181, 0, 182, 183, 184, 0, 185, 186, 187, + 188, 0, 190, 191, 192, 193, 0, 0, 195, 0, + 196, 197, 1120, 199, 0, 200, 0, 201, 2316, 0, + 2317, 204, 205, 206, 2318, 208, 0, 209, 0, 0, + 211, 0, 212, 213, 214, 215, 216, 217, 2319, 219, + 220, 221, 222, 0, 223, 224, 225, 226, 227, 228, + 0, 229, 2320, 0, 232, 233, 234, 235, 1126, 1127, + 0, 1128, 0, 239, 2321, 2322, 242, 2323, 244, 245, + 246, 247, 248, 0, 0, 249, 2324, 251, 2325, 0, + 253, 254, 255, 0, 0, 256, 257, 258, 259, 260, + 2513, 262, 263, 264, 265, 266, 267, 268, 269, 270, + 271, 272, 273, 274, 275, 276, 277, 278, 279, 1135, + 2327, 1137, 283, 284, 285, 0, 0, 287, 288, 2329, + 290, 0, 0, 292, 1139, 294, 295, 296, 0, 297, + 298, 0, 0, 299, 300, 301, 0, 0, 302, 0, + 2331, 305, 2332, 0, 308, 309, 310, 311, 312, 313, + 314, 315, 316, 317, 0, 318, 319, 0, 321, 322, + 0, 324, 325, 326, 0, 327, 328, 329, 330, 331, + 332, 333, 334, 335, 1142, 337, 338, 339, 340, 0, + 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, + 351, 352, 0, 353, 354, 2333, 356, 357, 358, 0, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 0, 371, 372, 373, 374, 375, 0, 376, 0, + 378, 379, 380, 2335, 382, 383, 1147, 385, 0, 386, + 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, + 2514, 398, 0, 0, 400, 401, 0, 402, 2337, 404, + 405, 406, 407, 408, 0, 1150, 1151, 0, 0, 411, + 412, 0, 414, 0, 0, 416, 417, 2338, 419, 420, + 421, 422, 423, 0, 0, 424, 425, 426, 427, 428, + 0, 0, 429, 430, 431, 432, 433, 0, 1153, 0, + 436, 2339, 438, 439, 440, 441, 0, 0, 442, 0, + 0, 443, 444, 445, 446, 447, 448, 1106, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 460, 461, + 462, 463, 0, 90, 91, 92, 93, 94, 95, 96, + 97, 0, 98, 99, 100, 0, 0, 0, 0, 0, + 0, 0, 101, 102, 0, 103, 104, 105, 1107, 107, + 108, 109, 0, 1108, 1109, 1110, 1111, 0, 115, 116, + 117, 118, 119, 120, 0, 0, 121, 122, 1112, 1113, + 125, 0, 126, 127, 128, 129, 0, 0, 1114, 0, + 132, 133, 134, 135, 136, 1115, 138, 139, 140, 0, + 141, 142, 143, 144, 145, 146, 0, 1116, 148, 149, + 150, 0, 0, 0, 1117, 0, 0, 0, 152, 153, + 154, 155, 156, 157, 158, 1118, 1119, 161, 0, 162, + 0, 163, 164, 165, 166, 167, 168, 0, 169, 170, + 171, 172, 173, 0, 0, 174, 175, 751, 177, 178, + 0, 179, 180, 181, 0, 182, 183, 184, 0, 185, + 186, 187, 188, 0, 190, 191, 192, 193, 0, 0, + 195, 0, 196, 197, 1120, 199, 0, 200, 0, 201, + 1121, 0, 1122, 204, 205, 206, 1123, 208, 0, 209, + 0, 0, 211, 0, 212, 213, 214, 215, 216, 217, + 1124, 219, 220, 221, 222, 0, 223, 224, 225, 226, + 227, 228, 0, 229, 1125, 0, 232, 233, 234, 235, + 1126, 1127, 0, 1128, 0, 239, 1129, 1130, 242, 1131, + 244, 245, 246, 247, 248, 0, 0, 249, 1132, 251, + 1133, 0, 253, 254, 255, 0, 0, 256, 257, 258, + 259, 260, 0, 262, 263, 264, 265, 266, 267, 268, + 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, + 279, 1135, 1136, 1137, 283, 284, 285, 0, 0, 287, + 288, 1138, 290, 0, 0, 292, 1139, 294, 295, 296, + 0, 297, 298, 0, 0, 299, 300, 301, 0, 0, + 302, 0, 1140, 305, 1141, 0, 308, 309, 310, 311, + 312, 313, 314, 315, 316, 317, 0, 318, 319, 0, + 321, 322, 0, 324, 325, 326, 0, 327, 328, 329, + 330, 331, 332, 333, 334, 335, 1142, 337, 338, 339, + 340, 0, 341, 342, 343, 344, 345, 346, 347, 348, + 349, 350, 351, 352, 0, 353, 354, 1143, 356, 357, + 358, 0, 360, 361, 362, 363, 364, 365, 366, 367, + 368, 369, 370, 0, 371, 372, 373, 374, 375, 0, + 376, 0, 378, 379, 380, 1146, 382, 383, 1147, 385, + 0, 386, 387, 388, 389, 390, 391, 392, 393, 394, + 395, 396, 0, 398, 0, 0, 400, 401, 0, 402, + 1149, 404, 405, 406, 407, 408, 0, 1150, 1151, 0, + 0, 411, 412, 0, 414, 0, 0, 416, 417, 1152, + 419, 420, 421, 422, 423, 0, 0, 424, 425, 426, + 427, 428, 0, 0, 429, 430, 431, 432, 433, 0, + 1153, 0, 436, 1154, 438, 439, 440, 441, 0, 0, + 442, 0, 0, 443, 444, 445, 446, 447, 448, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 460, 461, 462, 463 }; static const yytype_int16 yycheck[] = -{ - 6, 605, 620, 34, 518, 612, 711, 905, 56, 517, - 551, 590, 6, 524, 0, 0, 903, 0, 6, 569, - 1203, 0, 891, 1479, 711, 614, 564, 878, 522, 789, - 16, 1735, 16, 776, 1048, 1048, 1048, 1078, 883, 880, - 1048, 944, 29, 58, 1741, 805, 1743, 58, 2080, 2056, - 0, 874, 2008, 2056, 1564, 1565, 1192, 817, 1732, 1569, - 1192, 1192, 1229, 1230, 1623, 1967, 16, 908, 0, 1686, - 0, 1192, 523, 2088, 524, 647, 33, 11, 1170, 1666, - 718, 50, 58, 2095, 34, 9, 5, 749, 58, 5, - 1233, 13, 14, 0, 11, 73, 1606, 1607, 2358, 86, - 1281, 651, 5, 1070, 766, 5, 5, 5, 58, 1973, - 5, 9, 1329, 1327, 13, 14, 67, 5, 5, 824, - 2361, 9, 5, 168, 41, 863, 76, 1801, 75, 167, - 605, 625, 607, 0, 609, 2412, 5, 529, 85, 9, - 5, 116, 82, 5, 13, 14, 76, 27, 13, 14, - 106, 13, 14, 5, 34, 48, 73, 5, 40, 5, - 5, 13, 14, 5, 119, 5, 2434, 5, 1842, 1843, - 587, 2448, 5, 5, 40, 2350, 2364, 210, 75, 58, - 93, 587, 236, 2531, 1972, 92, 126, 892, 85, 36, - 895, 896, 3, 4, 5, 15, 4, 1997, 9, 2502, - 711, 9, 27, 27, 737, 738, 717, 279, 55, 34, - 34, 4, 265, 164, 1909, 1358, 9, 277, 285, 285, - 216, 2283, 301, 168, 167, 92, 347, 236, 119, 762, - 136, 121, 11, 301, 325, 2335, 3, 787, 167, 114, - 2688, 2587, 831, 134, 2771, 27, 11, 119, 106, 204, - 1205, 161, 790, 27, 2531, 168, 116, 2623, 105, 116, - 64, 65, 41, 714, 31, 32, 262, 37, 157, 348, - 144, 11, 20, 59, 1143, 2457, 41, 268, 916, 172, - 818, 67, 11, 11, 2027, 923, 2746, 200, 2748, 216, - 117, 146, 11, 186, 73, 445, 2478, 393, 191, 338, - 75, 2569, 215, 114, 13, 14, 2833, 429, 73, 107, - 85, 224, 41, 2215, 1663, 826, 827, 2219, 2321, 129, - 159, 1880, 2026, 873, 2356, 321, 429, 366, 1677, 2517, - 5, 845, 428, 226, 208, 262, 68, 192, 227, 2787, - 490, 198, 2206, 352, 73, 258, 193, 129, 2714, 2023, - 441, 2025, 297, 123, 73, 129, 296, 2100, 2818, 117, - 187, 312, 1317, 339, 486, 486, 279, 105, 1511, 339, - 949, 425, 279, 369, 427, 2652, 1658, 1659, 1888, 1889, - 1890, 1891, 373, 486, 1894, 1895, 1896, 1897, 1898, 1899, - 1900, 1901, 1902, 1903, 269, 242, 2742, 307, 209, 166, - 2500, 168, 481, 250, 262, 262, 1149, 264, 2685, 269, - 2472, 285, 279, 481, 486, 262, 425, 2205, 1056, 230, - 311, 2769, 261, 490, 490, 2607, 486, 372, 107, 187, - 343, 427, 486, 340, 1944, 1945, 2110, 1055, 394, 2805, - 1623, 486, 369, 486, 230, 2620, 293, 2142, 840, 489, - 2753, 2251, 490, 162, 871, 1117, 431, 490, 405, 399, - 339, 2729, 405, 1070, 274, 871, 448, 1205, 450, 320, - 290, 422, 427, 340, 2715, 1137, 405, 486, 371, 1446, - 383, 384, 484, 1700, 1940, 1699, 488, 473, 473, 1096, - 473, 338, 2769, 524, 473, 2502, 376, 348, 1349, 2502, - 427, 1668, 1405, 2763, 242, 1167, 484, 1688, 405, 391, - 488, 1694, 250, 360, 490, 23, 429, 486, 524, 2421, - 490, 29, 438, 473, 2426, 391, 8, 2429, 2716, 11, - 58, 450, 1430, 380, 16, 17, 18, 523, 444, 429, - 524, 448, 590, 473, 550, 551, 475, 421, 482, 2413, - 450, 376, 376, 472, 0, 427, 2260, 1065, 1072, 488, - 1068, 485, 577, 569, 486, 482, 577, 486, 1076, 1080, - 486, 439, 472, 523, 524, 1478, 2273, 1110, 1111, 1317, - 487, 448, 489, 567, 1676, 1174, 486, 486, 486, 23, - 2622, 486, 486, 579, 579, 582, 579, 484, 486, 486, - 579, 488, 1135, 486, 610, 611, 612, 477, 478, 1564, - 1565, 2626, 490, 570, 1569, 2627, 1965, 486, 1967, 407, - 487, 486, 489, 1173, 486, 1175, 134, 577, 1079, 579, - 1080, 1238, 2249, 1183, 486, 1224, 1225, 1187, 486, 1277, - 486, 486, 76, 194, 486, 651, 486, 579, 486, 579, - 129, 1606, 1607, 486, 486, 448, 1206, 1207, 248, 249, - 1882, 335, 2564, 451, 1197, 1198, 477, 478, 1811, 477, - 478, 448, 439, 1378, 1956, 1957, 1958, 1959, 2352, 472, - 1269, 194, 1220, 1388, 477, 478, 1391, 159, 1910, 2721, - 1279, 1280, 359, 157, 2253, 472, 23, 1880, 477, 478, - 479, 480, 481, 482, 92, 711, 243, 30, 1851, 191, - 410, 717, 718, 719, 479, 480, 481, 482, 441, 129, - 1856, 1857, 1858, 1855, 1867, 1856, 2182, 394, 714, 735, - 212, 54, 173, 1241, 262, 1856, 1857, 477, 478, 479, - 480, 481, 482, 114, 1887, 155, 2753, 27, 220, 355, - 2753, 479, 480, 481, 482, 2459, 787, 763, 1463, 1366, - 1927, 486, 1803, 227, 714, 450, 356, 357, 435, 475, - 1374, 129, 450, 1916, 450, 283, 1463, 783, 784, 785, - 1923, 787, 488, 157, 532, 264, 534, 472, 162, 230, - 93, 398, 236, 400, 472, 274, 472, 155, 78, 281, - 2832, 352, 752, 809, 168, 553, 86, 134, 486, 360, - 486, 339, 276, 184, 2211, 1582, 486, 1584, 1585, 486, - 826, 827, 1401, 1402, 1403, 333, 1564, 1565, 2225, 337, - 1590, 1569, 2788, 2789, 469, 115, 200, 787, 366, 1446, - 488, 856, 857, 394, 859, 856, 857, 1670, 859, 67, - 264, 215, 858, 227, 264, 269, 862, 863, 366, 269, - 224, 1759, 214, 850, 274, 168, 2215, 873, 1606, 1607, - 2219, 1443, 1347, 2056, 2384, 1447, 1384, 75, 1450, 1782, - 2836, 1389, 279, 23, 475, 891, 4, 85, 2592, 29, - 441, 9, 236, 264, 359, 486, 114, 200, 4, 1374, - 2597, 469, 276, 9, 184, 319, 856, 857, 352, 859, - 916, 269, 215, 2056, 4, 195, 274, 923, 1393, 9, - 907, 224, 479, 1398, 432, 236, 1767, 1421, 236, 394, - 1690, 2387, 4, 1888, 1889, 1890, 1891, 9, 944, 1894, - 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 356, - 357, 481, 1463, 171, 481, 258, 283, 398, 488, 400, - 2470, 488, 484, 711, 486, 1476, 488, 1812, 1518, 717, - 435, 359, 1817, 13, 14, 487, 31, 32, 490, 343, - 166, 425, 487, 448, 425, 490, 347, 348, 325, 1944, - 1945, 473, 474, 475, 134, 477, 478, 479, 480, 481, - 482, 487, 412, 398, 490, 400, 394, 472, 352, 8, - 337, 338, 11, 423, 398, 490, 400, 16, 17, 18, - 1468, 486, 236, 360, 1472, 487, 1474, 2537, 490, 23, - 584, 486, 586, 1583, 33, 29, 2220, 92, 2222, 366, - 343, 352, 486, 791, 352, 487, 264, 435, 490, 487, - 1056, 269, 490, 801, 412, 8, 487, 394, 11, 490, - 448, 1611, 2103, 2412, 1070, 423, 814, 2081, 2081, 2081, - 2253, 487, 2421, 2081, 1080, 487, 824, 2426, 826, 827, - 2429, 425, 2538, 398, 472, 400, 1665, 114, 41, 487, - 1096, 167, 490, 1079, 312, 48, 1080, 2684, 486, 2448, - 487, 1651, 382, 490, 441, 385, 157, 5, 54, 327, - 8, 162, 487, 210, 425, 490, 14, 425, 2822, 487, - 73, 487, 490, 11, 490, 487, 24, 15, 490, 1079, - 28, 469, 487, 487, 22, 490, 490, 1143, 352, 2037, - 134, 486, 486, 283, 32, 33, 1833, 214, 2291, 2036, - 1888, 1889, 1890, 1891, 13, 14, 1894, 1895, 1896, 1897, - 1898, 1899, 1900, 1901, 1902, 1903, 1738, 1173, 1740, 1175, - 149, 1914, 1915, 2024, 1180, 486, 227, 1183, 486, 333, - 1798, 1187, 2531, 2770, 1170, 1170, 149, 1170, 2075, 142, - 2035, 1170, 2037, 333, 149, 1201, 84, 337, 1204, 1205, - 1206, 1207, 487, 2790, 422, 490, 1944, 1945, 1214, 1215, - 1216, 425, 149, 212, 487, 2564, 148, 490, 487, 172, - 1170, 490, 1228, 487, 1831, 276, 366, 1233, 1234, 1235, - 1236, 1237, 1238, 186, 2377, 1995, 1242, 1243, 191, 292, - 1170, 1247, 13, 14, 487, 1251, 325, 490, 1254, 1255, - 1256, 1257, 1258, 1259, 1260, 1261, 1262, 2844, 487, 1265, - 487, 490, 486, 490, 1270, 292, 169, 1273, 264, 1275, - 173, 1277, 486, 226, 487, 487, 487, 490, 490, 490, - 487, 360, 281, 490, 552, 487, 554, 148, 490, 283, - 441, 194, 432, 1299, 1244, 487, 487, 487, 490, 490, - 490, 81, 487, 2652, 359, 490, 359, 487, 13, 14, - 490, 1317, 1296, 8, 487, 394, 11, 490, 1826, 2502, - 1326, 1327, 1833, 487, 13, 14, 490, 230, 281, 452, - 487, 486, 359, 490, 237, 1924, 2685, 487, 149, 394, - 490, 394, 36, 337, 0, 487, 41, 2224, 490, 23, - 2227, 2056, 1358, 48, 1304, 29, 13, 14, 1364, 2502, - 1366, 55, 441, 487, 13, 14, 490, 394, 182, 183, - 487, 1921, 366, 490, 1304, 488, 487, 149, 73, 490, - 435, 284, 435, 487, 439, 487, 490, 487, 490, 487, - 490, 2289, 490, 448, 487, 448, 149, 490, 23, 1405, - 13, 14, 1408, 1409, 29, 1411, 487, 279, 435, 490, - 2261, 105, 394, 487, 36, 2004, 490, 472, 371, 472, - 2769, 448, 225, 487, 486, 2568, 490, 359, 2571, 2384, - 486, 486, 1438, 486, 248, 249, 92, 484, 432, 1977, - 1446, 13, 14, 5, 1192, 472, 5, 142, 2055, 352, - 2057, 486, 2321, 13, 14, 13, 14, 1463, 486, 486, - 134, 162, 394, 13, 14, 13, 14, 13, 14, 486, - 1476, 486, 1478, 1479, 473, 474, 475, 172, 477, 478, - 479, 480, 481, 482, 1434, 13, 14, 308, 144, 13, - 14, 186, 2071, 13, 14, 398, 191, 400, 359, 193, - 486, 157, 486, 435, 1434, 1511, 162, 13, 14, 134, - 5, 167, 1518, 1519, 5, 2470, 448, 470, 1524, 422, - 176, 486, 425, 179, 477, 478, 479, 480, 481, 482, - 2102, 226, 486, 394, 486, 1566, 8, 486, 1286, 11, - 472, 486, 356, 357, 16, 17, 18, 486, 242, 13, - 14, 5, 208, 486, 486, 5, 250, 486, 1564, 1565, - 1566, 33, 486, 1569, 13, 14, 13, 14, 262, 486, - 2753, 227, 13, 14, 435, 2280, 1582, 1583, 1584, 1585, - 1611, 486, 2537, 13, 14, 1591, 281, 448, 347, 348, - 347, 348, 9, 83, 486, 85, 449, 87, 210, 293, - 1606, 1607, 15, 491, 490, 1611, 347, 348, 96, 283, - 2753, 472, 252, 253, 36, 366, 1566, 826, 827, 1625, - 276, 2508, 1628, 279, 1630, 486, 162, 162, 276, 285, - 274, 519, 225, 85, 486, 405, 486, 405, 54, 490, - 54, 405, 2231, 405, 338, 1651, 2384, 144, 23, 255, - 1681, 719, 487, 493, 29, 405, 2245, 1663, 283, 333, - 157, 1611, 448, 337, 8, 162, 360, 11, 149, 1663, - 264, 1677, 16, 17, 18, 1663, 371, 92, 264, 1629, - 486, 36, 570, 1677, 340, 1691, 380, 2374, 144, 1677, - 1676, 1676, 366, 1676, 36, 1645, 488, 1676, 438, 2240, - 1706, 157, 490, 108, 109, 486, 162, 486, 333, 486, - 23, 208, 337, 1663, 35, 1463, 29, 486, 1724, 1467, - 2292, 2293, 444, 486, 9, 486, 1676, 1677, 1476, 11, - 227, 1681, 2470, 403, 6, 1741, 1742, 1743, 394, 11, - 212, 366, 403, 15, 490, 403, 1676, 486, 269, 21, - 22, 23, 208, 486, 26, 405, 824, 29, 432, 134, - 32, 33, 158, 167, 176, 421, 487, 486, 36, 429, - 208, 227, 490, 429, 369, 470, 1782, 182, 183, 276, - 216, 280, 477, 478, 479, 480, 481, 482, 285, 445, - 490, 447, 448, 487, 486, 216, 264, 2502, 216, 2537, - 285, 487, 317, 277, 76, 1811, 441, 432, 486, 281, - 149, 83, 84, 85, 86, 87, 149, 448, 264, 279, - 276, 134, 264, 891, 279, 1831, 36, 1833, 484, 285, - 36, 487, 488, 489, 35, 405, 405, 484, 484, 244, - 245, 246, 247, 248, 249, 1851, 487, 252, 253, 405, - 405, 490, 485, 719, 488, 487, 167, 173, 1864, 487, - 487, 1867, 469, 2374, 487, 469, 487, 487, 1874, 487, - 469, 487, 1878, 487, 167, 1881, 487, 487, 194, 1829, - 405, 1887, 1888, 1889, 1890, 1891, 487, 487, 1894, 1895, - 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 486, 1829, - 289, 1907, 1908, 486, 1952, 486, 2804, 486, 283, 435, - 1916, 445, 445, 279, 230, 1921, 280, 1923, 475, 1950, - 471, 280, 1928, 280, 421, 1931, 427, 1933, 235, 490, - 405, 149, 264, 486, 1940, 1941, 1967, 281, 1944, 1945, - 194, 149, 283, 1949, 1950, 149, 405, 405, 487, 487, - 487, 356, 357, 485, 405, 277, 844, 405, 333, 1965, - 2468, 1967, 337, 490, 487, 421, 36, 2546, 284, 279, - 283, 1965, 1978, 1967, 149, 448, 149, 1965, 269, 1967, - 484, 1987, 487, 140, 474, 167, 2593, 11, 162, 487, - 2608, 366, 445, 487, 487, 167, 486, 2003, 487, 1747, - 1950, 473, 474, 475, 394, 477, 478, 479, 480, 481, - 482, 167, 490, 176, 340, 1965, 486, 1967, 83, 486, - 333, 67, 2572, 434, 337, 891, 487, 73, 484, 472, - 50, 487, 488, 280, 149, 171, 416, 2551, 36, 85, - 79, 486, 1790, 487, 429, 2051, 485, 487, 2753, 2055, - 2056, 2057, 486, 366, 486, 2569, 485, 432, 167, 490, - 487, 466, 467, 486, 215, 396, 288, 487, 114, 215, - 116, 561, 286, 2691, 94, 1143, 486, 8, 487, 487, - 11, 486, 398, 486, 400, 1833, 487, 486, 54, 179, - 1838, 475, 1840, 196, 448, 36, 1844, 1845, 185, 2649, - 120, 269, 719, 269, 594, 2613, 422, 488, 487, 425, - 41, 488, 488, 405, 55, 488, 488, 48, 138, 432, - 488, 488, 142, 488, 475, 171, 488, 2133, 488, 473, - 474, 475, 488, 477, 478, 479, 480, 481, 482, 629, - 630, 631, 73, 163, 405, 2752, 166, 36, 488, 2099, - 488, 2665, 198, 488, 2758, 106, 264, 279, 488, 488, - 180, 488, 486, 488, 105, 1233, 36, 448, 488, 2099, - 2176, 2177, 167, 488, 488, 2181, 2182, 485, 2686, 488, - 2186, 486, 488, 2189, 2190, 1073, 488, 1075, 2194, 488, - 488, 488, 488, 298, 215, 447, 486, 486, 486, 85, - 130, 487, 474, 327, 216, 114, 486, 486, 480, 2215, - 36, 142, 149, 2219, 486, 73, 121, 149, 264, 491, - 36, 2215, 487, 269, 54, 2219, 36, 2215, 348, 711, - 171, 2219, 348, 486, 2240, 717, 486, 486, 429, 490, - 434, 172, 73, 36, 184, 517, 518, 519, 429, 269, - 719, 416, 193, 67, 282, 186, 67, 277, 486, 490, - 191, 36, 719, 475, 350, 2215, 312, 2273, 366, 2219, - 269, 184, 240, 486, 891, 416, 2282, 1143, 36, 486, - 552, 327, 554, 486, 486, 2291, 282, 487, 282, 561, - 1358, 487, 2242, 2243, 279, 226, 347, 262, 570, 319, - 9, 242, 334, 196, 279, 279, 33, 487, 2056, 250, - 582, 119, 2242, 2243, 427, 2321, 486, 9, 22, 801, - 487, 262, 594, 579, 2038, 2051, 1372, 1736, 2001, 2244, - 2078, 2685, 2444, 2749, 2084, 2755, 2285, 2825, 828, 2600, - 2803, 2519, 824, 1742, 826, 827, 2791, 878, 1730, 2752, - 281, 2055, 293, 2089, 2801, 1792, 2750, 629, 630, 631, - 1727, 851, 1065, 1796, 8, 1831, 2374, 1233, 2374, 1691, - 2078, 2377, 16, 17, 18, 1343, 422, 2662, 2384, 844, - 866, 2387, 819, 2580, 2740, 865, 1706, 877, 1981, 1317, - 2396, 2397, 2674, 2236, 2400, 1342, 719, 338, 1677, 2206, - 890, 1967, 868, 2221, 2565, 1965, 2412, 2652, 2651, 2359, - 892, 1479, 2669, 895, 896, 2421, 2670, 1480, 2412, 360, - 2426, 911, 891, 2429, 2412, 1192, 1192, 2421, 2440, 2359, - 2436, 2437, 2426, 2421, 891, 2429, 1192, 1192, 2426, 380, - 371, 2429, 2448, 1511, 2450, 2738, 2725, 8, 2739, 1271, - 11, 723, 1694, 1629, 2448, 16, 17, 18, 1626, 1520, - 2448, 1411, 1350, 1275, 2470, 1799, 1476, 2644, 1296, 2002, - 1297, 2421, 1360, -1, 1362, 1080, 2426, 1365, -1, 2429, - 850, -1, -1, 1371, -1, 1373, -1, 2235, 8, 2237, - -1, 11, 1358, -1, -1, -1, 2502, 1385, -1, -1, - -1, -1, 1390, -1, -1, -1, 1394, 1395, 1396, 1397, - -1, 1399, 1400, -1, -1, -1, -1, -1, -1, -1, - -1, 41, -1, -1, -1, 2531, 1143, -1, 48, -1, - -1, 2537, 2538, -1, -1, -1, -1, 2531, -1, 470, - -1, 2572, -1, 2531, -1, -1, 477, 478, 479, 480, - 481, 482, -1, 73, 2560, -1, 828, -1, 2564, 1049, - -1, 2511, 2568, -1, -1, 2571, 2572, -1, 891, -1, - 2564, -1, 844, 845, -1, -1, 2564, 2583, 850, 851, - 852, 2511, 2532, -1, -1, -1, -1, 2593, 1078, -1, - -1, 2597, 2598, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 2532, 1085, 1086, 877, 878, -1, -1, -1, - -1, -1, -1, 1479, 2564, -1, 1233, -1, 890, -1, - -1, -1, 2572, -1, -1, -1, 2374, -1, -1, -1, - -1, -1, -1, -1, -1, 907, -1, 281, -1, 911, - -1, -1, -1, 2649, -1, 1511, 2652, -1, -1, -1, - -1, 212, 172, -1, -1, -1, -1, -1, 2652, -1, - -1, -1, -1, -1, 2652, -1, 186, -1, 2674, -1, - -1, 191, -1, -1, 1143, 2625, -1, -1, -1, 2685, - -1, -1, -1, -1, -1, -1, 1143, -1, -1, -1, - -1, 2685, -1, -1, -1, 2625, -1, 2685, -1, 2705, - -1, -1, -1, 1185, 1186, -1, 226, -1, -1, -1, - -1, 1193, -1, 1195, 1196, -1, -1, -1, 2724, 2725, - 281, -1, -1, -1, -1, -1, -1, -1, 1210, -1, - 1212, 2681, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 1358, -1, 1811, -1, 1227, 2752, 2753, -1, -1, - -1, 2681, -1, -1, 2502, -1, -1, -1, -1, -1, - -1, 281, -1, 2769, 1233, 2771, -1, -1, -1, -1, - -1, -1, 1044, -1, -1, 2769, 1233, 1049, -1, -1, - -1, 2769, -1, 1851, 1274, -1, -1, 1059, -1, -1, - -1, -1, -1, 1065, -1, -1, 1068, -1, -1, 1867, - 1072, 1073, -1, 1075, 1076, 1295, 1078, 1297, -1, -1, - -1, -1, 2762, -1, -1, -1, -1, -1, -1, 1887, - 1143, -1, -1, -1, -1, -1, -1, 2833, -1, 473, - 474, 475, 2762, 477, 478, 479, 480, 481, 482, -1, - -1, -1, -1, -1, 1326, 1327, -1, -1, 1916, -1, - 1340, 371, -1, -1, 719, 1923, -1, -1, -1, -1, - -1, 3, 1479, 5, -1, -1, -1, -1, -1, -1, - -1, -1, 1940, 2621, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 1358, - -1, -1, -1, -1, 1511, -1, 1378, -1, -1, -1, - -1, 1358, -1, -1, -1, -1, 1388, -1, 0, 1391, - 1233, -1, 473, 474, 475, -1, 477, 478, 479, 480, - 481, 482, 23, -1, -1, -1, -1, 1815, 29, 1419, - -1, 1203, -1, -1, -1, 36, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 1811, -1, -1, -1, -1, - 470, -1, -1, -1, 55, -1, -1, 477, 478, 479, - 480, 481, 482, -1, -1, -1, 108, 109, -1, 1241, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 1463, -1, -1, -1, 1851, -1, -1, 2056, -1, - -1, -1, -1, -1, 1476, -1, -1, -1, -1, -1, - 92, 1867, 1274, -1, 105, 2753, -1, -1, -1, -1, - 1479, -1, -1, -1, -1, -1, -1, -1, 1500, -1, - -1, 1887, 1479, 1295, -1, 1297, 891, -1, -1, -1, - -1, -1, -1, 134, 1306, 1358, 1308, -1, -1, -1, - 182, 183, 1511, -1, -1, -1, -1, -1, -1, -1, - 1916, -1, 144, -1, 1511, -1, -1, 1923, -1, -1, - -1, -1, -1, -1, -1, 157, -1, -1, 1340, -1, - 162, -1, -1, -1, 1940, 167, -1, 1349, 1350, -1, - -1, -1, -1, -1, 176, -1, -1, 179, 1360, 1361, - 1362, 1363, 193, 1365, -1, -1, -1, -1, -1, 1371, - -1, 1373, 244, 245, 246, 247, 248, 249, -1, -1, - 252, 253, 1384, 1385, 2182, -1, 208, 1389, 1390, -1, - -1, -1, 1394, 1395, 1396, 1397, -1, 1399, 1400, -1, - -1, 2019, -1, -1, -1, 227, -1, -1, -1, -1, - -1, 242, -1, -1, -1, -1, -1, 1419, 1420, 250, - -1, -1, -1, -1, -1, 2043, 1479, -1, -1, -1, - 1432, 262, -1, -1, -1, -1, 1656, -1, -1, -1, - 2058, 2059, 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, - -1, -1, 283, -1, 276, 1667, -1, 279, 1511, -1, - 2056, -1, 293, 285, 1811, 8, -1, -1, 11, -1, - 0, -1, -1, 16, 17, 18, -1, -1, -1, 1481, - -1, -1, -1, -1, 356, 357, 1698, 1699, -1, -1, - -1, -1, -1, 2291, -1, -1, -1, -1, 41, -1, - -1, -1, 333, -1, 1851, 48, 337, 338, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 340, -1, - 1867, -1, -1, 2321, -1, -1, -1, -1, -1, 360, - 73, -1, -1, -1, -1, 366, -1, -1, 8, -1, - 1887, 11, -1, -1, -1, -1, -1, -1, 1143, 380, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 92, -1, -1, -1, -1, -1, -1, 1916, - -1, 41, 394, -1, -1, -1, 1923, 1797, 48, 2377, - -1, -1, 1802, 1803, 1804, -1, 2182, -1, -1, 2387, - -1, -1, -1, 1940, 466, 467, -1, -1, -1, 421, - -1, 432, 1822, 73, -1, -1, -1, 429, -1, -1, - -1, -1, 1811, -1, 144, -1, -1, -1, -1, -1, - -1, 1623, -1, 445, 1811, 447, 448, 157, -1, 172, - -1, -1, 162, -1, -1, -1, -1, 167, 1233, -1, - 1852, 1853, -1, 186, -1, -1, 176, -1, 191, 179, - -1, -1, 1851, -1, 1656, -1, -1, -1, -1, -1, - -1, 1663, 484, -1, 1851, 487, 488, 489, 1867, -1, - -1, -1, 142, -1, -1, 1677, -1, -1, 208, 1681, - 1867, -1, 1684, 226, 1686, -1, -1, -1, 1887, -1, - -1, -1, -1, -1, -1, 2291, -1, 227, -1, -1, - 1887, -1, 172, 8, 2502, -1, 11, -1, -1, 2056, - -1, 16, 17, 18, -1, -1, 186, 1916, -1, -1, - -1, 191, -1, -1, 1923, 2321, -1, -1, 33, 1916, - 1732, -1, -1, -1, -1, -1, 1923, -1, 281, -1, - 2538, 1940, 1962, 1963, 1964, -1, 276, -1, -1, 279, - -1, -1, -1, 1940, -1, 285, 226, -1, 1811, -1, - -1, -1, -1, 1358, -1, -1, -1, -1, -1, -1, - 2568, -1, -1, 2571, -1, -1, -1, -1, -1, -1, - -1, 2377, -1, -1, -1, -1, -1, -1, -1, -1, - 1792, 2387, -1, -1, -1, 1797, -1, 2017, 1851, 1801, - 1802, 1803, 1804, -1, -1, -1, -1, -1, -1, -1, - 340, 281, -1, 1815, 1867, -1, -1, -1, -1, -1, - 1822, -1, -1, -1, 1826, -1, -1, -1, 371, -1, - -1, -1, -1, -1, 1887, 2182, -1, -1, -1, -1, - 1842, 1843, -1, -1, 2056, -1, -1, -1, -1, -1, - -1, 2469, -1, -1, -1, -1, -1, 2056, -1, -1, - -1, -1, 2082, 1916, 394, -1, -1, -1, -1, 2056, - 1923, -1, -1, -1, -1, -1, -1, -1, 1880, -1, - -1, -1, -1, 2103, 1479, 2503, 2504, 1940, -1, -1, - -1, 421, -1, -1, -1, -1, -1, -1, 2516, 429, - -1, 371, -1, -1, -1, 2117, 2502, 212, -1, -1, - -1, -1, -1, -1, -1, 445, 1511, 447, 448, -1, - -1, -1, -1, -1, -1, -1, -1, 470, 550, 551, - 473, 474, 475, -1, 477, 478, 479, 480, 481, 482, - -1, -1, 2538, -1, 2291, -1, -1, -1, -1, -1, - -1, -1, 1954, -1, 484, 2753, -1, 487, 488, 489, - 1962, 1963, 1964, 1965, 2582, 1967, 1968, -1, -1, -1, - 1972, 1973, 2568, -1, 2321, 2571, 281, -1, 2198, 2199, - 2200, 2201, -1, 2182, -1, -1, -1, -1, 610, 611, - -1, -1, -1, -1, -1, 2182, -1, -1, -1, 2001, - 470, -1, -1, 2056, -1, -1, -1, 477, 478, 479, - 480, 481, 482, -1, -1, 2017, -1, 2019, -1, -1, - -1, 2023, 2024, 2025, -1, 2027, -1, -1, -1, -1, - 2377, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 2387, 2043, -1, 2045, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 2058, 2059, 2060, 2061, - 2062, 2063, 2064, 2065, 2066, 2067, -1, -1, 2280, -1, - -1, -1, -1, 2285, -1, -1, -1, -1, -1, -1, - 2082, -1, -1, -1, -1, -1, -1, 2089, -1, -1, - -1, -1, 2291, -1, -1, -1, 718, -1, 2100, -1, - -1, 2103, -1, -1, 2291, -1, -1, -1, 2110, -1, - -1, -1, -1, 735, -1, 2327, 2328, 2119, -1, -1, - -1, -1, 2321, -1, -1, -1, -1, -1, -1, 2182, - -1, -1, -1, -1, 2321, -1, -1, -1, -1, -1, - -1, 763, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 2502, -1, 2753, -1, -1, - -1, 783, 784, 785, -1, 787, -1, -1, 473, 474, - 475, -1, 477, 478, 479, 480, 481, 482, 2377, -1, - -1, -1, -1, -1, -1, -1, -1, 809, 2387, -1, - 2377, 2538, -1, 2195, -1, -1, 2198, 2199, 2200, 2201, - 2387, -1, -1, 2205, 2206, -1, 2208, 36, -1, 2211, - -1, -1, -1, 2215, -1, -1, 1811, 2219, -1, -1, - -1, 2568, -1, 2225, 2571, -1, 55, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 858, -1, 2291, -1, - 862, 863, 2244, -1, -1, -1, -1, 2249, -1, -1, - -1, 2253, -1, -1, -1, -1, 1851, 2477, -1, 2261, - -1, -1, -1, -1, -1, -1, -1, -1, 2321, -1, - -1, -1, 1867, -1, -1, -1, 105, 106, -1, 2491, - 2492, -1, -1, -1, 2286, 114, -1, -1, -1, -1, - 2502, -1, 1887, -1, 916, -1, -1, -1, -1, -1, - -1, 923, -1, 2502, -1, -1, -1, -1, 2528, -1, - -1, -1, -1, -1, -1, 2502, -1, -1, -1, -1, - -1, 1916, 944, -1, 2377, -1, -1, -1, 1923, -1, - -1, -1, -1, -1, 2387, -1, -1, -1, -1, 2538, - -1, -1, 171, -1, -1, 1940, -1, -1, -1, -1, - 2352, 2538, -1, -1, -1, 8, 2358, -1, 11, -1, - -1, -1, -1, -1, 193, -1, -1, 2369, -1, 2568, - -1, 2591, 2571, -1, -1, -1, -1, -1, -1, -1, - -1, 2568, -1, -1, 2571, -1, -1, -1, 41, -1, - -1, -1, -1, -1, -1, 48, -1, -1, -1, -1, - -1, -1, -1, -1, 2624, -1, 2753, -1, -1, -1, - 2412, 2413, -1, 242, 2416, -1, -1, 2419, -1, 2421, - 73, 250, -1, -1, 2426, -1, -1, 2429, -1, 2431, - -1, -1, 2434, 262, 1056, 264, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 2448, -1, -1, 2502, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 2056, -1, -1, 293, -1, 2468, 2469, -1, -1, - -1, -1, -1, -1, -1, 2477, -1, -1, -1, -1, - -1, -1, -1, 8, -1, 2538, 11, -1, -1, 142, - -1, 16, 17, 18, -1, -1, -1, -1, -1, -1, - -1, 2503, 2504, -1, -1, -1, -1, -1, 33, 338, - -1, 2513, -1, -1, 2516, 2568, 41, -1, 2571, 172, - -1, -1, -1, 48, -1, -1, 2528, -1, -1, 2531, - -1, 360, -1, 186, -1, -1, -1, -1, 191, -1, - -1, 2753, -1, -1, -1, -1, -1, -1, 73, 2551, - -1, 380, -1, 382, 2753, -1, 385, -1, 1180, -1, - -1, -1, 2564, -1, -1, -1, 2753, 2569, -1, -1, - 108, 109, -1, 226, -1, -1, 2578, -1, -1, 1201, - 2582, -1, 1204, -1, -1, 1207, -1, 2182, -1, 2591, - -1, -1, 1214, 1215, 1216, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 1228, -1, -1, -1, - -1, 2613, 1234, 1235, 1236, 1237, -1, -1, -1, -1, - 1242, 1243, 2624, -1, -1, 1247, -1, -1, 281, 1251, - -1, -1, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, - 1262, -1, -1, 1265, 182, 183, -1, 172, 1270, -1, - 2652, 1273, -1, 1275, -1, 1277, 802, 486, -1, 2661, - -1, 186, -1, 2665, -1, -1, 191, 2669, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 1299, -1, -1, - -1, -1, -1, 2685, 2686, 2687, 2688, 212, 213, -1, - -1, -1, -1, -1, -1, 841, 2291, -1, -1, -1, - 2753, 226, -1, -1, 1326, 1327, 244, 245, 246, 247, - 248, 249, -1, -1, 252, 253, -1, -1, 371, -1, - -1, -1, -1, -1, -1, -1, 2321, 2729, -1, -1, - -1, -1, -1, -1, -1, -1, 2738, -1, 2740, 264, - -1, -1, 267, -1, -1, -1, -1, 2749, -1, -1, - -1, -1, -1, -1, -1, -1, 281, -1, -1, 284, - 8, 2763, -1, 11, -1, -1, -1, 2769, 16, 17, - 18, -1, -1, -1, -1, -1, 0, -1, -1, -1, - -1, -1, 2377, 1405, -1, 2787, 1408, 1409, -1, 1411, - -1, -1, 2387, -1, -1, 19, -1, -1, -1, -1, - -1, -1, 948, -1, -1, 29, -1, 31, 32, 955, - -1, -1, -1, -1, -1, -1, 1438, 470, 356, 357, - -1, -1, -1, 47, 477, 478, 479, 480, 481, 482, - -1, 8, 56, -1, 11, -1, -1, -1, -1, 16, - 17, 18, -1, -1, 68, -1, 371, -1, -1, -1, - -1, 8, -1, -1, 11, 79, 1478, -1, -1, 16, - 17, 18, -1, -1, 41, -1, 90, -1, 92, -1, - -1, 48, -1, -1, -1, 8, 33, -1, 11, -1, - -1, -1, -1, 16, 17, 18, 110, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 73, 1519, -1, -1, - 124, -1, 1524, -1, -1, -1, 23, 2502, 41, -1, - 134, -1, 29, -1, -1, 48, 140, -1, -1, 36, - -1, -1, -1, -1, 148, -1, 150, 151, 466, 467, - -1, -1, -1, -1, -1, -1, -1, -1, 55, 163, - 73, -1, -1, 2538, 1566, 470, -1, -1, 473, 474, - 475, -1, 477, 478, 479, 480, 481, 482, -1, -1, - 1582, -1, 1584, 1585, 212, 490, 190, -1, -1, 1591, - -1, -1, -1, 2568, -1, -1, 2571, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 105, -1, - -1, -1, -1, -1, -1, 172, -1, 8, -1, -1, - 11, -1, -1, 1625, -1, 229, 1628, -1, 1630, 186, - -1, -1, -1, -1, 191, -1, -1, 134, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 41, -1, 1178, 281, -1, 212, 213, 48, -1, 172, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 226, - -1, -1, -1, 186, 1200, 212, -1, -1, 191, 1205, - -1, -1, 73, -1, -1, -1, -1, -1, -1, 1691, - -1, 1217, 1218, 1219, -1, 1221, 193, -1, -1, 212, - 213, -1, 306, -1, 1706, 309, -1, 264, -1, -1, - 267, -1, -1, 226, -1, -1, -1, -1, -1, -1, - -1, -1, 1724, -1, 281, -1, -1, -1, -1, -1, - -1, -1, -1, 337, -1, -1, -1, -1, -1, -1, - -1, 345, -1, -1, 281, 242, 1272, -1, -1, -1, - -1, 142, -1, 250, 267, 359, -1, -1, -1, -1, - -1, -1, 366, -1, -1, 262, 370, -1, 281, -1, - -1, -1, -1, -1, -1, -1, 380, -1, 2753, -1, - 1782, 172, -1, -1, -1, -1, 283, 1313, -1, -1, - 394, -1, -1, -1, -1, 186, 293, -1, -1, 1325, - 191, -1, -1, -1, 1330, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 371, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 430, -1, -1, -1, - -1, 435, -1, -1, -1, 226, 333, -1, -1, -1, - 337, 338, -1, -1, 448, 473, 474, 475, -1, 477, - 478, 479, 480, 481, 482, -1, -1, -1, 371, -1, - -1, -1, 1864, 360, -1, -1, -1, -1, 472, 366, - -1, -1, 1874, -1, -1, -1, 1878, -1, -1, 1881, - -1, -1, 486, 380, -1, 489, -1, -1, -1, -1, - 281, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 1907, 1908, -1, -1, -1, - -1, -1, -1, 470, -1, -1, 473, 474, 475, -1, - 477, 478, 479, 480, 481, 482, 1928, -1, -1, 1931, - -1, 1933, -1, -1, -1, 432, 473, 474, 475, 1941, - 477, 478, 479, 480, 481, 482, -1, 1949, 1950, -1, - -1, -1, -1, -1, -1, -1, -1, 470, -1, -1, - 473, 474, 475, -1, 477, 478, 479, 480, 481, 482, - -1, -1, -1, -1, -1, -1, 1978, -1, -1, -1, - 371, -1, -1, -1, 3, 1987, -1, -1, -1, 8, - -1, -1, 11, -1, -1, -1, 1522, 16, 17, 18, - -1, 2003, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 33, -1, -1, 36, -1, 1545, - -1, -1, 41, -1, -1, -1, -1, -1, -1, 48, - -1, -1, -1, -1, -1, -1, 1562, 8, 1564, 1565, - 11, 1567, -1, 1569, -1, 16, 17, 18, 1574, -1, - -1, -1, -1, -1, 73, 1581, -1, -1, -1, -1, - 1586, -1, 33, 1589, -1, -1, -1, 1593, -1, -1, - 41, 1597, 1598, 1599, 1600, -1, -1, 48, -1, 470, - 1606, 1607, -1, 1609, 1610, -1, 477, 478, 479, 480, - 481, 482, -1, -1, -1, 1621, -1, -1, 1624, -1, - -1, -1, 73, -1, -1, -1, 1632, 1633, 1634, 1635, - 1636, 1637, 1638, 1639, 1640, 1641, -1, -1, -1, -1, - -1, -1, -1, 1649, -1, -1, -1, 1653, -1, -1, - -1, 2133, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 1674, -1, - -1, -1, -1, 172, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 186, -1, -1, - -1, -1, 191, -1, 2176, 2177, -1, -1, -1, 2181, - -1, -1, -1, -1, 2186, -1, -1, 2189, 2190, -1, - -1, -1, 2194, 212, 213, -1, -1, -1, -1, -1, - -1, 172, -1, -1, -1, -1, -1, 226, -1, -1, - -1, -1, -1, -1, -1, 186, -1, -1, -1, -1, - 191, -1, -1, -1, 8, -1, -1, 11, -1, -1, - -1, -1, 16, 17, 18, -1, -1, -1, 2240, -1, - -1, 212, 213, -1, -1, 264, -1, -1, 267, 33, - -1, -1, -1, -1, -1, 226, -1, 41, -1, 1785, - 1786, 1787, 281, -1, 48, 284, -1, -1, -1, -1, - -1, -1, -1, -1, 8, -1, -1, 11, -1, -1, - 2282, -1, 16, 17, 18, -1, -1, -1, -1, 73, - -1, -1, -1, 264, 1820, -1, 267, -1, -1, 33, - -1, -1, -1, -1, -1, -1, -1, 41, -1, -1, - 281, -1, -1, 284, 48, -1, -1, 8, -1, -1, - 11, -1, -1, -1, -1, 16, 17, 18, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 1863, -1, 73, - -1, -1, 33, 1869, -1, -1, -1, -1, -1, -1, - 41, -1, 371, -1, -1, -1, -1, 48, 1884, 1885, - 1886, -1, 1888, 1889, 1890, 1891, -1, -1, 1894, 1895, - 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, -1, - -1, -1, 73, -1, -1, -1, -1, -1, 172, -1, - -1, -1, -1, -1, 2396, 2397, -1, -1, 2400, 1925, - 371, -1, 186, 1929, 1930, -1, -1, 191, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 1943, 1944, 1945, - 1946, -1, 1948, -1, -1, -1, -1, -1, 212, 213, - -1, -1, -1, -1, 2436, 2437, -1, -1, 172, -1, - -1, -1, 226, -1, -1, -1, -1, -1, 2450, -1, - -1, 470, 186, -1, 473, 474, 475, 191, 477, 478, - 479, 480, 481, 482, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 212, 213, - 264, 172, -1, 267, -1, -1, -1, -1, -1, -1, - -1, -1, 226, -1, 2020, 186, -1, 281, -1, 470, - 191, -1, 473, 474, 475, -1, 477, 478, 479, 480, - 481, 482, -1, -1, -1, -1, 487, -1, -1, -1, - -1, 212, 213, -1, -1, -1, -1, -1, -1, -1, - 264, -1, -1, 267, -1, 226, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 281, -1, -1, - 284, -1, -1, -1, -1, -1, 21, -1, 2560, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 2572, -1, -1, 264, -1, -1, 267, -1, -1, -1, - -1, 2583, -1, -1, -1, -1, -1, 371, -1, -1, - 281, -1, -1, 284, -1, -1, 2598, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 2134, 74, - -1, -1, 2138, 2139, -1, 2141, -1, -1, 2144, 2145, - 2146, 2147, -1, -1, 89, 2151, 2152, 2153, 2154, 2155, - 2156, 2157, 2158, 2159, 2160, 2161, 2162, 371, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 2175, - -1, -1, 2178, -1, 2180, -1, -1, -1, 2184, -1, - -1, 2187, 2188, -1, -1, 2191, 2192, -1, -1, -1, - -1, -1, 2674, -1, -1, -1, -1, -1, 143, -1, - 371, -1, -1, -1, -1, -1, 470, 152, -1, 473, - 474, 475, -1, 477, 478, 479, 480, 481, 482, 164, - -1, -1, -1, 2705, 169, -1, 2232, -1, -1, -1, - -1, -1, -1, 2239, -1, -1, -1, -1, -1, -1, - -1, -1, 2724, 2725, -1, -1, 2252, -1, -1, 194, - -1, -1, -1, -1, -1, -1, 470, -1, -1, 473, - 474, 475, -1, 477, 478, 479, 480, 481, 482, -1, - -1, -1, -1, 487, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 2771, - -1, -1, 237, -1, -1, -1, 241, -1, -1, 470, - -1, -1, 473, 474, 475, -1, 477, 478, 479, 480, - 481, 482, -1, -1, -1, -1, 487, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 2833, -1, -1, -1, -1, -1, -1, -1, 304, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 318, 2380, -1, -1, -1, 2384, -1, - -1, -1, -1, -1, 2390, 2391, 2392, -1, -1, 2395, - -1, -1, 2398, 2399, -1, -1, -1, 2403, -1, -1, - -1, -1, -1, -1, 349, -1, -1, 352, -1, -1, - -1, -1, -1, -1, -1, 360, -1, -1, 363, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 382, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 394, - -1, -1, -1, -1, -1, -1, 401, -1, -1, -1, - -1, -1, -1, -1, 2470, 410, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 2488, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 441, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 2536, 2537, -1, -1, -1, 2541, 2542, 2543, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 2576, 2577, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 2589, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 2601, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - 2646, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, -1, -1, -1, 2664, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, 2683, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, 2697, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, 129, 2760, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, 2774, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, 437, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, 479, -1, -1, -1, -1, -1, -1, - 486, 487, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, 394, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, 448, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, 472, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, 487, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, 394, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, 448, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, 472, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, 167, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, 222, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, 479, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, 487, -1, -1, -1, 491, 492, 3, 4, 5, +{ + 6, 518, 56, 605, 34, 0, 0, 907, 551, 620, + 569, 517, 711, 612, 711, 590, 0, 0, 0, 893, + 16, 524, 647, 564, 1205, 880, 1677, 789, 1746, 885, + 6, 6, 16, 791, 1487, 776, 1050, 905, 1050, 1050, + 1050, 614, 1080, 1571, 1572, 522, 882, 876, 1576, 807, + 58, 1194, 946, 1752, 1194, 1754, 1634, 29, 1194, 1980, + 2021, 819, 2069, 2093, 1237, 1238, 2069, 1194, 718, 1743, + 523, 1697, 58, 0, 910, 1488, 524, 2101, 33, 2108, + 9, 76, 73, 1172, 5, 1986, 5, 5, 9, 1617, + 1618, 1289, 651, 5, 1207, 5, 5, 5, 50, 1072, + 5, 13, 14, 1337, 13, 14, 2371, 5, 13, 14, + 13, 14, 1674, 5, 86, 13, 14, 58, 5, 5, + 5, 13, 14, 9, 3, 0, 1688, 826, 5, 5, + 2374, 605, 0, 607, 1335, 609, 168, 11, 1812, 5, + 106, 5, 5, 5, 5, 40, 5, 40, 625, 167, + 5, 167, 31, 32, 58, 529, 67, 2449, 75, 48, + 116, 58, 2377, 3, 4, 5, 4, 27, 85, 9, + 1241, 9, 11, 9, 587, 82, 136, 27, 2363, 1853, + 1854, 587, 210, 27, 34, 58, 15, 20, 4, 2010, + 34, 2427, 194, 9, 167, 894, 2517, 27, 897, 898, + 2546, 1920, 41, 216, 34, 11, 144, 285, 711, 168, + 11, 277, 1325, 119, 717, 279, 285, 92, 78, 126, + 301, 121, 1985, 119, 92, 11, 86, 2463, 787, 788, + 2348, 265, 2296, 114, 73, 41, 2602, 93, 134, 119, + 41, 325, 2638, 359, 2472, 114, 116, 865, 59, 262, + 2040, 792, 75, 164, 301, 115, 67, 13, 14, 161, + 833, 714, 85, 429, 129, 2493, 11, 73, 918, 106, + 208, 1145, 73, 27, 114, 925, 117, 37, 394, 820, + 11, 23, 11, 172, 64, 65, 15, 166, 347, 168, + 157, 236, 2584, 22, 27, 1366, 168, 186, 204, 11, + 445, 348, 191, 32, 33, 159, 68, 2228, 321, 407, + 2546, 2232, 168, 1891, 107, 184, 875, 2532, 2219, 435, + 486, 2039, 107, 2113, 184, 828, 829, 2334, 200, 41, + 847, 159, 749, 2729, 76, 195, 268, 226, 297, 2369, + 475, 2786, 73, 215, 200, 490, 187, 285, 5, 766, + 352, 486, 224, 451, 486, 84, 369, 441, 360, 215, + 227, 73, 2036, 123, 2038, 262, 220, 117, 224, 209, + 486, 1899, 1900, 1901, 1902, 129, 951, 1905, 1906, 1907, + 1908, 1909, 1910, 1911, 1912, 1913, 1914, 405, 269, 296, + 230, 2757, 394, 427, 2622, 264, 129, 2515, 339, 269, + 481, 312, 258, 2848, 279, 307, 162, 352, 1058, 274, + 1151, 279, 490, 372, 427, 311, 1978, 0, 1980, 230, + 486, 490, 486, 2487, 2820, 262, 23, 486, 394, 1957, + 1958, 2667, 405, 261, 481, 339, 2155, 187, 2784, 441, + 236, 373, 339, 2264, 1669, 1670, 1057, 2768, 1519, 2123, + 2635, 2703, 2744, 1634, 486, 2218, 489, 475, 1571, 1572, + 873, 290, 490, 1576, 2700, 340, 339, 873, 842, 366, + 488, 343, 340, 1072, 490, 431, 490, 1711, 473, 473, + 425, 1454, 371, 421, 444, 429, 2730, 343, 405, 473, + 473, 473, 399, 484, 524, 194, 391, 488, 391, 1098, + 1953, 1699, 1357, 439, 1617, 1618, 1679, 383, 384, 1710, + 2517, 422, 486, 2778, 2517, 2436, 2731, 0, 524, 1413, + 2441, 427, 382, 2444, 1705, 385, 376, 2428, 524, 429, + 438, 450, 376, 16, 486, 448, 590, 134, 1438, 523, + 450, 486, 486, 335, 550, 551, 376, 427, 2784, 490, + 2802, 34, 23, 472, 243, 2273, 485, 1074, 29, 472, + 439, 1067, 472, 569, 1070, 486, 484, 486, 486, 577, + 488, 567, 1078, 448, 486, 58, 486, 486, 486, 1082, + 448, 486, 405, 486, 579, 579, 490, 2286, 486, 1207, + 410, 577, 1486, 76, 486, 579, 579, 579, 1687, 486, + 486, 486, 116, 1176, 610, 611, 612, 2637, 482, 486, + 582, 355, 487, 2642, 489, 570, 1175, 2641, 1177, 487, + 486, 489, 486, 486, 486, 486, 1185, 486, 1081, 425, + 1189, 486, 448, 450, 1082, 1285, 2262, 477, 478, 477, + 478, 477, 478, 482, 75, 651, 450, 1246, 92, 1208, + 1209, 157, 579, 114, 85, 472, 472, 236, 2579, 1232, + 1233, 477, 478, 134, 13, 14, 2228, 450, 472, 105, + 2232, 477, 478, 479, 480, 481, 482, 146, 479, 480, + 481, 482, 486, 1224, 198, 30, 283, 1386, 2266, 472, + 486, 2365, 148, 479, 480, 481, 482, 1396, 75, 532, + 1399, 534, 1119, 486, 1277, 711, 2736, 1325, 85, 54, + 1891, 717, 718, 719, 1287, 1288, 2129, 398, 2131, 400, + 553, 227, 1139, 192, 1867, 1868, 1869, 1867, 1868, 735, + 714, 1867, 477, 478, 479, 480, 481, 482, 338, 1866, + 337, 338, 2195, 1249, 1969, 1970, 1971, 1972, 262, 157, + 264, 1822, 1169, 173, 162, 320, 2474, 763, 788, 4, + 486, 2768, 491, 264, 9, 2768, 366, 1940, 269, 366, + 276, 129, 1471, 352, 1471, 1374, 1814, 783, 784, 785, + 1382, 787, 788, 348, 486, 393, 1899, 1900, 1901, 1902, + 519, 1862, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, + 1913, 1914, 216, 475, 441, 811, 242, 1878, 481, 1893, + 230, 2761, 283, 2763, 250, 488, 488, 2847, 319, 227, + 428, 481, 828, 829, 4, 469, 1451, 1898, 488, 9, + 1455, 292, 214, 1458, 1409, 1410, 1411, 1921, 1600, 488, + 1598, 570, 2803, 2804, 1957, 1958, 425, 469, 262, 236, + 858, 859, 1681, 861, 860, 1454, 1927, 236, 864, 865, + 236, 236, 333, 1934, 487, 2427, 337, 490, 276, 875, + 1770, 2399, 858, 859, 2436, 861, 13, 14, 711, 2441, + 852, 1355, 2444, 2833, 717, 279, 1392, 893, 2069, 2607, + 2851, 1397, 129, 325, 487, 366, 484, 490, 359, 1793, + 488, 2463, 36, 359, 23, 1476, 264, 486, 1382, 1480, + 29, 1482, 918, 2612, 8, 359, 274, 11, 155, 925, + 4, 55, 16, 17, 18, 9, 8, 1401, 360, 11, + 129, 479, 1406, 394, 16, 17, 18, 909, 394, 33, + 946, 4, 1778, 1701, 166, 1589, 9, 1591, 1592, 2402, + 394, 490, 1429, 1571, 1572, 369, 155, 2485, 1576, 486, + 793, 432, 394, 13, 14, 352, 1525, 1823, 1471, 487, + 803, 105, 1828, 352, 435, 325, 352, 352, 398, 435, + 400, 1484, 487, 816, 2546, 490, 2237, 448, 487, 2240, + 473, 435, 448, 826, 487, 828, 829, 490, 2069, 1617, + 1618, 490, 210, 492, 448, 425, 719, 13, 14, 441, + 360, 472, 398, 427, 400, 134, 472, 2579, 13, 14, + 487, 182, 183, 490, 2552, 486, 8, 264, 472, 11, + 486, 1590, 269, 54, 16, 17, 18, 274, 425, 486, + 523, 524, 486, 484, 394, 486, 425, 488, 2699, 425, + 425, 157, 1058, 167, 737, 738, 162, 487, 487, 193, + 490, 490, 487, 1622, 8, 490, 1072, 11, 487, 469, + 269, 490, 16, 17, 18, 274, 1082, 149, 2116, 762, + 2094, 214, 2094, 2094, 2094, 2266, 1082, 248, 249, 33, + 487, 441, 1098, 490, 577, 487, 579, 1081, 490, 486, + 2553, 1676, 448, 1662, 450, 2667, 333, 486, 242, 191, + 486, 486, 486, 826, 23, 149, 250, 846, 212, 2837, + 29, 227, 487, 149, 1749, 490, 1751, 149, 262, 398, + 212, 400, 487, 487, 2785, 8, 490, 264, 2700, 1145, + 2233, 441, 2235, 16, 17, 18, 487, 1844, 8, 490, + 2050, 11, 487, 486, 2805, 490, 16, 17, 18, 293, + 23, 584, 487, 586, 283, 490, 29, 13, 14, 1175, + 276, 1177, 398, 33, 400, 412, 1182, 1172, 1172, 1185, + 893, 2049, 2037, 1189, 1925, 1926, 423, 281, 1172, 1172, + 1172, 552, 2048, 554, 2050, 356, 357, 1203, 1809, 281, + 1206, 1207, 1208, 1209, 338, 81, 487, 394, 2859, 490, + 1216, 1217, 1218, 412, 333, 149, 487, 149, 337, 490, + 2088, 452, 2784, 488, 423, 134, 360, 13, 14, 487, + 1236, 714, 490, 2304, 149, 1241, 1242, 1243, 1244, 1245, + 1246, 248, 249, 1842, 1250, 1251, 380, 366, 23, 1255, + 2008, 279, 487, 1259, 29, 490, 1262, 1263, 1264, 1265, + 1266, 1267, 1268, 1269, 1270, 13, 14, 1273, 212, 752, + 487, 134, 1278, 490, 83, 1281, 85, 1283, 87, 1285, + 486, 1899, 1900, 1901, 1902, 36, 2399, 1905, 1906, 1907, + 1908, 1909, 1910, 1911, 1912, 1913, 1914, 487, 225, 281, + 490, 1307, 13, 14, 487, 788, 8, 490, 1304, 11, + 13, 14, 486, 432, 16, 17, 18, 1312, 484, 1325, + 487, 2392, 487, 490, 8, 490, 5, 11, 1334, 1335, + 5, 1837, 16, 17, 18, 5, 2517, 281, 8, 1957, + 1958, 1844, 487, 486, 14, 490, 1075, 486, 1077, 356, + 357, 162, 212, 487, 24, 13, 14, 41, 28, 134, + 1366, 1194, 1935, 486, 48, 486, 1372, 5, 1374, 308, + 2069, 486, 2485, 1932, 283, 858, 859, 486, 861, 473, + 474, 475, 5, 477, 478, 479, 480, 481, 482, 73, + 486, 473, 474, 475, 486, 477, 478, 479, 480, 481, + 482, 487, 2302, 486, 490, 5, 487, 1413, 281, 490, + 1416, 1417, 487, 1419, 487, 490, 486, 490, 144, 2274, + 283, 281, 487, 487, 333, 490, 490, 486, 337, 1112, + 1113, 157, 1145, 5, 487, 487, 162, 490, 490, 2552, + 1446, 9, 487, 486, 2017, 490, 2517, 1442, 1454, 1990, + 487, 359, 487, 490, 1137, 490, 487, 366, 449, 490, + 2334, 1294, 2224, 487, 487, 1471, 490, 490, 486, 2068, + 333, 2070, 487, 486, 337, 490, 2238, 486, 1484, 15, + 1486, 1487, 208, 210, 487, 486, 394, 490, 172, 486, + 2115, 473, 474, 475, 486, 477, 478, 479, 480, 481, + 482, 227, 186, 366, 13, 14, 490, 191, 283, 2084, + 212, 487, 2583, 1519, 490, 2586, 1199, 1200, 487, 1525, + 1526, 490, 96, 432, 36, 1531, 366, 435, 1241, 473, + 474, 475, 162, 477, 478, 479, 480, 481, 482, 276, + 448, 487, 226, 1573, 490, 487, 487, 162, 490, 490, + 276, 13, 14, 148, 13, 14, 13, 14, 333, 285, + 13, 14, 337, 225, 472, 1571, 1572, 1573, 274, 432, + 1576, 13, 14, 13, 14, 23, 13, 14, 486, 281, + 486, 29, 405, 1589, 1590, 1591, 1592, 2768, 13, 14, + 486, 366, 1622, 1599, 2293, 85, 1602, 281, 1081, 490, + 473, 474, 475, 405, 477, 478, 479, 480, 481, 482, + 54, 1617, 1618, 473, 474, 475, 1622, 477, 478, 479, + 480, 481, 482, 13, 14, 13, 14, 54, 23, 1358, + 1636, 13, 14, 1639, 29, 1641, 252, 253, 1471, 1368, + 405, 1370, 1475, 255, 1373, 405, 347, 348, 347, 348, + 1379, 1484, 1381, 1366, 487, 2523, 1662, 432, 347, 348, + 347, 348, 1692, 495, 1393, 474, 356, 357, 1674, 1398, + 405, 2244, 448, 1402, 1403, 1404, 1405, 486, 1407, 1408, + 2305, 2306, 1688, 149, 92, 2258, 134, 371, 264, 1172, + 2387, 264, 1687, 1687, 486, 421, 1702, 2768, 1674, 1674, + 828, 829, 36, 1687, 1687, 1687, 36, 488, 719, 438, + 2253, 1717, 1688, 1688, 486, 486, 490, 486, 486, 35, + 67, 444, 486, 486, 9, 403, 403, 11, 490, 1735, + 485, 490, 403, 486, 495, 269, 486, 405, 176, 134, + 158, 167, 487, 486, 429, 36, 1752, 1753, 1754, 490, + 208, 369, 561, 216, 280, 486, 490, 264, 31, 32, + 216, 216, 285, 487, 359, 317, 277, 114, 441, 1252, + 486, 473, 474, 475, 1487, 477, 478, 479, 480, 481, + 482, 2399, 149, 149, 448, 594, 470, 1793, 264, 473, + 474, 475, 279, 477, 478, 479, 480, 481, 482, 394, + 279, 36, 36, 264, 35, 485, 1519, 405, 484, 405, + 484, 405, 405, 167, 167, 487, 1822, 469, 2517, 92, + 629, 630, 631, 488, 171, 469, 487, 487, 487, 1312, + 487, 487, 487, 487, 487, 283, 1842, 487, 1844, 487, + 435, 8, 487, 469, 11, 1840, 487, 487, 405, 16, + 17, 18, 719, 448, 486, 486, 1862, 486, 486, 289, + 445, 445, 36, 279, 435, 475, 33, 2485, 36, 1875, + 471, 280, 1878, 280, 280, 235, 427, 472, 405, 1885, + 264, 55, 893, 1889, 2387, 333, 1892, 55, 283, 337, + 149, 486, 1898, 1899, 1900, 1901, 1902, 719, 490, 1905, + 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 486, + 283, 1965, 1918, 1919, 194, 485, 149, 264, 366, 2819, + 149, 1927, 269, 405, 405, 1758, 1932, 405, 1934, 487, + 405, 105, 487, 1963, 2552, 1941, 490, 105, 1944, 487, + 1946, 487, 337, 279, 277, 448, 36, 1953, 1954, 149, + 1980, 1957, 1958, 149, 269, 484, 1962, 1963, 140, 1442, + 167, 11, 487, 162, 167, 312, 487, 487, 1801, 487, + 486, 366, 1978, 6, 1980, 292, 445, 2483, 11, 487, + 327, 394, 15, 167, 432, 1991, 2561, 472, 21, 22, + 23, 490, 487, 26, 2000, 144, 29, 171, 176, 32, + 33, 487, 1978, 1978, 1980, 1980, 108, 109, 157, 2608, + 2016, 1844, 2623, 162, 487, 340, 1849, 434, 1851, 193, + 83, 830, 1855, 1856, 280, 193, 893, 149, 2587, 171, + 416, 36, 486, 79, 486, 486, 486, 432, 429, 485, + 485, 490, 359, 76, 853, 212, 487, 167, 487, 2566, + 83, 84, 85, 86, 87, 486, 396, 486, 2064, 208, + 215, 288, 2068, 2069, 2070, 487, 487, 2584, 242, 2768, + 879, 893, 487, 286, 242, 422, 250, 394, 227, 215, + 182, 183, 250, 892, 486, 486, 359, 54, 262, 448, + 1573, 179, 475, 196, 262, 2706, 185, 1826, 269, 269, + 405, 488, 405, 488, 913, 2664, 475, 488, 488, 1822, + 488, 488, 488, 488, 281, 36, 488, 2112, 435, 293, + 488, 394, 2628, 488, 488, 293, 488, 276, 106, 264, + 488, 448, 488, 488, 1145, 487, 285, 488, 488, 1622, + 2146, 486, 244, 245, 246, 247, 248, 249, 448, 1862, + 252, 253, 279, 488, 36, 472, 488, 1640, 167, 488, + 488, 488, 435, 2680, 338, 1878, 439, 488, 2767, 486, + 338, 2773, 485, 1656, 488, 448, 488, 488, 488, 486, + 486, 298, 486, 2189, 2190, 1898, 360, 215, 2194, 2195, + 486, 1674, 360, 2199, 93, 2701, 2202, 2203, 447, 472, + 85, 2207, 130, 327, 1687, 1688, 380, 114, 487, 1692, + 486, 486, 380, 486, 1927, 216, 36, 149, 73, 121, + 149, 1934, 2228, 36, 487, 348, 2232, 348, 0, 54, + 1241, 36, 67, 486, 486, 486, 2069, 429, 73, 434, + 1953, 73, 1051, 490, 36, 184, 429, 2253, 67, 67, + 85, 21, 2228, 2228, 356, 357, 2232, 2232, 2091, 416, + 2255, 2256, 282, 486, 490, 36, 350, 366, 184, 168, + 475, 1080, 421, 269, 240, 486, 416, 36, 1145, 114, + 2286, 116, 486, 282, 282, 486, 262, 279, 486, 2295, + 487, 347, 9, 487, 334, 196, 279, 279, 2304, 33, + 487, 200, 119, 2032, 74, 486, 473, 474, 475, 427, + 477, 478, 479, 480, 481, 482, 215, 9, 22, 89, + 92, 487, 579, 1145, 2051, 224, 1380, 2056, 2334, 2064, + 1747, 2257, 2014, 2700, 2459, 484, 171, 2764, 487, 488, + 2097, 2298, 2071, 2072, 2073, 2074, 2075, 2076, 2077, 2078, + 2079, 2080, 2770, 2840, 2615, 1366, 2069, 1840, 2818, 258, + 2806, 880, 2534, 198, 466, 467, 2102, 2767, 1741, 1753, + 1803, 2816, 144, 143, 1241, 2068, 1738, 2372, 2765, 1067, + 279, 2387, 152, 1807, 486, 157, 2392, 1842, 2387, 821, + 162, 2091, 173, 2399, 164, 167, 2402, 1702, 1351, 169, + 868, 2677, 867, 846, 176, 2411, 2412, 179, 2595, 2415, + 2755, 1717, 1325, 194, 1994, 2248, 2689, 2250, 2249, 1241, + 1350, 2427, 1688, 870, 194, 1980, 2234, 2580, 2219, 264, + 2436, 1978, 2667, 2666, 269, 2441, 208, 2684, 2444, 2685, + 1194, 474, 1194, 2455, 343, 2451, 2452, 480, 1194, 230, + 1194, 2427, 2427, 486, 2753, 227, 2754, 2463, 491, 2465, + 2436, 2436, 2740, 1279, 1600, 2441, 2441, 237, 2444, 2444, + 1177, 241, 1705, 1282, 1419, 1283, 1487, 312, 1640, 2485, + 1963, 1527, 2195, 1637, 517, 518, 519, 2463, 2463, 1810, + 2659, 1082, 327, 2015, 1303, 1978, 1305, 1980, 1484, 1366, + 1304, -1, 8, 284, 276, 11, 852, 279, 1519, 1305, + -1, 2517, -1, 285, -1, -1, -1, -1, -1, 552, + -1, 554, -1, -1, -1, -1, -1, -1, 561, -1, + 429, 2526, -1, -1, 304, 41, -1, 570, -1, 1348, + 2546, -1, 48, -1, 1366, -1, 2552, 2553, 318, 582, + -1, -1, 2547, -1, 2387, -1, -1, 2587, -1, -1, + -1, 594, -1, -1, -1, -1, -1, 73, 340, 2575, + 2546, 2546, -1, 2579, -1, -1, -1, 2583, -1, 349, + 2586, 2587, 352, -1, -1, -1, -1, 422, -1, -1, + 360, 2304, 2598, 363, -1, -1, 629, 630, 631, -1, + -1, -1, 2608, 2579, 2579, -1, 2612, 2613, -1, -1, + -1, -1, 382, -1, -1, -1, -1, 398, 1427, 400, + 1487, 2334, 394, -1, 394, -1, -1, -1, -1, 2112, + -1, 401, -1, -1, -1, 169, 142, -1, -1, 173, + 410, 422, -1, -1, 425, 2640, -1, -1, -1, 421, + -1, -1, 1519, 8, -1, -1, 11, 429, 2664, -1, + 194, 2667, -1, -1, -1, 1487, 172, -1, -1, -1, + -1, 441, -1, 445, -1, 447, 448, -1, -1, 2392, + 186, -1, -1, 2689, 2517, 191, 41, -1, -1, 2402, + 723, 2667, 2667, 48, 2700, -1, 230, 1519, -1, -1, + -1, 2696, -1, 237, -1, -1, -1, 719, -1, -1, + -1, -1, 484, -1, 2720, 487, 488, 489, 73, 8, + 226, -1, 11, -1, 2700, 2700, -1, -1, -1, -1, + -1, -1, -1, 2739, 2740, -1, 8, -1, -1, 11, + -1, -1, -1, -1, -1, 2228, -1, -1, -1, 2232, + 284, 719, 41, -1, -1, 2484, 789, -1, -1, 48, + -1, 2767, 2768, -1, -1, 8, -1, -1, 11, 41, + -1, -1, 2255, 2256, -1, 281, 48, -1, 2784, -1, + 2786, -1, 2777, -1, 73, -1, -1, 142, -1, 2518, + 2519, -1, -1, -1, -1, -1, -1, 830, 41, -1, + -1, 73, 2531, 2636, 2517, 48, -1, -1, 2784, 2784, + -1, 1822, -1, 846, 847, -1, -1, 172, 352, 852, + 853, 854, -1, -1, -1, -1, -1, -1, -1, -1, + 73, 186, -1, -1, -1, -1, 191, -1, -1, -1, + 2553, -1, 2848, -1, -1, -1, 879, 880, -1, -1, + -1, 1862, 50, 142, -1, -1, -1, -1, 1667, 892, + -1, -1, -1, -1, 398, 371, 400, 1878, 2597, -1, + 2583, 226, -1, 2586, -1, -1, 909, -1, -1, -1, + 913, 893, -1, 172, -1, -1, -1, 1898, 422, 2372, + -1, 425, -1, -1, -1, -1, 94, 186, -1, 142, + 172, -1, 191, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 186, -1, 1927, -1, -1, 191, + -1, -1, 120, 1934, -1, 893, 281, -1, -1, 172, + -1, -1, -1, 711, -1, 2768, -1, 226, -1, 717, + 138, -1, 1953, 186, 142, -1, -1, -1, 191, -1, + -1, -1, -1, 2436, 226, 1822, -1, -1, 2441, -1, + -1, 2444, -1, -1, 470, 163, -1, -1, 166, -1, + -1, 477, 478, 479, 480, 481, 482, -1, -1, -1, + -1, -1, 180, 226, -1, -1, -1, -1, -1, -1, + -1, -1, 281, -1, -1, 1862, -1, -1, -1, 1808, + 1822, -1, -1, 0, 1813, 1814, 1815, -1, -1, 281, + -1, 1878, -1, 1046, -1, -1, 371, -1, 1051, -1, + -1, -1, -1, -1, 1833, 803, -1, -1, 1061, -1, + -1, 1898, -1, -1, 1067, -1, -1, 1070, 281, -1, + 1862, 1074, 1075, 2526, 1077, 1078, -1, 1080, 826, -1, + 828, 829, -1, -1, -1, 2768, 1878, -1, 2069, -1, + 1927, -1, -1, -1, 2547, -1, -1, 1934, -1, -1, + -1, 269, -1, -1, -1, -1, 1898, -1, -1, 277, + -1, -1, 371, -1, -1, -1, 1953, -1, -1, -1, + -1, 719, -1, -1, -1, 92, 2579, -1, -1, 371, + -1, -1, -1, -1, 2587, 1927, -1, -1, -1, -1, + -1, -1, 1934, -1, -1, 470, 894, -1, -1, 897, + 898, 319, 477, 478, 479, 480, 481, 482, 371, -1, + -1, 1953, -1, 1145, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 144, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 2640, -1, -1, + 157, -1, -1, -1, -1, 162, 1975, 1976, 1977, -1, + 167, -1, 1205, -1, -1, -1, -1, 1145, -1, 176, + -1, 470, 179, -1, 2195, -1, -1, -1, 477, 478, + 479, 480, 481, 482, -1, -1, -1, -1, 470, -1, + -1, -1, 2069, -1, -1, 477, 478, 479, 480, 481, + 482, 208, -1, 2696, -1, -1, 1249, -1, -1, -1, + -1, 2030, -1, -1, -1, -1, -1, 470, -1, 1241, + 227, -1, -1, -1, 477, 478, 479, 480, 481, 482, + -1, -1, -1, -1, -1, -1, -1, 2069, -1, 1282, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 893, -1, -1, -1, -1, + 1303, -1, 1305, 1241, -1, -1, -1, -1, -1, 276, + -1, 1314, 279, 1316, -1, -1, 2095, -1, 285, -1, + -1, -1, -1, 2304, 2777, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 2116, -1, 1087, + 1088, -1, -1, -1, -1, 1348, -1, -1, -1, -1, + -1, -1, -1, 2334, 1357, 1358, -1, -1, 2195, -1, + -1, -1, -1, -1, -1, 1368, 1369, 1370, 1371, -1, + 1373, -1, -1, 340, -1, -1, 1379, -1, 1381, -1, + -1, -1, -1, -1, 1366, -1, -1, -1, -1, 1392, + 1393, -1, -1, -1, 1397, 1398, -1, -1, -1, 1402, + 1403, 1404, 1405, 2195, 1407, 1408, -1, -1, -1, -1, + -1, 2392, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 2402, -1, -1, 1427, 1428, 36, 394, 1366, -1, + -1, -1, 2211, 2212, 2213, 2214, -1, 1440, -1, 1187, + 1188, -1, -1, -1, -1, 55, -1, 1195, -1, 1197, + 1198, -1, -1, -1, 421, -1, -1, -1, -1, -1, + -1, -1, 429, -1, 1212, -1, 1214, 2304, -1, -1, + -1, 550, 551, -1, -1, -1, -1, -1, 445, -1, + 447, 448, -1, -1, -1, 8, 1489, 1235, 11, -1, + -1, -1, -1, -1, -1, 105, 106, 2334, -1, -1, + -1, -1, -1, -1, 114, 1487, -1, -1, -1, -1, + -1, -1, 2304, -1, -1, -1, -1, 484, 41, -1, + 487, 488, 489, -1, -1, 48, -1, -1, -1, -1, + -1, 610, 611, -1, -1, 8, 2517, 1519, 11, -1, + -1, -1, 2334, 16, 17, 18, -1, 1145, -1, 1487, + 73, -1, -1, -1, -1, 2392, -1, -1, -1, -1, + -1, 171, -1, -1, -1, 2402, -1, -1, -1, -1, + -1, -1, 2553, -1, -1, -1, -1, -1, -1, -1, + -1, 1519, -1, 193, -1, -1, 1334, 1335, 8, -1, + -1, 11, -1, -1, -1, -1, -1, 1600, -1, -1, + 2392, -1, 2583, -1, -1, 2586, -1, -1, -1, -1, + 2402, -1, -1, -1, -1, -1, -1, -1, -1, 142, + -1, 41, -1, -1, -1, -1, -1, -1, 48, -1, + -1, 1634, 242, -1, -1, -1, -1, -1, 1386, 718, + 250, -1, -1, 1241, -1, -1, -1, -1, 1396, 172, + -1, 1399, 262, 73, 264, -1, 735, -1, -1, -1, + -1, -1, -1, 186, 1667, -1, -1, -1, 191, -1, + -1, 1674, -1, -1, -1, -1, -1, -1, -1, -1, + 2517, -1, -1, 293, 763, 1688, -1, -1, -1, 1692, + -1, -1, 1695, -1, 1697, -1, -1, -1, -1, -1, + -1, -1, -1, 226, 783, 784, 785, -1, -1, 788, + -1, -1, -1, 2492, -1, -1, 2553, -1, -1, -1, + -1, -1, 142, 1471, -1, 2517, -1, -1, 338, -1, + -1, -1, 811, -1, -1, -1, 1484, -1, -1, 212, + 1743, -1, -1, -1, -1, -1, 2583, -1, -1, 2586, + 360, -1, 172, -1, -1, -1, -1, -1, 281, -1, + 1508, 2553, -1, -1, 2543, -1, 186, -1, 1366, -1, + 380, 191, 382, -1, -1, 385, -1, -1, -1, -1, + -1, 860, -1, -1, -1, 864, 865, 2768, -1, -1, + -1, 2583, -1, -1, 2586, -1, -1, 108, 109, -1, + 1803, -1, -1, -1, -1, 1808, 226, -1, 281, 1812, + 1813, 1814, 1815, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 1826, -1, -1, -1, 2606, -1, -1, + 1833, -1, -1, -1, 1837, -1, -1, -1, -1, 918, + 1822, -1, -1, -1, -1, -1, 925, -1, 371, -1, + 1853, 1854, -1, -1, -1, -1, -1, -1, -1, -1, + 2639, 281, -1, -1, -1, -1, -1, 946, -1, -1, + -1, 182, 183, -1, -1, -1, 486, -1, -1, -1, + 1862, -1, -1, -1, 1822, -1, -1, -1, 1891, 1487, + -1, -1, -1, -1, -1, -1, 1878, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 1898, -1, -1, -1, + -1, 1519, -1, -1, 1862, -1, -1, -1, -1, -1, + 1678, 2768, -1, 244, 245, 246, 247, 248, 249, -1, + 1878, 252, 253, -1, -1, 1927, -1, 470, -1, -1, + -1, 371, 1934, -1, 477, 478, 479, 480, 481, 482, + 1898, 1709, 1710, -1, 1967, -1, -1, -1, -1, -1, + -1, 1953, 1975, 1976, 1977, 1978, 2768, 1980, 1981, 1058, + -1, -1, 1985, 1986, -1, -1, -1, -1, -1, 1927, + -1, -1, -1, -1, -1, 3, 1934, 5, -1, -1, + 473, 474, 475, -1, 477, 478, 479, 480, 481, 482, + -1, 2014, -1, -1, -1, 1953, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 2030, -1, 2032, + -1, -1, -1, 2036, 2037, 2038, -1, 2040, -1, -1, + -1, -1, -1, -1, -1, 356, 357, -1, -1, -1, + 470, -1, -1, 2056, -1, 2058, -1, 477, 478, 479, + 480, 481, 482, -1, -1, -1, -1, -1, 2071, 2072, + 2073, 2074, 2075, 2076, 2077, 2078, 2079, 2080, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 2069, -1, -1, + -1, -1, 2095, -1, -1, -1, -1, -1, -1, 2102, + 108, 109, -1, 1182, -1, -1, -1, -1, -1, -1, + 2113, -1, -1, 2116, -1, 1863, 1864, -1, -1, -1, + 2123, -1, -1, -1, 1203, -1, -1, 1206, -1, 2132, + 1209, 2069, -1, -1, -1, -1, -1, 1216, 1217, 1218, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 466, 467, 1236, -1, -1, + -1, -1, -1, 1242, 1243, 1244, 1245, -1, -1, -1, + -1, 1250, 1251, -1, 182, 183, 1255, -1, -1, -1, + 1259, -1, -1, 1262, 1263, 1264, 1265, 1266, 1267, 1268, + 1269, 1270, -1, -1, 1273, -1, -1, -1, -1, 1278, + -1, -1, 1281, -1, 1283, 2208, 1285, -1, 2211, 2212, + 2213, 2214, -1, 2195, -1, 2218, 2219, -1, 2221, -1, + -1, 2224, -1, -1, 1822, 2228, -1, -1, 1307, 2232, + -1, -1, -1, -1, -1, 2238, 244, 245, 246, 247, + 248, 249, -1, -1, 252, 253, -1, -1, -1, -1, + -1, -1, -1, -1, 2257, 1334, 1335, 2195, -1, 2262, + -1, -1, -1, 2266, 1862, -1, -1, -1, -1, -1, + -1, 2274, -1, -1, -1, -1, -1, -1, -1, -1, + 1878, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 2299, -1, -1, -1, + 1898, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 23, -1, -1, -1, -1, -1, 29, -1, + -1, 2069, 2304, -1, -1, 36, -1, -1, -1, 1927, + -1, -1, -1, -1, 1413, -1, 1934, 1416, 1417, -1, + 1419, -1, -1, -1, 55, -1, -1, -1, 356, 357, + -1, -1, 2334, -1, -1, 1953, -1, -1, -1, -1, + -1, -1, 2365, -1, -1, -1, 2304, 1446, 2371, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 2382, + -1, -1, 2130, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 105, -1, 2334, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 1486, 0, -1, + 2392, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 2402, -1, -1, 134, 2427, 2428, -1, -1, 2431, -1, + -1, 2434, -1, 2436, -1, -1, -1, -1, 2441, -1, + -1, 2444, -1, 2446, -1, -1, 2449, 1526, -1, -1, + -1, -1, 1531, -1, 2392, -1, -1, -1, 466, 467, + 2463, -1, -1, -1, 2402, 804, -1, -1, -1, -1, + -1, 2069, -1, -1, -1, -1, -1, -1, -1, -1, + 2483, 2484, 193, -1, -1, -1, -1, -1, -1, 2492, + -1, -1, -1, -1, 1573, -1, -1, -1, -1, -1, + 92, -1, -1, -1, 843, -1, -1, -1, -1, -1, + 1589, -1, 1591, 1592, -1, 2518, 2519, -1, -1, -1, + 1599, -1, -1, 1602, -1, 2528, -1, -1, 2531, -1, + -1, 242, -1, -1, -1, 2517, -1, -1, -1, 250, + 2543, -1, -1, 2546, -1, 2293, -1, -1, -1, -1, + 2298, 262, 144, -1, -1, -1, -1, 1636, -1, -1, + 1639, -1, 1641, 2566, -1, 157, -1, -1, -1, -1, + 162, 2553, 283, -1, -1, 167, 2579, -1, -1, 2517, + -1, 2584, 293, -1, 176, -1, -1, 179, -1, -1, + 2593, -1, 2340, 2341, 2597, -1, -1, 2195, -1, -1, + -1, 2583, -1, 2606, 2586, -1, -1, -1, -1, -1, + -1, 950, -1, -1, -1, 2553, 208, -1, 957, -1, + -1, -1, 333, 1702, -1, 2628, 337, 338, -1, -1, + -1, -1, -1, -1, -1, 227, 2639, -1, 1717, -1, + -1, -1, -1, -1, -1, 2583, -1, -1, 2586, 360, + -1, -1, -1, -1, -1, 366, 1735, -1, -1, -1, + -1, -1, -1, -1, 2667, -1, -1, -1, -1, 380, + -1, -1, -1, 2676, -1, -1, -1, 2680, -1, -1, + -1, 2684, -1, -1, 276, -1, -1, 279, -1, -1, + -1, -1, -1, 285, -1, -1, -1, 2700, 2701, 2702, + 2703, -1, -1, -1, -1, -1, 2304, -1, -1, -1, + -1, -1, -1, -1, 1793, -1, -1, -1, -1, -1, + -1, 432, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 23, -1, -1, -1, 2334, -1, 29, -1, + -1, 2744, -1, -1, -1, 36, -1, -1, 340, -1, + 2753, -1, 2755, -1, -1, -1, -1, -1, 2506, 2507, + -1, 2764, -1, -1, 55, -1, -1, -1, -1, 2517, + -1, -1, -1, -1, -1, 2778, -1, -1, -1, -1, + -1, 2784, -1, -1, -1, -1, 2768, -1, -1, -1, + -1, -1, -1, -1, 2392, -1, 1875, -1, -1, 2802, + -1, -1, 394, -1, 2402, -1, 1885, -1, -1, -1, + 1889, -1, -1, 1892, 105, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 421, + 2768, -1, -1, -1, -1, -1, -1, 429, -1, 1918, + 1919, 1180, -1, 134, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 445, -1, 447, 448, -1, -1, -1, + -1, -1, 1941, 1202, -1, 1944, -1, 1946, 1207, -1, + -1, -1, -1, -1, -1, 1954, -1, -1, -1, -1, + 1219, 1220, 1221, 1962, 1963, -1, 1225, -1, -1, -1, + -1, -1, 484, -1, -1, 487, 488, 489, -1, -1, + -1, -1, 193, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 1991, -1, -1, -1, -1, -1, -1, 2517, + -1, 2000, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 2016, -1, -1, + -1, 1280, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 242, -1, 8, -1, 2553, 11, -1, 0, 250, + -1, 16, 17, 18, -1, -1, -1, -1, -1, -1, + -1, 262, -1, -1, -1, -1, -1, 19, 33, -1, + -1, -1, 1321, -1, -1, 2583, 41, 29, 2586, 31, + 32, -1, 283, 48, 1333, 3, -1, -1, -1, 1338, + 8, -1, 293, 11, -1, 47, -1, -1, 16, 17, + 18, -1, -1, -1, 56, -1, -1, -1, 73, -1, + 2768, -1, -1, -1, -1, 33, 68, -1, 36, -1, + -1, -1, -1, 41, -1, -1, -1, 79, -1, -1, + 48, -1, 333, -1, -1, -1, 337, 338, 90, -1, + 92, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 73, -1, 2146, 110, 360, + -1, -1, -1, -1, -1, 366, -1, -1, -1, -1, + -1, -1, 124, -1, -1, -1, -1, -1, -1, 380, + -1, -1, 134, -1, -1, -1, -1, -1, 140, -1, + -1, -1, -1, -1, -1, -1, 148, -1, 150, 151, + 2189, 2190, -1, -1, -1, 2194, -1, 172, -1, -1, + 2199, 163, -1, 2202, 2203, -1, -1, -1, 2207, -1, + -1, 186, -1, -1, -1, -1, 191, -1, -1, -1, + -1, 432, -1, -1, -1, -1, -1, -1, 190, -1, + -1, -1, -1, -1, -1, -1, -1, 212, 213, -1, + -1, -1, -1, -1, 172, -1, -1, -1, -1, -1, + 2768, 226, -1, -1, 2253, -1, -1, -1, 186, -1, + -1, -1, -1, 191, -1, -1, -1, 229, -1, -1, + 1529, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 212, 213, -1, -1, -1, 264, + -1, -1, 267, 1552, -1, -1, 2295, -1, 226, -1, + -1, -1, -1, -1, -1, -1, 281, -1, -1, 284, + 1569, -1, 1571, 1572, -1, 1574, -1, 1576, -1, -1, + -1, -1, 1581, -1, -1, -1, -1, -1, -1, 1588, + -1, -1, -1, -1, 1593, -1, 264, 1596, -1, 267, + -1, -1, -1, -1, 306, 1604, -1, 309, -1, 1608, + 1609, 1610, 1611, 281, -1, -1, 284, -1, 1617, 1618, + -1, 1620, 1621, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 1632, -1, 337, 1635, -1, -1, -1, + -1, -1, -1, 345, 1643, 1644, 1645, 1646, 1647, 1648, + 1649, 1650, 1651, 1652, -1, -1, 371, 359, -1, -1, + -1, 1660, -1, -1, 366, 1664, -1, -1, 370, -1, + -1, -1, 2411, 2412, -1, -1, 2415, -1, 380, -1, + -1, -1, -1, -1, -1, -1, 1685, -1, -1, -1, + -1, -1, 394, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 371, -1, -1, -1, -1, -1, -1, + -1, -1, 2451, 2452, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 2465, -1, 430, -1, + -1, -1, -1, 435, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 448, -1, -1, -1, + -1, -1, -1, -1, -1, 470, -1, -1, 473, 474, + 475, -1, 477, 478, 479, 480, 481, 482, 8, -1, + 472, 11, -1, -1, -1, 490, 16, 17, 18, -1, + -1, -1, -1, -1, 486, -1, -1, 489, -1, -1, + -1, -1, -1, 33, -1, -1, -1, 1796, 1797, 1798, + -1, 41, 470, -1, -1, 473, 474, 475, 48, 477, + 478, 479, 480, 481, 482, -1, -1, -1, -1, -1, + -1, 8, -1, -1, 11, -1, -1, -1, -1, 16, + 17, 18, 1831, 73, -1, -1, 2575, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 33, -1, 2587, -1, + -1, -1, -1, -1, 41, -1, -1, -1, -1, 2598, + -1, 48, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 2613, 1874, -1, -1, -1, -1, + -1, 1880, -1, -1, -1, -1, 73, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 1895, 1896, 1897, -1, + 1899, 1900, 1901, 1902, -1, -1, 1905, 1906, 1907, 1908, + 1909, 1910, 1911, 1912, 1913, 1914, 1915, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 172, -1, -1, -1, -1, 1936, -1, 1938, + -1, -1, -1, 1942, 1943, -1, 186, -1, -1, -1, + 2689, 191, -1, -1, -1, -1, -1, 1956, 1957, 1958, + 1959, -1, 1961, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 212, 213, -1, -1, -1, -1, -1, -1, + -1, 2720, -1, -1, -1, 172, 226, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 186, + 2739, 2740, -1, -1, 191, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 264, 212, 213, 267, -1, -1, + -1, -1, -1, -1, 2033, -1, -1, -1, -1, 226, + -1, 281, -1, -1, 284, -1, 8, 2786, -1, 11, + -1, -1, -1, -1, 16, 17, 18, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 33, -1, -1, -1, -1, -1, 264, -1, 41, + 267, -1, -1, -1, -1, -1, 48, -1, -1, -1, + -1, -1, -1, -1, 281, -1, -1, 284, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 2848, + -1, 73, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 371, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 2147, -1, + -1, -1, 2151, 2152, -1, 2154, -1, -1, 2157, 2158, + 2159, 2160, -1, -1, -1, 2164, 2165, 2166, 2167, 2168, + 2169, 2170, 2171, 2172, 2173, 2174, 2175, -1, -1, -1, + -1, -1, -1, -1, 371, -1, -1, -1, -1, 2188, + -1, -1, 2191, -1, 2193, -1, -1, -1, 2197, -1, + -1, 2200, 2201, -1, -1, 2204, 2205, -1, -1, -1, + 172, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 186, -1, -1, -1, -1, 191, + 470, -1, -1, 473, 474, 475, -1, 477, 478, 479, + 480, 481, 482, -1, -1, -1, 2245, 487, -1, -1, + 212, 213, -1, 2252, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 226, -1, 2265, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 470, -1, -1, 473, 474, 475, -1, + 477, 478, 479, 480, 481, 482, -1, -1, -1, -1, + 487, -1, 264, -1, -1, 267, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 281, + -1, -1, 284, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 2395, -1, -1, -1, + 2399, -1, -1, -1, -1, -1, 2405, 2406, 2407, 371, + -1, 2410, -1, -1, 2413, 2414, -1, -1, -1, 2418, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 2485, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 2503, -1, -1, -1, 470, -1, + -1, 473, 474, 475, -1, 477, 478, 479, 480, 481, + 482, -1, -1, -1, -1, 487, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 2551, 2552, -1, -1, -1, 2556, 2557, 2558, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 2591, 2592, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 2604, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 2616, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, + 4, 5, 6, 7, 8, 9, 10, -1, -1, -1, + -1, -1, 2661, -1, -1, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, -1, -1, -1, + 2679, 35, -1, -1, 38, 39, -1, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 2698, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 2712, 68, 69, 70, 71, 72, -1, + 74, -1, 76, 77, 78, 79, 80, 81, 82, 83, + 84, -1, 86, 87, 88, 89, 90, 91, -1, 93, + 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + 114, 115, -1, 117, 118, 119, 120, 121, 122, -1, + 124, 125, 126, 127, 128, 129, 2775, 131, 132, 133, + 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, + 2789, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, -1, 158, 159, 160, 161, -1, 163, + -1, 165, 166, -1, 168, 169, 170, 171, 172, 173, + -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, -1, 192, 193, + 194, 195, 196, 197, -1, 199, 200, 201, 202, 203, + 204, 205, 206, 207, -1, 209, -1, 211, 212, 213, + 214, 215, 216, 217, 218, 219, 220, -1, -1, 223, + 224, 225, 226, -1, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, + 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, + 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, + 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, + 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, + -1, -1, 286, 287, 288, 289, 290, 291, 292, 293, + 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, + 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, + 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, + 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, + 334, 335, 336, 337, 338, 339, -1, 341, 342, 343, + 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, + 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, + 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, + 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, + -1, 395, 396, 397, 398, 399, 400, 401, -1, 403, + 404, -1, -1, 407, 408, 409, 410, 411, 412, 413, + 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, + 424, 425, 426, 427, 428, -1, 430, 431, 432, 433, + 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, + -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 464, 465, 466, 467, 468, 469, 470, -1, -1, -1, + -1, -1, -1, 477, 478, 479, -1, -1, -1, -1, + 484, -1, 486, 487, -1, -1, -1, 491, -1, 493, + 494, 3, 4, 5, 6, 7, 8, 9, 10, -1, + -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, + 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, + -1, -1, -1, 35, -1, -1, 38, 39, -1, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, -1, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, + 72, -1, 74, -1, 76, 77, 78, 79, 80, 81, + 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, + -1, 93, 94, 95, 96, 97, 98, 99, 100, 101, + 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, -1, 117, 118, 119, 120, 121, + 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, + 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, + 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, -1, 158, 159, 160, 161, + -1, 163, -1, 165, 166, -1, 168, 169, 170, 171, + 172, 173, -1, 175, -1, 177, 178, -1, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, 190, -1, + 192, 193, 194, 195, 196, 197, -1, 199, 200, 201, + 202, 203, 204, 205, 206, 207, -1, 209, -1, 211, + 212, 213, 214, 215, 216, 217, 218, 219, 220, -1, + -1, 223, 224, 225, 226, -1, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, + 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, + 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, + 282, 283, -1, -1, 286, 287, 288, 289, 290, 291, + 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, + -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, + -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, + 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, + 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, + 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, + 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, + 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, + 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, + 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, + -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, + 412, 413, 414, 415, 416, 417, 418, 419, 420, -1, + -1, 423, 424, 425, 426, 427, 428, -1, 430, 431, + 432, 433, 434, 435, 436, -1, 438, 439, 440, 441, + 442, 443, -1, -1, 446, -1, 448, 449, 450, 451, + 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, + 462, 463, 464, 465, 466, 467, 468, 469, 470, -1, + 472, -1, -1, -1, -1, 477, 478, -1, -1, -1, + -1, -1, 484, -1, 486, 487, -1, -1, -1, 491, + -1, 493, 494, 3, 4, 5, 6, 7, 8, 9, + 10, -1, -1, -1, -1, -1, -1, -1, -1, 19, + 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, + 30, -1, -1, -1, -1, 35, -1, -1, 38, 39, + -1, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, -1, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, -1, 68, 69, + 70, 71, 72, -1, 74, -1, 76, 77, 78, 79, + 80, 81, 82, 83, 84, -1, 86, 87, 88, 89, + 90, 91, -1, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, -1, 117, 118, 119, + 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, + -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, + -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, -1, 158, 159, + 160, 161, -1, 163, -1, 165, 166, -1, 168, 169, + 170, 171, 172, 173, -1, 175, -1, 177, 178, -1, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, + 200, 201, 202, 203, 204, 205, 206, 207, -1, 209, + -1, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, -1, -1, 223, 224, 225, 226, -1, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, + 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, + 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, + -1, 281, 282, 283, -1, -1, 286, 287, 288, 289, + 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, + 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, + 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, + 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + -1, 341, 342, 343, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 371, 372, 373, 374, 375, -1, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, + 390, -1, 392, 393, 394, 395, 396, 397, 398, 399, + 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, + 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, + 420, -1, -1, 423, 424, 425, 426, 427, 428, -1, + 430, 431, 432, 433, 434, 435, 436, -1, 438, 439, + 440, 441, 442, 443, -1, -1, 446, -1, 448, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, + 470, -1, 472, -1, -1, -1, -1, 477, 478, -1, + -1, -1, -1, -1, 484, -1, 486, -1, -1, -1, + -1, 491, -1, 493, 494, 3, 4, 5, 6, 7, + 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, + -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, + 28, 29, 30, -1, -1, -1, -1, 35, -1, -1, + 38, 39, -1, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + 58, 59, 60, 61, 62, 63, 64, 65, 66, -1, + 68, 69, 70, 71, 72, -1, 74, -1, 76, 77, + 78, 79, 80, 81, 82, 83, 84, -1, 86, 87, + 88, 89, 90, 91, -1, 93, 94, 95, 96, 97, + 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, -1, 117, + 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, + 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, + 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, + 148, 149, 150, 151, 152, 153, 154, 155, 156, -1, + 158, 159, 160, 161, -1, 163, -1, 165, 166, 167, + 168, 169, 170, 171, 172, 173, -1, 175, -1, 177, + 178, -1, 180, 181, 182, 183, 184, 185, 186, 187, + 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, + -1, 199, 200, 201, 202, 203, 204, 205, 206, 207, + -1, 209, -1, 211, 212, 213, 214, 215, 216, 217, + 218, 219, 220, -1, 222, 223, 224, 225, 226, -1, + 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, + 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, + 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, + 268, 269, 270, 271, 272, 273, 274, 275, -1, 277, + 278, -1, -1, 281, 282, 283, -1, -1, 286, 287, + 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, + 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, + 308, 309, 310, 311, -1, 313, 314, 315, 316, 317, + 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, + 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, + 338, 339, -1, 341, 342, 343, 344, 345, 346, 347, + 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, + 358, -1, 360, 361, 362, 363, 364, 365, 366, 367, + 368, 369, 370, 371, 372, 373, 374, 375, -1, 377, + 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, + 388, 389, 390, -1, 392, 393, -1, 395, 396, 397, + 398, 399, 400, 401, -1, 403, 404, -1, 406, 407, + 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, + 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, + 428, -1, 430, 431, 432, 433, 434, 435, 436, -1, + 438, 439, 440, 441, 442, 443, -1, -1, 446, -1, + -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, + 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, + 468, 469, 470, -1, -1, -1, -1, -1, -1, 477, + 478, -1, -1, -1, -1, -1, 484, -1, 486, -1, + -1, -1, -1, 491, -1, 493, 494, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, @@ -194140,57 +207821,205 @@ static const yytype_int16 yycheck[] = 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, 487, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, 437, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, + -1, 477, 478, 479, -1, -1, -1, -1, 484, -1, + 486, -1, -1, -1, -1, 491, -1, 493, 494, 3, + 4, 5, 6, 7, 8, 9, 10, -1, -1, -1, + -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, -1, -1, -1, + -1, 35, -1, -1, 38, 39, -1, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, -1, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, -1, 68, 69, 70, 71, 72, -1, + 74, -1, 76, 77, 78, 79, 80, 81, 82, 83, + 84, -1, 86, 87, 88, 89, 90, 91, -1, 93, + 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + 114, 115, -1, 117, 118, 119, 120, 121, 122, -1, + 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, + 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, + -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, -1, 158, 159, 160, 161, -1, 163, + -1, 165, 166, -1, 168, 169, 170, 171, 172, 173, + -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, -1, 192, 193, + 194, 195, 196, 197, -1, 199, 200, 201, 202, 203, + 204, 205, 206, 207, -1, 209, -1, 211, 212, 213, + 214, 215, 216, 217, 218, 219, 220, -1, -1, 223, + 224, 225, 226, -1, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, + 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, + 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, + 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, + 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, + -1, -1, 286, 287, 288, 289, 290, 291, 292, 293, + 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, + 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, + 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, + 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, + 334, 335, 336, 337, 338, 339, -1, 341, 342, 343, + 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, + 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, + 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, + 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, + -1, 395, 396, 397, 398, 399, 400, 401, -1, 403, + 404, -1, -1, 407, 408, 409, 410, 411, 412, 413, + 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, + 424, 425, 426, 427, 428, -1, 430, 431, 432, 433, + 434, 435, 436, -1, 438, 439, 440, 441, 442, 443, + -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 464, 465, 466, 467, 468, 469, 470, -1, -1, -1, + -1, -1, -1, 477, 478, -1, -1, -1, -1, -1, + 484, -1, 486, -1, -1, -1, -1, 491, -1, 493, + 494, 3, 4, 5, 6, 7, 8, 9, 10, -1, + -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, + 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, + -1, -1, -1, 35, -1, -1, 38, 39, -1, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, -1, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, + 72, -1, 74, -1, 76, 77, 78, 79, 80, 81, + 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, + -1, 93, 94, 95, 96, 97, 98, 99, 100, 101, + 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, -1, 117, 118, 119, 120, 121, + 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, + 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, + 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, -1, 158, 159, 160, 161, + -1, 163, -1, 165, 166, -1, 168, 169, 170, 171, + 172, 173, -1, 175, -1, 177, 178, -1, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, 190, -1, + 192, 193, 194, 195, 196, 197, -1, 199, 200, 201, + 202, 203, 204, 205, 206, 207, -1, 209, -1, 211, + 212, 213, 214, 215, 216, 217, 218, 219, 220, -1, + -1, 223, 224, 225, 226, -1, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, + 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, + 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, + 282, 283, -1, -1, 286, 287, 288, 289, 290, 291, + 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, + -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, + -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, + 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, + 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, + 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, + 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, + 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, + 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, + 392, 393, -1, 395, 396, 397, 398, 399, 400, 401, + -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, + 412, 413, 414, 415, 416, 417, 418, 419, 420, -1, + -1, 423, 424, 425, 426, 427, 428, -1, 430, 431, + 432, 433, 434, 435, 436, -1, 438, 439, 440, 441, + 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, + 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, + 462, 463, 464, 465, 466, 467, 468, 469, 470, -1, + -1, -1, -1, -1, -1, 477, 478, -1, -1, -1, + -1, -1, 484, -1, 486, 487, -1, -1, -1, 491, + -1, 493, 494, 3, 4, 5, 6, 7, 8, 9, + 10, -1, -1, -1, -1, -1, -1, -1, -1, 19, + 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, + 30, -1, -1, -1, -1, 35, -1, -1, 38, 39, + -1, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, -1, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, -1, 68, 69, + 70, 71, 72, -1, 74, -1, 76, 77, 78, 79, + 80, 81, 82, 83, 84, -1, 86, 87, 88, 89, + 90, 91, -1, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, -1, 117, 118, 119, + 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, + -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, + -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, -1, 158, 159, + 160, 161, -1, 163, -1, 165, 166, -1, 168, 169, + 170, 171, 172, 173, -1, 175, -1, 177, 178, -1, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, + 200, 201, 202, 203, 204, 205, 206, 207, -1, 209, + -1, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, -1, -1, 223, 224, 225, 226, -1, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, + 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, + 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, + -1, 281, 282, 283, -1, -1, 286, 287, 288, 289, + 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, + 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, + 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, + 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + -1, 341, 342, 343, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 371, 372, 373, 374, 375, -1, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, + 390, -1, 392, 393, -1, 395, 396, 397, 398, 399, + 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, + 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, + 420, -1, -1, 423, 424, 425, 426, 427, 428, -1, + 430, 431, 432, 433, 434, 435, 436, -1, 438, 439, + 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, + 470, -1, -1, -1, -1, -1, -1, 477, 478, -1, + -1, -1, -1, -1, 484, -1, 486, 487, -1, -1, + -1, 491, -1, 493, 494, 3, 4, 5, 6, 7, + 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, + -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, + 28, 29, 30, -1, -1, -1, -1, 35, -1, -1, + 38, 39, -1, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, -1, 54, 55, 56, 57, + 58, 59, 60, 61, 62, 63, 64, 65, 66, -1, + 68, 69, 70, 71, 72, -1, 74, -1, 76, 77, + 78, 79, 80, 81, 82, 83, 84, -1, 86, 87, + 88, 89, 90, 91, -1, 93, 94, 95, 96, 97, + 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, -1, 117, + 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, + 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, + 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, + 148, 149, 150, 151, 152, 153, 154, 155, 156, -1, + 158, 159, 160, 161, -1, 163, -1, 165, 166, -1, + 168, 169, 170, 171, 172, 173, -1, 175, -1, 177, + 178, -1, 180, 181, 182, 183, 184, 185, 186, 187, + 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, + -1, 199, 200, 201, 202, 203, 204, 205, 206, 207, + -1, 209, -1, 211, 212, 213, 214, 215, 216, 217, + 218, 219, 220, -1, -1, 223, 224, 225, 226, -1, + 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, + 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, + 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, + 268, 269, 270, 271, 272, 273, 274, 275, -1, 277, + 278, -1, -1, 281, 282, 283, -1, -1, 286, 287, + 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, + 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, + 308, 309, 310, 311, -1, 313, 314, 315, 316, 317, + 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, + 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, + 338, 339, -1, 341, 342, 343, 344, 345, 346, 347, + 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, + 358, -1, 360, 361, 362, 363, 364, 365, 366, 367, + 368, 369, 370, 371, 372, 373, 374, 375, -1, 377, + 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, + 388, 389, 390, -1, 392, 393, -1, 395, 396, 397, + 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, + 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, + 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, + 428, -1, 430, 431, 432, 433, 434, 435, 436, 437, + 438, 439, 440, 441, 442, 443, -1, -1, 446, -1, + -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, + 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, + 468, 469, 470, -1, -1, -1, -1, -1, -1, 477, + 478, -1, -1, -1, -1, -1, 484, -1, 486, -1, + -1, -1, -1, 491, -1, 493, 494, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, @@ -194238,302 +208067,205 @@ static const yytype_int16 yycheck[] = 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, + -1, 477, 478, -1, -1, -1, -1, -1, 484, -1, + 486, -1, -1, -1, -1, 491, -1, 493, 494, 3, + 4, 5, 6, 7, 8, 9, 10, -1, -1, -1, + -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, + 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, + -1, 35, -1, -1, 38, 39, -1, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, -1, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, -1, 68, 69, 70, 71, 72, -1, + 74, -1, 76, 77, 78, 79, 80, 81, 82, 83, + 84, -1, 86, 87, 88, 89, 90, 91, -1, 93, + 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + 114, 115, -1, 117, 118, 119, 120, 121, 122, -1, + 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, + 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, + -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, -1, 158, 159, 160, 161, -1, 163, + -1, 165, 166, -1, 168, 169, 170, 171, 172, 173, + -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, -1, 192, 193, + 194, 195, 196, 197, -1, 199, 200, 201, 202, 203, + 204, 205, 206, 207, -1, 209, -1, 211, 212, 213, + 214, 215, 216, 217, 218, 219, 220, -1, -1, 223, + 224, 225, 226, -1, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, + 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, + 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, + 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, + 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, + -1, -1, 286, 287, 288, 289, 290, 291, 292, 293, + 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, + 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, + 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, + 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, + 334, 335, 336, 337, 338, 339, -1, 341, 342, 343, + 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, + 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, + 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, + 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, + -1, 395, 396, 397, 398, 399, 400, 401, -1, 403, + 404, -1, -1, 407, 408, 409, 410, 411, 412, 413, + 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, + 424, 425, 426, 427, 428, -1, 430, 431, 432, 433, + 434, 435, 436, -1, 438, 439, 440, 441, 442, 443, + -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 464, 465, 466, 467, 468, 469, 470, -1, -1, -1, + -1, -1, -1, 477, 478, -1, -1, -1, -1, -1, + 484, -1, 486, -1, -1, -1, -1, 491, -1, 493, + 494, 3, 4, 5, 6, 7, 8, 9, 10, -1, + -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, + 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, + -1, -1, -1, 35, -1, -1, 38, 39, -1, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, -1, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, + 72, -1, 74, -1, 76, 77, 78, 79, 80, 81, + 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, + -1, 93, 94, 95, 96, 97, 98, 99, 100, 101, + 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, -1, 117, 118, 119, 120, 121, + 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, + 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, + 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, -1, 158, 159, 160, 161, + -1, 163, -1, 165, 166, -1, 168, 169, 170, 171, + 172, 173, -1, 175, -1, 177, 178, -1, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, 190, -1, + 192, 193, 194, 195, 196, 197, -1, 199, 200, 201, + 202, 203, 204, 205, 206, 207, -1, 209, -1, 211, + 212, 213, 214, 215, 216, 217, 218, 219, 220, -1, + -1, 223, 224, 225, 226, -1, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, + 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, + 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, + 282, 283, -1, -1, 286, 287, 288, 289, 290, 291, + 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, + -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, + -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, + 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, + 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, + 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, + 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, + 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, + 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, + 392, 393, -1, 395, 396, 397, 398, 399, 400, 401, + -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, + 412, 413, 414, 415, 416, 417, 418, 419, 420, -1, + -1, 423, 424, 425, 426, 427, 428, -1, 430, 431, + 432, 433, 434, 435, 436, -1, 438, 439, 440, 441, + 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, + 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, + 462, 463, 464, 465, 466, 467, 468, 469, 470, -1, + -1, -1, -1, -1, -1, 477, 478, -1, -1, -1, + -1, -1, 484, -1, 486, -1, -1, -1, -1, 491, + -1, 493, 494, 3, 4, 5, 6, 7, 8, 9, + 10, -1, -1, -1, -1, -1, -1, -1, -1, 19, + 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, + 30, -1, -1, -1, -1, 35, -1, -1, 38, 39, + -1, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, -1, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, -1, 68, 69, + 70, 71, 72, -1, 74, -1, 76, 77, 78, 79, + 80, 81, 82, 83, 84, -1, 86, 87, 88, 89, + 90, 91, -1, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, -1, 117, 118, 119, + 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, + -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, + -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, -1, 158, 159, + 160, 161, -1, 163, -1, 165, 166, -1, 168, 169, + 170, 171, 172, 173, -1, 175, -1, 177, 178, -1, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, + 200, 201, 202, 203, 204, 205, 206, 207, -1, 209, + -1, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, -1, -1, 223, 224, 225, 226, -1, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, + 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, + 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, + -1, 281, 282, 283, -1, -1, 286, 287, 288, 289, + 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, + 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, + 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, + 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + -1, 341, 342, 343, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 371, 372, 373, 374, 375, -1, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, + 390, -1, 392, 393, -1, 395, 396, 397, 398, 399, + 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, + 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, + 420, -1, -1, 423, 424, 425, 426, 427, 428, -1, + 430, 431, 432, 433, 434, 435, 436, -1, 438, 439, + 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, + 470, -1, -1, -1, -1, -1, -1, 477, 478, -1, + -1, -1, -1, -1, 484, -1, 486, -1, -1, -1, + -1, 491, -1, 493, 494, 3, 4, 5, 6, 7, + 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, + -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, + 28, 29, 30, -1, -1, -1, -1, 35, -1, -1, + 38, 39, -1, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, -1, 54, 55, 56, 57, + 58, 59, 60, 61, 62, 63, 64, 65, 66, -1, + 68, 69, 70, 71, 72, -1, 74, -1, 76, 77, + 78, 79, 80, 81, 82, 83, 84, -1, 86, 87, + 88, 89, 90, 91, -1, 93, 94, 95, 96, 97, + 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, -1, 117, + 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, + 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, + 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, + 148, 149, 150, 151, 152, 153, 154, 155, 156, -1, + 158, 159, 160, 161, -1, 163, -1, 165, 166, -1, + 168, 169, 170, 171, 172, 173, -1, 175, -1, 177, + 178, -1, 180, 181, 182, 183, 184, 185, 186, 187, + 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, + -1, 199, 200, 201, 202, 203, 204, 205, 206, 207, + -1, 209, -1, 211, 212, 213, 214, 215, 216, 217, + 218, 219, 220, -1, -1, 223, 224, 225, 226, -1, + 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, + 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, + 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, + 268, 269, 270, 271, 272, 273, 274, 275, -1, 277, + 278, -1, -1, 281, 282, 283, -1, -1, 286, 287, + 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, + 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, + 308, 309, 310, 311, -1, 313, 314, 315, 316, 317, + 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, + 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, + 338, 339, -1, 341, 342, 343, 344, 345, 346, 347, + 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, + 358, -1, 360, 361, 362, 363, 364, 365, 366, 367, + 368, 369, 370, 371, 372, 373, 374, 375, -1, 377, + 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, + 388, 389, 390, -1, 392, 393, -1, 395, 396, 397, + 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, + 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, + 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, + 428, -1, 430, 431, 432, 433, 434, 435, 436, -1, + 438, 439, 440, 441, 442, 443, -1, -1, 446, -1, + -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, + 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, + 468, 469, 470, -1, -1, -1, -1, -1, -1, 477, + 478, -1, -1, -1, -1, -1, 484, -1, 486, -1, + -1, -1, -1, 491, -1, 493, 494, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, @@ -194581,154 +208313,156 @@ static const yytype_int16 yycheck[] = 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, -1, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, -1, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, -1, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, -1, -1, -1, -1, -1, - 486, -1, -1, -1, -1, 491, 492, 3, 4, 5, - 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, - -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, - -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, - 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, - 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, - 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, - -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, - 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, - -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, - 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, - 206, 207, -1, 209, -1, 211, -1, -1, 214, 215, - 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, - 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, -1, 265, - 266, -1, 268, 269, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, - 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, - -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, - 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, - 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, - 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, - 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 470, -1, -1, -1, -1, -1, - -1, 477, 478, -1, -1, 3, 4, 5, 6, 7, - 486, 9, 10, -1, -1, 491, 492, -1, -1, -1, - -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, - 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, - 38, 39, -1, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, -1, 54, 55, 56, 57, - 58, 59, 60, 61, 62, 63, 64, 65, 66, -1, - 68, 69, 70, 71, 72, -1, 74, -1, 76, 77, - 78, 79, 80, 81, 82, 83, 84, -1, 86, 87, - 88, 89, 90, 91, -1, 93, 94, 95, 96, 97, - 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, - 108, 109, 110, 111, 112, 113, -1, 115, -1, 117, - 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, - 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, - 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, - 148, 149, 150, 151, 152, 153, 154, 155, 156, -1, - 158, 159, 160, 161, -1, 163, -1, 165, 166, -1, - 168, 169, 170, 171, 172, 173, -1, 175, -1, 177, - 178, -1, 180, 181, 182, 183, 184, 185, 186, 187, - 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, - -1, 199, 200, 201, 202, 203, 204, 205, 206, 207, - -1, 209, -1, 211, 212, 213, 214, 215, 216, 217, - 218, 219, 220, -1, -1, 223, 224, 225, 226, -1, - 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, - 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, - 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, - 258, 259, 260, 261, 262, 263, -1, 265, 266, 267, - 268, 269, 270, 271, 272, 273, 274, 275, -1, 277, - 278, -1, -1, 281, 282, 283, -1, -1, 286, 287, - 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, - 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, - 308, 309, 310, 311, -1, 313, 314, 315, 316, 317, - 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, - 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, - 338, 339, -1, 341, 342, 343, 344, 345, 346, 347, - 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, - 358, -1, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, 371, 372, 373, 374, 375, -1, 377, - 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, - 388, 389, 390, -1, 392, 393, -1, 395, 396, 397, - 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, - 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, - 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, - 428, -1, 430, 431, 432, 433, 434, 435, 436, -1, - 438, 439, 440, 441, 442, 443, -1, -1, 446, -1, - -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, - 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, - 468, 469, -1, -1, -1, -1, -1, -1, -1, 477, - 478, -1, -1, -1, -1, -1, -1, -1, 486, -1, - -1, -1, -1, 491, 492, 3, 4, 5, 6, 7, + -1, 477, 478, -1, -1, -1, -1, -1, 484, -1, + 486, -1, -1, -1, -1, 491, -1, 493, 494, 3, + 4, 5, 6, 7, 8, 9, 10, -1, -1, -1, + -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, + 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, + -1, 35, -1, -1, 38, 39, -1, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, -1, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, -1, 68, 69, 70, 71, 72, -1, + 74, -1, 76, 77, 78, 79, 80, 81, 82, 83, + 84, -1, 86, 87, 88, 89, 90, 91, -1, 93, + 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + 114, 115, -1, 117, 118, 119, 120, 121, 122, -1, + 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, + 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, + -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, -1, 158, 159, 160, 161, -1, 163, + -1, 165, 166, -1, 168, 169, 170, 171, 172, 173, + -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, -1, 192, 193, + 194, 195, 196, 197, -1, 199, 200, 201, 202, 203, + 204, 205, 206, 207, -1, 209, -1, 211, 212, 213, + 214, 215, 216, 217, 218, 219, 220, -1, -1, 223, + 224, 225, 226, -1, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, + 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, + 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, + 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, + 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, + -1, -1, 286, 287, 288, 289, 290, 291, 292, 293, + 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, + 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, + 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, + 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, + 334, 335, 336, 337, 338, 339, -1, 341, 342, 343, + 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, + 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, + 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, + 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, + -1, 395, 396, 397, 398, 399, 400, 401, -1, 403, + 404, -1, -1, 407, 408, 409, 410, 411, 412, 413, + 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, + 424, 425, 426, 427, 428, -1, 430, 431, 432, 433, + 434, 435, 436, -1, 438, 439, 440, 441, 442, 443, + -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 464, 465, 466, 467, 468, 469, 470, -1, -1, -1, + -1, -1, -1, 477, 478, -1, -1, -1, -1, -1, + 484, -1, 486, -1, -1, -1, -1, 491, -1, 493, + 494, 3, 4, 5, 6, 7, 8, 9, 10, -1, + -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, + 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, + -1, -1, -1, 35, -1, -1, 38, 39, -1, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, -1, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, + 72, -1, 74, -1, 76, 77, 78, 79, 80, 81, + 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, + -1, 93, 94, 95, 96, 97, 98, 99, 100, 101, + 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, -1, 117, 118, 119, 120, 121, + 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, + 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, + 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, -1, 158, 159, 160, 161, + -1, 163, -1, 165, 166, -1, 168, 169, 170, 171, + 172, 173, -1, 175, -1, 177, 178, -1, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, 190, -1, + 192, 193, 194, 195, 196, 197, -1, 199, 200, 201, + 202, 203, 204, 205, 206, 207, -1, 209, -1, 211, + 212, 213, 214, 215, 216, 217, 218, 219, 220, -1, + -1, 223, 224, 225, 226, -1, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, + 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, + 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, + 282, 283, -1, -1, 286, 287, 288, 289, 290, 291, + 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, + -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, + -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, + 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, + 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, + 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, + 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, + 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, + 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, + 392, 393, -1, 395, 396, 397, 398, 399, 400, 401, + -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, + 412, 413, 414, 415, 416, 417, 418, 419, 420, -1, + -1, 423, 424, 425, 426, 427, 428, -1, 430, 431, + 432, 433, 434, 435, 436, -1, 438, 439, 440, 441, + 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, + 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, + 462, 463, 464, 465, 466, 467, 468, 469, 470, -1, + -1, -1, -1, -1, -1, 477, 478, -1, -1, -1, + -1, -1, 484, -1, 486, -1, -1, -1, -1, 491, + -1, 493, 494, 3, 4, 5, 6, 7, 8, 9, + 10, -1, -1, -1, -1, -1, -1, -1, -1, 19, + 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, + 30, -1, -1, -1, -1, 35, -1, -1, 38, 39, + -1, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, -1, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, -1, 68, 69, + 70, 71, 72, -1, 74, -1, 76, 77, 78, 79, + 80, 81, 82, 83, 84, -1, 86, 87, 88, 89, + 90, 91, -1, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, -1, 117, 118, 119, + 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, + -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, + -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, -1, 158, 159, + 160, 161, -1, 163, -1, 165, 166, -1, 168, 169, + 170, 171, 172, 173, -1, 175, -1, 177, 178, -1, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, + 200, 201, 202, 203, 204, 205, 206, 207, -1, 209, + -1, 211, -1, -1, 214, 215, 216, 217, 218, 219, + 220, -1, -1, 223, 224, 225, 226, -1, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, + 260, 261, 262, 263, -1, 265, 266, -1, 268, 269, + 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, + -1, 281, 282, 283, -1, -1, 286, 287, 288, 289, + 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, + 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, + 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, + 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + -1, 341, 342, 343, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 371, 372, 373, 374, 375, -1, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, + 390, -1, 392, 393, -1, 395, 396, 397, 398, 399, + 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, + 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, + 420, -1, -1, 423, 424, 425, 426, 427, 428, -1, + 430, 431, 432, 433, 434, 435, 436, -1, 438, 439, + 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, + 470, -1, -1, -1, -1, -1, -1, 477, 478, -1, + -1, -1, -1, -1, 484, -1, 486, -1, -1, -1, + -1, 491, -1, 493, 494, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, @@ -194749,7 +208483,7 @@ static const yytype_int16 yycheck[] = 178, -1, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, 206, 207, - -1, 209, -1, 211, -1, 213, 214, 215, 216, 217, + -1, 209, -1, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, @@ -194776,34 +208510,82 @@ static const yytype_int16 yycheck[] = -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, -1, -1, -1, -1, -1, -1, -1, 477, - 478, -1, -1, -1, -1, -1, -1, -1, 486, -1, - -1, -1, -1, 491, 492, 3, 4, 5, 6, 7, + 478, -1, -1, 3, 4, 5, 6, 7, 486, 9, + 10, -1, -1, -1, -1, 493, 494, -1, -1, 19, + 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, + 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, + -1, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, -1, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, -1, 68, 69, + 70, 71, 72, -1, 74, -1, 76, 77, 78, 79, + 80, 81, 82, 83, 84, -1, 86, 87, 88, 89, + 90, 91, -1, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, -1, 115, -1, 117, 118, 119, + 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, + -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, + -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, -1, 158, 159, + 160, 161, -1, 163, -1, 165, 166, -1, 168, 169, + 170, 171, 172, 173, -1, 175, -1, 177, 178, -1, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, + 200, 201, 202, 203, 204, 205, 206, 207, -1, 209, + -1, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, -1, -1, 223, 224, 225, 226, -1, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, + 260, 261, 262, 263, -1, 265, 266, 267, 268, 269, + 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, + -1, 281, 282, 283, -1, -1, 286, 287, 288, 289, + 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, + 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, + 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, + 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + -1, 341, 342, 343, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 371, 372, 373, 374, 375, -1, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, + 390, -1, 392, 393, -1, 395, 396, 397, 398, 399, + 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, + 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, + 420, -1, -1, 423, 424, 425, 426, 427, 428, -1, + 430, 431, 432, 433, 434, 435, 436, -1, 438, 439, + 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, + -1, -1, -1, -1, -1, -1, -1, 477, 478, -1, + -1, -1, -1, -1, -1, -1, 486, -1, -1, -1, + -1, -1, -1, 493, 494, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, - 28, 29, 30, -1, -1, -1, -1, 35, -1, -1, + 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, 46, 47, - -1, 49, 50, 51, 52, -1, 54, 55, 56, 57, + 48, 49, 50, 51, 52, -1, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, - 108, 109, 110, 111, 112, 113, 114, 115, -1, 117, + 108, 109, 110, 111, 112, 113, -1, 115, -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, - 138, 139, -1, 141, -1, 143, -1, 145, 146, 147, + 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, 166, -1, - 168, 169, 170, 171, -1, 173, -1, 175, -1, 177, - 178, -1, 180, 181, 182, 183, 184, 185, -1, 187, + 168, 169, 170, 171, 172, 173, -1, 175, -1, 177, + 178, -1, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, 206, 207, - -1, 209, -1, 211, -1, -1, 214, 215, 216, 217, - 218, 219, 220, -1, -1, 223, 224, 225, -1, -1, + -1, 209, -1, 211, -1, 213, 214, 215, 216, 217, + 218, 219, 220, -1, -1, 223, 224, 225, 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, - 258, 259, 260, 261, 262, 263, -1, 265, 266, -1, + 258, 259, 260, 261, 262, 263, -1, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, @@ -194814,7 +208596,7 @@ static const yytype_int16 yycheck[] = 338, 339, -1, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, 366, 367, - 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, + 368, 369, 370, 371, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, @@ -194825,55 +208607,151 @@ static const yytype_int16 yycheck[] = -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, -1, -1, -1, -1, -1, -1, -1, 477, - 478, 3, -1, -1, -1, -1, -1, -1, 486, -1, - -1, -1, -1, 491, 492, -1, -1, 19, 20, 21, - 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, - -1, -1, -1, -1, 36, -1, 38, 39, -1, 41, - 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, - 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, - 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, - 72, -1, -1, -1, 76, 77, 78, 79, 80, -1, - 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, - -1, 93, 94, 95, 96, -1, -1, -1, -1, -1, - -1, -1, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, -1, 115, -1, 117, 118, 119, 120, 121, - 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, - 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, - 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, - 152, 153, 154, -1, 156, -1, 158, 159, 160, 161, - -1, 163, -1, 165, -1, -1, 168, 169, 170, 171, - -1, 173, -1, 175, -1, 177, 178, -1, 180, 181, - 182, 183, 184, 185, -1, 187, 188, 189, 190, -1, - 192, 193, 194, 195, 196, 197, -1, 199, 200, 201, - 202, 203, 204, 205, 206, 207, -1, 209, -1, 211, - -1, -1, 214, 215, 216, 217, 218, 219, 220, -1, - -1, 223, 224, 225, -1, -1, 228, 229, 230, -1, - -1, 233, 234, 235, 236, 237, 238, 239, 240, 241, - 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, - 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, - 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, - 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, - 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, - 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, - -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, - -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, - 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, - 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, - 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, - 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, - 362, 363, 364, -1, 366, 367, 368, 369, 370, -1, - 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, - 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, - 392, 393, -1, 395, 396, 397, 398, 399, 400, 401, - -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, - -1, 413, 414, 415, 416, 417, 418, 419, 420, -1, - -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, - 432, 433, 434, 435, 436, -1, 438, -1, 440, 441, - 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, - 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, - 462, 463, 464, 465, 466, 467, 468, 469, -1, -1, - 3, -1, 5, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 487, 19, 20, 21, 22, + 478, -1, -1, -1, -1, -1, -1, -1, 486, -1, + -1, -1, -1, -1, -1, 493, 494, 3, 4, 5, + 6, 7, 8, 9, 10, -1, -1, -1, -1, -1, + -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, + 26, -1, 28, 29, 30, -1, -1, -1, -1, 35, + -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, + 46, 47, -1, 49, 50, 51, 52, -1, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, + 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, + 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, + 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, + -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, + 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, + -1, 137, 138, 139, -1, 141, -1, 143, -1, 145, + 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, + 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, + 166, -1, 168, 169, 170, 171, -1, 173, -1, 175, + -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, + -1, 187, 188, 189, 190, -1, 192, 193, 194, 195, + 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, + 206, 207, -1, 209, -1, 211, -1, -1, 214, 215, + 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, + -1, -1, 228, 229, 230, 231, 232, 233, 234, 235, + 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, + 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, + 256, 257, 258, 259, 260, 261, 262, 263, -1, 265, + 266, -1, 268, 269, 270, 271, 272, 273, 274, 275, + -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, + 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, + 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, + 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, + 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, + 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, + 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, + 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, + 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, + 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, + -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, + 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, + 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, + -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, + 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, + 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, + 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, + 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, + 466, 467, 468, 469, -1, -1, -1, -1, -1, -1, + -1, 477, 478, 3, -1, -1, -1, -1, 484, -1, + 486, -1, -1, -1, -1, 491, -1, 493, 494, 19, + 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, + 30, -1, -1, -1, -1, -1, 36, -1, 38, 39, + -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, + -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, + -1, -1, 62, 63, 64, 65, 66, -1, 68, 69, + 70, 71, 72, -1, -1, -1, 76, 77, 78, 79, + 80, -1, 82, 83, 84, -1, 86, 87, 88, 89, + 90, 91, -1, 93, 94, 95, 96, -1, -1, -1, + -1, -1, -1, -1, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, -1, 115, -1, 117, 118, 119, + 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, + -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, + -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, -1, 156, -1, 158, 159, + 160, 161, -1, 163, -1, 165, -1, -1, 168, 169, + 170, 171, -1, 173, -1, 175, -1, 177, 178, -1, + 180, 181, 182, 183, 184, 185, -1, 187, 188, 189, + 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, + 200, 201, 202, 203, 204, 205, 206, 207, -1, 209, + -1, 211, -1, -1, 214, 215, 216, 217, 218, 219, + 220, -1, -1, 223, 224, 225, -1, -1, 228, 229, + 230, -1, -1, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, + 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, + 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, + -1, 281, 282, 283, -1, -1, 286, 287, -1, 289, + -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, + 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, + 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, + 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + -1, 341, 342, 343, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, + 360, 361, 362, 363, 364, -1, 366, 367, 368, 369, + 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, + 390, -1, 392, 393, -1, 395, 396, 397, 398, 399, + 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, + 410, 411, -1, 413, 414, 415, 416, 417, 418, 419, + 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, + 430, 431, 432, 433, 434, 435, 436, -1, 438, -1, + 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, + -1, -1, 3, -1, 5, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 487, 19, 20, + 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, + -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, + 61, 62, 63, 64, 65, 66, -1, 68, 69, 70, + 71, 72, -1, 74, -1, 76, 77, 78, 79, 80, + 81, 82, 83, 84, -1, 86, 87, 88, 89, 90, + 91, -1, 93, 94, 95, 96, 97, 98, 99, 100, + 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, + 111, 112, 113, -1, 115, -1, 117, 118, 119, 120, + 121, 122, -1, 124, 125, 126, 127, 128, -1, -1, + 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, + 141, 142, 143, -1, 145, 146, 147, 148, 149, 150, + 151, 152, 153, 154, -1, 156, -1, 158, 159, 160, + 161, -1, 163, -1, 165, 166, -1, 168, 169, 170, + 171, 172, 173, -1, 175, -1, 177, 178, -1, 180, + 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, + -1, 192, 193, 194, 195, 196, 197, -1, 199, 200, + 201, 202, 203, 204, 205, 206, 207, -1, 209, -1, + 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, + 221, -1, 223, 224, 225, 226, -1, 228, 229, 230, + 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, + 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, + 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, + 261, 262, 263, -1, 265, 266, 267, 268, -1, 270, + 271, 272, 273, 274, 275, -1, 277, 278, -1, 280, + 281, 282, 283, -1, -1, 286, 287, 288, 289, 290, + 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, + 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, + 311, -1, 313, 314, 315, 316, 317, 318, 319, 320, + 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, + 331, 332, 333, 334, 335, 336, 337, 338, 339, -1, + 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, + 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, + 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, + 371, 372, 373, 374, 375, -1, 377, 378, 379, 380, + 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, + -1, 392, 393, 394, 395, 396, 397, 398, 399, 400, + 401, -1, 403, 404, -1, -1, 407, 408, 409, 410, + 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, + -1, -1, 423, 424, 425, 426, 427, 428, -1, 430, + 431, 432, 433, 434, 435, 436, -1, 438, 439, 440, + 441, 442, 443, -1, -1, 446, -1, 448, 449, 450, + 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, + 461, 462, 463, 464, 465, 466, 467, 468, 469, -1, + 3, 472, 5, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 486, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, @@ -194907,19 +208785,19 @@ static const yytype_int16 yycheck[] = 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, - 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, + 353, 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, 392, - 393, 394, 395, 396, 397, 398, 399, 400, 401, -1, + 393, -1, 395, 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, 439, 440, 441, 442, - 443, -1, -1, 446, -1, 448, 449, 450, 451, 452, + 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, - 463, 464, 465, 466, 467, 468, 469, -1, 3, 472, - 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 463, 464, 465, 466, 467, 468, 469, -1, 3, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 486, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, 44, @@ -194940,13 +208818,13 @@ static const yytype_int16 yycheck[] = 185, 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, 206, 207, -1, 209, -1, 211, 212, 213, 214, - 215, 216, 217, 218, 219, 220, 221, -1, 223, 224, + 215, 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, -1, 265, 266, 267, 268, -1, 270, 271, 272, 273, 274, - 275, -1, 277, 278, -1, 280, 281, 282, 283, -1, + 275, -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, 314, @@ -195012,57 +208890,57 @@ static const yytype_int16 yycheck[] = -1, 438, 439, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, - 467, 468, 469, -1, 3, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 486, + 467, 468, 469, -1, 3, 4, 5, -1, -1, -1, + 9, -1, -1, -1, -1, -1, -1, -1, -1, 486, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, - 39, -1, 41, 42, 43, 44, 45, 46, 47, 48, - 49, 50, 51, 52, -1, 54, 55, 56, 57, 58, - 59, -1, 61, 62, 63, 64, 65, 66, -1, 68, - 69, 70, 71, 72, -1, 74, -1, 76, 77, 78, - 79, 80, 81, 82, 83, 84, -1, 86, 87, 88, - 89, 90, 91, -1, 93, 94, 95, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, + 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, + 59, -1, -1, 62, 63, 64, 65, 66, -1, 68, + 69, 70, 71, 72, -1, -1, -1, 76, 77, 78, + 79, 80, -1, 82, 83, 84, -1, 86, 87, 88, + 89, 90, 91, -1, -1, 94, 95, 96, -1, -1, + -1, -1, -1, -1, -1, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, -1, 115, -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, 148, - 149, 150, 151, 152, 153, 154, -1, 156, -1, 158, - 159, 160, 161, -1, 163, -1, 165, 166, -1, 168, - 169, 170, 171, 172, 173, -1, 175, -1, 177, 178, - -1, 180, 181, 182, 183, 184, 185, 186, 187, 188, + 149, 150, 151, 152, 153, 154, 155, 156, -1, 158, + 159, 160, 161, -1, 163, -1, 165, -1, -1, -1, + 169, 170, 171, -1, 173, -1, 175, -1, 177, 178, + -1, 180, 181, 182, 183, 184, 185, -1, 187, 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, -1, - 199, 200, 201, 202, 203, 204, 205, 206, 207, -1, - 209, -1, 211, 212, 213, 214, 215, 216, 217, 218, - 219, 220, -1, -1, 223, 224, 225, 226, -1, 228, - 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, + 199, -1, 201, 202, 203, 204, 205, 206, 207, -1, + 209, -1, 211, -1, -1, 214, -1, 216, 217, 218, + 219, 220, -1, -1, 223, -1, 225, -1, -1, 228, + 229, 230, -1, -1, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, - 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, - 259, 260, 261, 262, 263, -1, 265, 266, 267, 268, + 249, 250, 251, 252, 253, 254, 255, 256, 257, -1, + 259, 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, 275, -1, 277, 278, - -1, -1, 281, 282, 283, -1, -1, 286, 287, 288, - 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, + 279, -1, 281, 282, 283, -1, -1, 286, 287, -1, + 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, - 339, -1, 341, 342, 343, 344, 345, 346, 347, 348, + 339, -1, 341, 342, -1, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, - -1, 360, 361, 362, 363, 364, 365, 366, 367, 368, - 369, 370, 371, 372, 373, 374, 375, -1, 377, 378, + -1, 360, 361, 362, 363, 364, -1, 366, 367, 368, + 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, - 389, 390, -1, 392, 393, -1, 395, 396, 397, 398, + 389, 390, -1, 392, 393, -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, - 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, - 419, 420, -1, -1, 423, 424, 425, 426, 427, 428, + 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, + 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, - 439, 440, 441, 442, 443, -1, -1, 446, -1, -1, + -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, - 469, -1, 3, 4, 5, -1, -1, -1, 9, -1, - -1, -1, -1, -1, -1, -1, -1, 486, 19, 20, + 469, -1, 3, -1, -1, -1, -1, -1, 477, 478, + 479, -1, -1, -1, -1, -1, -1, 486, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, - -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, + 31, 32, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, 68, 69, 70, @@ -195074,7 +208952,7 @@ static const yytype_int16 yycheck[] = 121, 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, 150, - 151, 152, 153, 154, 155, 156, -1, 158, 159, 160, + 151, 152, 153, 154, -1, 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, -1, -1, -1, 169, 170, 171, -1, 173, -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, -1, 187, 188, 189, 190, @@ -195086,7 +208964,7 @@ static const yytype_int16 yycheck[] = 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, 270, - 271, 272, 273, 274, 275, -1, 277, 278, 279, -1, + 271, 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, @@ -195094,21 +208972,21 @@ static const yytype_int16 yycheck[] = 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, 342, -1, 344, 345, 346, 347, 348, 349, 350, - 351, 352, 353, 354, 355, 356, 357, 358, -1, 360, + 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, - -1, 392, 393, -1, 395, -1, 397, 398, 399, 400, + -1, 392, 393, 394, 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, 410, - 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, + 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, -1, 440, - 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, + 441, 442, 443, -1, -1, 446, -1, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, -1, - 3, -1, -1, -1, -1, -1, 477, 478, 479, -1, + 3, 472, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 486, 19, 20, 21, 22, - 23, 24, 25, 26, -1, 28, 29, 30, 31, 32, + 23, 24, 25, 26, 27, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, @@ -195155,7 +209033,7 @@ static const yytype_int16 yycheck[] = 463, 464, 465, 466, 467, 468, 469, -1, 3, 472, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 486, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, -1, -1, -1, -1, + 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, @@ -195199,7 +209077,7 @@ static const yytype_int16 yycheck[] = 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, - 465, 466, 467, 468, 469, -1, 3, 472, -1, -1, + 465, 466, 467, 468, 469, -1, 3, 472, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 486, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, @@ -195235,18 +209113,18 @@ static const yytype_int16 yycheck[] = -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, 342, -1, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, - 357, 358, 359, 360, 361, 362, 363, 364, -1, 366, + 357, 358, -1, 360, 361, 362, 363, 364, -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, - 387, 388, 389, 390, -1, 392, 393, 394, 395, -1, + 387, 388, 389, 390, -1, 392, 393, -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, - -1, 448, 449, 450, 451, 452, 453, 454, 455, 456, + -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, - 467, 468, 469, -1, 3, 472, 5, -1, -1, -1, + 467, 468, 469, -1, 3, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 486, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, @@ -195293,7 +209171,7 @@ static const yytype_int16 yycheck[] = -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, - 469, -1, 3, -1, 5, -1, -1, -1, -1, -1, + 469, -1, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 486, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, @@ -195386,103 +209264,395 @@ static const yytype_int16 yycheck[] = 433, 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, - 463, 464, 465, 466, 467, 468, 469, -1, 3, -1, + 463, 464, 465, 466, 467, 468, 469, -1, -1, -1, + -1, 3, 4, 5, -1, -1, 8, 9, -1, -1, + -1, -1, -1, 486, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, -1, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + -1, 73, 74, 75, 76, 77, 78, 79, 80, 81, + 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, + 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, + 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, -1, 150, 151, + 152, 153, -1, 155, 156, 157, 158, 159, 160, 161, + 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, + 172, 173, 174, 175, 176, -1, 178, 179, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, 200, -1, + 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, + 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, + 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, + 262, 263, 264, 265, 266, 267, 268, 269, -1, 271, + 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, + 282, 283, 284, 285, 286, -1, 288, 289, 290, -1, + 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, + 302, 303, 304, -1, 306, 307, -1, 309, 310, 311, + 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, + 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, + 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, + 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, + 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, + 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, + 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, + 382, 383, 384, 385, 386, 387, 388, 389, -1, 391, + 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, + 402, 403, 404, 405, 406, 407, 408, -1, 410, -1, + 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, + 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, + 432, 433, 434, -1, 436, 437, 438, 439, 440, 441, + 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, + 452, 453, 454, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 466, 467, 468, 469, -1, 3, + -1, 473, 474, 475, 8, 477, 478, 479, 480, 481, + 482, -1, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, + -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, + -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, + 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, + 64, 65, 66, -1, 68, 69, 70, 71, 72, -1, + -1, -1, 76, 77, 78, 79, 80, -1, 82, 83, + 84, -1, 86, 87, 88, 89, 90, 91, -1, -1, + 94, 95, 96, -1, -1, -1, -1, -1, -1, -1, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + -1, 115, -1, 117, 118, 119, 120, 121, 122, -1, + 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, + 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, + -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, -1, 156, -1, 158, 159, 160, 161, -1, 163, + -1, 165, -1, -1, -1, 169, 170, 171, -1, 173, + -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, + 184, 185, -1, 187, 188, 189, 190, -1, 192, 193, + 194, 195, 196, 197, -1, 199, -1, 201, 202, 203, + 204, 205, 206, 207, -1, 209, -1, 211, -1, -1, + 214, -1, 216, 217, 218, 219, 220, -1, -1, 223, + -1, 225, -1, -1, 228, 229, 230, -1, -1, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, + 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, + 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, + -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, + 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, + -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, + 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, + 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, + 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, + 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, + 334, 335, 336, 337, 338, 339, -1, 341, 342, -1, + 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, + 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, + 364, -1, 366, 367, 368, 369, 370, -1, 372, 373, + 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, + 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, + -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, + 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, + 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, + 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, + 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, + -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 464, 465, 466, 467, 468, 469, -1, -1, -1, 473, + 474, 475, -1, 477, 478, 479, 480, 481, 482, 8, + -1, -1, 11, -1, -1, -1, -1, 16, 17, 18, + -1, -1, -1, -1, -1, -1, -1, 8, -1, -1, + 11, -1, -1, -1, 33, 16, 17, 18, -1, -1, + -1, -1, 41, -1, -1, 8, -1, -1, 11, 48, + -1, -1, 33, 16, 17, 18, -1, -1, -1, -1, + 41, -1, -1, -1, -1, -1, -1, 48, -1, -1, + 33, -1, -1, 8, 73, -1, 11, -1, 41, -1, + -1, 16, 17, 18, -1, 48, -1, -1, -1, -1, + -1, 8, 73, -1, 11, -1, -1, -1, 33, 16, + 17, 18, -1, -1, -1, -1, 41, -1, -1, 8, + 73, -1, 11, 48, -1, -1, 33, 16, 17, 18, + -1, -1, -1, -1, 41, -1, -1, -1, -1, -1, + -1, 48, -1, -1, 33, -1, -1, -1, 73, -1, + -1, -1, 41, -1, -1, -1, -1, -1, -1, 48, + -1, -1, -1, -1, -1, -1, 73, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 486, 19, 20, 21, 22, 23, 24, - 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, - -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, - 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, - 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, - 65, 66, -1, 68, 69, 70, 71, 72, -1, -1, - -1, 76, 77, 78, 79, 80, -1, 82, 83, 84, - -1, 86, 87, 88, 89, 90, 91, -1, -1, 94, - 95, 96, -1, -1, -1, -1, -1, -1, -1, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, -1, - 115, -1, 117, 118, 119, 120, 121, 122, -1, 124, - 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, - 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, - 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, - -1, 156, -1, 158, 159, 160, 161, -1, 163, -1, - 165, -1, -1, -1, 169, 170, 171, -1, 173, -1, - 175, -1, 177, 178, -1, 180, 181, 182, 183, 184, - 185, -1, 187, 188, 189, 190, -1, 192, 193, 194, - 195, 196, 197, -1, 199, -1, 201, 202, 203, 204, - 205, 206, 207, -1, 209, -1, 211, -1, -1, 214, - -1, 216, 217, 218, 219, 220, -1, -1, 223, -1, - 225, -1, -1, 228, 229, 230, -1, -1, 233, 234, - 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, - 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, - 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, - 275, -1, 277, 278, -1, -1, 281, 282, 283, -1, - -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, - 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, - 305, 306, 307, 308, 309, 310, 311, -1, 313, 314, - 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, - 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, - 335, 336, 337, 338, 339, -1, 341, 342, -1, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, - 355, 356, 357, 358, -1, 360, 361, 362, 363, 364, - -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, - 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, - 385, 386, 387, 388, 389, 390, -1, 392, 393, -1, - 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, - -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, - 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, - 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, - 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, - -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, - 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, - 465, 466, 467, 468, 469, -1, -1, -1, -1, 3, - 4, 5, -1, -1, 8, 9, -1, -1, -1, -1, - -1, 486, 16, 17, 18, 19, 20, 21, 22, 23, + -1, -1, -1, 172, 73, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 186, -1, -1, + -1, 172, 191, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 186, -1, -1, -1, 172, + 191, -1, -1, 212, 213, -1, -1, -1, -1, -1, + -1, -1, -1, 186, -1, -1, -1, 226, 191, -1, + -1, 212, 213, -1, -1, -1, -1, 172, -1, -1, + -1, -1, -1, -1, -1, 226, -1, -1, -1, 212, + 213, 186, -1, -1, -1, 172, 191, -1, -1, -1, + -1, -1, -1, 226, -1, 264, -1, -1, 267, 186, + -1, -1, -1, 172, 191, -1, -1, 212, 213, -1, + -1, -1, 281, 264, -1, 284, 267, 186, -1, -1, + -1, 226, 191, -1, -1, 212, 213, -1, -1, -1, + 281, 264, -1, 284, 267, -1, -1, -1, -1, 226, + -1, -1, -1, 212, 213, -1, -1, -1, 281, -1, + -1, 284, -1, -1, -1, -1, -1, 226, -1, 264, + -1, -1, 267, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 281, 264, -1, 284, + 267, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 281, 264, -1, 284, 267, -1, + -1, -1, 371, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 281, -1, -1, 284, -1, -1, -1, -1, + 371, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 371, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 8, -1, -1, 11, -1, -1, -1, -1, 16, 17, + 18, -1, -1, -1, -1, -1, 371, -1, -1, -1, + -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, + -1, -1, -1, 41, 371, -1, -1, -1, -1, -1, + 48, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 470, 371, -1, 473, 474, 475, -1, 477, 478, + 479, 480, 481, 482, -1, 73, -1, -1, 487, 470, + -1, -1, 473, 474, 475, -1, 477, 478, 479, 480, + 481, 482, -1, -1, -1, -1, 487, 470, -1, -1, + 473, 474, 475, -1, 477, 478, 479, 480, 481, 482, + -1, -1, -1, -1, 487, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 470, -1, -1, 473, 474, + 475, -1, 477, 478, 479, 480, 481, 482, -1, -1, + -1, -1, 487, 470, -1, -1, 473, 474, 475, -1, + 477, 478, 479, 480, 481, 482, -1, -1, -1, -1, + 487, 470, -1, -1, 473, 474, 475, -1, 477, 478, + 479, 480, 481, 482, 172, -1, 8, -1, 487, 11, + -1, -1, -1, -1, 16, 17, 18, -1, 186, -1, + -1, -1, -1, 191, -1, -1, -1, -1, -1, -1, + -1, 33, -1, -1, -1, -1, -1, -1, -1, 41, + -1, -1, -1, -1, 212, 213, 48, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 226, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 73, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 264, -1, -1, 267, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 281, -1, -1, 284, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 172, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 186, -1, -1, -1, -1, 191, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 371, -1, -1, -1, -1, -1, -1, + 212, 213, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 226, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 264, -1, -1, 267, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 281, + -1, -1, 284, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 470, -1, -1, 473, 474, 475, -1, 477, + 478, 479, 480, 481, 482, -1, -1, -1, -1, 487, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 371, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 470, 3, + -1, 473, 474, 475, -1, 477, 478, 479, 480, 481, + 482, -1, -1, 485, -1, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, -1, 49, 50, 51, 52, 53, + -1, 45, 46, 47, 48, 49, -1, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, -1, 73, - 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, - 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, - 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, + -1, 75, 76, 77, 78, 79, 80, -1, 82, 83, + 84, 85, 86, 87, 88, 89, 90, 91, 92, -1, + 94, 95, 96, 97, 98, 99, -1, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, - 144, 145, 146, 147, 148, -1, 150, 151, 152, 153, - -1, 155, 156, 157, 158, 159, 160, 161, 162, 163, - 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, - 174, 175, 176, -1, 178, 179, 180, 181, 182, 183, - 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, - 194, 195, 196, 197, 198, 199, 200, -1, 202, 203, - 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, - 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, + 164, 165, -1, 167, -1, 169, 170, 171, -1, 173, + 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, -1, 187, 188, 189, 190, 191, 192, 193, + 194, 195, 196, 197, 198, 199, -1, 201, 202, 203, + 204, 205, 206, 207, 208, 209, 210, 211, -1, -1, + 214, -1, 216, 217, 218, 219, 220, 221, 222, 223, + -1, 225, -1, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, - 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, - 264, 265, 266, 267, 268, 269, -1, 271, 272, 273, + 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, + 264, 265, 266, -1, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, - 284, 285, 286, -1, 288, 289, 290, -1, 292, 293, + 284, 285, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, - 304, -1, 306, 307, -1, 309, 310, 311, 312, 313, + 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, - 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, + 334, 335, 336, 337, 338, 339, 340, 341, 342, -1, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, - 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, + 364, 365, 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, - 384, 385, 386, 387, 388, 389, -1, 391, 392, 393, - 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, - 404, 405, 406, 407, 408, -1, 410, -1, 412, 413, + 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, + 394, 395, -1, 397, 398, 399, 400, 401, 402, 403, + 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, - 434, -1, 436, 437, 438, 439, 440, 441, 442, 443, + 434, 435, 436, 437, 438, -1, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, - 454, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 466, 467, 468, 469, -1, 3, -1, 473, - 474, 475, 8, 477, 478, 479, 480, 481, 482, -1, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 464, 465, 466, 467, 468, 469, 3, -1, -1, -1, + -1, -1, -1, -1, -1, 479, -1, -1, -1, -1, + -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, + -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, + -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, + 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, + 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, + -1, 68, 69, 70, 71, 72, -1, -1, -1, 76, + 77, 78, 79, 80, -1, 82, 83, 84, -1, 86, + 87, 88, 89, 90, 91, -1, -1, 94, 95, 96, + -1, -1, -1, -1, -1, -1, -1, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, -1, 115, -1, + 117, 118, 119, 120, 121, 122, -1, 124, 125, 126, + 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, + 137, 138, 139, -1, 141, 142, 143, -1, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, -1, 156, + -1, 158, 159, 160, 161, -1, 163, -1, 165, -1, + -1, -1, 169, 170, 171, -1, 173, -1, 175, -1, + 177, 178, -1, 180, 181, 182, 183, 184, 185, -1, + 187, 188, 189, 190, -1, 192, 193, 194, 195, 196, + 197, -1, 199, -1, 201, 202, 203, 204, 205, 206, + 207, -1, 209, -1, 211, -1, -1, 214, -1, 216, + 217, 218, 219, 220, -1, -1, 223, -1, 225, -1, + -1, 228, 229, 230, -1, -1, 233, 234, 235, 236, + 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, + 257, -1, 259, 260, 261, 262, 263, -1, 265, 266, + -1, 268, -1, 270, 271, 272, 273, 274, 275, -1, + 277, 278, -1, -1, 281, 282, 283, -1, -1, 286, + 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, + 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, + 307, 308, 309, 310, 311, -1, 313, 314, 315, 316, + 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, + -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, + 337, 338, 339, -1, 341, 342, -1, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, + 357, 358, -1, 360, 361, 362, 363, 364, -1, 366, + 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, + 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, + 387, 388, 389, 390, -1, 392, 393, -1, 395, -1, + 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, + 407, 408, 409, 410, 411, -1, 413, 414, 415, 416, + 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, + 427, -1, -1, 430, 431, 432, 433, 434, 435, 436, + -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, + -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, + 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, + 467, 468, 469, 3, 4, 5, -1, -1, -1, 9, + -1, -1, 479, -1, -1, -1, -1, -1, -1, 19, + 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, + 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, + -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, + -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, + -1, -1, 62, 63, 64, 65, 66, -1, 68, 69, + 70, 71, 72, -1, -1, -1, 76, 77, 78, 79, + 80, -1, 82, 83, 84, -1, 86, 87, 88, 89, + 90, 91, -1, -1, 94, 95, 96, -1, -1, -1, + -1, -1, -1, -1, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, -1, 117, 118, 119, + 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, + -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, + -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, -1, 158, 159, + 160, 161, -1, 163, -1, 165, -1, -1, -1, 169, + 170, 171, -1, 173, -1, 175, -1, 177, 178, -1, + 180, 181, 182, 183, 184, 185, -1, 187, 188, 189, + 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, + -1, 201, 202, 203, 204, 205, 206, 207, -1, 209, + -1, 211, -1, -1, 214, -1, 216, 217, 218, 219, + 220, -1, -1, 223, -1, 225, -1, -1, 228, 229, + 230, -1, -1, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, + 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, + 270, 271, 272, 273, 274, 275, -1, 277, 278, 279, + -1, 281, 282, 283, -1, -1, 286, 287, -1, 289, + -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, + 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, + 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, + 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + -1, 341, 342, -1, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, + 360, 361, 362, 363, 364, -1, 366, 367, 368, 369, + 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, + 390, -1, 392, 393, -1, 395, -1, 397, 398, 399, + 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, + 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, + 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, + 430, 431, 432, 433, 434, 435, 436, -1, 438, -1, + 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, + 3, 4, 5, -1, -1, -1, 9, 477, 478, -1, + -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, + 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, + -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, + 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, + -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, + 63, 64, 65, 66, -1, 68, 69, 70, 71, 72, + -1, -1, -1, 76, 77, 78, 79, 80, -1, 82, + 83, 84, -1, 86, 87, 88, 89, 90, 91, -1, + -1, 94, 95, 96, -1, -1, -1, -1, -1, -1, + -1, 104, 105, 106, 107, 108, 109, 110, 111, 112, + 113, 114, 115, -1, 117, 118, 119, 120, 121, 122, + -1, 124, 125, 126, 127, 128, -1, -1, 131, 132, + 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, + 143, -1, 145, 146, 147, 148, 149, 150, 151, 152, + 153, 154, 155, 156, -1, 158, 159, 160, 161, -1, + 163, -1, 165, -1, -1, -1, 169, 170, 171, -1, + 173, -1, 175, -1, 177, 178, -1, 180, 181, 182, + 183, 184, 185, -1, 187, 188, 189, 190, -1, 192, + 193, 194, 195, 196, 197, -1, 199, -1, 201, 202, + 203, 204, 205, 206, 207, -1, 209, -1, 211, -1, + -1, 214, -1, 216, 217, 218, 219, 220, -1, -1, + 223, -1, 225, -1, -1, 228, 229, 230, -1, -1, + 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, + 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, + 253, 254, 255, 256, 257, -1, 259, 260, 261, 262, + 263, -1, 265, 266, -1, 268, -1, 270, 271, 272, + 273, 274, 275, -1, 277, 278, 279, -1, 281, 282, + 283, -1, -1, 286, 287, -1, 289, -1, 291, 292, + 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, + 303, 304, 305, 306, 307, 308, 309, 310, 311, -1, + 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, + 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, + 333, 334, 335, 336, 337, 338, 339, -1, 341, 342, + -1, 344, 345, 346, 347, 348, 349, 350, 351, 352, + 353, 354, 355, 356, 357, 358, -1, 360, 361, 362, + 363, 364, -1, 366, 367, 368, 369, 370, -1, 372, + 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, + 383, 384, 385, 386, 387, 388, 389, 390, -1, 392, + 393, -1, 395, -1, 397, 398, 399, 400, 401, -1, + 403, 404, -1, -1, 407, 408, 409, 410, 411, 412, + 413, 414, 415, 416, 417, 418, 419, 420, -1, -1, + 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, + 433, 434, 435, 436, -1, 438, -1, 440, 441, 442, + 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, + 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, + 463, 464, 465, 466, 467, 468, 469, 3, 4, 5, + -1, -1, -1, 9, 477, 478, -1, -1, -1, -1, + -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, @@ -195495,7 +209665,7 @@ static const yytype_int16 yycheck[] = -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, -1, + 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, -1, -1, -1, 169, 170, 171, -1, 173, -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, @@ -195508,7 +209678,7 @@ static const yytype_int16 yycheck[] = 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, 275, - -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, + -1, 277, 278, 279, -1, 281, 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, @@ -195521,468 +209691,237 @@ static const yytype_int16 yycheck[] = -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, -1, - -1, 407, 408, 409, 410, 411, -1, 413, 414, 415, + -1, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, -1, -1, -1, 473, 474, 475, - -1, 477, 478, 479, 480, 481, 482, 8, -1, -1, - 11, -1, -1, -1, -1, 16, 17, 18, -1, -1, - -1, -1, -1, -1, -1, 8, -1, -1, 11, -1, - -1, -1, 33, 16, 17, 18, -1, -1, -1, -1, - 41, -1, -1, 8, -1, -1, 11, 48, -1, -1, - 33, 16, 17, 18, -1, -1, -1, -1, 41, -1, - -1, -1, -1, -1, -1, 48, -1, -1, 33, -1, - -1, 8, 73, -1, 11, -1, 41, -1, -1, 16, - 17, 18, -1, 48, -1, -1, -1, -1, -1, 8, - 73, -1, 11, -1, -1, -1, 33, 16, 17, 18, - -1, -1, -1, -1, 41, -1, -1, 8, 73, -1, - 11, 48, -1, -1, 33, 16, 17, 18, -1, -1, - -1, -1, 41, -1, -1, -1, -1, -1, -1, 48, - -1, -1, 33, -1, -1, -1, 73, -1, -1, -1, - 41, -1, -1, -1, -1, -1, -1, 48, -1, -1, - -1, -1, -1, -1, 73, -1, -1, -1, -1, -1, + 466, 467, 468, 469, -1, 8, -1, -1, 11, -1, + -1, 477, 478, 16, 17, 18, -1, -1, -1, -1, + -1, -1, -1, 8, -1, -1, 11, -1, -1, -1, + 33, 16, 17, 18, 37, -1, -1, -1, 41, -1, + -1, 8, -1, -1, 11, 48, -1, -1, 33, 16, + 17, 18, -1, -1, -1, -1, 41, -1, -1, -1, + -1, -1, -1, 48, -1, -1, 33, -1, -1, 8, + 73, -1, 11, -1, 41, -1, -1, 16, 17, 18, + -1, 48, -1, -1, -1, -1, -1, -1, 73, -1, + -1, -1, -1, -1, 33, -1, -1, 36, -1, -1, + -1, -1, 41, -1, -1, 8, 73, -1, 11, 48, + -1, -1, -1, 16, 17, 18, -1, -1, -1, -1, + 123, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 33, -1, -1, -1, 73, -1, -1, -1, 41, -1, + -1, -1, -1, -1, -1, 48, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 172, 73, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 186, -1, -1, -1, 172, - 191, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 186, -1, -1, -1, 172, 191, -1, - -1, 212, 213, -1, -1, -1, -1, -1, -1, -1, - -1, 186, -1, -1, -1, 226, 191, -1, -1, 212, - 213, -1, -1, -1, -1, 172, -1, -1, -1, -1, - -1, -1, -1, 226, -1, -1, -1, 212, 213, 186, - -1, -1, -1, 172, 191, -1, -1, -1, -1, -1, - -1, 226, -1, 264, -1, -1, 267, 186, -1, -1, - -1, 172, 191, -1, -1, 212, 213, -1, -1, -1, - 281, 264, -1, 284, 267, 186, -1, -1, -1, 226, - 191, -1, -1, 212, 213, -1, -1, -1, 281, 264, - -1, 284, 267, -1, -1, -1, -1, 226, -1, -1, - -1, 212, 213, -1, -1, -1, 281, -1, -1, 284, - -1, -1, -1, -1, -1, 226, -1, 264, -1, -1, - 267, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 281, 264, -1, 284, 267, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 172, + 73, -1, -1, -1, -1, -1, -1, 162, -1, -1, + -1, -1, 167, 186, -1, -1, -1, 172, 191, 8, + -1, -1, 11, -1, 161, -1, -1, 16, 17, 18, + -1, 186, -1, -1, -1, 172, 191, -1, -1, 212, + 213, -1, -1, -1, 33, -1, -1, 36, -1, 186, + -1, -1, 41, 226, 191, -1, -1, 212, 213, 48, + -1, -1, -1, 172, -1, -1, -1, -1, -1, -1, + -1, 226, -1, -1, -1, 212, 213, 186, -1, -1, + -1, -1, 191, -1, 73, -1, -1, -1, -1, 226, + -1, 264, -1, -1, 267, -1, -1, -1, -1, 172, + -1, -1, -1, 212, 213, -1, -1, -1, 281, 264, + -1, 284, 267, 186, -1, -1, -1, 226, 191, -1, + -1, -1, -1, -1, -1, -1, 281, 264, -1, 284, + 267, -1, -1, -1, -1, -1, -1, -1, -1, 212, + 213, -1, -1, -1, 281, -1, -1, 284, -1, -1, + -1, -1, -1, 226, -1, 264, -1, -1, 267, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 281, 264, -1, 284, 267, -1, -1, -1, - 371, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 281, -1, -1, 284, -1, -1, -1, -1, 371, -1, + 307, -1, 281, -1, -1, 284, -1, -1, -1, -1, + -1, -1, -1, 172, -1, -1, -1, -1, -1, -1, + -1, 264, -1, -1, 267, -1, -1, 186, 371, -1, + -1, -1, 191, -1, -1, -1, -1, -1, 281, -1, + -1, 284, -1, -1, -1, -1, 371, -1, -1, -1, + -1, -1, -1, 212, 213, -1, -1, -1, -1, 302, + -1, -1, -1, -1, 371, -1, -1, 226, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 371, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 8, -1, - -1, 11, -1, -1, -1, -1, 16, 17, 18, -1, - -1, -1, -1, -1, 371, -1, -1, -1, -1, -1, - -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, - -1, 41, 371, -1, -1, -1, -1, -1, 48, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 470, - 371, -1, 473, 474, 475, -1, 477, 478, 479, 480, - 481, 482, -1, 73, -1, -1, 487, 470, -1, -1, + -1, 8, -1, -1, 11, -1, 429, -1, -1, 16, + 17, 18, 371, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 264, 33, -1, 267, -1, + -1, -1, -1, -1, 41, -1, -1, -1, -1, -1, + -1, 48, 281, -1, -1, 284, -1, 470, 371, -1, 473, 474, 475, -1, 477, 478, 479, 480, 481, 482, - -1, -1, -1, -1, 487, 470, -1, -1, 473, 474, + -1, -1, -1, -1, -1, 470, 73, -1, 473, 474, 475, -1, 477, 478, 479, 480, 481, 482, -1, -1, - -1, -1, 487, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 470, -1, -1, 473, 474, 475, -1, 477, 478, 479, 480, 481, 482, -1, -1, -1, -1, - 487, 470, -1, -1, 473, 474, 475, -1, 477, 478, - 479, 480, 481, 482, -1, -1, -1, -1, 487, 470, - -1, -1, 473, 474, 475, -1, 477, 478, 479, 480, - 481, 482, 172, -1, 8, -1, 487, 11, -1, -1, - -1, -1, 16, 17, 18, -1, 186, -1, -1, -1, - -1, 191, -1, -1, -1, -1, -1, -1, -1, 33, - -1, -1, -1, -1, -1, -1, -1, 41, -1, -1, - -1, -1, 212, 213, 48, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 226, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 73, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 264, -1, -1, 267, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 281, -1, -1, 284, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 172, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 186, -1, -1, -1, -1, 191, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 371, -1, -1, -1, -1, -1, -1, 212, 213, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 226, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 264, -1, -1, 267, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 281, -1, -1, - 284, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 470, -1, -1, 473, 474, 475, -1, 477, 478, 479, - 480, 481, 482, -1, -1, -1, -1, 487, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 371, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 470, 3, -1, 473, - 474, 475, -1, 477, 478, 479, 480, 481, 482, -1, - -1, 485, -1, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, -1, 45, - 46, 47, 48, 49, -1, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, 71, 72, 73, -1, 75, - 76, 77, 78, 79, 80, -1, 82, 83, 84, 85, - 86, 87, 88, 89, 90, 91, 92, -1, 94, 95, - 96, 97, 98, 99, -1, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, - 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, - 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, - -1, 167, -1, 169, 170, 171, -1, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, - -1, 187, 188, 189, 190, 191, 192, 193, 194, 195, - 196, 197, 198, 199, -1, 201, 202, 203, 204, 205, - 206, 207, 208, 209, 210, 211, -1, -1, 214, -1, - 216, 217, 218, 219, 220, 221, 222, 223, -1, 225, - -1, 227, 228, 229, 230, 231, 232, 233, 234, 235, - 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, -1, 259, 260, 261, 262, 263, 264, 265, - 266, -1, 268, 269, 270, 271, 272, 273, 274, 275, - 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, - 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, - 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, - 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, 340, 341, 342, -1, 344, 345, - 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, - 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, - -1, 397, 398, 399, 400, 401, 402, 403, 404, 405, - 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, - 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, - 436, 437, 438, -1, 440, 441, 442, 443, 444, 445, - 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 3, -1, -1, -1, -1, -1, - -1, -1, -1, 479, -1, -1, -1, -1, -1, -1, - 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, - 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, - 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, - 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, - 59, -1, -1, 62, 63, 64, 65, 66, -1, 68, - 69, 70, 71, 72, -1, -1, -1, 76, 77, 78, - 79, 80, -1, 82, 83, 84, -1, 86, 87, 88, - 89, 90, 91, -1, -1, 94, 95, 96, -1, -1, - -1, -1, -1, -1, -1, 104, 105, 106, 107, 108, - 109, 110, 111, 112, 113, -1, 115, -1, 117, 118, - 119, 120, 121, 122, -1, 124, 125, 126, 127, 128, - -1, -1, 131, 132, 133, 134, 135, -1, 137, 138, - 139, -1, 141, 142, 143, -1, 145, 146, 147, 148, - 149, 150, 151, 152, 153, 154, -1, 156, -1, 158, - 159, 160, 161, -1, 163, -1, 165, -1, -1, -1, - 169, 170, 171, -1, 173, -1, 175, -1, 177, 178, - -1, 180, 181, 182, 183, 184, 185, -1, 187, 188, - 189, 190, -1, 192, 193, 194, 195, 196, 197, -1, - 199, -1, 201, 202, 203, 204, 205, 206, 207, -1, - 209, -1, 211, -1, -1, 214, -1, 216, 217, 218, - 219, 220, -1, -1, 223, -1, 225, -1, -1, 228, - 229, 230, -1, -1, 233, 234, 235, 236, 237, 238, - 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, - 249, 250, 251, 252, 253, 254, 255, 256, 257, -1, - 259, 260, 261, 262, 263, -1, 265, 266, -1, 268, - -1, 270, 271, 272, 273, 274, 275, -1, 277, 278, - -1, -1, 281, 282, 283, -1, -1, 286, 287, -1, - 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, - 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, - 309, 310, 311, -1, 313, 314, 315, 316, 317, 318, - 319, 320, 321, 322, 323, 324, 325, 326, -1, 328, - 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, - 339, -1, 341, 342, -1, 344, 345, 346, 347, 348, - 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, - -1, 360, 361, 362, 363, 364, -1, 366, 367, 368, - 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, - 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, - 389, 390, -1, 392, 393, -1, 395, -1, 397, 398, - 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, - 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, - 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, - -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, - -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, - 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, - 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, - 469, 3, 4, 5, -1, -1, -1, 9, -1, -1, - 479, -1, -1, -1, -1, -1, -1, 19, 20, 21, - 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, - -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, - 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, - 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, - 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, - 72, -1, -1, -1, 76, 77, 78, 79, 80, -1, - 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, - -1, -1, 94, 95, 96, -1, -1, -1, -1, -1, - -1, -1, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, -1, 117, 118, 119, 120, 121, - 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, - 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, - 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, - 152, 153, 154, 155, 156, -1, 158, 159, 160, 161, - -1, 163, -1, 165, -1, -1, -1, 169, 170, 171, - -1, 173, -1, 175, -1, 177, 178, -1, 180, 181, - 182, 183, 184, 185, -1, 187, 188, 189, 190, -1, - 192, 193, 194, 195, 196, 197, -1, 199, -1, 201, - 202, 203, 204, 205, 206, 207, -1, 209, -1, 211, - -1, -1, 214, -1, 216, 217, 218, 219, 220, -1, - -1, 223, -1, 225, -1, -1, 228, 229, 230, -1, - -1, 233, 234, 235, 236, 237, 238, 239, 240, 241, - 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, - 252, 253, 254, 255, 256, 257, -1, 259, 260, 261, - 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, - 272, 273, 274, 275, -1, 277, 278, 279, -1, 281, - 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, - 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, - -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, - -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, - 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, - 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, - 342, -1, 344, 345, 346, 347, 348, 349, 350, 351, - 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, - 362, 363, 364, -1, 366, 367, 368, 369, 370, -1, - 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, - 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, - 392, 393, -1, 395, -1, 397, 398, 399, 400, 401, - -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, - 412, 413, 414, 415, 416, 417, 418, 419, 420, -1, - -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, - 432, 433, 434, 435, 436, -1, 438, -1, 440, 441, - 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, - 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, - 462, 463, 464, 465, 466, 467, 468, 469, 3, 4, - 5, -1, -1, -1, 9, 477, 478, -1, -1, -1, - -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, - 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, - -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, - 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, - 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, - 65, 66, -1, 68, 69, 70, 71, 72, -1, -1, - -1, 76, 77, 78, 79, 80, -1, 82, 83, 84, - -1, 86, 87, 88, 89, 90, 91, -1, -1, 94, - 95, 96, -1, -1, -1, -1, -1, -1, -1, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, - 115, -1, 117, 118, 119, 120, 121, 122, -1, 124, - 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, - 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, - 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, - 155, 156, -1, 158, 159, 160, 161, -1, 163, -1, - 165, -1, -1, -1, 169, 170, 171, -1, 173, -1, - 175, -1, 177, 178, -1, 180, 181, 182, 183, 184, - 185, -1, 187, 188, 189, 190, -1, 192, 193, 194, - 195, 196, 197, -1, 199, -1, 201, 202, 203, 204, - 205, 206, 207, -1, 209, -1, 211, -1, -1, 214, - -1, 216, 217, 218, 219, 220, -1, -1, 223, -1, - 225, -1, -1, 228, 229, 230, -1, -1, 233, 234, - 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, - 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, - 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, - 275, -1, 277, 278, 279, -1, 281, 282, 283, -1, - -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, - 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, - 305, 306, 307, 308, 309, 310, 311, -1, 313, 314, - 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, - 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, - 335, 336, 337, 338, 339, -1, 341, 342, -1, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, - 355, 356, 357, 358, -1, 360, 361, 362, 363, 364, - -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, - 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, - 385, 386, 387, 388, 389, 390, -1, 392, 393, -1, - 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, - -1, -1, 407, 408, 409, 410, 411, 412, 413, 414, - 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, - 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, - 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, - -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, - 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, - 465, 466, 467, 468, 469, 3, 4, 5, -1, -1, - -1, 9, 477, 478, -1, -1, -1, -1, -1, -1, - -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, - 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, - 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, - 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, - 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, - 68, 69, 70, 71, 72, -1, -1, -1, 76, 77, - 78, 79, 80, -1, 82, 83, 84, -1, 86, 87, - 88, 89, 90, 91, -1, -1, 94, 95, 96, -1, - -1, -1, -1, -1, -1, -1, 104, 105, 106, 107, - 108, 109, 110, 111, 112, 113, -1, 115, -1, 117, - 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, - 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, - 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, - 148, 149, 150, 151, 152, 153, 154, 155, 156, -1, - 158, 159, 160, 161, -1, 163, -1, 165, -1, -1, - -1, 169, 170, 171, -1, 173, -1, 175, -1, 177, - 178, -1, 180, 181, 182, 183, 184, 185, -1, 187, - 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, - -1, 199, -1, 201, 202, 203, 204, 205, 206, 207, - -1, 209, -1, 211, -1, -1, 214, -1, 216, 217, - 218, 219, 220, -1, -1, 223, -1, 225, -1, -1, - 228, 229, 230, -1, -1, 233, 234, 235, 236, 237, - 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, - 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, - -1, 259, 260, 261, 262, 263, -1, 265, 266, -1, - 268, -1, 270, 271, 272, 273, 274, 275, -1, 277, - 278, 279, -1, 281, 282, 283, -1, -1, 286, 287, - -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, - 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, - 308, 309, 310, 311, -1, 313, 314, 315, 316, 317, - 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, - 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, - 338, 339, -1, 341, 342, -1, 344, 345, 346, 347, - 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, - 358, -1, 360, 361, 362, 363, 364, -1, 366, 367, - 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, - 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, - 388, 389, 390, -1, 392, 393, -1, 395, -1, 397, - 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, - 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, - 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, - -1, -1, 430, 431, 432, 433, 434, 435, 436, -1, - 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, - -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, - 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, - 468, 469, -1, 8, -1, -1, 11, -1, -1, 477, - 478, 16, 17, 18, -1, -1, -1, -1, -1, -1, - -1, 8, -1, -1, 11, -1, -1, -1, 33, 16, - 17, 18, 37, -1, -1, -1, 41, -1, -1, 8, - -1, -1, 11, 48, -1, -1, 33, 16, 17, 18, - -1, -1, -1, -1, 41, -1, -1, -1, -1, -1, - -1, 48, -1, -1, 33, -1, -1, 8, 73, -1, - 11, -1, 41, -1, -1, 16, 17, 18, -1, 48, - -1, -1, -1, -1, -1, -1, 73, -1, -1, -1, - -1, -1, 33, -1, -1, 36, -1, -1, -1, -1, - 41, -1, -1, 8, 73, -1, 11, 48, -1, -1, - -1, 16, 17, 18, -1, -1, -1, -1, 123, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 33, -1, - -1, -1, 73, -1, -1, -1, 41, -1, -1, -1, - -1, -1, -1, 48, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 172, 73, -1, - -1, -1, -1, -1, -1, 162, -1, -1, -1, -1, - 167, 186, -1, -1, -1, 172, 191, 8, -1, -1, - 11, -1, 161, -1, -1, 16, 17, 18, -1, 186, - -1, -1, -1, 172, 191, -1, -1, 212, 213, -1, - -1, -1, 33, -1, -1, 36, -1, 186, -1, -1, - 41, 226, 191, -1, -1, 212, 213, 48, -1, -1, - -1, 172, -1, -1, -1, -1, -1, -1, -1, 226, - -1, -1, -1, 212, 213, 186, -1, -1, -1, -1, - 191, -1, 73, -1, -1, -1, -1, 226, -1, 264, - -1, -1, 267, -1, -1, -1, -1, 172, -1, -1, - -1, 212, 213, -1, -1, -1, 281, 264, -1, 284, - 267, 186, -1, -1, -1, 226, 191, -1, -1, -1, - -1, -1, -1, -1, 281, 264, -1, 284, 267, -1, - -1, -1, -1, -1, -1, -1, -1, 212, 213, -1, - -1, -1, 281, -1, -1, 284, -1, -1, -1, -1, - -1, 226, -1, 264, -1, -1, 267, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 307, -1, - 281, -1, -1, 284, -1, -1, -1, -1, -1, -1, - -1, 172, -1, -1, -1, -1, -1, -1, -1, 264, - -1, -1, 267, -1, -1, 186, 371, -1, -1, -1, - 191, -1, -1, -1, -1, -1, 281, -1, -1, 284, - -1, -1, -1, -1, 371, -1, -1, -1, -1, -1, - -1, 212, 213, -1, -1, -1, -1, 302, -1, -1, - -1, -1, 371, -1, -1, 226, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 8, - -1, -1, 11, -1, 429, -1, -1, 16, 17, 18, - 371, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 264, 33, -1, 267, -1, -1, -1, - -1, -1, 41, -1, -1, -1, -1, -1, -1, 48, - 281, -1, -1, 284, -1, 470, 371, -1, 473, 474, - 475, -1, 477, 478, 479, 480, 481, 482, -1, -1, - -1, -1, -1, 470, 73, -1, 473, 474, 475, -1, - 477, 478, 479, 480, 481, 482, -1, -1, -1, -1, -1, 470, -1, -1, 473, 474, 475, -1, 477, 478, 479, 480, 481, 482, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 470, - -1, -1, 473, 474, 475, -1, 477, 478, 479, 480, - 481, 482, -1, -1, -1, -1, -1, -1, -1, -1, - 371, -1, -1, -1, 8, -1, -1, 11, -1, -1, - -1, -1, 16, 17, 18, 470, -1, -1, 473, 474, - 475, -1, 477, 478, 479, 480, 481, 482, 167, 33, - -1, -1, 36, 172, -1, -1, -1, 41, -1, -1, - -1, -1, -1, -1, 48, -1, -1, 186, -1, -1, - 8, -1, 191, 11, -1, -1, -1, -1, 16, 17, - 18, -1, -1, -1, -1, -1, -1, -1, -1, 73, - -1, -1, -1, 212, 213, 33, -1, -1, -1, -1, - -1, -1, -1, 41, -1, -1, -1, 226, -1, -1, - 48, -1, -1, -1, -1, -1, -1, -1, -1, 470, - -1, -1, 473, 474, 475, -1, 477, 478, 479, 480, - 481, 482, -1, -1, -1, 73, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 264, -1, -1, 267, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 8, 281, -1, 11, 284, -1, -1, -1, 16, - 17, 18, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 33, -1, 172, -1, - -1, -1, -1, -1, 41, -1, -1, -1, -1, -1, - -1, 48, 186, -1, -1, -1, -1, 191, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 73, -1, 212, 213, - -1, -1, -1, -1, 172, -1, -1, -1, -1, -1, - -1, -1, 226, -1, -1, -1, -1, -1, 186, -1, - -1, -1, 371, 191, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 212, 213, -1, -1, -1, -1, - 264, -1, -1, 267, -1, -1, -1, -1, 226, -1, - -1, -1, -1, -1, -1, -1, -1, 281, -1, -1, - 284, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 172, 264, -1, -1, 267, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 186, - -1, -1, -1, 281, 191, -1, 284, -1, -1, -1, + -1, -1, 371, -1, -1, -1, 8, -1, -1, 11, + -1, -1, -1, -1, 16, 17, 18, 470, -1, -1, + 473, 474, 475, -1, 477, 478, 479, 480, 481, 482, + 167, 33, -1, -1, 36, 172, -1, -1, -1, 41, + -1, -1, -1, -1, -1, -1, 48, -1, -1, 186, + -1, -1, 8, -1, 191, 11, -1, -1, -1, -1, + 16, 17, 18, -1, -1, -1, -1, -1, -1, -1, + -1, 73, -1, -1, -1, 212, 213, 33, -1, -1, + -1, -1, -1, -1, -1, 41, -1, -1, -1, 226, + -1, -1, 48, -1, -1, -1, -1, -1, -1, -1, -1, 470, -1, -1, 473, 474, 475, -1, 477, 478, - 479, 480, 481, 482, -1, 212, 213, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 226, - -1, -1, -1, -1, -1, -1, -1, 371, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 479, 480, 481, 482, -1, -1, -1, 73, -1, -1, -1, -1, -1, -1, -1, -1, -1, 264, -1, -1, 267, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 371, 281, -1, -1, 284, -1, -1, + -1, -1, -1, 8, 281, -1, 11, 284, -1, -1, + -1, 16, 17, 18, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 33, -1, + 172, -1, -1, -1, -1, -1, 41, -1, -1, -1, + -1, -1, 8, 48, 186, 11, -1, -1, -1, 191, + 16, 17, 18, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 33, 73, -1, + 212, 213, -1, -1, -1, 41, 172, -1, -1, -1, + -1, -1, 48, -1, 226, -1, -1, -1, -1, -1, + 186, -1, -1, -1, 371, 191, -1, -1, -1, -1, + 8, -1, -1, 11, -1, -1, -1, 73, 16, 17, + 18, -1, -1, -1, -1, -1, 212, 213, -1, -1, + -1, -1, 264, -1, -1, 267, -1, -1, -1, -1, + 226, -1, -1, 41, -1, -1, -1, -1, -1, 281, + 48, -1, 284, -1, -1, -1, -1, 8, -1, -1, + 11, -1, -1, -1, -1, 16, 17, 18, -1, -1, + -1, -1, -1, -1, -1, 73, -1, 172, 264, -1, + -1, 267, -1, -1, -1, -1, -1, -1, -1, -1, + 41, 186, -1, -1, -1, 281, 191, 48, 284, -1, + -1, -1, -1, 470, -1, -1, 473, 474, 475, -1, + 477, 478, 479, 480, 481, 482, 172, 212, 213, -1, + -1, -1, 73, -1, -1, -1, -1, -1, -1, -1, + 186, 226, -1, -1, -1, 191, -1, -1, -1, 371, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 212, 213, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 264, + 226, -1, 267, -1, 172, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 371, 281, -1, 186, 284, + -1, -1, -1, 191, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 264, -1, + -1, 267, -1, -1, 212, 213, 402, -1, -1, -1, + -1, 172, -1, -1, -1, 281, -1, -1, 226, -1, + -1, -1, -1, -1, -1, 186, -1, -1, 470, -1, + 191, 473, 474, 475, -1, 477, 478, 479, 480, 481, + 482, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 212, 213, -1, -1, -1, 264, -1, -1, 267, + -1, -1, -1, -1, -1, 226, 371, -1, -1, -1, + -1, -1, -1, 281, 470, -1, -1, 473, 474, 475, + -1, 477, 478, 479, 480, 481, 482, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 402, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 371, 267, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 470, -1, -1, 473, - 474, 475, -1, 477, 478, 479, 480, 481, 482, -1, + 281, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 371, -1, -1, -1, -1, -1, - -1, -1, 470, -1, -1, 473, 474, 475, -1, 477, - 478, 479, 480, 481, 482, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 371, -1, 470, -1, -1, 473, 474, + 475, -1, 477, 478, 479, 480, 481, 482, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 470, -1, -1, 473, 474, 475, + 371, 477, 478, 479, 480, 481, 482, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 470, -1, -1, 473, 474, 475, -1, 477, + 478, 479, 480, 481, 482, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 3, -1, 5, 470, -1, -1, 473, 474, 475, -1, - 477, 478, 479, 480, 481, 482, 19, 20, 21, 22, + -1, -1, -1, -1, -1, -1, 3, -1, 5, 470, + -1, -1, 473, 474, 475, -1, 477, 478, 479, 480, + 481, 482, 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, -1, 45, 46, + 47, 48, 49, -1, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, + 67, 68, 69, 70, 71, 72, 73, -1, 75, 76, + 77, 78, 79, 80, -1, 82, 83, 84, 85, 86, + 87, 88, 89, 90, 91, 92, -1, 94, 95, 96, + 97, 98, 99, -1, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, + 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, + 157, 158, 159, 160, 161, 162, 163, 164, 165, -1, + 167, -1, 169, 170, 171, -1, 173, 174, 175, 176, + 177, 178, 179, 180, 181, 182, 183, 184, 185, -1, + 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, + 197, 198, 199, -1, 201, 202, 203, 204, 205, 206, + 207, 208, 209, 210, 211, -1, -1, 214, -1, 216, + 217, 218, 219, 220, 221, 222, 223, -1, 225, -1, + 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, + 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, + 257, -1, 259, 260, 261, 262, 263, 264, 265, 266, + -1, 268, 269, 270, 271, 272, 273, 274, 275, 276, + 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, + 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, + 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, + 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, + 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, + 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, + 337, 338, 339, 340, 341, 342, -1, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, + 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, + 367, 368, 369, 370, -1, 372, 373, 374, 375, 376, + 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, + 387, 388, 389, 390, 391, 392, 393, 394, 395, -1, + 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, + 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, + 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, + 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, + 437, 438, -1, 440, 441, 442, 443, 444, 445, 446, + 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, + 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, + 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, -1, 45, 46, 47, 48, 49, + -1, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, -1, 75, 76, 77, 78, 79, + 80, -1, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, -1, 94, 95, 96, 97, 98, 99, + -1, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, + 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, -1, 167, -1, 169, + 170, 171, -1, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, -1, 187, 188, 189, + 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, + -1, 201, 202, 203, 204, 205, 206, 207, 208, 209, + 210, 211, -1, -1, 214, -1, 216, 217, 218, 219, + 220, 221, 222, 223, -1, 225, -1, 227, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, + 260, 261, 262, 263, 264, 265, 266, -1, 268, 269, + 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, + 280, 281, 282, 283, 284, 285, 286, 287, -1, 289, + -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, + 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, + 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, + 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, + 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + 340, 341, 342, -1, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, + 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, -1, 372, 373, 374, 375, 376, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, + 390, 391, 392, 393, 394, 395, -1, 397, 398, 399, + 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, + 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, + 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, + 430, 431, 432, 433, 434, 435, 436, 437, 438, -1, + 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, + 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, @@ -196030,207 +209969,207 @@ static const yytype_int16 yycheck[] = 463, 464, 465, 466, 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, -1, 45, - 46, 47, 48, 49, -1, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, 71, 72, 73, -1, 75, - 76, 77, 78, 79, 80, -1, 82, 83, 84, 85, - 86, 87, 88, 89, 90, 91, 92, -1, 94, 95, - 96, 97, 98, 99, -1, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, - 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, - 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, - -1, 167, -1, 169, 170, 171, -1, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, - -1, 187, 188, 189, 190, 191, 192, 193, 194, 195, - 196, 197, 198, 199, -1, 201, 202, 203, 204, 205, - 206, 207, 208, 209, 210, 211, -1, -1, 214, -1, - 216, 217, 218, 219, 220, 221, 222, 223, -1, 225, - -1, 227, 228, 229, 230, 231, 232, 233, 234, 235, + 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, + -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, -1, 54, 55, + 56, 57, 58, 59, -1, 61, 62, 63, 64, 65, + 66, -1, 68, 69, 70, 71, 72, -1, 74, -1, + 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, + 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, + 106, 107, 108, 109, 110, 111, 112, 113, -1, 115, + -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, + 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, + -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, + 146, 147, 148, 149, 150, 151, 152, 153, 154, -1, + 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, + 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, + -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, + 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, + 196, 197, -1, 199, 200, 201, 202, 203, 204, 205, + 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, + 226, -1, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, -1, 259, 260, 261, 262, 263, 264, 265, - 266, -1, 268, 269, 270, 271, 272, 273, 274, 275, - 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, - 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, - 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, - 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, + 256, 257, 258, 259, 260, 261, 262, 263, -1, 265, + 266, 267, 268, -1, 270, 271, 272, 273, 274, 275, + -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, + 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, + 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, + 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, - 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, 340, 341, 342, -1, 344, 345, + 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, + 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, - 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, - 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, - 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, - 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, - -1, 397, 398, 399, 400, 401, 402, 403, 404, 405, - 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, - 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, - 436, 437, 438, -1, 440, 441, 442, 443, 444, 445, - 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, + 356, 357, 358, -1, 360, 361, 362, 363, 364, 365, + 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, + -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, + 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, + 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, + -1, 407, 408, 409, 410, 411, -1, 413, 414, 415, + 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, + 426, 427, 428, -1, 430, 431, 432, 433, 434, 435, + 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, + 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, - 39, 40, 41, 42, 43, -1, 45, 46, 47, 48, - 49, -1, 51, 52, 53, 54, 55, 56, 57, 58, - 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, - 69, 70, 71, 72, 73, -1, 75, 76, 77, 78, - 79, 80, -1, 82, 83, 84, 85, 86, 87, 88, - 89, 90, 91, 92, -1, 94, 95, 96, 97, 98, - 99, -1, 101, 102, 103, 104, 105, 106, 107, 108, - 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, - 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, - 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, - 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, - 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, - 159, 160, 161, 162, 163, 164, 165, -1, 167, -1, - 169, 170, 171, -1, 173, 174, 175, 176, 177, 178, - 179, 180, 181, 182, 183, 184, 185, -1, 187, 188, - 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, - 199, -1, 201, 202, 203, 204, 205, 206, 207, 208, - 209, 210, 211, -1, -1, 214, -1, 216, 217, 218, - 219, 220, 221, 222, 223, -1, 225, -1, 227, 228, - 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, + 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, + 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, + 39, -1, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, -1, 54, 55, 56, 57, 58, + 59, -1, -1, 62, 63, 64, 65, 66, -1, 68, + 69, 70, 71, 72, -1, 74, -1, 76, 77, 78, + 79, 80, 81, 82, 83, 84, -1, 86, 87, 88, + 89, 90, 91, -1, 93, 94, 95, 96, -1, -1, + -1, 100, -1, -1, -1, 104, 105, 106, 107, 108, + 109, 110, 111, 112, 113, -1, 115, -1, 117, 118, + 119, 120, 121, 122, -1, 124, 125, 126, 127, 128, + -1, -1, 131, 132, 133, 134, 135, -1, 137, 138, + 139, -1, 141, 142, 143, -1, 145, 146, 147, 148, + 149, 150, 151, 152, 153, 154, -1, 156, -1, 158, + 159, 160, 161, -1, 163, -1, 165, 166, -1, 168, + 169, 170, 171, 172, 173, -1, 175, -1, 177, 178, + -1, 180, 181, 182, 183, 184, 185, 186, 187, 188, + 189, 190, -1, 192, 193, 194, 195, 196, 197, -1, + 199, 200, 201, 202, 203, 204, 205, 206, 207, -1, + 209, -1, 211, 212, 213, 214, 215, 216, 217, 218, + 219, 220, -1, -1, 223, 224, 225, 226, -1, 228, + 229, 230, -1, -1, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, - 249, 250, 251, 252, 253, 254, 255, 256, 257, -1, - 259, 260, 261, 262, 263, 264, 265, 266, -1, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, - 279, 280, 281, 282, 283, 284, 285, 286, 287, -1, - 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, - 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, - 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, - 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, + 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, + 259, 260, 261, 262, 263, -1, 265, 266, 267, 268, + -1, 270, 271, 272, 273, 274, 275, -1, 277, 278, + -1, -1, 281, 282, 283, -1, -1, 286, 287, 288, + 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, + 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, + 309, 310, 311, -1, 313, 314, 315, 316, 317, 318, + 319, 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, - 339, 340, 341, 342, -1, 344, 345, 346, 347, 348, + 339, -1, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, - 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, - 369, 370, -1, 372, 373, 374, 375, 376, 377, 378, + -1, 360, 361, 362, 363, 364, -1, 366, 367, 368, + 369, 370, 371, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, - 389, 390, 391, 392, 393, 394, 395, -1, 397, 398, - 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, - 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, - 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, - 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, - -1, 440, 441, 442, 443, 444, 445, 446, 447, 448, + 389, 390, -1, 392, 393, -1, 395, 396, 397, 398, + 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, + 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, + 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, + -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, + 439, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, - 52, -1, 54, 55, 56, 57, 58, 59, -1, 61, - 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, - 72, -1, 74, -1, 76, 77, 78, 79, 80, 81, - 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, - -1, 93, 94, 95, 96, 97, 98, 99, 100, 101, - 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, + 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, + 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, -1, -1, 75, 76, 77, 78, 79, 80, -1, + 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, + -1, -1, 94, 95, 96, -1, -1, -1, -1, -1, + -1, -1, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, -1, 115, -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, -1, 156, -1, 158, 159, 160, 161, - -1, 163, -1, 165, 166, -1, 168, 169, 170, 171, - 172, 173, -1, 175, -1, 177, 178, -1, 180, 181, - 182, 183, 184, 185, 186, 187, 188, 189, 190, -1, - 192, 193, 194, 195, 196, 197, -1, 199, 200, 201, + -1, 163, 164, 165, -1, -1, -1, 169, 170, 171, + -1, 173, -1, 175, -1, 177, 178, -1, 180, 181, + 182, 183, 184, 185, -1, 187, 188, 189, 190, -1, + 192, 193, 194, 195, 196, 197, -1, 199, -1, 201, 202, 203, 204, 205, 206, 207, -1, 209, -1, 211, - 212, 213, 214, 215, 216, 217, 218, 219, 220, -1, - -1, 223, 224, 225, 226, -1, 228, 229, 230, 231, - 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + -1, -1, 214, -1, 216, 217, 218, 219, 220, -1, + -1, 223, -1, 225, -1, -1, 228, 229, 230, -1, + -1, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, - 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, - 262, 263, -1, 265, 266, 267, 268, -1, 270, 271, + 252, 253, 254, 255, 256, 257, -1, 259, 260, 261, + 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, - 282, 283, -1, -1, 286, 287, 288, 289, 290, 291, + 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, - -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, + 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, - 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, + 342, -1, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, - 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, + 362, 363, 364, -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, - 392, 393, -1, 395, 396, 397, 398, 399, 400, 401, + 392, 393, -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, -1, - -1, 423, 424, 425, 426, 427, 428, -1, 430, 431, - 432, 433, 434, 435, 436, -1, 438, 439, 440, 441, + 422, 423, 424, 425, 426, 427, -1, -1, 430, 431, + 432, 433, 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, - -1, -1, -1, 38, 39, -1, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, -1, 54, + -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, + 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, - 65, 66, -1, 68, 69, 70, 71, 72, -1, 74, - -1, 76, 77, 78, 79, 80, 81, 82, 83, 84, - -1, 86, 87, 88, 89, 90, 91, -1, 93, 94, - 95, 96, -1, -1, -1, 100, -1, -1, -1, 104, + 65, 66, 67, 68, 69, 70, 71, 72, -1, -1, + -1, 76, 77, 78, 79, 80, -1, 82, 83, 84, + 85, 86, 87, 88, 89, 90, 91, -1, -1, 94, + 95, 96, -1, -1, -1, -1, -1, -1, -1, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, -1, 115, -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, - -1, 156, -1, 158, 159, 160, 161, -1, 163, -1, - 165, 166, -1, 168, 169, 170, 171, 172, 173, -1, + -1, 156, -1, 158, 159, 160, 161, -1, 163, 164, + 165, -1, -1, -1, 169, 170, 171, -1, 173, -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, 184, - 185, 186, 187, 188, 189, 190, -1, 192, 193, 194, - 195, 196, 197, -1, 199, 200, 201, 202, 203, 204, - 205, 206, 207, -1, 209, -1, 211, 212, 213, 214, - 215, 216, 217, 218, 219, 220, -1, -1, 223, 224, + 185, -1, 187, 188, 189, 190, -1, 192, 193, 194, + 195, 196, 197, -1, 199, -1, 201, 202, 203, 204, + 205, 206, 207, -1, 209, -1, 211, -1, -1, 214, + -1, 216, 217, 218, 219, 220, -1, -1, 223, -1, 225, 226, -1, 228, 229, 230, -1, -1, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, 256, 257, 258, 259, 260, 261, 262, 263, -1, - 265, 266, 267, 268, -1, 270, 271, 272, 273, 274, + 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, + 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, -1, - -1, 286, 287, 288, 289, 290, 291, 292, 293, 294, + -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, - 305, 306, 307, 308, 309, 310, 311, -1, 313, 314, + 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, - 335, 336, 337, 338, 339, -1, 341, 342, 343, 344, + 335, 336, 337, 338, 339, -1, 341, 342, -1, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, 364, - -1, 366, 367, 368, 369, 370, 371, 372, 373, 374, + -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, -1, - 395, 396, 397, 398, 399, 400, 401, -1, 403, 404, + 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, - 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, + 415, 416, 417, 418, 419, 420, -1, 422, 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, - 435, 436, -1, 438, 439, 440, 441, 442, 443, -1, + 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, - 465, 466, 467, 468, 469, 3, -1, -1, -1, -1, + 465, 466, 467, 468, 469, 3, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, - 58, 59, -1, -1, 62, 63, 64, 65, 66, 67, - 68, 69, 70, 71, 72, -1, -1, 75, 76, 77, - 78, 79, 80, -1, 82, 83, 84, 85, 86, 87, + 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, + 68, 69, 70, 71, 72, -1, -1, -1, 76, 77, + 78, 79, 80, -1, 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, -1, -1, 94, 95, 96, -1, -1, -1, -1, -1, -1, -1, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, -1, 115, -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, - 148, 149, 150, 151, 152, 153, 154, -1, 156, -1, - 158, 159, 160, 161, -1, 163, 164, 165, -1, -1, + 148, 149, 150, 151, 152, 153, 154, 155, 156, -1, + 158, 159, 160, 161, -1, 163, -1, 165, -1, -1, -1, 169, 170, 171, -1, 173, -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, -1, 187, 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, @@ -196242,10 +210181,10 @@ static const yytype_int16 yycheck[] = 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, 275, -1, 277, - 278, -1, -1, 281, 282, 283, -1, -1, 286, 287, + 278, 279, -1, 281, 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, - 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, + 308, 309, 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, 342, -1, 344, 345, 346, 347, @@ -196255,21 +210194,21 @@ static const yytype_int16 yycheck[] = 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, - 408, 409, 410, 411, -1, 413, 414, 415, 416, 417, - 418, 419, 420, -1, 422, 423, 424, 425, 426, 427, + 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, + 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, - 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, + 468, 469, 3, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, - -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, + 31, 32, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, - -1, 62, 63, 64, 65, 66, 67, 68, 69, 70, + -1, 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, 72, -1, -1, -1, 76, 77, 78, 79, 80, - -1, 82, 83, 84, 85, 86, 87, 88, 89, 90, + -1, 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, -1, -1, 94, 95, 96, -1, -1, -1, -1, -1, -1, -1, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, -1, 115, -1, 117, 118, 119, 120, @@ -196277,13 +210216,13 @@ static const yytype_int16 yycheck[] = 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, -1, 156, -1, 158, 159, 160, - 161, -1, 163, 164, 165, -1, -1, -1, 169, 170, + 161, -1, 163, -1, 165, -1, -1, -1, 169, 170, 171, -1, 173, -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, -1, 187, 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, -1, 201, 202, 203, 204, 205, 206, 207, -1, 209, -1, 211, -1, -1, 214, -1, 216, 217, 218, 219, 220, - -1, -1, 223, -1, 225, 226, -1, 228, 229, 230, + -1, -1, 223, -1, 225, -1, -1, 228, 229, 230, -1, -1, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, 260, @@ -196292,7 +210231,7 @@ static const yytype_int16 yycheck[] = 281, 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, - 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, + 311, -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, 342, -1, 344, 345, 346, 347, 348, 349, 350, @@ -196303,7 +210242,7 @@ static const yytype_int16 yycheck[] = -1, 392, 393, -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, - -1, 422, 423, 424, 425, 426, 427, -1, -1, 430, + -1, -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, @@ -196323,7 +210262,7 @@ static const yytype_int16 yycheck[] = 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, - 154, 155, 156, -1, 158, 159, 160, 161, -1, 163, + 154, -1, 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, -1, -1, -1, 169, 170, 171, -1, 173, -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, 184, 185, -1, 187, 188, 189, 190, -1, 192, 193, @@ -196335,7 +210274,7 @@ static const yytype_int16 yycheck[] = 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, - 274, 275, -1, 277, 278, 279, -1, 281, 282, 283, + 274, 275, -1, 277, 278, -1, 280, 281, 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, @@ -196348,7 +210287,7 @@ static const yytype_int16 yycheck[] = 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, - 404, -1, -1, 407, 408, 409, 410, 411, 412, 413, + 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, @@ -196357,7 +210296,7 @@ static const yytype_int16 yycheck[] = 464, 465, 466, 467, 468, 469, 3, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, - -1, 28, 29, 30, 31, 32, -1, -1, -1, -1, + -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, @@ -196382,7 +210321,7 @@ static const yytype_int16 yycheck[] = 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, 275, -1, - 277, 278, -1, -1, 281, 282, 283, -1, -1, 286, + 277, 278, -1, 280, 281, 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, 316, @@ -196401,10 +210340,10 @@ static const yytype_int16 yycheck[] = -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, - 467, 468, 469, 3, -1, 5, -1, -1, -1, -1, + 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, - 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, + 30, 31, 32, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, 68, 69, @@ -196429,7 +210368,7 @@ static const yytype_int16 yycheck[] = 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, - 280, 281, 282, 283, -1, -1, 286, 287, -1, 289, + -1, 281, 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, @@ -196475,7 +210414,7 @@ static const yytype_int16 yycheck[] = 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, 272, - 273, 274, 275, -1, 277, 278, -1, 280, 281, 282, + 273, 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, 311, -1, @@ -196487,17 +210426,17 @@ static const yytype_int16 yycheck[] = 363, 364, -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, 392, - 393, -1, 395, -1, 397, 398, 399, 400, 401, -1, + 393, 394, 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, - 463, 464, 465, 466, 467, 468, 469, 3, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 463, 464, 465, 466, 467, 468, 469, 3, 4, -1, + -1, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, - 26, -1, 28, 29, 30, 31, 32, -1, -1, -1, + 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, @@ -196541,9 +210480,9 @@ static const yytype_int16 yycheck[] = 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 3, -1, 5, -1, -1, -1, + 466, 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, @@ -196580,7 +210519,7 @@ static const yytype_int16 yycheck[] = -1, 360, 361, 362, 363, 364, -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, - 389, 390, -1, 392, 393, 394, 395, -1, 397, 398, + 389, 390, -1, 392, 393, -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, @@ -196588,9 +210527,9 @@ static const yytype_int16 yycheck[] = -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, - 469, 3, 4, -1, -1, -1, -1, 9, -1, -1, + 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, - 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, + 22, 23, 24, 25, 26, 27, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, @@ -196635,9 +210574,9 @@ static const yytype_int16 yycheck[] = 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 3, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, -1, -1, -1, -1, + 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, @@ -196681,9 +210620,9 @@ static const yytype_int16 yycheck[] = 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, - 465, 466, 467, 468, 469, 3, -1, -1, -1, -1, + 465, 466, 467, 468, 469, 3, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 19, 20, 21, 22, 23, 24, 25, 26, 27, + -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, @@ -196821,10 +210760,10 @@ static const yytype_int16 yycheck[] = 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, - 464, 465, 466, 467, 468, 469, 3, -1, 5, -1, + 464, 465, 466, 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, - -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, + 27, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, @@ -196915,9 +210854,9 @@ static const yytype_int16 yycheck[] = 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, - 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 3, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, -1, -1, + 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, @@ -197008,10 +210947,10 @@ static const yytype_int16 yycheck[] = 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 3, -1, 5, -1, -1, -1, + 466, 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, - 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, + 29, 30, -1, -1, -1, -1, -1, 36, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, 68, @@ -197055,10 +210994,10 @@ static const yytype_int16 yycheck[] = -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, - 469, 3, -1, 5, -1, -1, -1, -1, -1, -1, + 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, - -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, + -1, -1, -1, -1, 36, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, @@ -197102,10 +211041,10 @@ static const yytype_int16 yycheck[] = 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 3, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, - -1, 36, -1, 38, 39, -1, 41, 42, 43, -1, + -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, 72, -1, -1, @@ -197151,7 +211090,7 @@ static const yytype_int16 yycheck[] = 465, 466, 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, - 28, 29, 30, -1, -1, -1, -1, -1, 36, -1, + 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, @@ -197195,7 +211134,7 @@ static const yytype_int16 yycheck[] = 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, - 468, 469, 3, -1, 5, -1, -1, -1, -1, -1, + 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, @@ -197525,7 +211464,7 @@ static const yytype_int16 yycheck[] = 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, - -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, + -1, -1, -1, -1, 36, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, @@ -197558,7 +211497,7 @@ static const yytype_int16 yycheck[] = 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, 342, -1, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, - 362, 363, 364, -1, 366, 367, 368, 369, 370, -1, + 362, 363, 364, -1, -1, 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, -1, 397, 398, 399, 400, 401, @@ -197618,7 +211557,7 @@ static const yytype_int16 yycheck[] = 465, 466, 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, - 28, 29, 30, -1, -1, -1, -1, -1, 36, -1, + 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, @@ -197651,7 +211590,7 @@ static const yytype_int16 yycheck[] = 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, 342, -1, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, - 358, -1, 360, 361, 362, 363, 364, -1, -1, 367, + 358, -1, 360, 361, 362, 363, 364, -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, -1, 395, -1, 397, @@ -197691,244 +211630,12 @@ static const yytype_int16 yycheck[] = 261, 262, 263, -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, 286, 287, -1, 289, -1, - 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, - 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, - 311, -1, 313, 314, 315, 316, 317, 318, 319, 320, - 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, - 331, 332, 333, 334, 335, 336, 337, 338, 339, -1, - 341, 342, -1, 344, 345, 346, 347, 348, 349, 350, - 351, 352, 353, 354, 355, 356, 357, 358, -1, 360, - 361, 362, 363, 364, -1, 366, 367, 368, 369, 370, - -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, - 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, - -1, 392, 393, -1, 395, -1, 397, 398, 399, 400, - 401, -1, 403, 404, -1, -1, 407, 408, 409, 410, - 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, - -1, -1, 423, 424, 425, 426, 427, -1, -1, 430, - 431, 432, 433, 434, 435, 436, -1, 438, -1, 440, - 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, - 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, - 461, 462, 463, 464, 465, 466, 467, 468, 469, 3, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, - 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, - -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, - -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, - 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, - 64, 65, 66, -1, 68, 69, 70, 71, 72, -1, - -1, -1, 76, 77, 78, 79, 80, -1, 82, 83, - 84, -1, 86, 87, 88, 89, 90, 91, -1, -1, - 94, 95, 96, -1, -1, -1, -1, -1, -1, -1, - 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, - -1, 115, -1, 117, 118, 119, 120, 121, 122, -1, - 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, - 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, - -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, - 154, -1, 156, -1, 158, 159, 160, 161, -1, 163, - -1, 165, -1, -1, -1, 169, 170, 171, -1, 173, - -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, - 184, 185, -1, 187, 188, 189, 190, -1, 192, 193, - 194, 195, 196, 197, -1, 199, -1, 201, 202, 203, - 204, 205, 206, 207, -1, 209, -1, 211, -1, -1, - 214, -1, 216, 217, 218, 219, 220, -1, -1, 223, - -1, 225, -1, -1, 228, 229, 230, -1, -1, 233, - 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, - 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, - 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, - -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, - 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, - -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, - 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, - 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, - 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, - 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, - 334, 335, 336, 337, 338, 339, -1, 341, 342, -1, - 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, - 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, - 364, -1, 366, 367, 368, 369, 370, -1, 372, 373, - 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, - 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, - -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, - 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, - 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, - 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, - 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, - -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, - 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, - 464, 465, 466, 467, 468, 469, 3, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, - -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, - -1, 38, 39, -1, 41, 42, 43, -1, 45, 46, - 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, - 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, - -1, 68, 69, 70, 71, 72, -1, -1, -1, 76, - 77, 78, 79, 80, -1, 82, 83, 84, -1, 86, - 87, 88, 89, 90, 91, -1, -1, 94, 95, 96, - -1, -1, -1, -1, -1, -1, -1, 104, 105, 106, - 107, 108, 109, 110, 111, 112, 113, -1, 115, -1, - 117, 118, 119, 120, 121, 122, -1, 124, 125, 126, - 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, - 137, 138, 139, -1, 141, 142, 143, -1, 145, 146, - 147, 148, 149, 150, 151, 152, 153, 154, -1, 156, - -1, 158, 159, 160, 161, -1, 163, -1, 165, -1, - -1, -1, 169, 170, 171, -1, 173, -1, 175, -1, - 177, 178, -1, 180, 181, 182, 183, 184, 185, -1, - 187, 188, 189, 190, -1, 192, 193, 194, 195, 196, - 197, -1, 199, -1, 201, 202, 203, 204, 205, 206, - 207, -1, 209, -1, 211, -1, -1, 214, -1, 216, - 217, 218, 219, 220, -1, -1, 223, -1, 225, -1, - -1, 228, 229, 230, -1, -1, 233, 234, 235, 236, - 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, - 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, - 257, -1, 259, 260, 261, 262, 263, -1, 265, 266, - -1, 268, -1, 270, 271, 272, 273, 274, 275, -1, - 277, 278, -1, -1, 281, 282, 283, -1, -1, 286, - 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, - 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, - 307, 308, 309, 310, 311, -1, 313, 314, 315, 316, - 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, - -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, - 337, 338, 339, -1, 341, 342, -1, 344, 345, 346, - 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, - 357, 358, -1, 360, 361, 362, 363, 364, -1, 366, - 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, - 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, - 387, 388, 389, 390, -1, 392, 393, -1, 395, -1, - 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, - 407, 408, 409, 410, 411, -1, 413, 414, 415, 416, - 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, - 427, -1, -1, 430, 431, 432, 433, 434, 435, 436, - -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, - -1, -1, 449, 450, 451, 452, 453, 454, 455, 456, - 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, - 467, 468, 469, 3, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, - 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, - 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, - -1, 41, 42, 43, -1, 45, 46, 47, 48, 49, - -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, - -1, -1, 62, 63, 64, 65, 66, -1, 68, 69, - 70, 71, 72, -1, -1, -1, 76, 77, 78, 79, - 80, -1, 82, 83, 84, -1, 86, 87, 88, 89, - 90, 91, -1, -1, 94, 95, 96, -1, -1, -1, - -1, -1, -1, -1, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, -1, 115, -1, 117, 118, 119, - 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, - -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, - -1, 141, 142, 143, -1, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, -1, 156, -1, 158, 159, - 160, 161, -1, 163, -1, 165, -1, -1, -1, 169, - 170, 171, -1, 173, -1, 175, -1, 177, 178, -1, - 180, 181, 182, 183, 184, 185, -1, 187, 188, 189, - 190, -1, 192, 193, 194, 195, 196, 197, -1, 199, - -1, 201, 202, 203, 204, 205, 206, 207, -1, 209, - -1, 211, -1, -1, 214, -1, 216, 217, 218, 219, - 220, -1, -1, 223, -1, 225, -1, -1, 228, 229, - 230, -1, -1, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, - 250, 251, 252, 253, 254, 255, 256, 257, -1, 259, - 260, 261, 262, 263, -1, 265, 266, -1, 268, -1, - 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, - -1, 281, 282, 283, -1, -1, 286, 287, -1, 289, - -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, - 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, - 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, - 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, - 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, - -1, 341, 342, -1, 344, 345, 346, 347, 348, 349, - 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, - 360, 361, 362, 363, 364, -1, 366, 367, 368, 369, - 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, - 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, - 390, -1, 392, 393, -1, 395, -1, 397, 398, 399, - 400, 401, -1, 403, 404, -1, -1, 407, 408, 409, - 410, 411, -1, 413, 414, 415, 416, 417, 418, 419, - 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, - 430, 431, 432, 433, 434, 435, 436, -1, 438, -1, - 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, - 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 19, 20, 21, 22, - 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, - -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, - 43, 44, 45, 46, 47, -1, 49, 50, 51, 52, - -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, - 63, 64, 65, 66, -1, 68, 69, 70, 71, -1, - -1, 74, -1, 76, 77, 78, 79, 80, 81, 82, - 83, 84, -1, 86, 87, 88, 89, 90, 91, -1, - 93, 94, 95, 96, -1, -1, -1, 100, -1, -1, - -1, 104, 105, 106, 107, 108, 109, 110, 111, 112, - 113, -1, 115, -1, 117, 118, 119, 120, 121, 122, - -1, 124, 125, 126, 127, 128, -1, -1, 131, 132, - 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, - 143, -1, 145, 146, 147, 148, -1, 150, 151, 152, - 153, -1, -1, 156, -1, 158, 159, 160, 161, -1, - 163, -1, 165, 166, -1, 168, 169, 170, 171, 172, - 173, -1, 175, -1, -1, 178, -1, 180, 181, 182, - 183, 184, 185, 186, 187, 188, 189, 190, -1, 192, - 193, 194, 195, 196, 197, -1, 199, 200, -1, 202, - 203, 204, 205, 206, 207, -1, 209, -1, 211, 212, - 213, 214, 215, 216, 217, 218, 219, 220, -1, -1, - 223, 224, 225, 226, -1, 228, 229, 230, -1, -1, - 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, - 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, - 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, - -1, -1, 265, 266, 267, 268, -1, -1, 271, 272, - 273, 274, 275, -1, 277, 278, -1, -1, 281, 282, - 283, -1, -1, 286, -1, 288, 289, 290, -1, 292, - 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, - 303, 304, -1, 306, 307, -1, 309, 310, 311, -1, - 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, - 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, - 333, 334, 335, 336, 337, 338, 339, -1, 341, 342, - 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, - 353, 354, 355, 356, 357, 358, -1, 360, 361, 362, - 363, 364, -1, 366, 367, 368, 369, 370, 371, 372, - 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, - 383, 384, 385, 386, 387, 388, 389, -1, -1, 392, - 393, -1, 395, 396, 397, 398, 399, 400, 401, -1, - 403, 404, -1, -1, 407, 408, -1, 410, -1, -1, - 413, 414, 415, 416, 417, 418, 419, 420, -1, -1, - 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, - 433, 434, -1, 436, -1, 438, 439, 440, 441, 442, - 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, - 453, 454, 3, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 466, 467, 468, 469, -1, 19, 20, - 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, - -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, - 41, 42, 43, -1, 45, 46, 47, 48, 49, -1, - 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, - -1, 62, 63, 64, 65, 66, -1, 68, 69, 70, - 71, 72, -1, -1, -1, 76, 77, 78, 79, 80, - -1, 82, 83, 84, -1, 86, 87, 88, 89, 90, - 91, -1, -1, 94, 95, 96, -1, -1, -1, -1, - -1, -1, -1, 104, 105, 106, 107, 108, 109, 110, - 111, 112, 113, -1, 115, -1, 117, 118, 119, 120, - 121, 122, -1, 124, 125, 126, 127, 128, -1, -1, - 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, - 141, 142, 143, -1, 145, 146, 147, 148, 149, 150, - 151, 152, 153, 154, -1, 156, -1, 158, 159, 160, - 161, -1, 163, -1, 165, -1, -1, -1, 169, 170, - 171, -1, 173, -1, 175, -1, 177, 178, -1, 180, - 181, 182, 183, 184, 185, -1, 187, 188, 189, 190, - -1, 192, 193, 194, 195, 196, 197, -1, 199, -1, - 201, 202, 203, 204, 205, 206, 207, -1, 209, -1, - 211, -1, -1, 214, -1, 216, 217, 218, 219, 220, - -1, -1, 223, -1, 225, -1, -1, 228, 229, 230, - -1, -1, 233, 234, 235, 236, 237, 238, 239, 240, - 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, - 251, 252, 253, 254, 255, 256, 257, -1, 259, 260, - 261, 262, 263, -1, 265, 266, -1, 268, -1, 270, - 271, 272, 273, 274, 275, -1, 277, 278, -1, -1, - 281, 282, 283, -1, -1, 286, 287, -1, 289, -1, - 291, 292, 293, 294, 295, 296, 297, -1, 299, 300, + 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, 304, 305, 306, 307, 308, 309, 310, - 311, -1, 313, 314, 315, 316, 317, 318, 319, -1, + 311, -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, -1, - 341, 342, -1, 344, 345, 346, 347, -1, 349, 350, + 341, 342, -1, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, 364, -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, @@ -197945,94 +211652,233 @@ static const yytype_int16 yycheck[] = -1, -1, -1, -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, 42, 43, - 44, 45, 46, 47, -1, 49, 50, 51, 52, -1, + -1, 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, 62, 63, - 64, 65, 66, -1, 68, 69, 70, 71, -1, -1, - 74, -1, 76, 77, 78, 79, 80, 81, 82, 83, - 84, -1, 86, 87, 88, 89, 90, 91, -1, 93, - 94, 95, 96, -1, -1, -1, 100, -1, -1, -1, + 64, 65, 66, -1, 68, 69, 70, 71, 72, -1, + -1, -1, 76, 77, 78, 79, 80, -1, 82, 83, + 84, -1, 86, 87, 88, 89, 90, 91, -1, -1, + 94, 95, 96, -1, -1, -1, -1, -1, -1, -1, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, -1, 115, -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, 142, 143, - -1, 145, 146, 147, 148, -1, 150, 151, 152, 153, - -1, -1, 156, -1, 158, 159, 160, 161, -1, 163, - -1, 165, 166, -1, 168, 169, 170, 171, 172, 173, - -1, 175, -1, -1, 178, -1, 180, 181, 182, 183, - 184, 185, 186, 187, 188, 189, 190, -1, 192, 193, - 194, 195, 196, 197, -1, 199, 200, -1, 202, 203, - 204, 205, 206, 207, -1, 209, -1, 211, 212, 213, - 214, 215, 216, 217, 218, 219, 220, -1, -1, 223, - 224, 225, 226, -1, 228, 229, 230, -1, -1, 233, + -1, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, -1, 156, -1, 158, 159, 160, 161, -1, 163, + -1, 165, -1, -1, -1, 169, 170, 171, -1, 173, + -1, 175, -1, 177, 178, -1, 180, 181, 182, 183, + 184, 185, -1, 187, 188, 189, 190, -1, 192, 193, + 194, 195, 196, 197, -1, 199, -1, 201, 202, 203, + 204, 205, 206, 207, -1, 209, -1, 211, -1, -1, + 214, -1, 216, 217, 218, 219, 220, -1, -1, 223, + -1, 225, -1, -1, 228, 229, 230, -1, -1, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, - 254, 255, 256, 257, 258, 259, 260, 261, 262, -1, - -1, 265, 266, 267, 268, -1, -1, 271, 272, 273, + 254, 255, 256, 257, -1, 259, 260, 261, 262, 263, + -1, 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, 282, 283, - -1, -1, 286, -1, 288, 289, 290, -1, 292, 293, + -1, -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, -1, 303, - 304, -1, 306, 307, -1, 309, 310, 311, -1, 313, + 304, 305, 306, 307, 308, 309, 310, 311, -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, 332, 333, - 334, 335, 336, 337, 338, 339, -1, 341, 342, 343, - 344, 345, 346, -1, 348, 349, 350, 351, 352, 353, + 334, 335, 336, 337, 338, 339, -1, 341, 342, -1, + 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, 362, 363, - 364, -1, 366, -1, 368, 369, 370, 371, 372, 373, + 364, -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, 382, 383, - 384, 385, 386, 387, 388, 389, -1, -1, 392, 393, - -1, 395, 396, 397, 398, 399, 400, 401, -1, 403, - 404, -1, -1, 407, 408, -1, 410, -1, -1, 413, + 384, 385, 386, 387, 388, 389, 390, -1, 392, 393, + -1, 395, -1, 397, 398, 399, 400, 401, -1, 403, + 404, -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, 432, 433, - 434, -1, 436, -1, 438, 439, 440, 441, 442, 443, + 434, 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, 452, 453, - 454, 3, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 466, 467, 468, 469, -1, 19, 20, 21, - 22, 23, 24, 25, 26, -1, 28, 29, 30, -1, - -1, -1, -1, -1, -1, -1, 38, 39, -1, 41, - 42, 43, 44, 45, 46, 47, -1, 49, 50, 51, - 52, -1, 54, 55, 56, 57, 58, 59, -1, -1, - 62, 63, 64, 65, 66, -1, 68, 69, 70, 71, - -1, -1, 74, -1, 76, 77, 78, 79, 80, 81, - 82, 83, 84, -1, 86, 87, 88, 89, 90, 91, - -1, 93, 94, 95, 96, -1, -1, -1, 100, -1, - -1, -1, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, -1, 115, -1, 117, 118, 119, 120, 121, - 122, -1, 124, 125, 126, 127, 128, -1, -1, 131, - 132, 133, 134, 135, -1, 137, 138, 139, -1, 141, - 142, 143, -1, 145, 146, 147, 148, -1, 150, 151, - 152, 153, -1, -1, 156, -1, 158, 159, 160, 161, - -1, 163, -1, 165, 166, -1, 168, 169, 170, 171, - 172, 173, -1, 175, -1, -1, 178, -1, 180, 181, - 182, 183, 184, 185, 186, 187, 188, 189, 190, -1, - 192, 193, 194, 195, 196, 197, -1, 199, 200, -1, - 202, 203, 204, 205, 206, 207, -1, 209, -1, 211, - 212, 213, 214, 215, 216, 217, 218, 219, 220, -1, - -1, 223, 224, 225, 226, -1, 228, 229, 230, -1, - -1, 233, 234, 235, 236, 237, -1, 239, 240, 241, - 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, - 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, - 262, -1, -1, 265, 266, 267, 268, -1, -1, 271, - 272, 273, 274, 275, -1, 277, 278, -1, -1, 281, - 282, 283, -1, -1, 286, -1, 288, 289, 290, -1, - 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, - -1, 303, 304, -1, 306, 307, -1, 309, 310, 311, - -1, 313, 314, 315, 316, 317, 318, 319, 320, 321, - 322, 323, 324, 325, 326, -1, 328, 329, 330, 331, - 332, 333, 334, 335, 336, 337, 338, 339, -1, 341, - 342, 343, 344, 345, 346, -1, 348, 349, 350, 351, - 352, 353, 354, 355, 356, 357, 358, -1, 360, 361, - 362, 363, 364, -1, 366, -1, 368, 369, 370, 371, - 372, 373, 374, 375, -1, 377, 378, 379, 380, 381, - 382, 383, 384, 385, 386, 387, -1, 389, -1, -1, - 392, 393, -1, 395, 396, 397, 398, 399, 400, 401, - -1, 403, 404, -1, -1, 407, 408, -1, 410, -1, - -1, 413, 414, 415, 416, 417, 418, 419, 420, -1, - -1, 423, 424, 425, 426, 427, -1, -1, 430, 431, - 432, 433, 434, -1, 436, -1, 438, 439, 440, 441, - 442, 443, -1, -1, 446, -1, -1, 449, 450, 451, - 452, 453, 454, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 466, 467, 468, 469 + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 464, 465, 466, 467, 468, 469, 3, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 19, 20, 21, 22, 23, 24, 25, 26, + -1, 28, 29, 30, -1, -1, -1, -1, -1, -1, + -1, 38, 39, -1, 41, 42, 43, 44, 45, 46, + 47, -1, 49, 50, 51, 52, -1, 54, 55, 56, + 57, 58, 59, -1, -1, 62, 63, 64, 65, 66, + -1, 68, 69, 70, 71, -1, -1, 74, -1, 76, + 77, 78, 79, 80, 81, 82, 83, 84, -1, 86, + 87, 88, 89, 90, 91, -1, 93, 94, 95, 96, + -1, -1, -1, 100, -1, -1, -1, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, -1, 115, -1, + 117, 118, 119, 120, 121, 122, -1, 124, 125, 126, + 127, 128, -1, -1, 131, 132, 133, 134, 135, -1, + 137, 138, 139, -1, 141, 142, 143, -1, 145, 146, + 147, 148, -1, 150, 151, 152, 153, -1, -1, 156, + -1, 158, 159, 160, 161, -1, 163, -1, 165, 166, + -1, 168, 169, 170, 171, 172, 173, -1, 175, -1, + -1, 178, -1, 180, 181, 182, 183, 184, 185, 186, + 187, 188, 189, 190, -1, 192, 193, 194, 195, 196, + 197, -1, 199, 200, -1, 202, 203, 204, 205, 206, + 207, -1, 209, -1, 211, 212, 213, 214, 215, 216, + 217, 218, 219, 220, -1, -1, 223, 224, 225, 226, + -1, 228, 229, 230, -1, -1, 233, 234, 235, 236, + 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, + 257, 258, 259, 260, 261, 262, -1, -1, 265, 266, + 267, 268, -1, -1, 271, 272, 273, 274, 275, -1, + 277, 278, -1, -1, 281, 282, 283, -1, -1, 286, + -1, 288, 289, 290, -1, 292, 293, 294, 295, 296, + 297, 298, 299, 300, 301, -1, 303, 304, -1, 306, + 307, -1, 309, 310, 311, -1, 313, 314, 315, 316, + 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, + -1, 328, 329, 330, 331, 332, 333, 334, 335, 336, + 337, 338, 339, -1, 341, 342, 343, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, + 357, 358, -1, 360, 361, 362, 363, 364, -1, 366, + 367, 368, 369, 370, 371, 372, 373, 374, 375, -1, + 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, + 387, 388, 389, -1, -1, 392, 393, -1, 395, 396, + 397, 398, 399, 400, 401, -1, 403, 404, -1, -1, + 407, 408, -1, 410, -1, -1, 413, 414, 415, 416, + 417, 418, 419, 420, -1, -1, 423, 424, 425, 426, + 427, -1, -1, 430, 431, 432, 433, 434, -1, 436, + -1, 438, 439, 440, 441, 442, 443, -1, -1, 446, + -1, -1, 449, 450, 451, 452, 453, 454, 3, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 466, + 467, 468, 469, -1, 19, 20, 21, 22, 23, 24, + 25, 26, -1, 28, 29, 30, -1, -1, -1, -1, + -1, -1, -1, 38, 39, -1, 41, 42, 43, -1, + 45, 46, 47, 48, 49, -1, 51, 52, -1, 54, + 55, 56, 57, 58, 59, -1, -1, 62, 63, 64, + 65, 66, -1, 68, 69, 70, 71, 72, -1, -1, + -1, 76, 77, 78, 79, 80, -1, 82, 83, 84, + -1, 86, 87, 88, 89, 90, 91, -1, -1, 94, + 95, 96, -1, -1, -1, -1, -1, -1, -1, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, -1, + 115, -1, 117, 118, 119, 120, 121, 122, -1, 124, + 125, 126, 127, 128, -1, -1, 131, 132, 133, 134, + 135, -1, 137, 138, 139, -1, 141, 142, 143, -1, + 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, + -1, 156, -1, 158, 159, 160, 161, -1, 163, -1, + 165, -1, -1, -1, 169, 170, 171, -1, 173, -1, + 175, -1, 177, 178, -1, 180, 181, 182, 183, 184, + 185, -1, 187, 188, 189, 190, -1, 192, 193, 194, + 195, 196, 197, -1, 199, -1, 201, 202, 203, 204, + 205, 206, 207, -1, 209, -1, 211, -1, -1, 214, + -1, 216, 217, 218, 219, 220, -1, -1, 223, -1, + 225, -1, -1, 228, 229, 230, -1, -1, 233, 234, + 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, + 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, + 255, 256, 257, -1, 259, 260, 261, 262, 263, -1, + 265, 266, -1, 268, -1, 270, 271, 272, 273, 274, + 275, -1, 277, 278, -1, -1, 281, 282, 283, -1, + -1, 286, 287, -1, 289, -1, 291, 292, 293, 294, + 295, 296, 297, -1, 299, 300, 301, -1, 303, 304, + 305, 306, 307, 308, 309, 310, 311, -1, 313, 314, + 315, 316, 317, 318, 319, -1, 321, 322, 323, 324, + 325, 326, -1, 328, 329, 330, 331, 332, 333, 334, + 335, 336, 337, 338, 339, -1, 341, 342, -1, 344, + 345, 346, 347, -1, 349, 350, 351, 352, 353, 354, + 355, 356, 357, 358, -1, 360, 361, 362, 363, 364, + -1, 366, 367, 368, 369, 370, -1, 372, 373, 374, + 375, -1, 377, 378, 379, 380, 381, 382, 383, 384, + 385, 386, 387, 388, 389, 390, -1, 392, 393, -1, + 395, -1, 397, 398, 399, 400, 401, -1, 403, 404, + -1, -1, 407, 408, 409, 410, 411, -1, 413, 414, + 415, 416, 417, 418, 419, 420, -1, -1, 423, 424, + 425, 426, 427, -1, -1, 430, 431, 432, 433, 434, + 435, 436, -1, 438, -1, 440, 441, 442, 443, -1, + -1, 446, -1, -1, 449, 450, 451, 452, 453, 454, + 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, + 465, 466, 467, 468, 469, 3, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 19, 20, 21, 22, 23, 24, 25, 26, -1, + 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, + 38, 39, -1, 41, 42, 43, 44, 45, 46, 47, + -1, 49, 50, 51, 52, -1, 54, 55, 56, 57, + 58, 59, -1, -1, 62, 63, 64, 65, 66, -1, + 68, 69, 70, 71, -1, -1, 74, -1, 76, 77, + 78, 79, 80, 81, 82, 83, 84, -1, 86, 87, + 88, 89, 90, 91, -1, 93, 94, 95, 96, -1, + -1, -1, 100, -1, -1, -1, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, -1, 115, -1, 117, + 118, 119, 120, 121, 122, -1, 124, 125, 126, 127, + 128, -1, -1, 131, 132, 133, 134, 135, -1, 137, + 138, 139, -1, 141, 142, 143, -1, 145, 146, 147, + 148, -1, 150, 151, 152, 153, -1, -1, 156, -1, + 158, 159, 160, 161, -1, 163, -1, 165, 166, -1, + 168, 169, 170, 171, 172, 173, -1, 175, -1, -1, + 178, -1, 180, 181, 182, 183, 184, 185, 186, 187, + 188, 189, 190, -1, 192, 193, 194, 195, 196, 197, + -1, 199, 200, -1, 202, 203, 204, 205, 206, 207, + -1, 209, -1, 211, 212, 213, 214, 215, 216, 217, + 218, 219, 220, -1, -1, 223, 224, 225, 226, -1, + 228, 229, 230, -1, -1, 233, 234, 235, 236, 237, + 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, + 258, 259, 260, 261, 262, -1, -1, 265, 266, 267, + 268, -1, -1, 271, 272, 273, 274, 275, -1, 277, + 278, -1, -1, 281, 282, 283, -1, -1, 286, -1, + 288, 289, 290, -1, 292, 293, 294, 295, 296, 297, + 298, 299, 300, 301, -1, 303, 304, -1, 306, 307, + -1, 309, 310, 311, -1, 313, 314, 315, 316, 317, + 318, 319, 320, 321, 322, 323, 324, 325, 326, -1, + 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, + 338, 339, -1, 341, 342, 343, 344, 345, 346, -1, + 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, + 358, -1, 360, 361, 362, 363, 364, -1, 366, -1, + 368, 369, 370, 371, 372, 373, 374, 375, -1, 377, + 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, + 388, 389, -1, -1, 392, 393, -1, 395, 396, 397, + 398, 399, 400, 401, -1, 403, 404, -1, -1, 407, + 408, -1, 410, -1, -1, 413, 414, 415, 416, 417, + 418, 419, 420, -1, -1, 423, 424, 425, 426, 427, + -1, -1, 430, 431, 432, 433, 434, -1, 436, -1, + 438, 439, 440, 441, 442, 443, -1, -1, 446, -1, + -1, 449, 450, 451, 452, 453, 454, 3, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 466, 467, + 468, 469, -1, 19, 20, 21, 22, 23, 24, 25, + 26, -1, 28, 29, 30, -1, -1, -1, -1, -1, + -1, -1, 38, 39, -1, 41, 42, 43, 44, 45, + 46, 47, -1, 49, 50, 51, 52, -1, 54, 55, + 56, 57, 58, 59, -1, -1, 62, 63, 64, 65, + 66, -1, 68, 69, 70, 71, -1, -1, 74, -1, + 76, 77, 78, 79, 80, 81, 82, 83, 84, -1, + 86, 87, 88, 89, 90, 91, -1, 93, 94, 95, + 96, -1, -1, -1, 100, -1, -1, -1, 104, 105, + 106, 107, 108, 109, 110, 111, 112, 113, -1, 115, + -1, 117, 118, 119, 120, 121, 122, -1, 124, 125, + 126, 127, 128, -1, -1, 131, 132, 133, 134, 135, + -1, 137, 138, 139, -1, 141, 142, 143, -1, 145, + 146, 147, 148, -1, 150, 151, 152, 153, -1, -1, + 156, -1, 158, 159, 160, 161, -1, 163, -1, 165, + 166, -1, 168, 169, 170, 171, 172, 173, -1, 175, + -1, -1, 178, -1, 180, 181, 182, 183, 184, 185, + 186, 187, 188, 189, 190, -1, 192, 193, 194, 195, + 196, 197, -1, 199, 200, -1, 202, 203, 204, 205, + 206, 207, -1, 209, -1, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, -1, -1, 223, 224, 225, + 226, -1, 228, 229, 230, -1, -1, 233, 234, 235, + 236, 237, -1, 239, 240, 241, 242, 243, 244, 245, + 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, + 256, 257, 258, 259, 260, 261, 262, -1, -1, 265, + 266, 267, 268, -1, -1, 271, 272, 273, 274, 275, + -1, 277, 278, -1, -1, 281, 282, 283, -1, -1, + 286, -1, 288, 289, 290, -1, 292, 293, 294, 295, + 296, 297, 298, 299, 300, 301, -1, 303, 304, -1, + 306, 307, -1, 309, 310, 311, -1, 313, 314, 315, + 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, + 326, -1, 328, 329, 330, 331, 332, 333, 334, 335, + 336, 337, 338, 339, -1, 341, 342, 343, 344, 345, + 346, -1, 348, 349, 350, 351, 352, 353, 354, 355, + 356, 357, 358, -1, 360, 361, 362, 363, 364, -1, + 366, -1, 368, 369, 370, 371, 372, 373, 374, 375, + -1, 377, 378, 379, 380, 381, 382, 383, 384, 385, + 386, 387, -1, 389, -1, -1, 392, 393, -1, 395, + 396, 397, 398, 399, 400, 401, -1, 403, 404, -1, + -1, 407, 408, -1, 410, -1, -1, 413, 414, 415, + 416, 417, 418, 419, 420, -1, -1, 423, 424, 425, + 426, 427, -1, -1, 430, 431, 432, 433, 434, -1, + 436, -1, 438, 439, 440, 441, 442, 443, -1, -1, + 446, -1, -1, 449, 450, 451, 452, 453, 454, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 466, 467, 468, 469 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing @@ -198042,12 +211888,12 @@ static const yytype_uint16 yystos[] = 0, 19, 29, 31, 32, 47, 56, 68, 79, 90, 92, 110, 124, 134, 140, 148, 150, 151, 163, 190, 229, 306, 309, 337, 345, 359, 366, 370, 380, 394, - 430, 435, 448, 472, 486, 495, 496, 497, 498, 509, - 510, 512, 516, 530, 531, 533, 535, 542, 544, 590, - 596, 599, 600, 617, 618, 619, 620, 621, 622, 666, - 796, 799, 802, 809, 810, 811, 812, 813, 820, 824, - 830, 832, 837, 841, 842, 845, 846, 848, 849, 851, - 407, 451, 543, 194, 352, 360, 394, 441, 543, 3, + 430, 435, 448, 472, 486, 497, 498, 499, 500, 511, + 512, 514, 518, 532, 533, 535, 537, 544, 546, 592, + 598, 601, 602, 619, 620, 621, 622, 623, 624, 668, + 800, 803, 806, 813, 814, 815, 816, 817, 824, 828, + 834, 836, 841, 845, 846, 849, 850, 852, 853, 855, + 407, 451, 545, 194, 352, 360, 394, 441, 545, 3, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, @@ -198085,246 +211931,247 @@ static const yytype_uint16 yystos[] = 431, 432, 433, 434, 435, 436, 438, 439, 440, 441, 442, 443, 446, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, - 466, 467, 468, 469, 722, 781, 785, 788, 854, 855, - 856, 543, 50, 486, 612, 169, 173, 230, 237, 284, - 352, 398, 400, 422, 425, 588, 597, 808, 3, 27, - 238, 309, 388, 779, 785, 854, 21, 74, 89, 143, + 466, 467, 468, 469, 724, 785, 789, 792, 858, 859, + 860, 545, 50, 486, 614, 169, 173, 230, 237, 284, + 352, 398, 400, 422, 425, 590, 599, 812, 3, 27, + 238, 309, 388, 783, 789, 858, 21, 74, 89, 143, 152, 164, 169, 194, 237, 241, 304, 318, 349, 352, - 360, 363, 382, 394, 401, 410, 441, 591, 592, 595, - 543, 779, 92, 439, 486, 512, 599, 617, 816, 820, - 837, 851, 107, 68, 107, 5, 784, 831, 785, 779, - 27, 403, 407, 785, 843, 844, 847, 543, 27, 129, - 629, 630, 230, 352, 364, 403, 825, 826, 847, 543, - 5, 280, 677, 777, 785, 786, 168, 486, 834, 486, - 325, 623, 624, 779, 623, 618, 619, 622, 0, 489, - 119, 204, 427, 144, 208, 285, 421, 631, 632, 618, - 620, 621, 490, 439, 814, 27, 403, 407, 435, 617, - 847, 185, 777, 779, 185, 777, 185, 677, 185, 777, - 486, 484, 488, 768, 770, 512, 599, 617, 798, 837, - 777, 398, 400, 398, 400, 335, 185, 785, 325, 360, - 394, 441, 777, 194, 27, 779, 243, 410, 106, 394, - 441, 355, 185, 593, 785, 790, 185, 778, 779, 185, - 779, 486, 534, 588, 816, 3, 48, 49, 51, 52, + 360, 363, 382, 394, 401, 410, 441, 593, 594, 597, + 545, 783, 92, 439, 486, 514, 601, 619, 820, 824, + 841, 855, 107, 68, 107, 5, 788, 835, 789, 783, + 27, 403, 407, 789, 847, 848, 851, 545, 27, 129, + 631, 632, 230, 352, 364, 403, 829, 830, 851, 545, + 5, 280, 679, 781, 789, 790, 168, 486, 838, 486, + 325, 625, 626, 783, 625, 620, 621, 624, 0, 489, + 119, 204, 427, 144, 208, 285, 421, 633, 634, 620, + 622, 623, 490, 439, 818, 27, 403, 407, 435, 619, + 851, 185, 781, 783, 185, 781, 185, 679, 185, 781, + 486, 484, 488, 772, 774, 514, 601, 619, 802, 841, + 781, 398, 400, 398, 400, 335, 185, 789, 325, 360, + 394, 441, 781, 194, 27, 783, 243, 410, 106, 394, + 441, 355, 185, 595, 789, 794, 185, 782, 783, 185, + 783, 486, 536, 590, 820, 3, 48, 49, 51, 52, 64, 65, 72, 111, 112, 149, 154, 160, 177, 201, 206, 207, 209, 238, 257, 259, 263, 270, 272, 287, 291, 305, 308, 322, 347, 367, 374, 388, 390, 403, 404, 409, 411, 415, 435, 436, 455, 456, 457, 458, - 459, 460, 461, 462, 463, 464, 465, 817, 819, 820, - 822, 823, 854, 858, 814, 784, 784, 475, 486, 486, - 797, 469, 214, 488, 279, 4, 6, 7, 8, 9, + 459, 460, 461, 462, 463, 464, 465, 821, 823, 824, + 826, 827, 858, 862, 818, 788, 788, 475, 486, 486, + 801, 469, 214, 488, 279, 4, 6, 7, 8, 9, 10, 35, 49, 51, 52, 60, 61, 64, 65, 72, 74, 97, 98, 99, 100, 101, 102, 103, 111, 112, 114, 133, 149, 154, 155, 160, 206, 207, 209, 231, 232, 257, 259, 264, 269, 270, 272, 281, 291, 305, 322, 347, 365, 374, 390, 403, 404, 409, 411, 412, - 415, 428, 436, 470, 477, 478, 479, 486, 491, 492, - 618, 692, 695, 698, 699, 700, 702, 703, 704, 705, - 707, 708, 719, 721, 722, 723, 725, 738, 739, 743, - 762, 767, 774, 775, 781, 782, 783, 784, 785, 773, - 774, 825, 784, 825, 469, 167, 405, 475, 486, 777, - 479, 770, 3, 166, 168, 439, 820, 833, 835, 166, - 836, 719, 747, 623, 490, 486, 792, 487, 487, 497, - 167, 210, 677, 838, 27, 129, 628, 628, 54, 628, - 157, 162, 227, 276, 637, 639, 640, 659, 661, 662, - 663, 631, 632, 486, 777, 469, 214, 149, 23, 29, - 134, 283, 333, 337, 366, 432, 503, 506, 507, 333, - 149, 36, 55, 105, 193, 242, 250, 262, 293, 333, - 338, 360, 366, 380, 506, 536, 539, 149, 333, 366, - 506, 149, 333, 366, 506, 3, 27, 44, 50, 74, - 81, 93, 100, 129, 166, 168, 172, 186, 200, 212, - 213, 215, 224, 226, 238, 258, 267, 288, 290, 343, - 371, 388, 396, 415, 437, 439, 479, 487, 719, 749, - 750, 787, 793, 854, 859, 719, 769, 3, 27, 31, - 32, 33, 34, 35, 36, 37, 40, 53, 60, 61, - 67, 73, 75, 85, 92, 97, 98, 99, 101, 102, - 103, 114, 116, 123, 129, 130, 136, 140, 144, 155, - 157, 162, 164, 167, 174, 176, 179, 191, 198, 208, - 210, 221, 222, 227, 231, 232, 264, 269, 276, 279, - 280, 284, 285, 302, 312, 327, 340, 359, 365, 376, - 391, 394, 402, 405, 406, 412, 421, 422, 428, 429, - 435, 437, 444, 445, 447, 448, 479, 780, 794, 854, - 858, 860, 768, 487, 486, 578, 588, 264, 800, 441, - 185, 777, 185, 777, 853, 777, 486, 598, 81, 805, - 452, 82, 126, 296, 399, 149, 58, 339, 490, 594, - 488, 791, 149, 490, 594, 149, 279, 747, 394, 487, - 490, 4, 155, 279, 412, 477, 478, 538, 541, 783, - 784, 815, 817, 818, 821, 816, 486, 607, 611, 538, - 821, 827, 829, 749, 3, 44, 49, 50, 51, 52, - 64, 65, 74, 81, 93, 100, 111, 112, 160, 166, - 168, 172, 186, 200, 206, 207, 209, 212, 213, 215, - 224, 226, 238, 257, 258, 259, 267, 272, 288, 290, - 322, 343, 347, 367, 371, 374, 388, 396, 403, 404, - 415, 436, 439, 688, 689, 691, 693, 695, 697, 699, - 700, 701, 703, 704, 707, 708, 751, 789, 854, 857, - 36, 225, 785, 486, 771, 484, 438, 706, 719, 766, - 486, 706, 706, 486, 162, 486, 486, 486, 694, 694, - 308, 618, 486, 486, 696, 486, 486, 64, 65, 706, - 719, 486, 694, 486, 486, 486, 486, 486, 450, 472, - 486, 709, 486, 709, 486, 486, 486, 719, 719, 719, - 618, 719, 747, 9, 771, 770, 784, 486, 486, 783, - 784, 3, 8, 11, 16, 17, 18, 33, 36, 41, - 48, 73, 172, 186, 191, 212, 213, 226, 264, 267, - 281, 284, 371, 470, 473, 474, 475, 477, 478, 479, - 480, 481, 482, 741, 742, 743, 745, 449, 726, 771, - 15, 290, 719, 15, 210, 490, 625, 486, 784, 771, - 770, 625, 3, 114, 230, 538, 708, 784, 828, 96, - 114, 829, 114, 829, 777, 487, 490, 814, 487, 490, - 624, 778, 36, 838, 514, 777, 36, 785, 366, 620, - 620, 633, 634, 719, 620, 159, 261, 653, 216, 262, - 321, 369, 427, 27, 648, 719, 477, 478, 649, 650, - 719, 721, 659, 660, 640, 639, 637, 638, 162, 662, - 274, 664, 637, 659, 747, 792, 225, 777, 67, 75, - 85, 164, 185, 312, 422, 559, 569, 584, 785, 75, - 85, 511, 85, 511, 486, 405, 486, 557, 236, 425, - 557, 85, 490, 405, 777, 691, 538, 54, 540, 538, - 538, 105, 242, 250, 54, 405, 448, 472, 537, 255, - 352, 537, 539, 677, 85, 405, 511, 352, 777, 405, - 352, 749, 749, 750, 487, 490, 631, 632, 13, 14, - 485, 493, 405, 577, 582, 785, 448, 610, 325, 441, - 149, 92, 531, 544, 801, 802, 849, 777, 264, 532, - 536, 264, 486, 578, 36, 578, 487, 749, 36, 185, - 572, 785, 806, 593, 790, 780, 488, 778, 779, 779, - 790, 487, 185, 777, 853, 816, 822, 4, 783, 4, - 783, 609, 616, 794, 50, 94, 120, 138, 142, 163, - 166, 180, 269, 277, 319, 613, 490, 487, 490, 486, - 691, 486, 35, 690, 108, 109, 182, 183, 244, 245, - 246, 247, 248, 249, 252, 253, 356, 357, 466, 467, - 486, 710, 711, 712, 713, 714, 715, 716, 717, 718, - 487, 490, 694, 798, 747, 768, 747, 748, 486, 444, - 763, 764, 719, 747, 486, 783, 783, 747, 3, 710, - 711, 712, 713, 714, 715, 716, 717, 752, 753, 784, - 783, 783, 783, 706, 706, 719, 8, 16, 17, 18, - 473, 474, 475, 477, 478, 479, 480, 481, 482, 741, - 746, 785, 719, 754, 477, 478, 486, 720, 721, 743, - 756, 767, 487, 747, 719, 747, 757, 403, 403, 783, - 783, 719, 53, 167, 222, 406, 719, 747, 760, 719, - 487, 490, 783, 719, 718, 718, 689, 719, 719, 719, - 719, 5, 794, 795, 403, 40, 391, 772, 790, 719, - 719, 486, 618, 761, 129, 155, 264, 269, 274, 412, - 423, 719, 269, 486, 719, 405, 48, 172, 186, 191, - 226, 371, 719, 719, 719, 719, 719, 719, 719, 719, - 719, 719, 27, 34, 376, 740, 176, 158, 727, 719, - 347, 486, 739, 719, 173, 230, 394, 398, 400, 425, - 626, 777, 775, 167, 667, 749, 667, 486, 784, 487, - 777, 833, 777, 841, 719, 487, 486, 429, 840, 114, - 292, 486, 513, 617, 36, 785, 486, 518, 527, 529, - 785, 490, 37, 123, 429, 635, 347, 348, 477, 478, - 650, 652, 721, 369, 216, 280, 490, 4, 651, 783, - 651, 347, 348, 652, 776, 777, 268, 373, 665, 660, - 638, 487, 333, 506, 486, 185, 569, 779, 216, 264, - 216, 429, 486, 562, 689, 779, 785, 185, 779, 185, - 785, 23, 134, 366, 502, 505, 552, 567, 794, 779, - 561, 581, 794, 779, 503, 779, 333, 366, 506, 536, - 538, 790, 779, 538, 790, 779, 538, 333, 366, 506, - 779, 779, 779, 779, 333, 366, 506, 779, 779, 631, - 631, 631, 437, 750, 487, 719, 719, 719, 769, 317, - 606, 487, 490, 277, 167, 405, 601, 441, 777, 785, - 588, 486, 149, 149, 226, 559, 569, 573, 576, 585, - 587, 785, 448, 450, 564, 148, 617, 448, 807, 487, - 719, 264, 279, 594, 780, 594, 279, 594, 264, 36, - 487, 490, 479, 486, 538, 608, 821, 36, 605, 784, - 605, 264, 269, 319, 605, 605, 827, 751, 35, 690, - 687, 785, 484, 484, 783, 405, 405, 405, 405, 689, - 487, 485, 747, 719, 136, 764, 765, 36, 487, 719, - 487, 487, 487, 487, 167, 487, 487, 487, 490, 487, - 488, 302, 755, 487, 720, 720, 719, 11, 16, 17, - 18, 191, 212, 281, 473, 474, 475, 477, 478, 479, - 480, 481, 482, 743, 720, 487, 487, 162, 167, 758, - 759, 487, 469, 469, 487, 487, 36, 760, 747, 760, - 760, 167, 487, 36, 771, 719, 487, 487, 469, 720, - 720, 142, 747, 167, 129, 155, 269, 274, 412, 423, - 486, 142, 746, 719, 391, 772, 719, 761, 719, 405, - 486, 618, 486, 486, 289, 731, 398, 400, 398, 400, - 777, 394, 627, 627, 627, 221, 348, 486, 618, 668, - 669, 670, 677, 678, 722, 724, 725, 785, 445, 683, - 631, 683, 783, 718, 792, 798, 668, 445, 839, 435, - 393, 428, 522, 517, 526, 785, 279, 519, 785, 523, - 529, 490, 667, 475, 771, 634, 281, 741, 744, 471, - 636, 4, 783, 652, 280, 427, 649, 490, 235, 405, - 719, 264, 584, 486, 149, 486, 562, 194, 582, 545, - 283, 555, 545, 23, 134, 337, 338, 366, 499, 500, - 501, 507, 508, 149, 594, 149, 594, 552, 567, 552, - 487, 490, 548, 784, 487, 490, 475, 488, 405, 352, - 85, 405, 511, 352, 405, 405, 405, 352, 487, 487, - 487, 750, 485, 383, 384, 615, 784, 577, 606, 777, - 578, 800, 394, 582, 777, 777, 853, 777, 487, 490, - 277, 557, 277, 279, 556, 779, 448, 852, 557, 36, - 149, 777, 790, 149, 609, 603, 614, 821, 784, 784, - 269, 582, 479, 582, 784, 784, 487, 484, 487, 490, - 689, 783, 485, 783, 487, 711, 713, 714, 715, 714, - 715, 715, 487, 402, 719, 140, 689, 487, 719, 719, - 746, 719, 758, 689, 720, 720, 720, 720, 129, 264, - 274, 720, 720, 720, 720, 720, 720, 720, 720, 720, - 720, 719, 719, 759, 758, 709, 709, 689, 487, 487, - 487, 747, 689, 487, 718, 719, 33, 33, 719, 487, - 719, 167, 486, 751, 719, 487, 142, 720, 720, 142, - 142, 719, 719, 632, 445, 486, 732, 785, 627, 627, - 627, 627, 777, 777, 777, 618, 678, 167, 618, 669, - 670, 36, 671, 672, 785, 490, 93, 168, 200, 215, - 224, 258, 343, 674, 672, 36, 671, 673, 785, 472, - 682, 770, 719, 176, 654, 487, 654, 487, 487, 719, - 340, 521, 434, 487, 490, 771, 83, 521, 487, 490, - 518, 839, 719, 486, 636, 159, 220, 280, 777, 779, - 487, 149, 582, 569, 582, 545, 572, 487, 116, 198, - 262, 264, 568, 486, 579, 171, 114, 184, 264, 557, - 537, 106, 114, 171, 264, 382, 385, 539, 557, 366, - 501, 416, 779, 785, 505, 581, 3, 44, 50, 74, - 81, 93, 100, 166, 168, 172, 186, 200, 212, 213, - 215, 224, 226, 238, 258, 263, 267, 281, 288, 290, - 343, 367, 371, 388, 396, 415, 439, 477, 478, 538, - 546, 583, 689, 744, 784, 787, 854, 860, 794, 779, - 779, 779, 779, 779, 779, 779, 779, 779, 779, 631, - 537, 615, 486, 807, 185, 777, 487, 532, 486, 36, - 566, 564, 573, 79, 534, 106, 262, 617, 572, 429, - 804, 594, 853, 487, 490, 582, 783, 785, 485, 485, - 719, 487, 487, 759, 167, 129, 274, 486, 487, 487, - 719, 719, 719, 751, 487, 719, 33, 33, 719, 719, - 142, 487, 487, 719, 733, 785, 777, 777, 777, 777, - 672, 673, 486, 487, 785, 786, 396, 645, 646, 486, - 669, 215, 288, 675, 669, 675, 215, 674, 675, 215, - 646, 486, 785, 646, 486, 286, 54, 179, 658, 784, - 658, 784, 774, 617, 292, 617, 517, 279, 486, 515, - 475, 529, 521, 746, 545, 569, 487, 487, 448, 575, - 117, 187, 196, 116, 431, 552, 570, 67, 73, 85, - 114, 116, 171, 198, 264, 269, 312, 327, 422, 550, - 551, 563, 30, 54, 589, 185, 269, 538, 719, 589, - 269, 477, 478, 541, 785, 689, 594, 594, 238, 388, - 787, 791, 475, 405, 405, 487, 607, 429, 602, 604, - 582, 36, 264, 486, 807, 576, 148, 617, 146, 192, - 556, 119, 134, 311, 852, 106, 448, 850, 279, 785, - 803, 486, 36, 614, 485, 689, 720, 167, 486, 751, - 487, 719, 719, 719, 487, 298, 734, 679, 680, 724, - 671, 486, 4, 9, 641, 643, 644, 785, 778, 669, - 279, 429, 676, 669, 215, 669, 684, 685, 785, 486, - 684, 785, 486, 655, 656, 657, 719, 719, 447, 728, - 728, 522, 85, 486, 520, 528, 724, 785, 130, 719, - 487, 327, 575, 486, 565, 545, 487, 490, 486, 790, - 779, 720, 589, 117, 187, 116, 269, 216, 777, 575, - 114, 36, 149, 73, 686, 791, 481, 546, 779, 779, - 537, 121, 487, 617, 149, 36, 487, 779, 852, 27, - 78, 86, 115, 184, 195, 382, 385, 560, 560, 348, - 348, 59, 67, 230, 777, 528, 720, 751, 487, 54, - 631, 487, 490, 36, 681, 778, 301, 481, 301, 348, - 481, 486, 486, 487, 719, 486, 669, 676, 487, 490, - 689, 684, 487, 487, 490, 729, 730, 785, 429, 642, - 642, 434, 779, 719, 487, 490, 73, 524, 524, 265, - 427, 777, 545, 571, 574, 794, 552, 719, 264, 551, - 36, 575, 578, 184, 790, 429, 504, 481, 416, 607, - 784, 807, 850, 777, 617, 564, 534, 67, 282, 67, - 804, 487, 487, 747, 320, 348, 735, 682, 679, 486, - 487, 785, 641, 778, 685, 686, 487, 656, 490, 36, - 350, 617, 487, 683, 520, 790, 525, 790, 525, 366, - 578, 487, 490, 475, 487, 184, 240, 586, 486, 547, - 719, 416, 36, 486, 850, 556, 852, 282, 282, 486, - 807, 48, 96, 418, 719, 736, 737, 736, 487, 684, - 487, 490, 487, 487, 730, 732, 644, 524, 635, 635, - 527, 586, 574, 546, 262, 558, 547, 168, 297, 372, - 279, 553, 554, 580, 536, 617, 528, 683, 737, 347, - 161, 307, 161, 307, 487, 9, 334, 647, 525, 636, - 636, 683, 554, 196, 119, 427, 279, 580, 279, 553, - 487, 850, 487, 33, 487, 486, 635, 545, 58, 262, - 339, 366, 549, 549, 807, 737, 9, 636, 22, 114, - 269, 683, 487 + 415, 428, 436, 470, 477, 478, 479, 484, 486, 491, + 493, 494, 620, 694, 697, 700, 701, 702, 704, 705, + 706, 707, 709, 710, 721, 723, 724, 725, 727, 740, + 741, 747, 766, 771, 778, 779, 785, 786, 787, 788, + 789, 777, 778, 829, 788, 829, 469, 167, 405, 475, + 486, 781, 479, 774, 3, 166, 168, 439, 824, 837, + 839, 166, 840, 721, 751, 625, 490, 486, 796, 487, + 487, 499, 167, 210, 679, 842, 27, 129, 630, 630, + 54, 630, 157, 162, 227, 276, 639, 641, 642, 661, + 663, 664, 665, 633, 634, 486, 781, 469, 214, 149, + 23, 29, 134, 283, 333, 337, 366, 432, 505, 508, + 509, 333, 149, 36, 55, 105, 193, 242, 250, 262, + 293, 333, 338, 360, 366, 380, 508, 538, 541, 149, + 333, 366, 508, 149, 333, 366, 508, 3, 27, 44, + 50, 74, 81, 93, 100, 129, 166, 168, 172, 186, + 200, 212, 213, 215, 224, 226, 238, 258, 267, 288, + 290, 343, 371, 388, 396, 415, 437, 439, 479, 487, + 721, 753, 754, 791, 797, 858, 863, 721, 773, 3, + 27, 31, 32, 33, 34, 35, 36, 37, 40, 53, + 60, 61, 67, 73, 75, 85, 92, 97, 98, 99, + 101, 102, 103, 114, 116, 123, 129, 130, 136, 140, + 144, 155, 157, 162, 164, 167, 174, 176, 179, 191, + 198, 208, 210, 221, 222, 227, 231, 232, 264, 269, + 276, 279, 280, 284, 285, 302, 312, 327, 340, 359, + 365, 376, 391, 394, 402, 405, 406, 412, 421, 422, + 428, 429, 435, 437, 444, 445, 447, 448, 479, 784, + 798, 858, 862, 864, 772, 487, 486, 580, 590, 264, + 804, 441, 185, 781, 185, 781, 857, 781, 486, 600, + 81, 809, 452, 82, 126, 296, 399, 149, 58, 339, + 490, 596, 488, 795, 149, 490, 596, 149, 279, 751, + 394, 487, 490, 4, 155, 279, 412, 477, 478, 540, + 543, 787, 788, 819, 821, 822, 825, 820, 486, 609, + 613, 540, 825, 831, 833, 753, 3, 44, 49, 50, + 51, 52, 64, 65, 74, 81, 93, 100, 111, 112, + 160, 166, 168, 172, 186, 200, 206, 207, 209, 212, + 213, 215, 224, 226, 238, 257, 258, 259, 267, 272, + 288, 290, 322, 343, 347, 367, 371, 374, 388, 396, + 403, 404, 415, 436, 439, 690, 691, 693, 695, 697, + 699, 701, 702, 703, 705, 706, 709, 710, 755, 793, + 858, 861, 36, 225, 789, 486, 775, 484, 438, 708, + 721, 770, 486, 708, 708, 486, 162, 486, 486, 486, + 696, 696, 308, 620, 486, 486, 698, 486, 486, 64, + 65, 708, 721, 486, 696, 486, 486, 486, 486, 486, + 450, 472, 486, 711, 486, 711, 486, 486, 486, 721, + 721, 721, 751, 752, 620, 721, 751, 742, 743, 789, + 790, 9, 775, 774, 788, 486, 486, 787, 788, 3, + 8, 11, 16, 17, 18, 33, 36, 41, 48, 73, + 172, 186, 191, 212, 213, 226, 264, 267, 281, 284, + 371, 470, 473, 474, 475, 477, 478, 479, 480, 481, + 482, 745, 746, 747, 749, 449, 728, 775, 15, 290, + 721, 15, 210, 490, 627, 486, 788, 775, 774, 627, + 3, 114, 230, 540, 710, 788, 832, 96, 114, 833, + 114, 833, 781, 487, 490, 818, 487, 490, 626, 782, + 36, 842, 516, 781, 36, 789, 366, 622, 622, 635, + 636, 721, 622, 159, 261, 655, 216, 262, 321, 369, + 427, 27, 650, 721, 477, 478, 651, 652, 721, 723, + 661, 662, 642, 641, 639, 640, 162, 664, 274, 666, + 639, 661, 751, 796, 225, 781, 67, 75, 85, 164, + 185, 312, 422, 561, 571, 586, 789, 75, 85, 513, + 85, 513, 486, 405, 486, 559, 236, 425, 559, 85, + 490, 405, 781, 693, 540, 54, 542, 540, 540, 105, + 242, 250, 54, 405, 448, 472, 539, 255, 352, 539, + 541, 679, 85, 405, 513, 352, 781, 405, 352, 753, + 753, 754, 487, 490, 633, 634, 13, 14, 485, 495, + 405, 579, 584, 789, 448, 612, 325, 441, 149, 92, + 533, 546, 805, 806, 853, 781, 264, 534, 538, 264, + 486, 580, 36, 580, 487, 753, 36, 185, 574, 789, + 810, 595, 794, 784, 488, 782, 783, 783, 794, 487, + 185, 781, 857, 820, 826, 4, 787, 4, 787, 611, + 618, 798, 50, 94, 120, 138, 142, 163, 166, 180, + 269, 277, 319, 615, 490, 487, 490, 486, 693, 486, + 35, 692, 108, 109, 182, 183, 244, 245, 246, 247, + 248, 249, 252, 253, 356, 357, 466, 467, 486, 712, + 713, 714, 715, 716, 717, 718, 719, 720, 487, 490, + 696, 802, 751, 772, 752, 486, 444, 767, 768, 721, + 751, 486, 787, 787, 751, 3, 712, 713, 714, 715, + 716, 717, 718, 719, 756, 757, 788, 787, 787, 787, + 708, 708, 721, 8, 16, 17, 18, 473, 474, 475, + 477, 478, 479, 480, 481, 482, 745, 750, 789, 721, + 758, 477, 478, 486, 722, 723, 747, 760, 771, 487, + 751, 721, 751, 761, 403, 403, 787, 787, 721, 53, + 167, 222, 406, 721, 751, 764, 721, 485, 487, 490, + 490, 492, 495, 787, 721, 720, 720, 691, 721, 721, + 721, 721, 5, 798, 799, 403, 40, 391, 776, 794, + 721, 721, 486, 620, 765, 129, 155, 264, 269, 274, + 412, 423, 721, 269, 486, 721, 405, 48, 172, 186, + 191, 226, 371, 721, 721, 721, 721, 721, 721, 721, + 721, 721, 721, 27, 34, 376, 744, 176, 158, 729, + 721, 347, 486, 741, 721, 173, 230, 394, 398, 400, + 425, 628, 781, 779, 167, 669, 753, 669, 486, 788, + 487, 781, 837, 781, 845, 721, 487, 486, 429, 844, + 114, 292, 486, 515, 619, 36, 789, 486, 520, 529, + 531, 789, 490, 37, 123, 429, 637, 347, 348, 477, + 478, 652, 654, 723, 369, 216, 280, 490, 4, 653, + 787, 653, 347, 348, 654, 780, 781, 268, 373, 667, + 662, 640, 487, 333, 508, 486, 185, 571, 783, 216, + 264, 216, 429, 486, 564, 691, 783, 789, 185, 783, + 185, 789, 23, 134, 366, 504, 507, 554, 569, 798, + 783, 563, 583, 798, 783, 505, 783, 333, 366, 508, + 538, 540, 794, 783, 540, 794, 783, 540, 333, 366, + 508, 783, 783, 783, 783, 333, 366, 508, 783, 783, + 633, 633, 633, 437, 754, 487, 721, 721, 721, 773, + 317, 608, 487, 490, 277, 167, 405, 603, 441, 781, + 789, 590, 486, 149, 149, 226, 561, 571, 575, 578, + 587, 589, 789, 448, 450, 566, 148, 619, 448, 811, + 487, 721, 264, 279, 596, 784, 596, 279, 596, 264, + 36, 487, 490, 479, 486, 540, 610, 825, 36, 607, + 788, 607, 264, 269, 319, 607, 607, 831, 755, 35, + 692, 689, 789, 484, 484, 787, 405, 405, 405, 405, + 691, 487, 485, 751, 721, 136, 768, 769, 36, 487, + 721, 487, 487, 487, 487, 167, 487, 487, 487, 490, + 487, 488, 302, 759, 487, 722, 722, 721, 11, 16, + 17, 18, 191, 212, 281, 473, 474, 475, 477, 478, + 479, 480, 481, 482, 747, 722, 487, 487, 162, 167, + 762, 763, 487, 469, 469, 487, 487, 36, 764, 751, + 764, 764, 167, 487, 36, 775, 721, 742, 721, 487, + 487, 469, 722, 722, 142, 751, 167, 129, 155, 269, + 274, 412, 423, 486, 142, 750, 721, 391, 776, 721, + 765, 721, 405, 486, 620, 486, 486, 289, 733, 398, + 400, 398, 400, 781, 394, 629, 629, 629, 221, 348, + 486, 620, 670, 671, 672, 679, 680, 724, 726, 727, + 789, 445, 685, 633, 685, 787, 720, 796, 802, 670, + 445, 843, 435, 393, 428, 524, 519, 528, 789, 279, + 521, 789, 525, 531, 490, 669, 475, 775, 636, 281, + 745, 748, 471, 638, 4, 787, 654, 280, 427, 651, + 490, 235, 405, 721, 264, 586, 486, 149, 486, 564, + 194, 584, 547, 283, 557, 547, 23, 134, 337, 338, + 366, 501, 502, 503, 509, 510, 149, 596, 149, 596, + 554, 569, 554, 487, 490, 550, 788, 487, 490, 475, + 488, 405, 352, 85, 405, 513, 352, 405, 405, 405, + 352, 487, 487, 487, 754, 485, 383, 384, 617, 788, + 579, 608, 781, 580, 804, 394, 584, 781, 781, 857, + 781, 487, 490, 277, 559, 277, 279, 558, 783, 448, + 856, 559, 36, 149, 781, 794, 149, 611, 605, 616, + 825, 788, 788, 269, 584, 479, 584, 788, 788, 487, + 484, 487, 490, 691, 787, 485, 787, 487, 713, 715, + 716, 717, 716, 717, 717, 487, 402, 721, 140, 691, + 487, 721, 721, 750, 721, 762, 691, 722, 722, 722, + 722, 129, 264, 274, 722, 722, 722, 722, 722, 722, + 722, 722, 722, 722, 721, 721, 763, 762, 711, 711, + 691, 487, 487, 487, 751, 691, 487, 720, 721, 33, + 33, 721, 487, 721, 167, 486, 755, 721, 487, 142, + 722, 722, 142, 142, 721, 721, 634, 445, 486, 734, + 789, 629, 629, 629, 629, 781, 781, 781, 620, 680, + 167, 620, 671, 672, 36, 673, 674, 789, 490, 93, + 168, 200, 215, 224, 258, 343, 676, 674, 36, 673, + 675, 789, 472, 684, 774, 721, 176, 656, 487, 656, + 487, 487, 721, 340, 523, 434, 487, 490, 775, 83, + 523, 487, 490, 520, 843, 721, 486, 638, 159, 220, + 280, 781, 783, 487, 149, 584, 571, 584, 547, 574, + 487, 116, 198, 262, 264, 570, 486, 581, 171, 114, + 184, 264, 559, 539, 106, 114, 171, 264, 382, 385, + 541, 559, 366, 503, 416, 783, 789, 507, 583, 3, + 44, 50, 74, 81, 93, 100, 166, 168, 172, 186, + 200, 212, 213, 215, 224, 226, 238, 258, 263, 267, + 281, 288, 290, 343, 367, 371, 388, 396, 415, 439, + 477, 478, 540, 548, 585, 691, 748, 788, 791, 858, + 864, 798, 783, 783, 783, 783, 783, 783, 783, 783, + 783, 783, 633, 539, 617, 486, 811, 185, 781, 487, + 534, 486, 36, 568, 566, 575, 79, 536, 106, 262, + 619, 574, 429, 808, 596, 857, 487, 490, 584, 692, + 787, 692, 789, 485, 485, 721, 487, 487, 763, 167, + 129, 274, 486, 487, 487, 721, 721, 721, 755, 487, + 721, 33, 33, 721, 721, 142, 487, 487, 721, 735, + 789, 781, 781, 781, 781, 674, 675, 486, 487, 789, + 790, 396, 647, 648, 486, 671, 215, 288, 677, 671, + 677, 215, 676, 677, 215, 648, 486, 789, 648, 486, + 286, 54, 179, 660, 788, 660, 788, 778, 619, 292, + 619, 519, 279, 486, 517, 475, 531, 523, 750, 547, + 571, 487, 487, 448, 577, 117, 187, 196, 116, 431, + 554, 572, 67, 73, 85, 114, 116, 171, 198, 264, + 269, 312, 327, 422, 552, 553, 565, 30, 54, 591, + 185, 269, 540, 721, 591, 269, 477, 478, 543, 789, + 691, 596, 596, 238, 388, 791, 795, 475, 405, 405, + 487, 609, 429, 604, 606, 584, 36, 264, 486, 811, + 578, 148, 619, 146, 192, 558, 119, 134, 311, 856, + 106, 448, 854, 279, 789, 807, 486, 36, 616, 485, + 691, 722, 167, 486, 755, 487, 721, 721, 721, 487, + 298, 736, 681, 682, 726, 673, 486, 4, 9, 643, + 645, 646, 789, 782, 671, 279, 429, 678, 671, 215, + 671, 686, 687, 789, 486, 686, 789, 486, 657, 658, + 659, 721, 721, 447, 730, 730, 524, 85, 486, 522, + 530, 726, 789, 130, 721, 487, 327, 577, 486, 567, + 547, 487, 490, 486, 794, 783, 722, 591, 117, 187, + 116, 269, 216, 781, 577, 114, 36, 149, 73, 688, + 795, 481, 548, 783, 783, 539, 121, 487, 619, 149, + 36, 487, 783, 856, 27, 78, 86, 115, 184, 195, + 382, 385, 562, 562, 348, 348, 59, 67, 230, 781, + 530, 722, 755, 487, 54, 633, 487, 490, 36, 683, + 782, 301, 481, 301, 348, 481, 486, 486, 487, 721, + 486, 671, 678, 487, 490, 691, 686, 487, 487, 490, + 731, 732, 789, 429, 644, 644, 434, 783, 721, 487, + 490, 73, 526, 526, 265, 427, 781, 547, 573, 576, + 798, 554, 721, 264, 553, 36, 577, 580, 184, 794, + 429, 506, 481, 416, 609, 788, 811, 854, 781, 619, + 566, 536, 67, 282, 67, 808, 487, 487, 751, 320, + 348, 737, 684, 681, 486, 487, 789, 643, 782, 687, + 688, 487, 658, 490, 36, 350, 619, 487, 685, 522, + 794, 527, 794, 527, 366, 580, 487, 490, 475, 487, + 184, 240, 588, 486, 549, 721, 416, 36, 486, 854, + 558, 856, 282, 282, 486, 811, 48, 96, 418, 721, + 738, 739, 738, 487, 686, 487, 490, 487, 487, 732, + 734, 646, 526, 637, 637, 529, 588, 576, 548, 262, + 560, 549, 168, 297, 372, 279, 555, 556, 582, 538, + 619, 530, 685, 739, 347, 161, 307, 161, 307, 487, + 9, 334, 649, 527, 638, 638, 685, 556, 196, 119, + 427, 279, 582, 279, 555, 487, 854, 487, 33, 487, + 486, 637, 547, 58, 262, 339, 366, 551, 551, 811, + 739, 9, 638, 22, 114, 269, 685, 487 }; #define yyerrok (yyerrstatus = 0) @@ -199167,14 +213014,14 @@ YYLTYPE yylloc; switch (yyn) { case 2: -#line 457 "third_party/libpg_query/grammar/grammar.y" +#line 460 "third_party/libpg_query/grammar/grammar.y" { pg_yyget_extra(yyscanner)->parsetree = (yyvsp[(1) - (1)].list); ;} break; case 3: -#line 473 "third_party/libpg_query/grammar/grammar.y" +#line 476 "third_party/libpg_query/grammar/grammar.y" { if ((yyvsp[(1) - (3)].list) != NIL) { @@ -199189,7 +213036,7 @@ YYLTYPE yylloc; break; case 4: -#line 485 "third_party/libpg_query/grammar/grammar.y" +#line 488 "third_party/libpg_query/grammar/grammar.y" { if ((yyvsp[(1) - (1)].node) != NULL) (yyval.list) = list_make1(makeRawStmt((yyvsp[(1) - (1)].node), 0)); @@ -199199,7 +213046,7 @@ YYLTYPE yylloc; break; case 38: -#line 527 "third_party/libpg_query/grammar/grammar.y" +#line 530 "third_party/libpg_query/grammar/grammar.y" { (yyval.node) = NULL; ;} break; @@ -199766,6 +213613,11 @@ YYLTYPE yylloc; break; case 97: +#line 458 "third_party/libpg_query/grammar/statements/alter_table.y" + { (yyval.ival) = 0; ;} + break; + + case 98: #line 8 "third_party/libpg_query/grammar/statements/deallocate.y" { PGDeallocateStmt *n = makeNode(PGDeallocateStmt); @@ -199774,7 +213626,7 @@ YYLTYPE yylloc; ;} break; - case 98: + case 99: #line 14 "third_party/libpg_query/grammar/statements/deallocate.y" { PGDeallocateStmt *n = makeNode(PGDeallocateStmt); @@ -199783,7 +213635,7 @@ YYLTYPE yylloc; ;} break; - case 99: + case 100: #line 20 "third_party/libpg_query/grammar/statements/deallocate.y" { PGDeallocateStmt *n = makeNode(PGDeallocateStmt); @@ -199792,7 +213644,7 @@ YYLTYPE yylloc; ;} break; - case 100: + case 101: #line 26 "third_party/libpg_query/grammar/statements/deallocate.y" { PGDeallocateStmt *n = makeNode(PGDeallocateStmt); @@ -199801,7 +213653,7 @@ YYLTYPE yylloc; ;} break; - case 101: + case 102: #line 7 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199813,7 +213665,7 @@ YYLTYPE yylloc; ;} break; - case 102: + case 103: #line 16 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199826,7 +213678,7 @@ YYLTYPE yylloc; ;} break; - case 103: + case 104: #line 26 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199839,7 +213691,7 @@ YYLTYPE yylloc; ;} break; - case 104: + case 105: #line 36 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199852,7 +213704,7 @@ YYLTYPE yylloc; ;} break; - case 105: + case 106: #line 46 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199865,7 +213717,7 @@ YYLTYPE yylloc; ;} break; - case 106: + case 107: #line 56 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199878,7 +213730,7 @@ YYLTYPE yylloc; ;} break; - case 107: + case 108: #line 66 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199891,7 +213743,7 @@ YYLTYPE yylloc; ;} break; - case 108: + case 109: #line 76 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199904,7 +213756,7 @@ YYLTYPE yylloc; ;} break; - case 109: + case 110: #line 86 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199917,7 +213769,7 @@ YYLTYPE yylloc; ;} break; - case 110: + case 111: #line 96 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199931,7 +213783,7 @@ YYLTYPE yylloc; ;} break; - case 111: + case 112: #line 107 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199945,7 +213797,7 @@ YYLTYPE yylloc; ;} break; - case 112: + case 113: #line 118 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199958,7 +213810,7 @@ YYLTYPE yylloc; ;} break; - case 113: + case 114: #line 128 "third_party/libpg_query/grammar/statements/rename.y" { PGRenameStmt *n = makeNode(PGRenameStmt); @@ -199971,17 +213823,17 @@ YYLTYPE yylloc; ;} break; - case 114: + case 115: #line 140 "third_party/libpg_query/grammar/statements/rename.y" { (yyval.ival) = COLUMN; ;} break; - case 115: + case 116: #line 141 "third_party/libpg_query/grammar/statements/rename.y" { (yyval.ival) = 0; ;} break; - case 116: + case 117: #line 10 "third_party/libpg_query/grammar/statements/insert.y" { (yyvsp[(5) - (7)].istmt)->relation = (yyvsp[(4) - (7)].range); @@ -199992,7 +213844,7 @@ YYLTYPE yylloc; ;} break; - case 117: + case 118: #line 22 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.istmt) = makeNode(PGInsertStmt); @@ -200001,7 +213853,7 @@ YYLTYPE yylloc; ;} break; - case 118: + case 119: #line 28 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.istmt) = makeNode(PGInsertStmt); @@ -200011,7 +213863,7 @@ YYLTYPE yylloc; ;} break; - case 119: + case 120: #line 35 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.istmt) = makeNode(PGInsertStmt); @@ -200020,7 +213872,7 @@ YYLTYPE yylloc; ;} break; - case 120: + case 121: #line 41 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.istmt) = makeNode(PGInsertStmt); @@ -200030,7 +213882,7 @@ YYLTYPE yylloc; ;} break; - case 121: + case 122: #line 48 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.istmt) = makeNode(PGInsertStmt); @@ -200039,14 +213891,14 @@ YYLTYPE yylloc; ;} break; - case 122: + case 123: #line 58 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.range) = (yyvsp[(1) - (1)].range); ;} break; - case 123: + case 124: #line 62 "third_party/libpg_query/grammar/statements/insert.y" { (yyvsp[(1) - (3)].range)->alias = makeAlias((yyvsp[(3) - (3)].str), NIL); @@ -200054,7 +213906,7 @@ YYLTYPE yylloc; ;} break; - case 124: + case 125: #line 71 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.infer) = makeNode(PGInferClause); @@ -200065,7 +213917,7 @@ YYLTYPE yylloc; ;} break; - case 125: + case 126: #line 80 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.infer) = makeNode(PGInferClause); @@ -200076,24 +213928,24 @@ YYLTYPE yylloc; ;} break; - case 126: + case 127: #line 88 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.infer) = NULL; ;} break; - case 127: + case 128: #line 95 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.with) = (yyvsp[(1) - (1)].with); ;} break; - case 128: + case 129: #line 96 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.with) = NULL; ;} break; - case 129: + case 130: #line 102 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.target) = makeNode(PGResTarget); @@ -200104,7 +213956,7 @@ YYLTYPE yylloc; ;} break; - case 130: + case 131: #line 114 "third_party/libpg_query/grammar/statements/insert.y" { (yyvsp[(1) - (3)].target)->val = (PGNode *) (yyvsp[(3) - (3)].node); @@ -200112,7 +213964,7 @@ YYLTYPE yylloc; ;} break; - case 131: + case 132: #line 119 "third_party/libpg_query/grammar/statements/insert.y" { int ncolumns = list_length((yyvsp[(2) - (5)].list)); @@ -200136,7 +213988,7 @@ YYLTYPE yylloc; ;} break; - case 132: + case 133: #line 144 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.onconflict) = makeNode(PGOnConflictClause); @@ -200148,7 +214000,7 @@ YYLTYPE yylloc; ;} break; - case 133: + case 134: #line 154 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.onconflict) = makeNode(PGOnConflictClause); @@ -200160,14 +214012,14 @@ YYLTYPE yylloc; ;} break; - case 134: + case 135: #line 163 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.onconflict) = NULL; ;} break; - case 135: + case 136: #line 170 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.ielem) = makeNode(PGIndexElem); @@ -200181,7 +214033,7 @@ YYLTYPE yylloc; ;} break; - case 136: + case 137: #line 181 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.ielem) = makeNode(PGIndexElem); @@ -200195,7 +214047,7 @@ YYLTYPE yylloc; ;} break; - case 137: + case 138: #line 192 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.ielem) = makeNode(PGIndexElem); @@ -200209,87 +214061,87 @@ YYLTYPE yylloc; ;} break; - case 138: + case 139: #line 206 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = (yyvsp[(2) - (2)].list); ;} break; - case 139: + case 140: #line 207 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = NIL; ;} break; - case 140: + case 141: #line 213 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.override) = PG_OVERRIDING_USER_VALUE; ;} break; - case 141: + case 142: #line 214 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.override) = OVERRIDING_SYSTEM_VALUE; ;} break; - case 142: + case 143: #line 219 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].target)); ;} break; - case 143: + case 144: #line 220 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list),(yyvsp[(3) - (3)].target)); ;} break; - case 144: + case 145: #line 226 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = (yyvsp[(2) - (2)].list); ;} break; - case 145: + case 146: #line 227 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = NIL; ;} break; - case 146: + case 147: #line 231 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 147: + case 148: #line 232 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = NIL; ;} break; - case 148: + case 149: #line 238 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].target)); ;} break; - case 149: + case 150: #line 240 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].target)); ;} break; - case 150: + case 151: #line 245 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 151: + case 152: #line 246 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = list_concat((yyvsp[(1) - (3)].list),(yyvsp[(3) - (3)].list)); ;} break; - case 152: + case 153: #line 250 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].ielem)); ;} break; - case 153: + case 154: #line 251 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].ielem)); ;} break; - case 154: + case 155: #line 257 "third_party/libpg_query/grammar/statements/insert.y" { (yyval.target) = makeNode(PGResTarget); @@ -200300,7 +214152,7 @@ YYLTYPE yylloc; ;} break; - case 155: + case 156: #line 8 "third_party/libpg_query/grammar/statements/pragma.y" { PGPragmaStmt *n = makeNode(PGPragmaStmt); @@ -200310,7 +214162,7 @@ YYLTYPE yylloc; ;} break; - case 156: + case 157: #line 15 "third_party/libpg_query/grammar/statements/pragma.y" { PGPragmaStmt *n = makeNode(PGPragmaStmt); @@ -200321,7 +214173,7 @@ YYLTYPE yylloc; ;} break; - case 157: + case 158: #line 23 "third_party/libpg_query/grammar/statements/pragma.y" { PGPragmaStmt *n = makeNode(PGPragmaStmt); @@ -200332,7 +214184,7 @@ YYLTYPE yylloc; ;} break; - case 158: + case 159: #line 10 "third_party/libpg_query/grammar/statements/create_sequence.y" { PGCreateSeqStmt *n = makeNode(PGCreateSeqStmt); @@ -200345,7 +214197,7 @@ YYLTYPE yylloc; ;} break; - case 159: + case 160: #line 20 "third_party/libpg_query/grammar/statements/create_sequence.y" { PGCreateSeqStmt *n = makeNode(PGCreateSeqStmt); @@ -200358,17 +214210,17 @@ YYLTYPE yylloc; ;} break; - case 160: + case 161: #line 32 "third_party/libpg_query/grammar/statements/create_sequence.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 161: + case 162: #line 33 "third_party/libpg_query/grammar/statements/create_sequence.y" { (yyval.list) = NIL; ;} break; - case 162: + case 163: #line 8 "third_party/libpg_query/grammar/statements/execute.y" { PGExecuteStmt *n = makeNode(PGExecuteStmt); @@ -200378,7 +214230,7 @@ YYLTYPE yylloc; ;} break; - case 163: + case 164: #line 16 "third_party/libpg_query/grammar/statements/execute.y" { PGCreateTableAsStmt *ctas = makeNode(PGCreateTableAsStmt); @@ -200397,7 +214249,7 @@ YYLTYPE yylloc; ;} break; - case 164: + case 165: #line 33 "third_party/libpg_query/grammar/statements/execute.y" { PGCreateTableAsStmt *ctas = makeNode(PGCreateTableAsStmt); @@ -200416,17 +214268,17 @@ YYLTYPE yylloc; ;} break; - case 165: + case 166: #line 51 "third_party/libpg_query/grammar/statements/execute.y" { (yyval.list) = (yyvsp[(2) - (3)].list); ;} break; - case 166: + case 167: #line 52 "third_party/libpg_query/grammar/statements/execute.y" { (yyval.list) = NIL; ;} break; - case 167: + case 168: #line 10 "third_party/libpg_query/grammar/statements/alter_sequence.y" { PGAlterSeqStmt *n = makeNode(PGAlterSeqStmt); @@ -200437,7 +214289,7 @@ YYLTYPE yylloc; ;} break; - case 168: + case 169: #line 18 "third_party/libpg_query/grammar/statements/alter_sequence.y" { PGAlterSeqStmt *n = makeNode(PGAlterSeqStmt); @@ -200448,42 +214300,42 @@ YYLTYPE yylloc; ;} break; - case 169: + case 170: #line 29 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].defelt)); ;} break; - case 170: + case 171: #line 30 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.list) = lappend((yyvsp[(1) - (2)].list), (yyvsp[(2) - (2)].defelt)); ;} break; - case 171: + case 172: #line 34 "third_party/libpg_query/grammar/statements/alter_sequence.y" {;} break; - case 172: + case 173: #line 35 "third_party/libpg_query/grammar/statements/alter_sequence.y" {;} break; - case 173: + case 174: #line 36 "third_party/libpg_query/grammar/statements/alter_sequence.y" {;} break; - case 174: + case 175: #line 41 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.value) = makeFloat((yyvsp[(1) - (1)].str)); ;} break; - case 175: + case 176: #line 42 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.value) = makeFloat((yyvsp[(2) - (2)].str)); ;} break; - case 176: + case 177: #line 44 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.value) = makeFloat((yyvsp[(2) - (2)].str)); @@ -200491,82 +214343,82 @@ YYLTYPE yylloc; ;} break; - case 177: + case 178: #line 48 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.value) = makeInteger((yyvsp[(1) - (1)].ival)); ;} break; - case 178: + case 179: #line 53 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("as", (PGNode *)(yyvsp[(2) - (2)].typnam), (yylsp[(1) - (2)])); ;} break; - case 179: + case 180: #line 57 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("cache", (PGNode *)(yyvsp[(2) - (2)].value), (yylsp[(1) - (2)])); ;} break; - case 180: + case 181: #line 61 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("cycle", (PGNode *)makeInteger(true), (yylsp[(1) - (1)])); ;} break; - case 181: + case 182: #line 65 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("cycle", (PGNode *)makeInteger(false), (yylsp[(1) - (2)])); ;} break; - case 182: + case 183: #line 69 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("increment", (PGNode *)(yyvsp[(3) - (3)].value), (yylsp[(1) - (3)])); ;} break; - case 183: + case 184: #line 73 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("maxvalue", (PGNode *)(yyvsp[(2) - (2)].value), (yylsp[(1) - (2)])); ;} break; - case 184: + case 185: #line 77 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("minvalue", (PGNode *)(yyvsp[(2) - (2)].value), (yylsp[(1) - (2)])); ;} break; - case 185: + case 186: #line 81 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("maxvalue", NULL, (yylsp[(1) - (2)])); ;} break; - case 186: + case 187: #line 85 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("minvalue", NULL, (yylsp[(1) - (2)])); ;} break; - case 187: + case 188: #line 89 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("owned_by", (PGNode *)(yyvsp[(3) - (3)].list), (yylsp[(1) - (3)])); ;} break; - case 188: + case 189: #line 93 "third_party/libpg_query/grammar/statements/alter_sequence.y" { /* not documented, only used by pg_dump */ @@ -200574,53 +214426,53 @@ YYLTYPE yylloc; ;} break; - case 189: + case 190: #line 98 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("start", (PGNode *)(yyvsp[(3) - (3)].value), (yylsp[(1) - (3)])); ;} break; - case 190: + case 191: #line 102 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("restart", NULL, (yylsp[(1) - (1)])); ;} break; - case 191: + case 192: #line 106 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.defelt) = makeDefElem("restart", (PGNode *)(yyvsp[(3) - (3)].value), (yylsp[(1) - (3)])); ;} break; - case 192: + case 193: #line 112 "third_party/libpg_query/grammar/statements/alter_sequence.y" {;} break; - case 193: + case 194: #line 113 "third_party/libpg_query/grammar/statements/alter_sequence.y" {;} break; - case 194: + case 195: #line 117 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.ival) = (yyvsp[(1) - (1)].ival); ;} break; - case 195: + case 196: #line 118 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.ival) = + (yyvsp[(2) - (2)].ival); ;} break; - case 196: + case 197: #line 119 "third_party/libpg_query/grammar/statements/alter_sequence.y" { (yyval.ival) = - (yyvsp[(2) - (2)].ival); ;} break; - case 197: + case 198: #line 3 "third_party/libpg_query/grammar/statements/transaction.y" { PGTransactionStmt *n = makeNode(PGTransactionStmt); @@ -200630,7 +214482,7 @@ YYLTYPE yylloc; ;} break; - case 198: + case 199: #line 10 "third_party/libpg_query/grammar/statements/transaction.y" { PGTransactionStmt *n = makeNode(PGTransactionStmt); @@ -200639,7 +214491,7 @@ YYLTYPE yylloc; ;} break; - case 199: + case 200: #line 16 "third_party/libpg_query/grammar/statements/transaction.y" { PGTransactionStmt *n = makeNode(PGTransactionStmt); @@ -200648,7 +214500,7 @@ YYLTYPE yylloc; ;} break; - case 200: + case 201: #line 22 "third_party/libpg_query/grammar/statements/transaction.y" { PGTransactionStmt *n = makeNode(PGTransactionStmt); @@ -200658,7 +214510,7 @@ YYLTYPE yylloc; ;} break; - case 201: + case 202: #line 29 "third_party/libpg_query/grammar/statements/transaction.y" { PGTransactionStmt *n = makeNode(PGTransactionStmt); @@ -200668,7 +214520,7 @@ YYLTYPE yylloc; ;} break; - case 202: + case 203: #line 36 "third_party/libpg_query/grammar/statements/transaction.y" { PGTransactionStmt *n = makeNode(PGTransactionStmt); @@ -200678,22 +214530,22 @@ YYLTYPE yylloc; ;} break; - case 203: + case 204: #line 45 "third_party/libpg_query/grammar/statements/transaction.y" {;} break; - case 204: + case 205: #line 46 "third_party/libpg_query/grammar/statements/transaction.y" {;} break; - case 205: + case 206: #line 47 "third_party/libpg_query/grammar/statements/transaction.y" {;} break; - case 206: + case 207: #line 9 "third_party/libpg_query/grammar/statements/create.y" { PGCreateStmt *n = makeNode(PGCreateStmt); @@ -200709,7 +214561,7 @@ YYLTYPE yylloc; ;} break; - case 207: + case 208: #line 24 "third_party/libpg_query/grammar/statements/create.y" { PGCreateStmt *n = makeNode(PGCreateStmt); @@ -200725,12 +214577,12 @@ YYLTYPE yylloc; ;} break; - case 208: + case 209: #line 41 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = 0; ;} break; - case 209: + case 210: #line 43 "third_party/libpg_query/grammar/statements/create.y" { /* @@ -200757,77 +214609,77 @@ YYLTYPE yylloc; ;} break; - case 210: + case 211: #line 69 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (PGNode *)(yyvsp[(1) - (1)].typnam); ;} break; - case 211: + case 212: #line 70 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (PGNode *)makeString(pstrdup((yyvsp[(1) - (1)].keyword))); ;} break; - case 212: + case 213: #line 71 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (PGNode *)(yyvsp[(1) - (1)].list); ;} break; - case 213: + case 214: #line 72 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (PGNode *)(yyvsp[(1) - (1)].value); ;} break; - case 214: + case 215: #line 73 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (PGNode *)makeString((yyvsp[(1) - (1)].str)); ;} break; - case 215: + case 216: #line 74 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (PGNode *)makeString(pstrdup((yyvsp[(1) - (1)].keyword))); ;} break; - case 216: + case 217: #line 78 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = (yyvsp[(2) - (3)].list); ;} break; - case 217: + case 218: #line 79 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = NIL; ;} break; - case 218: + case 219: #line 84 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (PGNode *) makeString((yyvsp[(1) - (1)].str)); ;} break; - case 219: + case 220: #line 89 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_FKCONSTR_ACTION_NOACTION; ;} break; - case 220: + case 221: #line 90 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_FKCONSTR_ACTION_RESTRICT; ;} break; - case 221: + case 222: #line 91 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_FKCONSTR_ACTION_CASCADE; ;} break; - case 222: + case 223: #line 92 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_FKCONSTR_ACTION_SETNULL; ;} break; - case 223: + case 224: #line 93 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_FKCONSTR_ACTION_SETDEFAULT; ;} break; - case 224: + case 225: #line 99 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = castNode(PGConstraint, (yyvsp[(3) - (3)].node)); @@ -200837,17 +214689,17 @@ YYLTYPE yylloc; ;} break; - case 225: + case 226: #line 105 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 226: + case 227: #line 106 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 227: + case 228: #line 108 "third_party/libpg_query/grammar/statements/create.y" { /* @@ -200863,7 +214715,7 @@ YYLTYPE yylloc; ;} break; - case 228: + case 229: #line 125 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -200873,7 +214725,7 @@ YYLTYPE yylloc; ;} break; - case 229: + case 230: #line 132 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -200883,7 +214735,7 @@ YYLTYPE yylloc; ;} break; - case 230: + case 231: #line 139 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -200896,7 +214748,7 @@ YYLTYPE yylloc; ;} break; - case 231: + case 232: #line 149 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -200909,7 +214761,7 @@ YYLTYPE yylloc; ;} break; - case 232: + case 233: #line 159 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -200924,7 +214776,7 @@ YYLTYPE yylloc; ;} break; - case 233: + case 234: #line 171 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -200936,7 +214788,7 @@ YYLTYPE yylloc; ;} break; - case 234: + case 235: #line 180 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -200948,7 +214800,7 @@ YYLTYPE yylloc; ;} break; - case 235: + case 236: #line 189 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -200966,89 +214818,89 @@ YYLTYPE yylloc; ;} break; - case 236: + case 237: #line 208 "third_party/libpg_query/grammar/statements/create.y" { (yyval.defelt) = makeDefElem((yyvsp[(1) - (2)].str), (yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; - case 237: + case 238: #line 214 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = (yyvsp[(3) - (3)].ival); ;} break; - case 238: + case 239: #line 220 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = ((yyvsp[(1) - (1)].ival) << 8) | (PG_FKCONSTR_ACTION_NOACTION & 0xFF); ;} break; - case 239: + case 240: #line 222 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = (PG_FKCONSTR_ACTION_NOACTION << 8) | ((yyvsp[(1) - (1)].ival) & 0xFF); ;} break; - case 240: + case 241: #line 224 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = ((yyvsp[(1) - (2)].ival) << 8) | ((yyvsp[(2) - (2)].ival) & 0xFF); ;} break; - case 241: + case 242: #line 226 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = ((yyvsp[(2) - (2)].ival) << 8) | ((yyvsp[(1) - (2)].ival) & 0xFF); ;} break; - case 242: + case 243: #line 228 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = (PG_FKCONSTR_ACTION_NOACTION << 8) | (PG_FKCONSTR_ACTION_NOACTION & 0xFF); ;} break; - case 243: + case 244: #line 233 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = (yyvsp[(3) - (4)].list); ;} break; - case 244: + case 245: #line 234 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = NIL; ;} break; - case 245: + case 246: #line 238 "third_party/libpg_query/grammar/statements/create.y" { (yyval.oncommit) = ONCOMMIT_DROP; ;} break; - case 246: + case 247: #line 239 "third_party/libpg_query/grammar/statements/create.y" { (yyval.oncommit) = PG_ONCOMMIT_DELETE_ROWS; ;} break; - case 247: + case 248: #line 240 "third_party/libpg_query/grammar/statements/create.y" { (yyval.oncommit) = PG_ONCOMMIT_PRESERVE_ROWS; ;} break; - case 248: + case 249: #line 241 "third_party/libpg_query/grammar/statements/create.y" { (yyval.oncommit) = PG_ONCOMMIT_NOOP; ;} break; - case 249: + case 250: #line 246 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = (yyvsp[(2) - (3)].list); ;} break; - case 250: + case 251: #line 250 "third_party/libpg_query/grammar/statements/create.y" { (yyval.boolean) = true; ;} break; - case 251: + case 252: #line 251 "third_party/libpg_query/grammar/statements/create.y" { (yyval.boolean) = false; ;} break; - case 252: + case 253: #line 257 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = castNode(PGConstraint, (yyvsp[(3) - (3)].node)); @@ -201058,67 +214910,67 @@ YYLTYPE yylloc; ;} break; - case 253: + case 254: #line 263 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 254: + case 255: #line 268 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_CREATE_TABLE_LIKE_COMMENTS; ;} break; - case 255: + case 256: #line 269 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_CREATE_TABLE_LIKE_CONSTRAINTS; ;} break; - case 256: + case 257: #line 270 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_CREATE_TABLE_LIKE_DEFAULTS; ;} break; - case 257: + case 258: #line 271 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_CREATE_TABLE_LIKE_IDENTITY; ;} break; - case 258: + case 259: #line 272 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_CREATE_TABLE_LIKE_INDEXES; ;} break; - case 259: + case 260: #line 273 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_CREATE_TABLE_LIKE_STATISTICS; ;} break; - case 260: + case 261: #line 274 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_CREATE_TABLE_LIKE_STORAGE; ;} break; - case 261: + case 262: #line 275 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_CREATE_TABLE_LIKE_ALL; ;} break; - case 262: + case 263: #line 281 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].defelt)); ;} break; - case 263: + case 264: #line 282 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].defelt)); ;} break; - case 264: + case 265: #line 286 "third_party/libpg_query/grammar/statements/create.y" { (yyval.str) = (yyvsp[(3) - (3)].str); ;} break; - case 265: + case 266: #line 292 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -201128,7 +214980,7 @@ YYLTYPE yylloc; ;} break; - case 266: + case 267: #line 299 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -201138,7 +214990,7 @@ YYLTYPE yylloc; ;} break; - case 267: + case 268: #line 306 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -201148,7 +215000,7 @@ YYLTYPE yylloc; ;} break; - case 268: + case 269: #line 313 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -201158,82 +215010,82 @@ YYLTYPE yylloc; ;} break; - case 269: + case 270: #line 324 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = (yyvsp[(2) - (2)].list); ;} break; - case 270: + case 271: #line 325 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = list_make1(makeDefElem("oids", (PGNode *) makeInteger(true), (yylsp[(1) - (2)]))); ;} break; - case 271: + case 272: #line 326 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = list_make1(makeDefElem("oids", (PGNode *) makeInteger(false), (yylsp[(1) - (2)]))); ;} break; - case 272: + case 273: #line 327 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = NIL; ;} break; - case 273: + case 274: #line 331 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = (yyvsp[(2) - (3)].list); ;} break; - case 274: + case 275: #line 336 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = (yyvsp[(1) - (3)].ival) | (yyvsp[(3) - (3)].ival); ;} break; - case 275: + case 276: #line 337 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = (yyvsp[(1) - (3)].ival) & ~(yyvsp[(3) - (3)].ival); ;} break; - case 276: + case 277: #line 338 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = 0; ;} break; - case 277: + case 278: #line 343 "third_party/libpg_query/grammar/statements/create.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 278: + case 279: #line 348 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = CAS_NOT_DEFERRABLE; ;} break; - case 279: + case 280: #line 349 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = CAS_DEFERRABLE; ;} break; - case 280: + case 281: #line 350 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = CAS_INITIALLY_IMMEDIATE; ;} break; - case 281: + case 282: #line 351 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = CAS_INITIALLY_DEFERRED; ;} break; - case 282: + case 283: #line 352 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = CAS_NOT_VALID; ;} break; - case 283: + case 284: #line 353 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = CAS_NO_INHERIT; ;} break; - case 284: + case 285: #line 359 "third_party/libpg_query/grammar/statements/create.y" { PGColumnDef *n = makeNode(PGColumnDef); @@ -201255,131 +215107,131 @@ YYLTYPE yylloc; ;} break; - case 285: + case 286: #line 382 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].defelt)); ;} break; - case 286: + case 287: #line 386 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].defelt)); ;} break; - case 287: + case 288: #line 392 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].defelt)); ;} break; - case 288: + case 289: #line 393 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].defelt)); ;} break; - case 289: + case 290: #line 397 "third_party/libpg_query/grammar/statements/create.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 290: + case 291: #line 401 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 291: + case 292: #line 402 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 292: + case 293: #line 403 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 293: + case 294: #line 408 "third_party/libpg_query/grammar/statements/create.y" { (yyval.defelt) = makeDefElem((yyvsp[(1) - (3)].str), (PGNode *) (yyvsp[(3) - (3)].node), (yylsp[(1) - (3)])); ;} break; - case 294: + case 295: #line 412 "third_party/libpg_query/grammar/statements/create.y" { (yyval.defelt) = makeDefElem((yyvsp[(1) - (1)].str), NULL, (yylsp[(1) - (1)])); ;} break; - case 295: + case 296: #line 419 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = (yyvsp[(2) - (2)].list); ;} break; - case 296: + case 297: #line 420 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = NIL; ;} break; - case 297: + case 298: #line 425 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 298: + case 299: #line 426 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = NIL; ;} break; - case 299: + case 300: #line 431 "third_party/libpg_query/grammar/statements/create.y" { (yyval.node) = (PGNode *) makeString((yyvsp[(1) - (1)].str)); ;} break; - case 300: + case 301: #line 438 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = (yyvsp[(2) - (3)].list); ;} break; - case 301: + case 302: #line 439 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = NIL; ;} break; - case 302: + case 303: #line 444 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = lappend((yyvsp[(1) - (2)].list), (yyvsp[(2) - (2)].node)); ;} break; - case 303: + case 304: #line 445 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = NIL; ;} break; - case 304: + case 305: #line 449 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = (yyvsp[(3) - (3)].ival); ;} break; - case 305: + case 306: #line 455 "third_party/libpg_query/grammar/statements/create.y" { (yyval.defelt) = makeDefElem((yyvsp[(1) - (3)].str), (PGNode *) (yyvsp[(3) - (3)].node), (yylsp[(1) - (3)])); ;} break; - case 306: + case 307: #line 459 "third_party/libpg_query/grammar/statements/create.y" { (yyval.defelt) = makeDefElem((yyvsp[(1) - (1)].str), NULL, (yylsp[(1) - (1)])); ;} break; - case 307: + case 308: #line 463 "third_party/libpg_query/grammar/statements/create.y" { (yyval.defelt) = makeDefElemExtended((yyvsp[(1) - (5)].str), (yyvsp[(3) - (5)].str), (PGNode *) (yyvsp[(5) - (5)].node), @@ -201387,29 +215239,29 @@ YYLTYPE yylloc; ;} break; - case 308: + case 309: #line 468 "third_party/libpg_query/grammar/statements/create.y" { (yyval.defelt) = makeDefElemExtended((yyvsp[(1) - (3)].str), (yyvsp[(3) - (3)].str), NULL, PG_DEFELEM_UNSPEC, (yylsp[(1) - (3)])); ;} break; - case 309: + case 310: #line 475 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 310: + case 311: #line 476 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].node)); ;} break; - case 311: + case 312: #line 480 "third_party/libpg_query/grammar/statements/create.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; - case 312: + case 313: #line 482 "third_party/libpg_query/grammar/statements/create.y" { (yyval.typnam) = makeTypeNameFromNameList(lcons(makeString((yyvsp[(1) - (4)].str)), (yyvsp[(2) - (4)].list))); @@ -201418,7 +215270,7 @@ YYLTYPE yylloc; ;} break; - case 313: + case 314: #line 488 "third_party/libpg_query/grammar/statements/create.y" { (yyval.typnam) = makeTypeNameFromNameList(lcons(makeString((yyvsp[(2) - (5)].str)), (yyvsp[(3) - (5)].list))); @@ -201428,7 +215280,7 @@ YYLTYPE yylloc; ;} break; - case 314: + case 315: #line 499 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -201444,7 +215296,7 @@ YYLTYPE yylloc; ;} break; - case 315: + case 316: #line 513 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -201460,7 +215312,7 @@ YYLTYPE yylloc; ;} break; - case 316: + case 317: #line 526 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -201477,7 +215329,7 @@ YYLTYPE yylloc; ;} break; - case 317: + case 318: #line 541 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -201493,7 +215345,7 @@ YYLTYPE yylloc; ;} break; - case 318: + case 319: #line 554 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -201510,7 +215362,7 @@ YYLTYPE yylloc; ;} break; - case 319: + case 320: #line 569 "third_party/libpg_query/grammar/statements/create.y" { PGConstraint *n = makeNode(PGConstraint); @@ -201531,28 +215383,28 @@ YYLTYPE yylloc; ;} break; - case 320: + case 321: #line 591 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 321: + case 322: #line 595 "third_party/libpg_query/grammar/statements/create.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].node)); ;} break; - case 322: + case 323: #line 602 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_FKCONSTR_MATCH_FULL; ;} break; - case 323: + case 324: #line 606 "third_party/libpg_query/grammar/statements/create.y" { ereport(ERROR, @@ -201563,21 +215415,21 @@ YYLTYPE yylloc; ;} break; - case 324: + case 325: #line 614 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_FKCONSTR_MATCH_SIMPLE; ;} break; - case 325: + case 326: #line 618 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_FKCONSTR_MATCH_SIMPLE; ;} break; - case 326: + case 327: #line 626 "third_party/libpg_query/grammar/statements/create.y" { PGTableLikeClause *n = makeNode(PGTableLikeClause); @@ -201587,27 +215439,27 @@ YYLTYPE yylloc; ;} break; - case 327: + case 328: #line 635 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_RELPERSISTENCE_TEMP; ;} break; - case 328: + case 329: #line 636 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_RELPERSISTENCE_TEMP; ;} break; - case 329: + case 330: #line 637 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_RELPERSISTENCE_TEMP; ;} break; - case 330: + case 331: #line 638 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_RELPERSISTENCE_TEMP; ;} break; - case 331: + case 332: #line 640 "third_party/libpg_query/grammar/statements/create.y" { ereport(PGWARNING, @@ -201617,7 +215469,7 @@ YYLTYPE yylloc; ;} break; - case 332: + case 333: #line 647 "third_party/libpg_query/grammar/statements/create.y" { ereport(PGWARNING, @@ -201627,27 +215479,27 @@ YYLTYPE yylloc; ;} break; - case 333: + case 334: #line 653 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_RELPERSISTENCE_UNLOGGED; ;} break; - case 334: + case 335: #line 654 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = RELPERSISTENCE_PERMANENT; ;} break; - case 335: + case 336: #line 659 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = PG_ATTRIBUTE_IDENTITY_ALWAYS; ;} break; - case 336: + case 337: #line 660 "third_party/libpg_query/grammar/statements/create.y" { (yyval.ival) = ATTRIBUTE_IDENTITY_BY_DEFAULT; ;} break; - case 337: + case 338: #line 10 "third_party/libpg_query/grammar/statements/drop.y" { PGDropStmt *n = makeNode(PGDropStmt); @@ -201660,7 +215512,7 @@ YYLTYPE yylloc; ;} break; - case 338: + case 339: #line 20 "third_party/libpg_query/grammar/statements/drop.y" { PGDropStmt *n = makeNode(PGDropStmt); @@ -201673,7 +215525,7 @@ YYLTYPE yylloc; ;} break; - case 339: + case 340: #line 30 "third_party/libpg_query/grammar/statements/drop.y" { PGDropStmt *n = makeNode(PGDropStmt); @@ -201686,7 +215538,7 @@ YYLTYPE yylloc; ;} break; - case 340: + case 341: #line 40 "third_party/libpg_query/grammar/statements/drop.y" { PGDropStmt *n = makeNode(PGDropStmt); @@ -201699,7 +215551,7 @@ YYLTYPE yylloc; ;} break; - case 341: + case 342: #line 50 "third_party/libpg_query/grammar/statements/drop.y" { PGDropStmt *n = makeNode(PGDropStmt); @@ -201712,7 +215564,7 @@ YYLTYPE yylloc; ;} break; - case 342: + case 343: #line 60 "third_party/libpg_query/grammar/statements/drop.y" { PGDropStmt *n = makeNode(PGDropStmt); @@ -201725,157 +215577,157 @@ YYLTYPE yylloc; ;} break; - case 343: + case 344: #line 73 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_TABLE; ;} break; - case 344: + case 345: #line 74 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_SEQUENCE; ;} break; - case 345: + case 346: #line 75 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_FUNCTION; ;} break; - case 346: + case 347: #line 76 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_FUNCTION; ;} break; - case 347: + case 348: #line 77 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_VIEW; ;} break; - case 348: + case 349: #line 78 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_MATVIEW; ;} break; - case 349: + case 350: #line 79 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_INDEX; ;} break; - case 350: + case 351: #line 80 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_FOREIGN_TABLE; ;} break; - case 351: + case 352: #line 81 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_COLLATION; ;} break; - case 352: + case 353: #line 82 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_CONVERSION; ;} break; - case 353: + case 354: #line 83 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_STATISTIC_EXT; ;} break; - case 354: + case 355: #line 84 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_TSPARSER; ;} break; - case 355: + case 356: #line 85 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_TSDICTIONARY; ;} break; - case 356: + case 357: #line 86 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_TSTEMPLATE; ;} break; - case 357: + case 358: #line 87 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_TSCONFIGURATION; ;} break; - case 358: + case 359: #line 92 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_ACCESS_METHOD; ;} break; - case 359: + case 360: #line 93 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_EVENT_TRIGGER; ;} break; - case 360: + case 361: #line 94 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_EXTENSION; ;} break; - case 361: + case 362: #line 95 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_FDW; ;} break; - case 362: + case 363: #line 96 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_PUBLICATION; ;} break; - case 363: + case 364: #line 97 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_SCHEMA; ;} break; - case 364: + case 365: #line 98 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_FOREIGN_SERVER; ;} break; - case 365: + case 366: #line 103 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].list)); ;} break; - case 366: + case 367: #line 104 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].list)); ;} break; - case 367: + case 368: #line 109 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.dbehavior) = PG_DROP_CASCADE; ;} break; - case 368: + case 369: #line 110 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.dbehavior) = PG_DROP_RESTRICT; ;} break; - case 369: + case 370: #line 111 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.dbehavior) = PG_DROP_RESTRICT; /* default */ ;} break; - case 370: + case 371: #line 116 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_POLICY; ;} break; - case 371: + case 372: #line 117 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_RULE; ;} break; - case 372: + case 373: #line 118 "third_party/libpg_query/grammar/statements/drop.y" { (yyval.objtype) = PG_OBJECT_TRIGGER; ;} break; - case 373: + case 374: #line 8 "third_party/libpg_query/grammar/statements/create_function.y" { PGCreateFunctionStmt *n = makeNode(PGCreateFunctionStmt); @@ -201886,21 +215738,21 @@ YYLTYPE yylloc; ;} break; - case 376: + case 377: #line 24 "third_party/libpg_query/grammar/statements/create_function.y" { (yyval.list) = NIL; ;} break; - case 377: + case 378: #line 28 "third_party/libpg_query/grammar/statements/create_function.y" { (yyval.list) = (yyvsp[(2) - (3)].list); ;} break; - case 378: + case 379: #line 12 "third_party/libpg_query/grammar/statements/update.y" { PGUpdateStmt *n = makeNode(PGUpdateStmt); @@ -201914,7 +215766,7 @@ YYLTYPE yylloc; ;} break; - case 379: + case 380: #line 3 "third_party/libpg_query/grammar/statements/copy.y" { PGCopyStmt *n = makeNode(PGCopyStmt); @@ -201945,7 +215797,7 @@ YYLTYPE yylloc; ;} break; - case 380: + case 381: #line 31 "third_party/libpg_query/grammar/statements/copy.y" { PGCopyStmt *n = makeNode(PGCopyStmt); @@ -201967,296 +215819,296 @@ YYLTYPE yylloc; ;} break; - case 381: + case 382: #line 53 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.boolean) = true; ;} break; - case 382: + case 383: #line 54 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.boolean) = false; ;} break; - case 383: + case 384: #line 60 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("delimiter", (PGNode *)makeString((yyvsp[(3) - (3)].str)), (yylsp[(2) - (3)])); ;} break; - case 384: + case 385: #line 63 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = NULL; ;} break; - case 385: + case 386: #line 69 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 386: + case 387: #line 73 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].node)); ;} break; - case 387: + case 388: #line 80 "third_party/libpg_query/grammar/statements/copy.y" {;} break; - case 388: + case 389: #line 81 "third_party/libpg_query/grammar/statements/copy.y" {;} break; - case 389: + case 390: #line 85 "third_party/libpg_query/grammar/statements/copy.y" {;} break; - case 390: + case 391: #line 86 "third_party/libpg_query/grammar/statements/copy.y" {;} break; - case 391: + case 392: #line 91 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.boolean) = true; ;} break; - case 392: + case 393: #line 92 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.boolean) = false; ;} break; - case 393: + case 394: #line 96 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 394: + case 395: #line 97 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.list) = (yyvsp[(2) - (3)].list); ;} break; - case 395: + case 396: #line 102 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.node) = (PGNode *) makeString((yyvsp[(1) - (1)].str)); ;} break; - case 396: + case 397: #line 103 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.node) = (PGNode *) (yyvsp[(1) - (1)].value); ;} break; - case 397: + case 398: #line 104 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.node) = (PGNode *) makeNode(PGAStar); ;} break; - case 398: + case 399: #line 105 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.node) = (PGNode *) (yyvsp[(2) - (3)].list); ;} break; - case 399: + case 400: #line 106 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.node) = NULL; ;} break; - case 400: + case 401: #line 112 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem((yyvsp[(1) - (2)].str), (yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; - case 401: + case 402: #line 120 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("oids", (PGNode *)makeInteger(true), (yylsp[(1) - (2)])); ;} break; - case 402: + case 403: #line 123 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = NULL; ;} break; - case 403: + case 404: #line 128 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.list) = lappend((yyvsp[(1) - (2)].list), (yyvsp[(2) - (2)].defelt)); ;} break; - case 404: + case 405: #line 129 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.list) = NIL; ;} break; - case 405: + case 406: #line 135 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("format", (PGNode *)makeString("binary"), (yylsp[(1) - (1)])); ;} break; - case 406: + case 407: #line 138 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = NULL; ;} break; - case 407: + case 408: #line 144 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("format", (PGNode *)makeString("binary"), (yylsp[(1) - (1)])); ;} break; - case 408: + case 409: #line 148 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("oids", (PGNode *)makeInteger(true), (yylsp[(1) - (1)])); ;} break; - case 409: + case 410: #line 152 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("freeze", (PGNode *)makeInteger(true), (yylsp[(1) - (1)])); ;} break; - case 410: + case 411: #line 156 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("delimiter", (PGNode *)makeString((yyvsp[(3) - (3)].str)), (yylsp[(1) - (3)])); ;} break; - case 411: + case 412: #line 160 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("null", (PGNode *)makeString((yyvsp[(3) - (3)].str)), (yylsp[(1) - (3)])); ;} break; - case 412: + case 413: #line 164 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("format", (PGNode *)makeString("csv"), (yylsp[(1) - (1)])); ;} break; - case 413: + case 414: #line 168 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("header", (PGNode *)makeInteger(true), (yylsp[(1) - (1)])); ;} break; - case 414: + case 415: #line 172 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("quote", (PGNode *)makeString((yyvsp[(3) - (3)].str)), (yylsp[(1) - (3)])); ;} break; - case 415: + case 416: #line 176 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("escape", (PGNode *)makeString((yyvsp[(3) - (3)].str)), (yylsp[(1) - (3)])); ;} break; - case 416: + case 417: #line 180 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("force_quote", (PGNode *)(yyvsp[(3) - (3)].list), (yylsp[(1) - (3)])); ;} break; - case 417: + case 418: #line 184 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("force_quote", (PGNode *)makeNode(PGAStar), (yylsp[(1) - (3)])); ;} break; - case 418: + case 419: #line 188 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("force_not_null", (PGNode *)(yyvsp[(4) - (4)].list), (yylsp[(1) - (4)])); ;} break; - case 419: + case 420: #line 192 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("force_null", (PGNode *)(yyvsp[(3) - (3)].list), (yylsp[(1) - (3)])); ;} break; - case 420: + case 421: #line 196 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.defelt) = makeDefElem("encoding", (PGNode *)makeString((yyvsp[(2) - (2)].str)), (yylsp[(1) - (2)])); ;} break; - case 421: + case 422: #line 203 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.node) = (PGNode *) makeString((yyvsp[(1) - (1)].str)); ;} break; - case 422: + case 423: #line 209 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 423: + case 424: #line 210 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.str) = NULL; ;} break; - case 424: + case 425: #line 211 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.str) = NULL; ;} break; - case 425: + case 426: #line 217 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].defelt)); ;} break; - case 426: + case 427: #line 221 "third_party/libpg_query/grammar/statements/copy.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].defelt)); ;} break; - case 429: + case 430: #line 52 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (3)].node); ;} break; - case 430: + case 431: #line 53 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (3)].node); ;} break; - case 431: + case 432: #line 68 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 432: + case 433: #line 70 "third_party/libpg_query/grammar/statements/select.y" { insertSelectOptions((PGSelectStmt *) (yyvsp[(1) - (2)].node), (yyvsp[(2) - (2)].list), NIL, @@ -202266,7 +216118,7 @@ YYLTYPE yylloc; ;} break; - case 433: + case 434: #line 77 "third_party/libpg_query/grammar/statements/select.y" { insertSelectOptions((PGSelectStmt *) (yyvsp[(1) - (4)].node), (yyvsp[(2) - (4)].list), (yyvsp[(3) - (4)].list), @@ -202277,7 +216129,7 @@ YYLTYPE yylloc; ;} break; - case 434: + case 435: #line 85 "third_party/libpg_query/grammar/statements/select.y" { insertSelectOptions((PGSelectStmt *) (yyvsp[(1) - (4)].node), (yyvsp[(2) - (4)].list), (yyvsp[(4) - (4)].list), @@ -202288,7 +216140,7 @@ YYLTYPE yylloc; ;} break; - case 435: + case 436: #line 93 "third_party/libpg_query/grammar/statements/select.y" { insertSelectOptions((PGSelectStmt *) (yyvsp[(2) - (2)].node), NULL, NIL, @@ -202299,7 +216151,7 @@ YYLTYPE yylloc; ;} break; - case 436: + case 437: #line 101 "third_party/libpg_query/grammar/statements/select.y" { insertSelectOptions((PGSelectStmt *) (yyvsp[(2) - (3)].node), (yyvsp[(3) - (3)].list), NIL, @@ -202310,7 +216162,7 @@ YYLTYPE yylloc; ;} break; - case 437: + case 438: #line 109 "third_party/libpg_query/grammar/statements/select.y" { insertSelectOptions((PGSelectStmt *) (yyvsp[(2) - (5)].node), (yyvsp[(3) - (5)].list), (yyvsp[(4) - (5)].list), @@ -202321,7 +216173,7 @@ YYLTYPE yylloc; ;} break; - case 438: + case 439: #line 117 "third_party/libpg_query/grammar/statements/select.y" { insertSelectOptions((PGSelectStmt *) (yyvsp[(2) - (5)].node), (yyvsp[(3) - (5)].list), (yyvsp[(5) - (5)].list), @@ -202332,17 +216184,17 @@ YYLTYPE yylloc; ;} break; - case 439: + case 440: #line 127 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 440: + case 441: #line 128 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 441: + case 442: #line 158 "third_party/libpg_query/grammar/statements/select.y" { PGSelectStmt *n = makeNode(PGSelectStmt); @@ -202358,7 +216210,7 @@ YYLTYPE yylloc; ;} break; - case 442: + case 443: #line 173 "third_party/libpg_query/grammar/statements/select.y" { PGSelectStmt *n = makeNode(PGSelectStmt); @@ -202375,12 +216227,12 @@ YYLTYPE yylloc; ;} break; - case 443: + case 444: #line 186 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 444: + case 445: #line 188 "third_party/libpg_query/grammar/statements/select.y" { /* same as SELECT * FROM relation_expr */ @@ -202402,28 +216254,28 @@ YYLTYPE yylloc; ;} break; - case 445: + case 446: #line 207 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSetOp(PG_SETOP_UNION, (yyvsp[(3) - (4)].boolean), (yyvsp[(1) - (4)].node), (yyvsp[(4) - (4)].node)); ;} break; - case 446: + case 447: #line 211 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSetOp(PG_SETOP_INTERSECT, (yyvsp[(3) - (4)].boolean), (yyvsp[(1) - (4)].node), (yyvsp[(4) - (4)].node)); ;} break; - case 447: + case 448: #line 215 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSetOp(PG_SETOP_EXCEPT, (yyvsp[(3) - (4)].boolean), (yyvsp[(1) - (4)].node), (yyvsp[(4) - (4)].node)); ;} break; - case 448: + case 449: #line 232 "third_party/libpg_query/grammar/statements/select.y" { (yyval.with) = makeNode(PGWithClause); @@ -202433,7 +216285,7 @@ YYLTYPE yylloc; ;} break; - case 449: + case 450: #line 239 "third_party/libpg_query/grammar/statements/select.y" { (yyval.with) = makeNode(PGWithClause); @@ -202443,7 +216295,7 @@ YYLTYPE yylloc; ;} break; - case 450: + case 451: #line 246 "third_party/libpg_query/grammar/statements/select.y" { (yyval.with) = makeNode(PGWithClause); @@ -202453,17 +216305,17 @@ YYLTYPE yylloc; ;} break; - case 451: + case 452: #line 255 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 452: + case 453: #line 256 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].node)); ;} break; - case 453: + case 454: #line 260 "third_party/libpg_query/grammar/statements/select.y" { PGCommonTableExpr *n = makeNode(PGCommonTableExpr); @@ -202475,7 +216327,7 @@ YYLTYPE yylloc; ;} break; - case 454: + case 455: #line 272 "third_party/libpg_query/grammar/statements/select.y" { (yyval.into) = makeNode(PGIntoClause); @@ -202488,12 +216340,12 @@ YYLTYPE yylloc; ;} break; - case 455: + case 456: #line 282 "third_party/libpg_query/grammar/statements/select.y" { (yyval.into) = NULL; ;} break; - case 456: + case 457: #line 291 "third_party/libpg_query/grammar/statements/select.y" { (yyval.range) = (yyvsp[(3) - (3)].range); @@ -202501,7 +216353,7 @@ YYLTYPE yylloc; ;} break; - case 457: + case 458: #line 296 "third_party/libpg_query/grammar/statements/select.y" { (yyval.range) = (yyvsp[(3) - (3)].range); @@ -202509,7 +216361,7 @@ YYLTYPE yylloc; ;} break; - case 458: + case 459: #line 301 "third_party/libpg_query/grammar/statements/select.y" { (yyval.range) = (yyvsp[(4) - (4)].range); @@ -202517,7 +216369,7 @@ YYLTYPE yylloc; ;} break; - case 459: + case 460: #line 306 "third_party/libpg_query/grammar/statements/select.y" { (yyval.range) = (yyvsp[(4) - (4)].range); @@ -202525,7 +216377,7 @@ YYLTYPE yylloc; ;} break; - case 460: + case 461: #line 311 "third_party/libpg_query/grammar/statements/select.y" { ereport(PGWARNING, @@ -202536,7 +216388,7 @@ YYLTYPE yylloc; ;} break; - case 461: + case 462: #line 319 "third_party/libpg_query/grammar/statements/select.y" { ereport(PGWARNING, @@ -202547,7 +216399,7 @@ YYLTYPE yylloc; ;} break; - case 462: + case 463: #line 327 "third_party/libpg_query/grammar/statements/select.y" { (yyval.range) = (yyvsp[(3) - (3)].range); @@ -202555,7 +216407,7 @@ YYLTYPE yylloc; ;} break; - case 463: + case 464: #line 332 "third_party/libpg_query/grammar/statements/select.y" { (yyval.range) = (yyvsp[(2) - (2)].range); @@ -202563,7 +216415,7 @@ YYLTYPE yylloc; ;} break; - case 464: + case 465: #line 337 "third_party/libpg_query/grammar/statements/select.y" { (yyval.range) = (yyvsp[(1) - (1)].range); @@ -202571,77 +216423,77 @@ YYLTYPE yylloc; ;} break; - case 465: + case 466: #line 343 "third_party/libpg_query/grammar/statements/select.y" {;} break; - case 466: + case 467: #line 344 "third_party/libpg_query/grammar/statements/select.y" {;} break; - case 467: + case 468: #line 348 "third_party/libpg_query/grammar/statements/select.y" { (yyval.boolean) = true; ;} break; - case 468: + case 469: #line 349 "third_party/libpg_query/grammar/statements/select.y" { (yyval.boolean) = false; ;} break; - case 469: + case 470: #line 350 "third_party/libpg_query/grammar/statements/select.y" { (yyval.boolean) = false; ;} break; - case 470: + case 471: #line 357 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(NIL); ;} break; - case 471: + case 472: #line 358 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(4) - (5)].list); ;} break; - case 472: + case 473: #line 362 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL;;} break; - case 473: + case 474: #line 363 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 474: + case 475: #line 367 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(1) - (1)].list);;} break; - case 475: + case 476: #line 368 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 476: + case 477: #line 372 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(3) - (3)].list); ;} break; - case 477: + case 478: #line 376 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].sortby)); ;} break; - case 478: + case 479: #line 377 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].sortby)); ;} break; - case 479: + case 480: #line 381 "third_party/libpg_query/grammar/statements/select.y" { (yyval.sortby) = makeNode(PGSortBy); @@ -202653,7 +216505,7 @@ YYLTYPE yylloc; ;} break; - case 480: + case 481: #line 390 "third_party/libpg_query/grammar/statements/select.y" { (yyval.sortby) = makeNode(PGSortBy); @@ -202665,72 +216517,72 @@ YYLTYPE yylloc; ;} break; - case 481: + case 482: #line 400 "third_party/libpg_query/grammar/statements/select.y" { (yyval.sortorder) = PG_SORTBY_ASC; ;} break; - case 482: + case 483: #line 401 "third_party/libpg_query/grammar/statements/select.y" { (yyval.sortorder) = PG_SORTBY_DESC; ;} break; - case 483: + case 484: #line 402 "third_party/libpg_query/grammar/statements/select.y" { (yyval.sortorder) = PG_SORTBY_DEFAULT; ;} break; - case 484: + case 485: #line 405 "third_party/libpg_query/grammar/statements/select.y" { (yyval.nullorder) = PG_SORTBY_NULLS_FIRST; ;} break; - case 485: + case 486: #line 406 "third_party/libpg_query/grammar/statements/select.y" { (yyval.nullorder) = PG_SORTBY_NULLS_LAST; ;} break; - case 486: + case 487: #line 407 "third_party/libpg_query/grammar/statements/select.y" { (yyval.nullorder) = PG_SORTBY_NULLS_DEFAULT; ;} break; - case 487: + case 488: #line 411 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2((yyvsp[(2) - (2)].node), (yyvsp[(1) - (2)].node)); ;} break; - case 488: + case 489: #line 412 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2((yyvsp[(1) - (2)].node), (yyvsp[(2) - (2)].node)); ;} break; - case 489: + case 490: #line 413 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2(NULL, (yyvsp[(1) - (1)].node)); ;} break; - case 490: + case 491: #line 414 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2((yyvsp[(1) - (1)].node), NULL); ;} break; - case 491: + case 492: #line 418 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 492: + case 493: #line 419 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2(NULL,NULL); ;} break; - case 493: + case 494: #line 424 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 494: + case 495: #line 426 "third_party/libpg_query/grammar/statements/select.y" { /* Disabled because it was too confusing, bjm 2002-02-18 */ @@ -202742,151 +216594,151 @@ YYLTYPE yylloc; ;} break; - case 495: + case 496: #line 442 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(3) - (5)].node); ;} break; - case 496: + case 497: #line 444 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeIntConst(1, -1); ;} break; - case 497: + case 498: #line 449 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 498: + case 499: #line 452 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (3)].node); ;} break; - case 499: + case 500: #line 460 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSampleSize(makeFloat((yyvsp[(1) - (2)].str)), true); ;} break; - case 500: + case 501: #line 464 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSampleSize(makeInteger((yyvsp[(1) - (2)].ival)), true); ;} break; - case 501: + case 502: #line 468 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSampleSize(makeFloat((yyvsp[(1) - (2)].str)), true); ;} break; - case 502: + case 503: #line 472 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSampleSize(makeInteger((yyvsp[(1) - (2)].ival)), true); ;} break; - case 503: + case 504: #line 476 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSampleSize(makeInteger((yyvsp[(1) - (1)].ival)), false); ;} break; - case 504: + case 505: #line 480 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSampleSize(makeInteger((yyvsp[(1) - (2)].ival)), false); ;} break; - case 505: + case 506: #line 487 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(3) - (3)].node); ;} break; - case 506: + case 507: #line 491 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 507: + case 508: #line 498 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 508: + case 509: #line 499 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = NULL; ;} break; - case 509: + case 510: #line 504 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSampleOptions((yyvsp[(3) - (5)].node), (yyvsp[(1) - (5)].str), (yyvsp[(5) - (5)].ival), (yylsp[(1) - (5)])); ;} break; - case 510: + case 511: #line 508 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSampleOptions((yyvsp[(1) - (1)].node), NULL, -1, (yylsp[(1) - (1)])); ;} break; - case 511: + case 512: #line 512 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSampleOptions((yyvsp[(1) - (4)].node), (yyvsp[(3) - (4)].str), -1, (yylsp[(1) - (4)])); ;} break; - case 512: + case 513: #line 516 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSampleOptions((yyvsp[(1) - (6)].node), (yyvsp[(3) - (6)].str), (yyvsp[(5) - (6)].ival), (yylsp[(1) - (6)])); ;} break; - case 513: + case 514: #line 523 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 514: + case 515: #line 529 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 515: + case 516: #line 530 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 516: + case 517: #line 535 "third_party/libpg_query/grammar/statements/select.y" { (yyval.ival) = (yyvsp[(3) - (4)].ival); ;} break; - case 517: + case 518: #line 536 "third_party/libpg_query/grammar/statements/select.y" { (yyval.ival) = -1; ;} break; - case 518: + case 519: #line 540 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 519: + case 520: #line 542 "third_party/libpg_query/grammar/statements/select.y" { /* LIMIT ALL is represented as a NULL constant */ @@ -202894,134 +216746,134 @@ YYLTYPE yylloc; ;} break; - case 520: + case 521: #line 549 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 521: + case 522: #line 569 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 522: + case 523: #line 571 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "+", NULL, (yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; - case 523: + case 524: #line 573 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = doNegate((yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; - case 524: + case 525: #line 577 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeIntConst((yyvsp[(1) - (1)].ival),(yylsp[(1) - (1)])); ;} break; - case 525: + case 526: #line 578 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeFloatConst((yyvsp[(1) - (1)].str),(yylsp[(1) - (1)])); ;} break; - case 526: + case 527: #line 582 "third_party/libpg_query/grammar/statements/select.y" { (yyval.ival) = 0; ;} break; - case 527: + case 528: #line 583 "third_party/libpg_query/grammar/statements/select.y" { (yyval.ival) = 0; ;} break; - case 528: + case 529: #line 586 "third_party/libpg_query/grammar/statements/select.y" { (yyval.ival) = 0; ;} break; - case 529: + case 530: #line 587 "third_party/libpg_query/grammar/statements/select.y" { (yyval.ival) = 0; ;} break; - case 530: + case 531: #line 612 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(3) - (3)].list); ;} break; - case 531: + case 532: #line 613 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 532: + case 533: #line 617 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 533: + case 534: #line 618 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list),(yyvsp[(3) - (3)].node)); ;} break; - case 534: + case 535: #line 622 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 535: + case 536: #line 623 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 536: + case 537: #line 628 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeGroupingSet(GROUPING_SET_EMPTY, NIL, (yylsp[(1) - (2)])); ;} break; - case 537: + case 538: #line 640 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 538: + case 539: #line 641 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 539: + case 540: #line 645 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 540: + case 541: #line 646 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 541: + case 542: #line 650 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 542: + case 543: #line 651 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 543: + case 544: #line 655 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 544: + case 545: #line 656 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (2)].list), (yyvsp[(2) - (2)].node)); ;} break; - case 545: + case 546: #line 661 "third_party/libpg_query/grammar/statements/select.y" { PGLockingClause *n = makeNode(PGLockingClause); @@ -203032,52 +216884,52 @@ YYLTYPE yylloc; ;} break; - case 546: + case 547: #line 671 "third_party/libpg_query/grammar/statements/select.y" { (yyval.lockstrength) = LCS_FORUPDATE; ;} break; - case 547: + case 548: #line 672 "third_party/libpg_query/grammar/statements/select.y" { (yyval.lockstrength) = PG_LCS_FORNOKEYUPDATE; ;} break; - case 548: + case 549: #line 673 "third_party/libpg_query/grammar/statements/select.y" { (yyval.lockstrength) = PG_LCS_FORSHARE; ;} break; - case 549: + case 550: #line 674 "third_party/libpg_query/grammar/statements/select.y" { (yyval.lockstrength) = PG_LCS_FORKEYSHARE; ;} break; - case 550: + case 551: #line 678 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(2) - (2)].list); ;} break; - case 551: + case 552: #line 679 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 552: + case 553: #line 684 "third_party/libpg_query/grammar/statements/select.y" { (yyval.lockwaitpolicy) = LockWaitError; ;} break; - case 553: + case 554: #line 685 "third_party/libpg_query/grammar/statements/select.y" { (yyval.lockwaitpolicy) = PGLockWaitSkip; ;} break; - case 554: + case 555: #line 686 "third_party/libpg_query/grammar/statements/select.y" { (yyval.lockwaitpolicy) = PGLockWaitBlock; ;} break; - case 555: + case 556: #line 696 "third_party/libpg_query/grammar/statements/select.y" { PGSelectStmt *n = makeNode(PGSelectStmt); @@ -203086,7 +216938,7 @@ YYLTYPE yylloc; ;} break; - case 556: + case 557: #line 702 "third_party/libpg_query/grammar/statements/select.y" { PGSelectStmt *n = (PGSelectStmt *) (yyvsp[(1) - (5)].node); @@ -203095,27 +216947,27 @@ YYLTYPE yylloc; ;} break; - case 557: + case 558: #line 719 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(2) - (2)].list); ;} break; - case 558: + case 559: #line 720 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 559: + case 560: #line 724 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 560: + case 561: #line 725 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].node)); ;} break; - case 561: + case 562: #line 732 "third_party/libpg_query/grammar/statements/select.y" { (yyvsp[(1) - (3)].range)->alias = (yyvsp[(2) - (3)].alias); @@ -203124,7 +216976,7 @@ YYLTYPE yylloc; ;} break; - case 562: + case 563: #line 738 "third_party/libpg_query/grammar/statements/select.y" { PGRangeFunction *n = (PGRangeFunction *) (yyvsp[(1) - (3)].node); @@ -203135,7 +216987,7 @@ YYLTYPE yylloc; ;} break; - case 563: + case 564: #line 746 "third_party/libpg_query/grammar/statements/select.y" { PGRangeFunction *n = (PGRangeFunction *) (yyvsp[(2) - (3)].node); @@ -203146,7 +216998,7 @@ YYLTYPE yylloc; ;} break; - case 564: + case 565: #line 754 "third_party/libpg_query/grammar/statements/select.y" { PGRangeSubselect *n = makeNode(PGRangeSubselect); @@ -203185,7 +217037,7 @@ YYLTYPE yylloc; ;} break; - case 565: + case 566: #line 790 "third_party/libpg_query/grammar/statements/select.y" { PGRangeSubselect *n = makeNode(PGRangeSubselect); @@ -203214,14 +217066,14 @@ YYLTYPE yylloc; ;} break; - case 566: + case 567: #line 816 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) (yyvsp[(1) - (1)].jexpr); ;} break; - case 567: + case 568: #line 820 "third_party/libpg_query/grammar/statements/select.y" { (yyvsp[(2) - (4)].jexpr)->alias = (yyvsp[(4) - (4)].alias); @@ -203229,14 +217081,14 @@ YYLTYPE yylloc; ;} break; - case 568: + case 569: #line 846 "third_party/libpg_query/grammar/statements/select.y" { (yyval.jexpr) = (yyvsp[(2) - (3)].jexpr); ;} break; - case 569: + case 570: #line 850 "third_party/libpg_query/grammar/statements/select.y" { /* CROSS JOIN is same as unqualified inner join */ @@ -203252,7 +217104,7 @@ YYLTYPE yylloc; ;} break; - case 570: + case 571: #line 863 "third_party/libpg_query/grammar/statements/select.y" { PGJoinExpr *n = makeNode(PGJoinExpr); @@ -203269,7 +217121,7 @@ YYLTYPE yylloc; ;} break; - case 571: + case 572: #line 877 "third_party/libpg_query/grammar/statements/select.y" { /* letting join_type reduce to empty doesn't work */ @@ -203287,7 +217139,7 @@ YYLTYPE yylloc; ;} break; - case 572: + case 573: #line 892 "third_party/libpg_query/grammar/statements/select.y" { PGJoinExpr *n = makeNode(PGJoinExpr); @@ -203302,7 +217154,7 @@ YYLTYPE yylloc; ;} break; - case 573: + case 574: #line 904 "third_party/libpg_query/grammar/statements/select.y" { /* letting join_type reduce to empty doesn't work */ @@ -203318,7 +217170,7 @@ YYLTYPE yylloc; ;} break; - case 574: + case 575: #line 920 "third_party/libpg_query/grammar/statements/select.y" { (yyval.alias) = makeNode(PGAlias); @@ -203327,7 +217179,7 @@ YYLTYPE yylloc; ;} break; - case 575: + case 576: #line 926 "third_party/libpg_query/grammar/statements/select.y" { (yyval.alias) = makeNode(PGAlias); @@ -203335,7 +217187,7 @@ YYLTYPE yylloc; ;} break; - case 576: + case 577: #line 931 "third_party/libpg_query/grammar/statements/select.y" { (yyval.alias) = makeNode(PGAlias); @@ -203344,7 +217196,7 @@ YYLTYPE yylloc; ;} break; - case 577: + case 578: #line 937 "third_party/libpg_query/grammar/statements/select.y" { (yyval.alias) = makeNode(PGAlias); @@ -203352,31 +217204,31 @@ YYLTYPE yylloc; ;} break; - case 578: + case 579: #line 943 "third_party/libpg_query/grammar/statements/select.y" { (yyval.alias) = (yyvsp[(1) - (1)].alias); ;} break; - case 579: + case 580: #line 944 "third_party/libpg_query/grammar/statements/select.y" { (yyval.alias) = NULL; ;} break; - case 580: + case 581: #line 953 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2((yyvsp[(1) - (1)].alias), NIL); ;} break; - case 581: + case 582: #line 957 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2(NULL, (yyvsp[(3) - (4)].list)); ;} break; - case 582: + case 583: #line 961 "third_party/libpg_query/grammar/statements/select.y" { PGAlias *a = makeNode(PGAlias); @@ -203385,7 +217237,7 @@ YYLTYPE yylloc; ;} break; - case 583: + case 584: #line 967 "third_party/libpg_query/grammar/statements/select.y" { PGAlias *a = makeNode(PGAlias); @@ -203394,54 +217246,54 @@ YYLTYPE yylloc; ;} break; - case 584: + case 585: #line 973 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2(NULL, NIL); ;} break; - case 585: + case 586: #line 978 "third_party/libpg_query/grammar/statements/select.y" { (yyval.jtype) = PG_JOIN_FULL; ;} break; - case 586: + case 587: #line 979 "third_party/libpg_query/grammar/statements/select.y" { (yyval.jtype) = PG_JOIN_LEFT; ;} break; - case 587: + case 588: #line 980 "third_party/libpg_query/grammar/statements/select.y" { (yyval.jtype) = PG_JOIN_RIGHT; ;} break; - case 588: + case 589: #line 981 "third_party/libpg_query/grammar/statements/select.y" { (yyval.jtype) = PG_JOIN_INNER; ;} break; - case 589: + case 590: #line 985 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 590: + case 591: #line 986 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 591: + case 592: #line 998 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) (yyvsp[(3) - (4)].list); ;} break; - case 592: + case 593: #line 999 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 593: + case 594: #line 1005 "third_party/libpg_query/grammar/statements/select.y" { /* inheritance query, implicitly */ @@ -203451,7 +217303,7 @@ YYLTYPE yylloc; ;} break; - case 594: + case 595: #line 1012 "third_party/libpg_query/grammar/statements/select.y" { /* inheritance query, explicitly */ @@ -203461,7 +217313,7 @@ YYLTYPE yylloc; ;} break; - case 595: + case 596: #line 1019 "third_party/libpg_query/grammar/statements/select.y" { /* no inheritance */ @@ -203471,7 +217323,7 @@ YYLTYPE yylloc; ;} break; - case 596: + case 597: #line 1026 "third_party/libpg_query/grammar/statements/select.y" { /* no inheritance, SQL99-style syntax */ @@ -203481,7 +217333,7 @@ YYLTYPE yylloc; ;} break; - case 597: + case 598: #line 1058 "third_party/libpg_query/grammar/statements/select.y" { PGRangeFunction *n = makeNode(PGRangeFunction); @@ -203495,7 +217347,7 @@ YYLTYPE yylloc; ;} break; - case 598: + case 599: #line 1069 "third_party/libpg_query/grammar/statements/select.y" { PGRangeFunction *n = makeNode(PGRangeFunction); @@ -203509,66 +217361,66 @@ YYLTYPE yylloc; ;} break; - case 599: + case 600: #line 1082 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2((yyvsp[(1) - (2)].node), (yyvsp[(2) - (2)].list)); ;} break; - case 600: + case 601: #line 1086 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].list)); ;} break; - case 601: + case 602: #line 1087 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].list)); ;} break; - case 602: + case 603: #line 1090 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(3) - (4)].list); ;} break; - case 603: + case 604: #line 1091 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 604: + case 605: #line 1094 "third_party/libpg_query/grammar/statements/select.y" { (yyval.boolean) = true; ;} break; - case 605: + case 606: #line 1095 "third_party/libpg_query/grammar/statements/select.y" { (yyval.boolean) = false; ;} break; - case 606: + case 607: #line 1100 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 607: + case 608: #line 1101 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 608: + case 609: #line 1107 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 609: + case 610: #line 1111 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].node)); ;} break; - case 610: + case 611: #line 1117 "third_party/libpg_query/grammar/statements/select.y" { PGColumnDef *n = makeNode(PGColumnDef); @@ -203589,7 +217441,7 @@ YYLTYPE yylloc; ;} break; - case 611: + case 612: #line 1138 "third_party/libpg_query/grammar/statements/select.y" { PGCollateClause *n = makeNode(PGCollateClause); @@ -203600,26 +217452,26 @@ YYLTYPE yylloc; ;} break; - case 612: + case 613: #line 1145 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 613: + case 614: #line 1158 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(list_make2(makeString((yyvsp[(1) - (2)].str)), (yyvsp[(2) - (2)].typnam))); ;} break; - case 614: + case 615: #line 1161 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (4)].list), list_make2(makeString((yyvsp[(3) - (4)].str)), (yyvsp[(4) - (4)].typnam))); ;} break; - case 617: + case 618: #line 1168 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (2)].typnam); @@ -203627,7 +217479,7 @@ YYLTYPE yylloc; ;} break; - case 618: + case 619: #line 1173 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(2) - (3)].typnam); @@ -203636,7 +217488,7 @@ YYLTYPE yylloc; ;} break; - case 619: + case 620: #line 1180 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (5)].typnam); @@ -203644,7 +217496,7 @@ YYLTYPE yylloc; ;} break; - case 620: + case 621: #line 1185 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(2) - (6)].typnam); @@ -203653,7 +217505,7 @@ YYLTYPE yylloc; ;} break; - case 621: + case 622: #line 1191 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (2)].typnam); @@ -203661,7 +217513,7 @@ YYLTYPE yylloc; ;} break; - case 622: + case 623: #line 1196 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(2) - (3)].typnam); @@ -203670,74 +217522,76 @@ YYLTYPE yylloc; ;} break; - case 623: + case 624: #line 1201 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("struct"); - (yyval.typnam)->typmods = (yyvsp[(3) - (4)].list); - (yyval.typnam)->location = (yylsp[(1) - (4)]); + (yyval.typnam)->arrayBounds = (yyvsp[(5) - (5)].list); + (yyval.typnam)->typmods = (yyvsp[(3) - (5)].list); + (yyval.typnam)->location = (yylsp[(1) - (5)]); ;} break; - case 624: -#line 1206 "third_party/libpg_query/grammar/statements/select.y" + case 625: +#line 1207 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("map"); - (yyval.typnam)->typmods = (yyvsp[(3) - (4)].list); - (yyval.typnam)->location = (yylsp[(1) - (4)]); + (yyval.typnam)->arrayBounds = (yyvsp[(5) - (5)].list); + (yyval.typnam)->typmods = (yyvsp[(3) - (5)].list); + (yyval.typnam)->location = (yylsp[(1) - (5)]); ;} break; - case 625: -#line 1215 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = lappend((yyvsp[(1) - (3)].list), makeInteger(-1)); ;} - break; - case 626: #line 1217 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = lappend((yyvsp[(1) - (4)].list), makeInteger((yyvsp[(3) - (4)].ival))); ;} + { (yyval.list) = lappend((yyvsp[(1) - (3)].list), makeInteger(-1)); ;} break; case 627: #line 1219 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = NIL; ;} + { (yyval.list) = lappend((yyvsp[(1) - (4)].list), makeInteger((yyvsp[(3) - (4)].ival))); ;} break; case 628: -#line 1223 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} +#line 1221 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.list) = NIL; ;} break; case 629: -#line 1224 "third_party/libpg_query/grammar/statements/select.y" +#line 1225 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; case 630: -#line 1225 "third_party/libpg_query/grammar/statements/select.y" +#line 1226 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; case 631: -#line 1226 "third_party/libpg_query/grammar/statements/select.y" +#line 1227 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; case 632: -#line 1227 "third_party/libpg_query/grammar/statements/select.y" +#line 1228 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; case 633: #line 1229 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} + break; + + case 634: +#line 1231 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (2)].typnam); (yyval.typnam)->typmods = (yyvsp[(2) - (2)].list); ;} break; - case 634: -#line 1234 "third_party/libpg_query/grammar/statements/select.y" + case 635: +#line 1236 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (4)].typnam); (yyval.typnam)->typmods = list_make2(makeIntConst(INTERVAL_FULL_RANGE, -1), @@ -203745,28 +217599,28 @@ YYLTYPE yylloc; ;} break; - case 635: -#line 1253 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} - break; - case 636: -#line 1254 "third_party/libpg_query/grammar/statements/select.y" +#line 1255 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; case 637: -#line 1255 "third_party/libpg_query/grammar/statements/select.y" +#line 1256 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; case 638: -#line 1256 "third_party/libpg_query/grammar/statements/select.y" +#line 1257 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; case 639: -#line 1268 "third_party/libpg_query/grammar/statements/select.y" +#line 1258 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} + break; + + case 640: +#line 1270 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = makeTypeName((yyvsp[(1) - (2)].str)); (yyval.typnam)->typmods = (yyvsp[(2) - (2)].list); @@ -203774,74 +217628,74 @@ YYLTYPE yylloc; ;} break; - case 640: -#line 1281 "third_party/libpg_query/grammar/statements/select.y" + case 641: +#line 1283 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(2) - (3)].list); ;} break; - case 641: -#line 1282 "third_party/libpg_query/grammar/statements/select.y" + case 642: +#line 1284 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 642: -#line 1289 "third_party/libpg_query/grammar/statements/select.y" + case 643: +#line 1291 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("int4"); (yyval.typnam)->location = (yylsp[(1) - (1)]); ;} break; - case 643: -#line 1294 "third_party/libpg_query/grammar/statements/select.y" + case 644: +#line 1296 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("int4"); (yyval.typnam)->location = (yylsp[(1) - (1)]); ;} break; - case 644: -#line 1299 "third_party/libpg_query/grammar/statements/select.y" + case 645: +#line 1301 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("int2"); (yyval.typnam)->location = (yylsp[(1) - (1)]); ;} break; - case 645: -#line 1304 "third_party/libpg_query/grammar/statements/select.y" + case 646: +#line 1306 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("int8"); (yyval.typnam)->location = (yylsp[(1) - (1)]); ;} break; - case 646: -#line 1309 "third_party/libpg_query/grammar/statements/select.y" + case 647: +#line 1311 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("float4"); (yyval.typnam)->location = (yylsp[(1) - (1)]); ;} break; - case 647: -#line 1314 "third_party/libpg_query/grammar/statements/select.y" + case 648: +#line 1316 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(2) - (2)].typnam); (yyval.typnam)->location = (yylsp[(1) - (2)]); ;} break; - case 648: -#line 1319 "third_party/libpg_query/grammar/statements/select.y" + case 649: +#line 1321 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("float8"); (yyval.typnam)->location = (yylsp[(1) - (2)]); ;} break; - case 649: -#line 1324 "third_party/libpg_query/grammar/statements/select.y" + case 650: +#line 1326 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("numeric"); (yyval.typnam)->typmods = (yyvsp[(2) - (2)].list); @@ -203849,8 +217703,8 @@ YYLTYPE yylloc; ;} break; - case 650: -#line 1330 "third_party/libpg_query/grammar/statements/select.y" + case 651: +#line 1332 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("numeric"); (yyval.typnam)->typmods = (yyvsp[(2) - (2)].list); @@ -203858,8 +217712,8 @@ YYLTYPE yylloc; ;} break; - case 651: -#line 1336 "third_party/libpg_query/grammar/statements/select.y" + case 652: +#line 1338 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("numeric"); (yyval.typnam)->typmods = (yyvsp[(2) - (2)].list); @@ -203867,16 +217721,16 @@ YYLTYPE yylloc; ;} break; - case 652: -#line 1342 "third_party/libpg_query/grammar/statements/select.y" + case 653: +#line 1344 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("bool"); (yyval.typnam)->location = (yylsp[(1) - (1)]); ;} break; - case 653: -#line 1349 "third_party/libpg_query/grammar/statements/select.y" + case 654: +#line 1351 "third_party/libpg_query/grammar/statements/select.y" { /* * Check FLOAT() precision limits assuming IEEE floating @@ -203899,44 +217753,44 @@ YYLTYPE yylloc; ;} break; - case 654: -#line 1370 "third_party/libpg_query/grammar/statements/select.y" + case 655: +#line 1372 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("float4"); ;} break; - case 655: -#line 1380 "third_party/libpg_query/grammar/statements/select.y" + case 656: +#line 1382 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; - case 656: -#line 1384 "third_party/libpg_query/grammar/statements/select.y" + case 657: +#line 1386 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; - case 657: -#line 1392 "third_party/libpg_query/grammar/statements/select.y" + case 658: +#line 1394 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; - case 658: -#line 1396 "third_party/libpg_query/grammar/statements/select.y" + case 659: +#line 1398 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); (yyval.typnam)->typmods = NIL; ;} break; - case 659: -#line 1404 "third_party/libpg_query/grammar/statements/select.y" + case 660: +#line 1406 "third_party/libpg_query/grammar/statements/select.y" { const char *typname; @@ -203947,8 +217801,8 @@ YYLTYPE yylloc; ;} break; - case 660: -#line 1416 "third_party/libpg_query/grammar/statements/select.y" + case 661: +#line 1418 "third_party/libpg_query/grammar/statements/select.y" { /* bit defaults to bit(1), varbit to no limit */ if ((yyvsp[(2) - (2)].boolean)) @@ -203964,29 +217818,29 @@ YYLTYPE yylloc; ;} break; - case 661: -#line 1437 "third_party/libpg_query/grammar/statements/select.y" + case 662: +#line 1439 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; - case 662: -#line 1441 "third_party/libpg_query/grammar/statements/select.y" + case 663: +#line 1443 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; - case 663: -#line 1447 "third_party/libpg_query/grammar/statements/select.y" + case 664: +#line 1449 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = (yyvsp[(1) - (1)].typnam); ;} break; - case 664: -#line 1451 "third_party/libpg_query/grammar/statements/select.y" + case 665: +#line 1453 "third_party/libpg_query/grammar/statements/select.y" { /* Length was not specified so allow to be unrestricted. * This handles problems with fixed-length (bpchar) strings @@ -203999,8 +217853,8 @@ YYLTYPE yylloc; ;} break; - case 665: -#line 1464 "third_party/libpg_query/grammar/statements/select.y" + case 666: +#line 1466 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName((yyvsp[(1) - (4)].conststr)); (yyval.typnam)->typmods = list_make1(makeIntConst((yyvsp[(3) - (4)].ival), (yylsp[(3) - (4)]))); @@ -204008,8 +217862,8 @@ YYLTYPE yylloc; ;} break; - case 666: -#line 1472 "third_party/libpg_query/grammar/statements/select.y" + case 667: +#line 1474 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName((yyvsp[(1) - (1)].conststr)); /* char defaults to char(1), varchar to no limit */ @@ -204019,11 +217873,6 @@ YYLTYPE yylloc; ;} break; - case 667: -#line 1482 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.conststr) = (yyvsp[(2) - (2)].boolean) ? "varchar": "bpchar"; ;} - break; - case 668: #line 1484 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = (yyvsp[(2) - (2)].boolean) ? "varchar": "bpchar"; ;} @@ -204031,12 +217880,12 @@ YYLTYPE yylloc; case 669: #line 1486 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.conststr) = "varchar"; ;} + { (yyval.conststr) = (yyvsp[(2) - (2)].boolean) ? "varchar": "bpchar"; ;} break; case 670: #line 1488 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.conststr) = (yyvsp[(3) - (3)].boolean) ? "varchar": "bpchar"; ;} + { (yyval.conststr) = "varchar"; ;} break; case 671: @@ -204046,21 +217895,26 @@ YYLTYPE yylloc; case 672: #line 1492 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.conststr) = (yyvsp[(2) - (2)].boolean) ? "varchar": "bpchar"; ;} + { (yyval.conststr) = (yyvsp[(3) - (3)].boolean) ? "varchar": "bpchar"; ;} break; case 673: -#line 1496 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.boolean) = true; ;} +#line 1494 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.conststr) = (yyvsp[(2) - (2)].boolean) ? "varchar": "bpchar"; ;} break; case 674: -#line 1497 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.boolean) = false; ;} +#line 1498 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.boolean) = true; ;} break; case 675: -#line 1505 "third_party/libpg_query/grammar/statements/select.y" +#line 1499 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.boolean) = false; ;} + break; + + case 676: +#line 1507 "third_party/libpg_query/grammar/statements/select.y" { if ((yyvsp[(5) - (5)].boolean)) (yyval.typnam) = SystemTypeName("timestamptz"); @@ -204071,8 +217925,8 @@ YYLTYPE yylloc; ;} break; - case 676: -#line 1514 "third_party/libpg_query/grammar/statements/select.y" + case 677: +#line 1516 "third_party/libpg_query/grammar/statements/select.y" { if ((yyvsp[(2) - (2)].boolean)) (yyval.typnam) = SystemTypeName("timestamptz"); @@ -204082,8 +217936,8 @@ YYLTYPE yylloc; ;} break; - case 677: -#line 1522 "third_party/libpg_query/grammar/statements/select.y" + case 678: +#line 1524 "third_party/libpg_query/grammar/statements/select.y" { if ((yyvsp[(5) - (5)].boolean)) (yyval.typnam) = SystemTypeName("timetz"); @@ -204094,8 +217948,8 @@ YYLTYPE yylloc; ;} break; - case 678: -#line 1531 "third_party/libpg_query/grammar/statements/select.y" + case 679: +#line 1533 "third_party/libpg_query/grammar/statements/select.y" { if ((yyvsp[(2) - (2)].boolean)) (yyval.typnam) = SystemTypeName("timetz"); @@ -204105,87 +217959,87 @@ YYLTYPE yylloc; ;} break; - case 679: -#line 1542 "third_party/libpg_query/grammar/statements/select.y" + case 680: +#line 1544 "third_party/libpg_query/grammar/statements/select.y" { (yyval.typnam) = SystemTypeName("interval"); (yyval.typnam)->location = (yylsp[(1) - (1)]); ;} break; - case 680: -#line 1549 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.boolean) = true; ;} - break; - case 681: -#line 1550 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.boolean) = false; ;} +#line 1551 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.boolean) = true; ;} break; case 682: -#line 1551 "third_party/libpg_query/grammar/statements/select.y" +#line 1552 "third_party/libpg_query/grammar/statements/select.y" { (yyval.boolean) = false; ;} break; - case 699: -#line 1580 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(YEAR), (yylsp[(1) - (1)]))); ;} + case 683: +#line 1553 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.boolean) = false; ;} break; case 700: #line 1582 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(MONTH), (yylsp[(1) - (1)]))); ;} + { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(YEAR), (yylsp[(1) - (1)]))); ;} break; case 701: #line 1584 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(DAY), (yylsp[(1) - (1)]))); ;} + { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(MONTH), (yylsp[(1) - (1)]))); ;} break; case 702: #line 1586 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(HOUR), (yylsp[(1) - (1)]))); ;} + { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(DAY), (yylsp[(1) - (1)]))); ;} break; case 703: #line 1588 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(MINUTE), (yylsp[(1) - (1)]))); ;} + { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(HOUR), (yylsp[(1) - (1)]))); ;} break; case 704: #line 1590 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(SECOND), (yylsp[(1) - (1)]))); ;} + { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(MINUTE), (yylsp[(1) - (1)]))); ;} break; case 705: #line 1592 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(MILLISECOND), (yylsp[(1) - (1)]))); ;} + { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(SECOND), (yylsp[(1) - (1)]))); ;} break; case 706: #line 1594 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(MICROSECOND), (yylsp[(1) - (1)]))); ;} + { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(MILLISECOND), (yylsp[(1) - (1)]))); ;} break; case 707: #line 1596 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(MICROSECOND), (yylsp[(1) - (1)]))); ;} + break; + + case 708: +#line 1598 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH), (yylsp[(1) - (3)]))); ;} break; - case 708: -#line 1601 "third_party/libpg_query/grammar/statements/select.y" + case 709: +#line 1603 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR), (yylsp[(1) - (3)]))); ;} break; - case 709: -#line 1606 "third_party/libpg_query/grammar/statements/select.y" + case 710: +#line 1608 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | @@ -204193,8 +218047,8 @@ YYLTYPE yylloc; ;} break; - case 710: -#line 1612 "third_party/libpg_query/grammar/statements/select.y" + case 711: +#line 1614 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | @@ -204203,16 +218057,16 @@ YYLTYPE yylloc; ;} break; - case 711: -#line 1619 "third_party/libpg_query/grammar/statements/select.y" + case 712: +#line 1621 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE), (yylsp[(1) - (3)]))); ;} break; - case 712: -#line 1624 "third_party/libpg_query/grammar/statements/select.y" + case 713: +#line 1626 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | @@ -204220,31 +218074,31 @@ YYLTYPE yylloc; ;} break; - case 713: -#line 1630 "third_party/libpg_query/grammar/statements/select.y" + case 714: +#line 1632 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeIntConst(INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND), (yylsp[(1) - (3)]))); ;} break; - case 714: -#line 1635 "third_party/libpg_query/grammar/statements/select.y" + case 715: +#line 1637 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 715: -#line 1666 "third_party/libpg_query/grammar/statements/select.y" + case 716: +#line 1668 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 716: -#line 1669 "third_party/libpg_query/grammar/statements/select.y" + case 717: +#line 1671 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeTypeCast((yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].typnam), 0, (yylsp[(2) - (3)])); ;} break; - case 717: -#line 1671 "third_party/libpg_query/grammar/statements/select.y" + case 718: +#line 1673 "third_party/libpg_query/grammar/statements/select.y" { PGCollateClause *n = makeNode(PGCollateClause); n->arg = (yyvsp[(1) - (3)].node); @@ -204254,8 +218108,8 @@ YYLTYPE yylloc; ;} break; - case 718: -#line 1679 "third_party/libpg_query/grammar/statements/select.y" + case 719: +#line 1681 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeFuncCall(SystemFuncName("timezone"), list_make2((yyvsp[(5) - (5)].node), (yyvsp[(1) - (5)].node)), @@ -204263,104 +218117,99 @@ YYLTYPE yylloc; ;} break; - case 719: -#line 1694 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "+", NULL, (yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} - break; - case 720: #line 1696 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = doNegate((yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "+", NULL, (yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; case 721: #line 1698 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "+", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = doNegate((yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; case 722: #line 1700 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "-", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "+", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 723: #line 1702 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "*", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "-", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 724: #line 1704 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "/", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "*", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 725: #line 1706 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "%", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "/", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 726: #line 1708 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "^", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "%", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 727: #line 1710 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "<", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "^", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 728: #line 1712 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, ">", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "<", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 729: #line 1714 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "=", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, ">", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 730: #line 1716 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "<=", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "=", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 731: #line 1718 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, ">=", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "<=", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 732: #line 1720 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "<>", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, ">=", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 733: -#line 1723 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeAExpr(PG_AEXPR_OP, (yyvsp[(2) - (3)].list), (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} +#line 1722 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "<>", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 734: #line 1725 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeAExpr(PG_AEXPR_OP, (yyvsp[(1) - (2)].list), NULL, (yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} + { (yyval.node) = (PGNode *) makeAExpr(PG_AEXPR_OP, (yyvsp[(2) - (3)].list), (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 735: #line 1727 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = (PGNode *) makeAExpr(PG_AEXPR_OP, (yyvsp[(2) - (2)].list), (yyvsp[(1) - (2)].node), NULL, (yylsp[(2) - (2)])); ;} + { (yyval.node) = (PGNode *) makeAExpr(PG_AEXPR_OP, (yyvsp[(1) - (2)].list), NULL, (yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; case 736: -#line 1730 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = makeAndExpr((yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} +#line 1729 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.node) = (PGNode *) makeAExpr(PG_AEXPR_OP, (yyvsp[(2) - (2)].list), (yyvsp[(1) - (2)].node), NULL, (yylsp[(2) - (2)])); ;} break; case 737: #line 1732 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = makeOrExpr((yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} + { (yyval.node) = makeAndExpr((yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 738: #line 1734 "third_party/libpg_query/grammar/statements/select.y" - { (yyval.node) = makeNotExpr((yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} + { (yyval.node) = makeOrExpr((yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; case 739: @@ -204369,23 +218218,28 @@ YYLTYPE yylloc; break; case 740: -#line 1739 "third_party/libpg_query/grammar/statements/select.y" +#line 1738 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.node) = makeNotExpr((yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} + break; + + case 741: +#line 1741 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_GLOB, "~~~", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 741: -#line 1744 "third_party/libpg_query/grammar/statements/select.y" + case 742: +#line 1746 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_LIKE, "~~", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 742: -#line 1749 "third_party/libpg_query/grammar/statements/select.y" + case 743: +#line 1751 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall(SystemFuncName("like_escape"), list_make3((yyvsp[(1) - (5)].node), (yyvsp[(3) - (5)].node), (yyvsp[(5) - (5)].node)), @@ -204394,16 +218248,16 @@ YYLTYPE yylloc; ;} break; - case 743: -#line 1756 "third_party/libpg_query/grammar/statements/select.y" + case 744: +#line 1758 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_LIKE, "!~~", (yyvsp[(1) - (4)].node), (yyvsp[(4) - (4)].node), (yylsp[(2) - (4)])); ;} break; - case 744: -#line 1761 "third_party/libpg_query/grammar/statements/select.y" + case 745: +#line 1763 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall(SystemFuncName("not_like_escape"), list_make3((yyvsp[(1) - (6)].node), (yyvsp[(4) - (6)].node), (yyvsp[(6) - (6)].node)), @@ -204412,16 +218266,16 @@ YYLTYPE yylloc; ;} break; - case 745: -#line 1768 "third_party/libpg_query/grammar/statements/select.y" + case 746: +#line 1770 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_ILIKE, "~~*", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 746: -#line 1773 "third_party/libpg_query/grammar/statements/select.y" + case 747: +#line 1775 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall(SystemFuncName("like_escape"), list_make2((yyvsp[(3) - (5)].node), (yyvsp[(5) - (5)].node)), @@ -204431,16 +218285,16 @@ YYLTYPE yylloc; ;} break; - case 747: -#line 1781 "third_party/libpg_query/grammar/statements/select.y" + case 748: +#line 1783 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_ILIKE, "!~~*", (yyvsp[(1) - (4)].node), (yyvsp[(4) - (4)].node), (yylsp[(2) - (4)])); ;} break; - case 748: -#line 1786 "third_party/libpg_query/grammar/statements/select.y" + case 749: +#line 1788 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall(SystemFuncName("not_like_escape"), list_make2((yyvsp[(4) - (6)].node), (yyvsp[(6) - (6)].node)), @@ -204450,8 +218304,8 @@ YYLTYPE yylloc; ;} break; - case 749: -#line 1795 "third_party/libpg_query/grammar/statements/select.y" + case 750: +#line 1797 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall(SystemFuncName("similar_escape"), list_make2((yyvsp[(4) - (4)].node), makeNullAConst(-1)), @@ -204461,8 +218315,8 @@ YYLTYPE yylloc; ;} break; - case 750: -#line 1803 "third_party/libpg_query/grammar/statements/select.y" + case 751: +#line 1805 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall(SystemFuncName("similar_escape"), list_make2((yyvsp[(4) - (6)].node), (yyvsp[(6) - (6)].node)), @@ -204472,8 +218326,8 @@ YYLTYPE yylloc; ;} break; - case 751: -#line 1811 "third_party/libpg_query/grammar/statements/select.y" + case 752: +#line 1813 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall(SystemFuncName("similar_escape"), list_make2((yyvsp[(5) - (5)].node), makeNullAConst(-1)), @@ -204483,8 +218337,8 @@ YYLTYPE yylloc; ;} break; - case 752: -#line 1819 "third_party/libpg_query/grammar/statements/select.y" + case 753: +#line 1821 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall(SystemFuncName("similar_escape"), list_make2((yyvsp[(5) - (7)].node), (yyvsp[(7) - (7)].node)), @@ -204494,8 +218348,8 @@ YYLTYPE yylloc; ;} break; - case 753: -#line 1837 "third_party/libpg_query/grammar/statements/select.y" + case 754: +#line 1839 "third_party/libpg_query/grammar/statements/select.y" { PGNullTest *n = makeNode(PGNullTest); n->arg = (PGExpr *) (yyvsp[(1) - (3)].node); @@ -204505,8 +218359,8 @@ YYLTYPE yylloc; ;} break; - case 754: -#line 1845 "third_party/libpg_query/grammar/statements/select.y" + case 755: +#line 1847 "third_party/libpg_query/grammar/statements/select.y" { PGNullTest *n = makeNode(PGNullTest); n->arg = (PGExpr *) (yyvsp[(1) - (2)].node); @@ -204516,8 +218370,8 @@ YYLTYPE yylloc; ;} break; - case 755: -#line 1853 "third_party/libpg_query/grammar/statements/select.y" + case 756: +#line 1855 "third_party/libpg_query/grammar/statements/select.y" { PGNullTest *n = makeNode(PGNullTest); n->arg = (PGExpr *) (yyvsp[(1) - (4)].node); @@ -204527,8 +218381,8 @@ YYLTYPE yylloc; ;} break; - case 756: -#line 1861 "third_party/libpg_query/grammar/statements/select.y" + case 757: +#line 1863 "third_party/libpg_query/grammar/statements/select.y" { PGNullTest *n = makeNode(PGNullTest); n->arg = (PGExpr *) (yyvsp[(1) - (3)].node); @@ -204538,8 +218392,8 @@ YYLTYPE yylloc; ;} break; - case 757: -#line 1869 "third_party/libpg_query/grammar/statements/select.y" + case 758: +#line 1871 "third_party/libpg_query/grammar/statements/select.y" { PGNullTest *n = makeNode(PGNullTest); n->arg = (PGExpr *) (yyvsp[(1) - (2)].node); @@ -204549,16 +218403,32 @@ YYLTYPE yylloc; ;} break; - case 758: -#line 1876 "third_party/libpg_query/grammar/statements/select.y" + case 759: +#line 1878 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall(SystemFuncName("row"), (yyvsp[(1) - (1)].list), (yylsp[(1) - (1)])); (yyval.node) = (PGNode *) n; ;} break; - case 759: -#line 1881 "third_party/libpg_query/grammar/statements/select.y" + case 760: +#line 1882 "third_party/libpg_query/grammar/statements/select.y" + { + PGFuncCall *n = makeFuncCall(SystemFuncName("struct_pack"), (yyvsp[(2) - (3)].list), (yylsp[(2) - (3)])); + (yyval.node) = (PGNode *) n; + ;} + break; + + case 761: +#line 1886 "third_party/libpg_query/grammar/statements/select.y" + { + PGFuncCall *n = makeFuncCall(SystemFuncName("list_value"), (yyvsp[(2) - (3)].list), (yylsp[(2) - (3)])); + (yyval.node) = (PGNode *) n; + ;} + break; + + case 762: +#line 1891 "third_party/libpg_query/grammar/statements/select.y" { PGLambdaFunction *n = makeNode(PGLambdaFunction); n->parameters = (yyvsp[(1) - (3)].list); @@ -204568,8 +218438,8 @@ YYLTYPE yylloc; ;} break; - case 760: -#line 1889 "third_party/libpg_query/grammar/statements/select.y" + case 763: +#line 1899 "third_party/libpg_query/grammar/statements/select.y" { PGLambdaFunction *n = makeNode(PGLambdaFunction); n->parameters = list_make1((yyvsp[(1) - (3)].node)); @@ -204579,8 +218449,8 @@ YYLTYPE yylloc; ;} break; - case 761: -#line 1897 "third_party/libpg_query/grammar/statements/select.y" + case 764: +#line 1907 "third_party/libpg_query/grammar/statements/select.y" { if (list_length((yyvsp[(1) - (3)].list)) != 2) ereport(ERROR, @@ -204598,8 +218468,8 @@ YYLTYPE yylloc; ;} break; - case 762: -#line 1913 "third_party/libpg_query/grammar/statements/select.y" + case 765: +#line 1923 "third_party/libpg_query/grammar/statements/select.y" { PGBooleanTest *b = makeNode(PGBooleanTest); b->arg = (PGExpr *) (yyvsp[(1) - (3)].node); @@ -204609,8 +218479,8 @@ YYLTYPE yylloc; ;} break; - case 763: -#line 1921 "third_party/libpg_query/grammar/statements/select.y" + case 766: +#line 1931 "third_party/libpg_query/grammar/statements/select.y" { PGBooleanTest *b = makeNode(PGBooleanTest); b->arg = (PGExpr *) (yyvsp[(1) - (4)].node); @@ -204620,8 +218490,8 @@ YYLTYPE yylloc; ;} break; - case 764: -#line 1929 "third_party/libpg_query/grammar/statements/select.y" + case 767: +#line 1939 "third_party/libpg_query/grammar/statements/select.y" { PGBooleanTest *b = makeNode(PGBooleanTest); b->arg = (PGExpr *) (yyvsp[(1) - (3)].node); @@ -204631,8 +218501,8 @@ YYLTYPE yylloc; ;} break; - case 765: -#line 1937 "third_party/libpg_query/grammar/statements/select.y" + case 768: +#line 1947 "third_party/libpg_query/grammar/statements/select.y" { PGBooleanTest *b = makeNode(PGBooleanTest); b->arg = (PGExpr *) (yyvsp[(1) - (4)].node); @@ -204642,8 +218512,8 @@ YYLTYPE yylloc; ;} break; - case 766: -#line 1945 "third_party/libpg_query/grammar/statements/select.y" + case 769: +#line 1955 "third_party/libpg_query/grammar/statements/select.y" { PGBooleanTest *b = makeNode(PGBooleanTest); b->arg = (PGExpr *) (yyvsp[(1) - (3)].node); @@ -204653,8 +218523,8 @@ YYLTYPE yylloc; ;} break; - case 767: -#line 1953 "third_party/libpg_query/grammar/statements/select.y" + case 770: +#line 1963 "third_party/libpg_query/grammar/statements/select.y" { PGBooleanTest *b = makeNode(PGBooleanTest); b->arg = (PGExpr *) (yyvsp[(1) - (4)].node); @@ -204664,36 +218534,36 @@ YYLTYPE yylloc; ;} break; - case 768: -#line 1961 "third_party/libpg_query/grammar/statements/select.y" + case 771: +#line 1971 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_DISTINCT, "=", (yyvsp[(1) - (5)].node), (yyvsp[(5) - (5)].node), (yylsp[(2) - (5)])); ;} break; - case 769: -#line 1965 "third_party/libpg_query/grammar/statements/select.y" + case 772: +#line 1975 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_NOT_DISTINCT, "=", (yyvsp[(1) - (6)].node), (yyvsp[(6) - (6)].node), (yylsp[(2) - (6)])); ;} break; - case 770: -#line 1969 "third_party/libpg_query/grammar/statements/select.y" + case 773: +#line 1979 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OF, "=", (yyvsp[(1) - (6)].node), (PGNode *) (yyvsp[(5) - (6)].list), (yylsp[(2) - (6)])); ;} break; - case 771: -#line 1973 "third_party/libpg_query/grammar/statements/select.y" + case 774: +#line 1983 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OF, "<>", (yyvsp[(1) - (7)].node), (PGNode *) (yyvsp[(6) - (7)].list), (yylsp[(2) - (7)])); ;} break; - case 772: -#line 1977 "third_party/libpg_query/grammar/statements/select.y" + case 775: +#line 1987 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_BETWEEN, "BETWEEN", @@ -204703,8 +218573,8 @@ YYLTYPE yylloc; ;} break; - case 773: -#line 1985 "third_party/libpg_query/grammar/statements/select.y" + case 776: +#line 1995 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_NOT_BETWEEN, "NOT BETWEEN", @@ -204714,8 +218584,8 @@ YYLTYPE yylloc; ;} break; - case 774: -#line 1993 "third_party/libpg_query/grammar/statements/select.y" + case 777: +#line 2003 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_BETWEEN_SYM, "BETWEEN SYMMETRIC", @@ -204725,8 +218595,8 @@ YYLTYPE yylloc; ;} break; - case 775: -#line 2001 "third_party/libpg_query/grammar/statements/select.y" + case 778: +#line 2011 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_NOT_BETWEEN_SYM, "NOT BETWEEN SYMMETRIC", @@ -204736,8 +218606,8 @@ YYLTYPE yylloc; ;} break; - case 776: -#line 2009 "third_party/libpg_query/grammar/statements/select.y" + case 779: +#line 2019 "third_party/libpg_query/grammar/statements/select.y" { /* in_expr returns a PGSubLink or a list of a_exprs */ if (IsA((yyvsp[(3) - (3)].node), PGSubLink)) @@ -204759,8 +218629,8 @@ YYLTYPE yylloc; ;} break; - case 777: -#line 2029 "third_party/libpg_query/grammar/statements/select.y" + case 780: +#line 2039 "third_party/libpg_query/grammar/statements/select.y" { /* in_expr returns a PGSubLink or a list of a_exprs */ if (IsA((yyvsp[(4) - (4)].node), PGSubLink)) @@ -204784,8 +218654,8 @@ YYLTYPE yylloc; ;} break; - case 778: -#line 2051 "third_party/libpg_query/grammar/statements/select.y" + case 781: +#line 2061 "third_party/libpg_query/grammar/statements/select.y" { PGSubLink *n = makeNode(PGSubLink); n->subLinkType = (yyvsp[(3) - (4)].subquerytype); @@ -204798,8 +218668,8 @@ YYLTYPE yylloc; ;} break; - case 779: -#line 2062 "third_party/libpg_query/grammar/statements/select.y" + case 782: +#line 2072 "third_party/libpg_query/grammar/statements/select.y" { if ((yyvsp[(3) - (6)].subquerytype) == PG_ANY_SUBLINK) (yyval.node) = (PGNode *) makeAExpr(PG_AEXPR_OP_ANY, (yyvsp[(2) - (6)].list), (yyvsp[(1) - (6)].node), (yyvsp[(5) - (6)].node), (yylsp[(2) - (6)])); @@ -204808,8 +218678,8 @@ YYLTYPE yylloc; ;} break; - case 780: -#line 2069 "third_party/libpg_query/grammar/statements/select.y" + case 783: +#line 2079 "third_party/libpg_query/grammar/statements/select.y" { /* * The SQL spec only allows DEFAULT in "contextually typed @@ -204825,8 +218695,8 @@ YYLTYPE yylloc; ;} break; - case 781: -#line 2082 "third_party/libpg_query/grammar/statements/select.y" + case 784: +#line 2092 "third_party/libpg_query/grammar/statements/select.y" { PGList *func_name = list_make1(makeString("construct_array")); PGFuncCall *n = makeFuncCall(func_name, (yyvsp[(3) - (4)].list), (yylsp[(1) - (4)])); @@ -204834,141 +218704,141 @@ YYLTYPE yylloc; ;} break; - case 782: -#line 2099 "third_party/libpg_query/grammar/statements/select.y" + case 785: +#line 2109 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 783: -#line 2101 "third_party/libpg_query/grammar/statements/select.y" + case 786: +#line 2111 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeTypeCast((yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].typnam), 0, (yylsp[(2) - (3)])); ;} break; - case 784: -#line 2103 "third_party/libpg_query/grammar/statements/select.y" + case 787: +#line 2113 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "+", NULL, (yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; - case 785: -#line 2105 "third_party/libpg_query/grammar/statements/select.y" + case 788: +#line 2115 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = doNegate((yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; - case 786: -#line 2107 "third_party/libpg_query/grammar/statements/select.y" + case 789: +#line 2117 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "+", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 787: -#line 2109 "third_party/libpg_query/grammar/statements/select.y" + case 790: +#line 2119 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "-", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 788: -#line 2111 "third_party/libpg_query/grammar/statements/select.y" + case 791: +#line 2121 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "*", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 789: -#line 2113 "third_party/libpg_query/grammar/statements/select.y" + case 792: +#line 2123 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "/", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 790: -#line 2115 "third_party/libpg_query/grammar/statements/select.y" + case 793: +#line 2125 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "%", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 791: -#line 2117 "third_party/libpg_query/grammar/statements/select.y" + case 794: +#line 2127 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "^", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 792: -#line 2119 "third_party/libpg_query/grammar/statements/select.y" + case 795: +#line 2129 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "<", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 793: -#line 2121 "third_party/libpg_query/grammar/statements/select.y" + case 796: +#line 2131 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, ">", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 794: -#line 2123 "third_party/libpg_query/grammar/statements/select.y" + case 797: +#line 2133 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "=", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 795: -#line 2125 "third_party/libpg_query/grammar/statements/select.y" + case 798: +#line 2135 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "<=", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 796: -#line 2127 "third_party/libpg_query/grammar/statements/select.y" + case 799: +#line 2137 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, ">=", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 797: -#line 2129 "third_party/libpg_query/grammar/statements/select.y" + case 800: +#line 2139 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OP, "<>", (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 798: -#line 2131 "third_party/libpg_query/grammar/statements/select.y" + case 801: +#line 2141 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeAExpr(PG_AEXPR_OP, (yyvsp[(2) - (3)].list), (yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yylsp[(2) - (3)])); ;} break; - case 799: -#line 2133 "third_party/libpg_query/grammar/statements/select.y" + case 802: +#line 2143 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeAExpr(PG_AEXPR_OP, (yyvsp[(1) - (2)].list), NULL, (yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; - case 800: -#line 2135 "third_party/libpg_query/grammar/statements/select.y" + case 803: +#line 2145 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeAExpr(PG_AEXPR_OP, (yyvsp[(2) - (2)].list), (yyvsp[(1) - (2)].node), NULL, (yylsp[(2) - (2)])); ;} break; - case 801: -#line 2137 "third_party/libpg_query/grammar/statements/select.y" + case 804: +#line 2147 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_DISTINCT, "=", (yyvsp[(1) - (5)].node), (yyvsp[(5) - (5)].node), (yylsp[(2) - (5)])); ;} break; - case 802: -#line 2141 "third_party/libpg_query/grammar/statements/select.y" + case 805: +#line 2151 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_NOT_DISTINCT, "=", (yyvsp[(1) - (6)].node), (yyvsp[(6) - (6)].node), (yylsp[(2) - (6)])); ;} break; - case 803: -#line 2145 "third_party/libpg_query/grammar/statements/select.y" + case 806: +#line 2155 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OF, "=", (yyvsp[(1) - (6)].node), (PGNode *) (yyvsp[(5) - (6)].list), (yylsp[(2) - (6)])); ;} break; - case 804: -#line 2149 "third_party/libpg_query/grammar/statements/select.y" + case 807: +#line 2159 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_OF, "<>", (yyvsp[(1) - (7)].node), (PGNode *) (yyvsp[(6) - (7)].list), (yylsp[(2) - (7)])); ;} break; - case 805: -#line 2162 "third_party/libpg_query/grammar/statements/select.y" + case 808: +#line 2172 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 806: -#line 2163 "third_party/libpg_query/grammar/statements/select.y" + case 809: +#line 2173 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 807: -#line 2165 "third_party/libpg_query/grammar/statements/select.y" + case 810: +#line 2175 "third_party/libpg_query/grammar/statements/select.y" { PGPositionalReference *n = makeNode(PGPositionalReference); n->position = (yyvsp[(2) - (2)].ival); @@ -204977,8 +218847,8 @@ YYLTYPE yylloc; ;} break; - case 808: -#line 2172 "third_party/libpg_query/grammar/statements/select.y" + case 811: +#line 2182 "third_party/libpg_query/grammar/statements/select.y" { if ((yyvsp[(2) - (2)].list)) { @@ -204992,8 +218862,8 @@ YYLTYPE yylloc; ;} break; - case 809: -#line 2184 "third_party/libpg_query/grammar/statements/select.y" + case 812: +#line 2194 "third_party/libpg_query/grammar/statements/select.y" { PGParamRef *p = makeNode(PGParamRef); p->number = (yyvsp[(1) - (2)].ival); @@ -205010,8 +218880,8 @@ YYLTYPE yylloc; ;} break; - case 810: -#line 2199 "third_party/libpg_query/grammar/statements/select.y" + case 813: +#line 2209 "third_party/libpg_query/grammar/statements/select.y" { if ((yyvsp[(4) - (4)].list)) { @@ -205025,13 +218895,13 @@ YYLTYPE yylloc; ;} break; - case 811: -#line 2211 "third_party/libpg_query/grammar/statements/select.y" + case 814: +#line 2221 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 812: -#line 2213 "third_party/libpg_query/grammar/statements/select.y" + case 815: +#line 2223 "third_party/libpg_query/grammar/statements/select.y" { if ((yyvsp[(2) - (2)].list)) { PGAIndirection *n = makeNode(PGAIndirection); @@ -205045,8 +218915,8 @@ YYLTYPE yylloc; ;} break; - case 813: -#line 2225 "third_party/libpg_query/grammar/statements/select.y" + case 816: +#line 2235 "third_party/libpg_query/grammar/statements/select.y" { PGSubLink *n = makeNode(PGSubLink); n->subLinkType = PG_EXPR_SUBLINK; @@ -205059,8 +218929,8 @@ YYLTYPE yylloc; ;} break; - case 814: -#line 2236 "third_party/libpg_query/grammar/statements/select.y" + case 817: +#line 2246 "third_party/libpg_query/grammar/statements/select.y" { /* * Because the select_with_parens nonterminal is designed @@ -205086,8 +218956,8 @@ YYLTYPE yylloc; ;} break; - case 815: -#line 2260 "third_party/libpg_query/grammar/statements/select.y" + case 818: +#line 2270 "third_party/libpg_query/grammar/statements/select.y" { PGSubLink *n = makeNode(PGSubLink); n->subLinkType = PG_EXISTS_SUBLINK; @@ -205100,15 +218970,15 @@ YYLTYPE yylloc; ;} break; - case 816: -#line 2273 "third_party/libpg_query/grammar/statements/select.y" + case 819: +#line 2283 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeFuncCall((yyvsp[(1) - (3)].list), NIL, (yylsp[(1) - (3)])); ;} break; - case 817: -#line 2277 "third_party/libpg_query/grammar/statements/select.y" + case 820: +#line 2287 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall((yyvsp[(1) - (5)].list), (yyvsp[(3) - (5)].list), (yylsp[(1) - (5)])); n->agg_order = (yyvsp[(4) - (5)].list); @@ -205116,8 +218986,8 @@ YYLTYPE yylloc; ;} break; - case 818: -#line 2283 "third_party/libpg_query/grammar/statements/select.y" + case 821: +#line 2293 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall((yyvsp[(1) - (6)].list), list_make1((yyvsp[(4) - (6)].node)), (yylsp[(1) - (6)])); n->func_variadic = true; @@ -205126,8 +218996,8 @@ YYLTYPE yylloc; ;} break; - case 819: -#line 2290 "third_party/libpg_query/grammar/statements/select.y" + case 822: +#line 2300 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall((yyvsp[(1) - (8)].list), lappend((yyvsp[(3) - (8)].list), (yyvsp[(6) - (8)].node)), (yylsp[(1) - (8)])); n->func_variadic = true; @@ -205136,8 +219006,8 @@ YYLTYPE yylloc; ;} break; - case 820: -#line 2297 "third_party/libpg_query/grammar/statements/select.y" + case 823: +#line 2307 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall((yyvsp[(1) - (6)].list), (yyvsp[(4) - (6)].list), (yylsp[(1) - (6)])); n->agg_order = (yyvsp[(5) - (6)].list); @@ -205149,8 +219019,8 @@ YYLTYPE yylloc; ;} break; - case 821: -#line 2307 "third_party/libpg_query/grammar/statements/select.y" + case 824: +#line 2317 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = makeFuncCall((yyvsp[(1) - (6)].list), (yyvsp[(4) - (6)].list), (yylsp[(1) - (6)])); n->agg_order = (yyvsp[(5) - (6)].list); @@ -205159,8 +219029,8 @@ YYLTYPE yylloc; ;} break; - case 822: -#line 2314 "third_party/libpg_query/grammar/statements/select.y" + case 825: +#line 2324 "third_party/libpg_query/grammar/statements/select.y" { /* * We consider AGGREGATE(*) to invoke a parameterless @@ -205178,8 +219048,8 @@ YYLTYPE yylloc; ;} break; - case 823: -#line 2342 "third_party/libpg_query/grammar/statements/select.y" + case 826: +#line 2352 "third_party/libpg_query/grammar/statements/select.y" { PGFuncCall *n = (PGFuncCall *) (yyvsp[(1) - (4)].node); /* @@ -205216,23 +219086,23 @@ YYLTYPE yylloc; ;} break; - case 824: -#line 2377 "third_party/libpg_query/grammar/statements/select.y" + case 827: +#line 2387 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 825: -#line 2387 "third_party/libpg_query/grammar/statements/select.y" + case 828: +#line 2397 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 826: -#line 2388 "third_party/libpg_query/grammar/statements/select.y" + case 829: +#line 2398 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 827: -#line 2396 "third_party/libpg_query/grammar/statements/select.y" + case 830: +#line 2406 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeFuncCall(SystemFuncName("pg_collation_for"), list_make1((yyvsp[(4) - (5)].node)), @@ -205240,130 +219110,130 @@ YYLTYPE yylloc; ;} break; - case 828: -#line 2402 "third_party/libpg_query/grammar/statements/select.y" + case 831: +#line 2412 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_CURRENT_DATE, -1, (yylsp[(1) - (1)])); ;} break; - case 829: -#line 2406 "third_party/libpg_query/grammar/statements/select.y" + case 832: +#line 2416 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_CURRENT_TIME, -1, (yylsp[(1) - (1)])); ;} break; - case 830: -#line 2410 "third_party/libpg_query/grammar/statements/select.y" + case 833: +#line 2420 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_CURRENT_TIME_N, (yyvsp[(3) - (4)].ival), (yylsp[(1) - (4)])); ;} break; - case 831: -#line 2414 "third_party/libpg_query/grammar/statements/select.y" + case 834: +#line 2424 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_CURRENT_TIMESTAMP, -1, (yylsp[(1) - (1)])); ;} break; - case 832: -#line 2418 "third_party/libpg_query/grammar/statements/select.y" + case 835: +#line 2428 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_CURRENT_TIMESTAMP_N, (yyvsp[(3) - (4)].ival), (yylsp[(1) - (4)])); ;} break; - case 833: -#line 2422 "third_party/libpg_query/grammar/statements/select.y" + case 836: +#line 2432 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_LOCALTIME, -1, (yylsp[(1) - (1)])); ;} break; - case 834: -#line 2426 "third_party/libpg_query/grammar/statements/select.y" + case 837: +#line 2436 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_LOCALTIME_N, (yyvsp[(3) - (4)].ival), (yylsp[(1) - (4)])); ;} break; - case 835: -#line 2430 "third_party/libpg_query/grammar/statements/select.y" + case 838: +#line 2440 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_LOCALTIMESTAMP, -1, (yylsp[(1) - (1)])); ;} break; - case 836: -#line 2434 "third_party/libpg_query/grammar/statements/select.y" + case 839: +#line 2444 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_LOCALTIMESTAMP_N, (yyvsp[(3) - (4)].ival), (yylsp[(1) - (4)])); ;} break; - case 837: -#line 2438 "third_party/libpg_query/grammar/statements/select.y" + case 840: +#line 2448 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_CURRENT_ROLE, -1, (yylsp[(1) - (1)])); ;} break; - case 838: -#line 2442 "third_party/libpg_query/grammar/statements/select.y" + case 841: +#line 2452 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_CURRENT_USER, -1, (yylsp[(1) - (1)])); ;} break; - case 839: -#line 2446 "third_party/libpg_query/grammar/statements/select.y" + case 842: +#line 2456 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_SESSION_USER, -1, (yylsp[(1) - (1)])); ;} break; - case 840: -#line 2450 "third_party/libpg_query/grammar/statements/select.y" + case 843: +#line 2460 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_USER, -1, (yylsp[(1) - (1)])); ;} break; - case 841: -#line 2454 "third_party/libpg_query/grammar/statements/select.y" + case 844: +#line 2464 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_CURRENT_CATALOG, -1, (yylsp[(1) - (1)])); ;} break; - case 842: -#line 2458 "third_party/libpg_query/grammar/statements/select.y" + case 845: +#line 2468 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeSQLValueFunction(PG_SVFOP_CURRENT_SCHEMA, -1, (yylsp[(1) - (1)])); ;} break; - case 843: -#line 2462 "third_party/libpg_query/grammar/statements/select.y" + case 846: +#line 2472 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeTypeCast((yyvsp[(3) - (6)].node), (yyvsp[(5) - (6)].typnam), 0, (yylsp[(1) - (6)])); ;} break; - case 844: -#line 2464 "third_party/libpg_query/grammar/statements/select.y" + case 847: +#line 2474 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeTypeCast((yyvsp[(3) - (6)].node), (yyvsp[(5) - (6)].typnam), 1, (yylsp[(1) - (6)])); ;} break; - case 845: -#line 2466 "third_party/libpg_query/grammar/statements/select.y" + case 848: +#line 2476 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeFuncCall(SystemFuncName("date_part"), (yyvsp[(3) - (4)].list), (yylsp[(1) - (4)])); ;} break; - case 846: -#line 2470 "third_party/libpg_query/grammar/statements/select.y" + case 849: +#line 2480 "third_party/libpg_query/grammar/statements/select.y" { /* overlay(A PLACING B FROM C FOR D) is converted to * overlay(A, B, C, D) @@ -205374,16 +219244,16 @@ YYLTYPE yylloc; ;} break; - case 847: -#line 2479 "third_party/libpg_query/grammar/statements/select.y" + case 850: +#line 2489 "third_party/libpg_query/grammar/statements/select.y" { /* position(A in B) is converted to position(B, A) */ (yyval.node) = (PGNode *) makeFuncCall(SystemFuncName("position"), (yyvsp[(3) - (4)].list), (yylsp[(1) - (4)])); ;} break; - case 848: -#line 2484 "third_party/libpg_query/grammar/statements/select.y" + case 851: +#line 2494 "third_party/libpg_query/grammar/statements/select.y" { /* substring(A from B for C) is converted to * substring(A, B, C) - thomas 2000-11-28 @@ -205392,8 +219262,8 @@ YYLTYPE yylloc; ;} break; - case 849: -#line 2491 "third_party/libpg_query/grammar/statements/select.y" + case 852: +#line 2501 "third_party/libpg_query/grammar/statements/select.y" { /* TREAT(expr AS target) converts expr of a particular type to target, * which is defined to be a subtype of the original expression. @@ -205410,8 +219280,8 @@ YYLTYPE yylloc; ;} break; - case 850: -#line 2506 "third_party/libpg_query/grammar/statements/select.y" + case 853: +#line 2516 "third_party/libpg_query/grammar/statements/select.y" { /* various trim expressions are defined in SQL * - thomas 1997-07-19 @@ -205420,36 +219290,36 @@ YYLTYPE yylloc; ;} break; - case 851: -#line 2513 "third_party/libpg_query/grammar/statements/select.y" + case 854: +#line 2523 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeFuncCall(SystemFuncName("ltrim"), (yyvsp[(4) - (5)].list), (yylsp[(1) - (5)])); ;} break; - case 852: -#line 2517 "third_party/libpg_query/grammar/statements/select.y" + case 855: +#line 2527 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeFuncCall(SystemFuncName("rtrim"), (yyvsp[(4) - (5)].list), (yylsp[(1) - (5)])); ;} break; - case 853: -#line 2521 "third_party/libpg_query/grammar/statements/select.y" + case 856: +#line 2531 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeFuncCall(SystemFuncName("trim"), (yyvsp[(3) - (4)].list), (yylsp[(1) - (4)])); ;} break; - case 854: -#line 2525 "third_party/libpg_query/grammar/statements/select.y" + case 857: +#line 2535 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeSimpleAExpr(PG_AEXPR_NULLIF, "=", (yyvsp[(3) - (6)].node), (yyvsp[(5) - (6)].node), (yylsp[(1) - (6)])); ;} break; - case 855: -#line 2529 "third_party/libpg_query/grammar/statements/select.y" + case 858: +#line 2539 "third_party/libpg_query/grammar/statements/select.y" { PGCoalesceExpr *c = makeNode(PGCoalesceExpr); c->args = (yyvsp[(3) - (4)].list); @@ -205458,48 +219328,48 @@ YYLTYPE yylloc; ;} break; - case 856: -#line 2542 "third_party/libpg_query/grammar/statements/select.y" + case 859: +#line 2552 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(4) - (5)].list); ;} break; - case 857: -#line 2543 "third_party/libpg_query/grammar/statements/select.y" + case 860: +#line 2553 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 858: -#line 2547 "third_party/libpg_query/grammar/statements/select.y" + case 861: +#line 2557 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(4) - (5)].node); ;} break; - case 859: -#line 2548 "third_party/libpg_query/grammar/statements/select.y" + case 862: +#line 2558 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 860: -#line 2556 "third_party/libpg_query/grammar/statements/select.y" + case 863: +#line 2566 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(2) - (2)].list); ;} break; - case 861: -#line 2557 "third_party/libpg_query/grammar/statements/select.y" + case 864: +#line 2567 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 862: -#line 2561 "third_party/libpg_query/grammar/statements/select.y" + case 865: +#line 2571 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].windef)); ;} break; - case 863: -#line 2563 "third_party/libpg_query/grammar/statements/select.y" + case 866: +#line 2573 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].windef)); ;} break; - case 864: -#line 2568 "third_party/libpg_query/grammar/statements/select.y" + case 867: +#line 2578 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = (yyvsp[(3) - (3)].windef); n->name = (yyvsp[(1) - (3)].str); @@ -205507,13 +219377,13 @@ YYLTYPE yylloc; ;} break; - case 865: -#line 2576 "third_party/libpg_query/grammar/statements/select.y" + case 868: +#line 2586 "third_party/libpg_query/grammar/statements/select.y" { (yyval.windef) = (yyvsp[(2) - (2)].windef); ;} break; - case 866: -#line 2578 "third_party/libpg_query/grammar/statements/select.y" + case 869: +#line 2588 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = makeNode(PGWindowDef); n->name = (yyvsp[(2) - (2)].str); @@ -205528,13 +219398,13 @@ YYLTYPE yylloc; ;} break; - case 867: -#line 2591 "third_party/libpg_query/grammar/statements/select.y" + case 870: +#line 2601 "third_party/libpg_query/grammar/statements/select.y" { (yyval.windef) = NULL; ;} break; - case 868: -#line 2596 "third_party/libpg_query/grammar/statements/select.y" + case 871: +#line 2606 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = makeNode(PGWindowDef); n->name = NULL; @@ -205550,49 +219420,37 @@ YYLTYPE yylloc; ;} break; - case 869: -#line 2621 "third_party/libpg_query/grammar/statements/select.y" + case 872: +#line 2631 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 870: -#line 2622 "third_party/libpg_query/grammar/statements/select.y" + case 873: +#line 2632 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = NULL; ;} break; - case 871: -#line 2625 "third_party/libpg_query/grammar/statements/select.y" + case 874: +#line 2635 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(3) - (3)].list); ;} break; - case 872: -#line 2626 "third_party/libpg_query/grammar/statements/select.y" + case 875: +#line 2636 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 873: -#line 2638 "third_party/libpg_query/grammar/statements/select.y" + case 876: +#line 2648 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = (yyvsp[(2) - (2)].windef); n->frameOptions |= FRAMEOPTION_NONDEFAULT | FRAMEOPTION_RANGE; - if (n->frameOptions & (FRAMEOPTION_START_VALUE_PRECEDING | - FRAMEOPTION_END_VALUE_PRECEDING)) - ereport(ERROR, - (errcode(PG_ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("RANGE PRECEDING is only supported with UNBOUNDED"), - parser_errposition((yylsp[(1) - (2)])))); - if (n->frameOptions & (FRAMEOPTION_START_VALUE_FOLLOWING | - FRAMEOPTION_END_VALUE_FOLLOWING)) - ereport(ERROR, - (errcode(PG_ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("RANGE FOLLOWING is only supported with UNBOUNDED"), - parser_errposition((yylsp[(1) - (2)])))); (yyval.windef) = n; ;} break; - case 874: -#line 2656 "third_party/libpg_query/grammar/statements/select.y" + case 877: +#line 2654 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = (yyvsp[(2) - (2)].windef); n->frameOptions |= FRAMEOPTION_NONDEFAULT | FRAMEOPTION_ROWS; @@ -205600,8 +219458,8 @@ YYLTYPE yylloc; ;} break; - case 875: -#line 2662 "third_party/libpg_query/grammar/statements/select.y" + case 878: +#line 2660 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = makeNode(PGWindowDef); n->frameOptions = FRAMEOPTION_DEFAULTS; @@ -205611,8 +219469,8 @@ YYLTYPE yylloc; ;} break; - case 876: -#line 2672 "third_party/libpg_query/grammar/statements/select.y" + case 879: +#line 2670 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = (yyvsp[(1) - (1)].windef); /* reject invalid cases */ @@ -205631,8 +219489,8 @@ YYLTYPE yylloc; ;} break; - case 877: -#line 2689 "third_party/libpg_query/grammar/statements/select.y" + case 880: +#line 2687 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n1 = (yyvsp[(2) - (4)].windef); PGWindowDef *n2 = (yyvsp[(4) - (4)].windef); @@ -205671,8 +219529,8 @@ YYLTYPE yylloc; ;} break; - case 878: -#line 2734 "third_party/libpg_query/grammar/statements/select.y" + case 881: +#line 2732 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = makeNode(PGWindowDef); n->frameOptions = FRAMEOPTION_START_UNBOUNDED_PRECEDING; @@ -205682,8 +219540,8 @@ YYLTYPE yylloc; ;} break; - case 879: -#line 2742 "third_party/libpg_query/grammar/statements/select.y" + case 882: +#line 2740 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = makeNode(PGWindowDef); n->frameOptions = FRAMEOPTION_START_UNBOUNDED_FOLLOWING; @@ -205693,8 +219551,8 @@ YYLTYPE yylloc; ;} break; - case 880: -#line 2750 "third_party/libpg_query/grammar/statements/select.y" + case 883: +#line 2748 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = makeNode(PGWindowDef); n->frameOptions = FRAMEOPTION_START_CURRENT_ROW; @@ -205704,8 +219562,8 @@ YYLTYPE yylloc; ;} break; - case 881: -#line 2758 "third_party/libpg_query/grammar/statements/select.y" + case 884: +#line 2756 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = makeNode(PGWindowDef); n->frameOptions = FRAMEOPTION_START_VALUE_PRECEDING; @@ -205715,8 +219573,8 @@ YYLTYPE yylloc; ;} break; - case 882: -#line 2766 "third_party/libpg_query/grammar/statements/select.y" + case 885: +#line 2764 "third_party/libpg_query/grammar/statements/select.y" { PGWindowDef *n = makeNode(PGWindowDef); n->frameOptions = FRAMEOPTION_START_VALUE_FOLLOWING; @@ -205726,232 +219584,254 @@ YYLTYPE yylloc; ;} break; - case 883: -#line 2786 "third_party/libpg_query/grammar/statements/select.y" + case 886: +#line 2784 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(3) - (4)].list); ;} break; - case 884: -#line 2787 "third_party/libpg_query/grammar/statements/select.y" + case 887: +#line 2785 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 885: -#line 2790 "third_party/libpg_query/grammar/statements/select.y" + case 888: +#line 2788 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(1) - (1)].list);;} break; - case 886: -#line 2791 "third_party/libpg_query/grammar/statements/select.y" + case 889: +#line 2789 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(2) - (5)].list), (yyvsp[(4) - (5)].node)); ;} break; - case 887: -#line 2794 "third_party/libpg_query/grammar/statements/select.y" + case 890: +#line 2793 "third_party/libpg_query/grammar/statements/select.y" + { + PGNamedArgExpr *na = makeNode(PGNamedArgExpr); + na->name = (yyvsp[(1) - (3)].str); + na->arg = (PGExpr *) (yyvsp[(3) - (3)].node); + na->argnumber = -1; + na->location = (yylsp[(1) - (3)]); + (yyval.node) = (PGNode *) na; + ;} + break; + + case 891: +#line 2803 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} + break; + + case 892: +#line 2804 "third_party/libpg_query/grammar/statements/select.y" + { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].node)); ;} + break; + + case 893: +#line 2806 "third_party/libpg_query/grammar/statements/select.y" { (yyval.subquerytype) = PG_ANY_SUBLINK; ;} break; - case 888: -#line 2795 "third_party/libpg_query/grammar/statements/select.y" + case 894: +#line 2807 "third_party/libpg_query/grammar/statements/select.y" { (yyval.subquerytype) = PG_ANY_SUBLINK; ;} break; - case 889: -#line 2796 "third_party/libpg_query/grammar/statements/select.y" + case 895: +#line 2808 "third_party/libpg_query/grammar/statements/select.y" { (yyval.subquerytype) = PG_ALL_SUBLINK; ;} break; - case 890: -#line 2799 "third_party/libpg_query/grammar/statements/select.y" + case 896: +#line 2811 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 891: -#line 2800 "third_party/libpg_query/grammar/statements/select.y" + case 897: +#line 2812 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (char*) (yyvsp[(1) - (1)].conststr); ;} break; - case 892: -#line 2803 "third_party/libpg_query/grammar/statements/select.y" + case 898: +#line 2815 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = "+"; ;} break; - case 893: -#line 2804 "third_party/libpg_query/grammar/statements/select.y" + case 899: +#line 2816 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = "-"; ;} break; - case 894: -#line 2805 "third_party/libpg_query/grammar/statements/select.y" + case 900: +#line 2817 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = "*"; ;} break; - case 895: -#line 2806 "third_party/libpg_query/grammar/statements/select.y" + case 901: +#line 2818 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = "/"; ;} break; - case 896: -#line 2807 "third_party/libpg_query/grammar/statements/select.y" + case 902: +#line 2819 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = "%"; ;} break; - case 897: -#line 2808 "third_party/libpg_query/grammar/statements/select.y" + case 903: +#line 2820 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = "^"; ;} break; - case 898: -#line 2809 "third_party/libpg_query/grammar/statements/select.y" + case 904: +#line 2821 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = "<"; ;} break; - case 899: -#line 2810 "third_party/libpg_query/grammar/statements/select.y" + case 905: +#line 2822 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = ">"; ;} break; - case 900: -#line 2811 "third_party/libpg_query/grammar/statements/select.y" + case 906: +#line 2823 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = "="; ;} break; - case 901: -#line 2812 "third_party/libpg_query/grammar/statements/select.y" + case 907: +#line 2824 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = "<="; ;} break; - case 902: -#line 2813 "third_party/libpg_query/grammar/statements/select.y" + case 908: +#line 2825 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = ">="; ;} break; - case 903: -#line 2814 "third_party/libpg_query/grammar/statements/select.y" + case 909: +#line 2826 "third_party/libpg_query/grammar/statements/select.y" { (yyval.conststr) = "<>"; ;} break; - case 904: -#line 2818 "third_party/libpg_query/grammar/statements/select.y" + case 910: +#line 2830 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString((yyvsp[(1) - (1)].str))); ;} break; - case 905: -#line 2820 "third_party/libpg_query/grammar/statements/select.y" + case 911: +#line 2832 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(3) - (4)].list); ;} break; - case 906: -#line 2825 "third_party/libpg_query/grammar/statements/select.y" + case 912: +#line 2837 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString((yyvsp[(1) - (1)].str))); ;} break; - case 907: -#line 2827 "third_party/libpg_query/grammar/statements/select.y" + case 913: +#line 2839 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(3) - (4)].list); ;} break; - case 908: -#line 2832 "third_party/libpg_query/grammar/statements/select.y" + case 914: +#line 2844 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString((yyvsp[(1) - (1)].str))); ;} break; - case 909: -#line 2834 "third_party/libpg_query/grammar/statements/select.y" + case 915: +#line 2846 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(3) - (4)].list); ;} break; - case 910: -#line 2836 "third_party/libpg_query/grammar/statements/select.y" + case 916: +#line 2848 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString("~~")); ;} break; - case 911: -#line 2838 "third_party/libpg_query/grammar/statements/select.y" + case 917: +#line 2850 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString("!~~")); ;} break; - case 912: -#line 2840 "third_party/libpg_query/grammar/statements/select.y" + case 918: +#line 2852 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString("~~~")); ;} break; - case 913: -#line 2842 "third_party/libpg_query/grammar/statements/select.y" + case 919: +#line 2854 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString("!~~~")); ;} break; - case 914: -#line 2844 "third_party/libpg_query/grammar/statements/select.y" + case 920: +#line 2856 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString("~~*")); ;} break; - case 915: -#line 2846 "third_party/libpg_query/grammar/statements/select.y" + case 921: +#line 2858 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString("!~~*")); ;} break; - case 916: -#line 2860 "third_party/libpg_query/grammar/statements/select.y" + case 922: +#line 2872 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString((yyvsp[(1) - (1)].str))); ;} break; - case 917: -#line 2862 "third_party/libpg_query/grammar/statements/select.y" + case 923: +#line 2874 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lcons(makeString((yyvsp[(1) - (3)].str)), (yyvsp[(3) - (3)].list)); ;} break; - case 918: -#line 2866 "third_party/libpg_query/grammar/statements/select.y" + case 924: +#line 2878 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 919: -#line 2870 "third_party/libpg_query/grammar/statements/select.y" + case 925: +#line 2882 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].node)); ;} break; - case 920: -#line 2877 "third_party/libpg_query/grammar/statements/select.y" + case 926: +#line 2889 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 921: -#line 2881 "third_party/libpg_query/grammar/statements/select.y" + case 927: +#line 2893 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NULL; ;} break; - case 922: -#line 2889 "third_party/libpg_query/grammar/statements/select.y" + case 928: +#line 2901 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 923: -#line 2893 "third_party/libpg_query/grammar/statements/select.y" + case 929: +#line 2905 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].node)); ;} break; - case 924: -#line 2899 "third_party/libpg_query/grammar/statements/select.y" + case 930: +#line 2911 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 925: -#line 2903 "third_party/libpg_query/grammar/statements/select.y" + case 931: +#line 2915 "third_party/libpg_query/grammar/statements/select.y" { PGNamedArgExpr *na = makeNode(PGNamedArgExpr); na->name = (yyvsp[(1) - (3)].str); @@ -205962,8 +219842,8 @@ YYLTYPE yylloc; ;} break; - case 926: -#line 2912 "third_party/libpg_query/grammar/statements/select.y" + case 932: +#line 2924 "third_party/libpg_query/grammar/statements/select.y" { PGNamedArgExpr *na = makeNode(PGNamedArgExpr); na->name = (yyvsp[(1) - (3)].str); @@ -205974,131 +219854,131 @@ YYLTYPE yylloc; ;} break; - case 927: -#line 2922 "third_party/libpg_query/grammar/statements/select.y" + case 933: +#line 2934 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].typnam)); ;} break; - case 928: -#line 2923 "third_party/libpg_query/grammar/statements/select.y" + case 934: +#line 2935 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].typnam)); ;} break; - case 929: -#line 2928 "third_party/libpg_query/grammar/statements/select.y" + case 935: +#line 2940 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2(makeStringConst((yyvsp[(1) - (3)].str), (yylsp[(1) - (3)])), (yyvsp[(3) - (3)].node)); ;} break; - case 930: -#line 2931 "third_party/libpg_query/grammar/statements/select.y" + case 936: +#line 2943 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 931: -#line 2938 "third_party/libpg_query/grammar/statements/select.y" + case 937: +#line 2950 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 932: -#line 2939 "third_party/libpg_query/grammar/statements/select.y" + case 938: +#line 2951 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (char*) "year"; ;} break; - case 933: -#line 2940 "third_party/libpg_query/grammar/statements/select.y" + case 939: +#line 2952 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (char*) "month"; ;} break; - case 934: -#line 2941 "third_party/libpg_query/grammar/statements/select.y" + case 940: +#line 2953 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (char*) "day"; ;} break; - case 935: -#line 2942 "third_party/libpg_query/grammar/statements/select.y" + case 941: +#line 2954 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (char*) "hour"; ;} break; - case 936: -#line 2943 "third_party/libpg_query/grammar/statements/select.y" + case 942: +#line 2955 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (char*) "minute"; ;} break; - case 937: -#line 2944 "third_party/libpg_query/grammar/statements/select.y" + case 943: +#line 2956 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (char*) "second"; ;} break; - case 938: -#line 2945 "third_party/libpg_query/grammar/statements/select.y" + case 944: +#line 2957 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (char*) "millisecond"; ;} break; - case 939: -#line 2946 "third_party/libpg_query/grammar/statements/select.y" + case 945: +#line 2958 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (char*) "microsecond"; ;} break; - case 940: -#line 2947 "third_party/libpg_query/grammar/statements/select.y" + case 946: +#line 2959 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 941: -#line 2958 "third_party/libpg_query/grammar/statements/select.y" + case 947: +#line 2970 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make4((yyvsp[(1) - (4)].node), (yyvsp[(2) - (4)].node), (yyvsp[(3) - (4)].node), (yyvsp[(4) - (4)].node)); ;} break; - case 942: -#line 2962 "third_party/libpg_query/grammar/statements/select.y" + case 948: +#line 2974 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make3((yyvsp[(1) - (3)].node), (yyvsp[(2) - (3)].node), (yyvsp[(3) - (3)].node)); ;} break; - case 943: -#line 2969 "third_party/libpg_query/grammar/statements/select.y" + case 949: +#line 2981 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 944: -#line 2975 "third_party/libpg_query/grammar/statements/select.y" + case 950: +#line 2987 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2((yyvsp[(3) - (3)].node), (yyvsp[(1) - (3)].node)); ;} break; - case 945: -#line 2976 "third_party/libpg_query/grammar/statements/select.y" + case 951: +#line 2988 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 946: -#line 2993 "third_party/libpg_query/grammar/statements/select.y" + case 952: +#line 3005 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make3((yyvsp[(1) - (3)].node), (yyvsp[(2) - (3)].node), (yyvsp[(3) - (3)].node)); ;} break; - case 947: -#line 2997 "third_party/libpg_query/grammar/statements/select.y" + case 953: +#line 3009 "third_party/libpg_query/grammar/statements/select.y" { /* not legal per SQL99, but might as well allow it */ (yyval.list) = list_make3((yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node), (yyvsp[(2) - (3)].node)); ;} break; - case 948: -#line 3002 "third_party/libpg_query/grammar/statements/select.y" + case 954: +#line 3014 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make2((yyvsp[(1) - (2)].node), (yyvsp[(2) - (2)].node)); ;} break; - case 949: -#line 3006 "third_party/libpg_query/grammar/statements/select.y" + case 955: +#line 3018 "third_party/libpg_query/grammar/statements/select.y" { /* * Since there are no cases where this syntax allows @@ -206115,45 +219995,45 @@ YYLTYPE yylloc; ;} break; - case 950: -#line 3021 "third_party/libpg_query/grammar/statements/select.y" + case 956: +#line 3033 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 951: -#line 3025 "third_party/libpg_query/grammar/statements/select.y" + case 957: +#line 3037 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 952: -#line 3029 "third_party/libpg_query/grammar/statements/select.y" + case 958: +#line 3041 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 953: -#line 3032 "third_party/libpg_query/grammar/statements/select.y" + case 959: +#line 3044 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 954: -#line 3035 "third_party/libpg_query/grammar/statements/select.y" + case 960: +#line 3047 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(3) - (3)].list), (yyvsp[(1) - (3)].node)); ;} break; - case 955: -#line 3036 "third_party/libpg_query/grammar/statements/select.y" + case 961: +#line 3048 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(2) - (2)].list); ;} break; - case 956: -#line 3037 "third_party/libpg_query/grammar/statements/select.y" + case 962: +#line 3049 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 957: -#line 3041 "third_party/libpg_query/grammar/statements/select.y" + case 963: +#line 3053 "third_party/libpg_query/grammar/statements/select.y" { PGSubLink *n = makeNode(PGSubLink); n->subselect = (yyvsp[(1) - (1)].node); @@ -206162,13 +220042,13 @@ YYLTYPE yylloc; ;} break; - case 958: -#line 3047 "third_party/libpg_query/grammar/statements/select.y" + case 964: +#line 3059 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *)(yyvsp[(2) - (3)].list); ;} break; - case 959: -#line 3058 "third_party/libpg_query/grammar/statements/select.y" + case 965: +#line 3070 "third_party/libpg_query/grammar/statements/select.y" { PGCaseExpr *c = makeNode(PGCaseExpr); c->casetype = InvalidOid; /* not analyzed yet */ @@ -206180,18 +220060,18 @@ YYLTYPE yylloc; ;} break; - case 960: -#line 3071 "third_party/libpg_query/grammar/statements/select.y" + case 966: +#line 3083 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 961: -#line 3072 "third_party/libpg_query/grammar/statements/select.y" + case 967: +#line 3084 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (2)].list), (yyvsp[(2) - (2)].node)); ;} break; - case 962: -#line 3077 "third_party/libpg_query/grammar/statements/select.y" + case 968: +#line 3089 "third_party/libpg_query/grammar/statements/select.y" { PGCaseWhen *w = makeNode(PGCaseWhen); w->expr = (PGExpr *) (yyvsp[(2) - (4)].node); @@ -206201,56 +220081,56 @@ YYLTYPE yylloc; ;} break; - case 963: -#line 3087 "third_party/libpg_query/grammar/statements/select.y" + case 969: +#line 3099 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 964: -#line 3088 "third_party/libpg_query/grammar/statements/select.y" + case 970: +#line 3100 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 965: -#line 3091 "third_party/libpg_query/grammar/statements/select.y" + case 971: +#line 3103 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 966: -#line 3092 "third_party/libpg_query/grammar/statements/select.y" + case 972: +#line 3104 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 967: -#line 3096 "third_party/libpg_query/grammar/statements/select.y" + case 973: +#line 3108 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeColumnRef((yyvsp[(1) - (1)].str), NIL, (yylsp[(1) - (1)]), yyscanner); ;} break; - case 968: -#line 3100 "third_party/libpg_query/grammar/statements/select.y" + case 974: +#line 3112 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeColumnRef((yyvsp[(1) - (2)].str), (yyvsp[(2) - (2)].list), (yylsp[(1) - (2)]), yyscanner); ;} break; - case 969: -#line 3107 "third_party/libpg_query/grammar/statements/select.y" + case 975: +#line 3119 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeString((yyvsp[(2) - (2)].str)); ;} break; - case 970: -#line 3111 "third_party/libpg_query/grammar/statements/select.y" + case 976: +#line 3123 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (PGNode *) makeNode(PGAStar); ;} break; - case 971: -#line 3115 "third_party/libpg_query/grammar/statements/select.y" + case 977: +#line 3127 "third_party/libpg_query/grammar/statements/select.y" { PGAIndices *ai = makeNode(PGAIndices); ai->is_slice = false; @@ -206260,8 +220140,8 @@ YYLTYPE yylloc; ;} break; - case 972: -#line 3123 "third_party/libpg_query/grammar/statements/select.y" + case 978: +#line 3135 "third_party/libpg_query/grammar/statements/select.y" { PGAIndices *ai = makeNode(PGAIndices); ai->is_slice = true; @@ -206271,58 +220151,58 @@ YYLTYPE yylloc; ;} break; - case 973: -#line 3133 "third_party/libpg_query/grammar/statements/select.y" + case 979: +#line 3145 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = (yyvsp[(1) - (1)].node); ;} break; - case 974: -#line 3134 "third_party/libpg_query/grammar/statements/select.y" + case 980: +#line 3146 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = NULL; ;} break; - case 975: -#line 3138 "third_party/libpg_query/grammar/statements/select.y" + case 981: +#line 3150 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 976: -#line 3139 "third_party/libpg_query/grammar/statements/select.y" + case 982: +#line 3151 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (2)].list), (yyvsp[(2) - (2)].node)); ;} break; - case 977: -#line 3143 "third_party/libpg_query/grammar/statements/select.y" + case 983: +#line 3155 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 978: -#line 3144 "third_party/libpg_query/grammar/statements/select.y" + case 984: +#line 3156 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (2)].list), (yyvsp[(2) - (2)].node)); ;} break; - case 981: -#line 3158 "third_party/libpg_query/grammar/statements/select.y" + case 987: +#line 3170 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(1) - (1)].list); ;} break; - case 982: -#line 3159 "third_party/libpg_query/grammar/statements/select.y" + case 988: +#line 3171 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 983: -#line 3163 "third_party/libpg_query/grammar/statements/select.y" + case 989: +#line 3175 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].target)); ;} break; - case 984: -#line 3164 "third_party/libpg_query/grammar/statements/select.y" + case 990: +#line 3176 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].target)); ;} break; - case 985: -#line 3168 "third_party/libpg_query/grammar/statements/select.y" + case 991: +#line 3180 "third_party/libpg_query/grammar/statements/select.y" { (yyval.target) = makeNode(PGResTarget); (yyval.target)->name = (yyvsp[(3) - (3)].str); @@ -206332,8 +220212,8 @@ YYLTYPE yylloc; ;} break; - case 986: -#line 3184 "third_party/libpg_query/grammar/statements/select.y" + case 992: +#line 3196 "third_party/libpg_query/grammar/statements/select.y" { (yyval.target) = makeNode(PGResTarget); (yyval.target)->name = (yyvsp[(2) - (2)].str); @@ -206343,8 +220223,8 @@ YYLTYPE yylloc; ;} break; - case 987: -#line 3192 "third_party/libpg_query/grammar/statements/select.y" + case 993: +#line 3204 "third_party/libpg_query/grammar/statements/select.y" { (yyval.target) = makeNode(PGResTarget); (yyval.target)->name = NULL; @@ -206354,8 +220234,8 @@ YYLTYPE yylloc; ;} break; - case 988: -#line 3200 "third_party/libpg_query/grammar/statements/select.y" + case 994: +#line 3212 "third_party/libpg_query/grammar/statements/select.y" { PGColumnRef *n = makeNode(PGColumnRef); n->fields = list_make1(makeNode(PGAStar)); @@ -206369,25 +220249,25 @@ YYLTYPE yylloc; ;} break; - case 989: -#line 3221 "third_party/libpg_query/grammar/statements/select.y" + case 995: +#line 3233 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].range)); ;} break; - case 990: -#line 3222 "third_party/libpg_query/grammar/statements/select.y" + case 996: +#line 3234 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].range)); ;} break; - case 991: -#line 3234 "third_party/libpg_query/grammar/statements/select.y" + case 997: +#line 3246 "third_party/libpg_query/grammar/statements/select.y" { (yyval.range) = makeRangeVar(NULL, (yyvsp[(1) - (1)].str), (yylsp[(1) - (1)])); ;} break; - case 992: -#line 3238 "third_party/libpg_query/grammar/statements/select.y" + case 998: +#line 3250 "third_party/libpg_query/grammar/statements/select.y" { check_qualified_name((yyvsp[(2) - (2)].list), yyscanner); (yyval.range) = makeRangeVar(NULL, NULL, (yylsp[(1) - (2)])); @@ -206414,55 +220294,55 @@ YYLTYPE yylloc; ;} break; - case 993: -#line 3265 "third_party/libpg_query/grammar/statements/select.y" + case 999: +#line 3277 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString((yyvsp[(1) - (1)].str))); ;} break; - case 994: -#line 3267 "third_party/libpg_query/grammar/statements/select.y" + case 1000: +#line 3279 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), makeString((yyvsp[(3) - (3)].str))); ;} break; - case 995: -#line 3271 "third_party/libpg_query/grammar/statements/select.y" + case 1001: +#line 3283 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 996: -#line 3273 "third_party/libpg_query/grammar/statements/select.y" + case 1002: +#line 3285 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 997: -#line 3284 "third_party/libpg_query/grammar/statements/select.y" + case 1003: +#line 3296 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString((yyvsp[(1) - (1)].str))); ;} break; - case 998: -#line 3287 "third_party/libpg_query/grammar/statements/select.y" + case 1004: +#line 3299 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = check_func_name(lcons(makeString((yyvsp[(1) - (2)].str)), (yyvsp[(2) - (2)].list)), yyscanner); ;} break; - case 999: -#line 3298 "third_party/libpg_query/grammar/statements/select.y" + case 1005: +#line 3310 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeIntConst((yyvsp[(1) - (1)].ival), (yylsp[(1) - (1)])); ;} break; - case 1000: -#line 3302 "third_party/libpg_query/grammar/statements/select.y" + case 1006: +#line 3314 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeFloatConst((yyvsp[(1) - (1)].str), (yylsp[(1) - (1)])); ;} break; - case 1001: -#line 3306 "third_party/libpg_query/grammar/statements/select.y" + case 1007: +#line 3318 "third_party/libpg_query/grammar/statements/select.y" { if ((yyvsp[(2) - (2)].list)) { @@ -206476,15 +220356,15 @@ YYLTYPE yylloc; ;} break; - case 1002: -#line 3318 "third_party/libpg_query/grammar/statements/select.y" + case 1008: +#line 3330 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeBitStringConst((yyvsp[(1) - (1)].str), (yylsp[(1) - (1)])); ;} break; - case 1003: -#line 3322 "third_party/libpg_query/grammar/statements/select.y" + case 1009: +#line 3334 "third_party/libpg_query/grammar/statements/select.y" { /* This is a bit constant per SQL99: * Without Feature F511, "BIT data type", @@ -206495,8 +220375,8 @@ YYLTYPE yylloc; ;} break; - case 1004: -#line 3331 "third_party/libpg_query/grammar/statements/select.y" + case 1010: +#line 3343 "third_party/libpg_query/grammar/statements/select.y" { /* generic type 'literal' syntax */ PGTypeName *t = makeTypeNameFromNameList((yyvsp[(1) - (2)].list)); @@ -206505,8 +220385,8 @@ YYLTYPE yylloc; ;} break; - case 1005: -#line 3338 "third_party/libpg_query/grammar/statements/select.y" + case 1011: +#line 3350 "third_party/libpg_query/grammar/statements/select.y" { /* generic syntax with a type modifier */ PGTypeName *t = makeTypeNameFromNameList((yyvsp[(1) - (6)].list)); @@ -206540,196 +220420,196 @@ YYLTYPE yylloc; ;} break; - case 1006: -#line 3370 "third_party/libpg_query/grammar/statements/select.y" + case 1012: +#line 3382 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeStringConstCast((yyvsp[(2) - (2)].str), (yylsp[(2) - (2)]), (yyvsp[(1) - (2)].typnam)); ;} break; - case 1007: -#line 3374 "third_party/libpg_query/grammar/statements/select.y" + case 1013: +#line 3386 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeIntervalNode((yyvsp[(3) - (5)].node), (yylsp[(3) - (5)]), (yyvsp[(5) - (5)].list)); ;} break; - case 1008: -#line 3378 "third_party/libpg_query/grammar/statements/select.y" + case 1014: +#line 3390 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeIntervalNode((yyvsp[(2) - (3)].ival), (yylsp[(2) - (3)]), (yyvsp[(3) - (3)].list)); ;} break; - case 1009: -#line 3382 "third_party/libpg_query/grammar/statements/select.y" + case 1015: +#line 3394 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeIntervalNode((yyvsp[(2) - (3)].str), (yylsp[(2) - (3)]), (yyvsp[(3) - (3)].list)); ;} break; - case 1010: -#line 3386 "third_party/libpg_query/grammar/statements/select.y" + case 1016: +#line 3398 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeBoolAConst(true, (yylsp[(1) - (1)])); ;} break; - case 1011: -#line 3390 "third_party/libpg_query/grammar/statements/select.y" + case 1017: +#line 3402 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeBoolAConst(false, (yylsp[(1) - (1)])); ;} break; - case 1012: -#line 3394 "third_party/libpg_query/grammar/statements/select.y" + case 1018: +#line 3406 "third_party/libpg_query/grammar/statements/select.y" { (yyval.node) = makeNullAConst((yylsp[(1) - (1)])); ;} break; - case 1013: -#line 3399 "third_party/libpg_query/grammar/statements/select.y" + case 1019: +#line 3411 "third_party/libpg_query/grammar/statements/select.y" { (yyval.ival) = (yyvsp[(1) - (1)].ival); ;} break; - case 1014: -#line 3400 "third_party/libpg_query/grammar/statements/select.y" + case 1020: +#line 3412 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1015: -#line 3416 "third_party/libpg_query/grammar/statements/select.y" + case 1021: +#line 3428 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1016: -#line 3417 "third_party/libpg_query/grammar/statements/select.y" + case 1022: +#line 3429 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1017: -#line 3418 "third_party/libpg_query/grammar/statements/select.y" + case 1023: +#line 3430 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1018: -#line 3421 "third_party/libpg_query/grammar/statements/select.y" + case 1024: +#line 3433 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1019: -#line 3422 "third_party/libpg_query/grammar/statements/select.y" + case 1025: +#line 3434 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1020: -#line 3428 "third_party/libpg_query/grammar/statements/select.y" + case 1026: +#line 3440 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1021: -#line 3429 "third_party/libpg_query/grammar/statements/select.y" + case 1027: +#line 3441 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1022: -#line 3430 "third_party/libpg_query/grammar/statements/select.y" + case 1028: +#line 3442 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1023: -#line 3433 "third_party/libpg_query/grammar/statements/select.y" + case 1029: +#line 3445 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1024: -#line 3434 "third_party/libpg_query/grammar/statements/select.y" + case 1030: +#line 3446 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1025: -#line 3435 "third_party/libpg_query/grammar/statements/select.y" + case 1031: +#line 3447 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1026: -#line 3438 "third_party/libpg_query/grammar/statements/select.y" + case 1032: +#line 3450 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1027: -#line 3439 "third_party/libpg_query/grammar/statements/select.y" + case 1033: +#line 3451 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1028: -#line 3440 "third_party/libpg_query/grammar/statements/select.y" + case 1034: +#line 3452 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1029: -#line 3443 "third_party/libpg_query/grammar/statements/select.y" + case 1035: +#line 3455 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString((yyvsp[(1) - (1)].str))); ;} break; - case 1030: -#line 3444 "third_party/libpg_query/grammar/statements/select.y" + case 1036: +#line 3456 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lcons(makeString((yyvsp[(1) - (2)].str)), (yyvsp[(2) - (2)].list)); ;} break; - case 1031: -#line 3448 "third_party/libpg_query/grammar/statements/select.y" + case 1037: +#line 3460 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = list_make1(makeString((yyvsp[(2) - (2)].str))); ;} break; - case 1032: -#line 3450 "third_party/libpg_query/grammar/statements/select.y" + case 1038: +#line 3462 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), makeString((yyvsp[(3) - (3)].str))); ;} break; - case 1033: -#line 3454 "third_party/libpg_query/grammar/statements/select.y" + case 1039: +#line 3466 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = (yyvsp[(2) - (3)].list); ;} break; - case 1034: -#line 3455 "third_party/libpg_query/grammar/statements/select.y" + case 1040: +#line 3467 "third_party/libpg_query/grammar/statements/select.y" { (yyval.list) = NIL; ;} break; - case 1036: -#line 3466 "third_party/libpg_query/grammar/statements/select.y" + case 1042: +#line 3478 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1037: -#line 3467 "third_party/libpg_query/grammar/statements/select.y" + case 1043: +#line 3479 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1038: -#line 3468 "third_party/libpg_query/grammar/statements/select.y" + case 1044: +#line 3480 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1039: -#line 3469 "third_party/libpg_query/grammar/statements/select.y" + case 1045: +#line 3481 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1040: -#line 3472 "third_party/libpg_query/grammar/statements/select.y" + case 1046: +#line 3484 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1041: -#line 3473 "third_party/libpg_query/grammar/statements/select.y" + case 1047: +#line 3485 "third_party/libpg_query/grammar/statements/select.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1042: + case 1048: #line 8 "third_party/libpg_query/grammar/statements/prepare.y" { PGPrepareStmt *n = makeNode(PGPrepareStmt); @@ -206740,17 +220620,17 @@ YYLTYPE yylloc; ;} break; - case 1043: + case 1049: #line 18 "third_party/libpg_query/grammar/statements/prepare.y" { (yyval.list) = (yyvsp[(2) - (3)].list); ;} break; - case 1044: + case 1050: #line 19 "third_party/libpg_query/grammar/statements/prepare.y" { (yyval.list) = NIL; ;} break; - case 1049: + case 1055: #line 8 "third_party/libpg_query/grammar/statements/create_schema.y" { PGCreateSchemaStmt *n = makeNode(PGCreateSchemaStmt); @@ -206762,7 +220642,7 @@ YYLTYPE yylloc; ;} break; - case 1050: + case 1056: #line 17 "third_party/libpg_query/grammar/statements/create_schema.y" { PGCreateSchemaStmt *n = makeNode(PGCreateSchemaStmt); @@ -206779,7 +220659,7 @@ YYLTYPE yylloc; ;} break; - case 1051: + case 1057: #line 35 "third_party/libpg_query/grammar/statements/create_schema.y" { if ((yyloc) < 0) /* see comments for YYLLOC_DEFAULT */ @@ -206788,12 +220668,12 @@ YYLTYPE yylloc; ;} break; - case 1052: + case 1058: #line 41 "third_party/libpg_query/grammar/statements/create_schema.y" { (yyval.list) = NIL; ;} break; - case 1057: + case 1063: #line 11 "third_party/libpg_query/grammar/statements/index.y" { PGIndexStmt *n = makeNode(PGIndexStmt); @@ -206819,7 +220699,7 @@ YYLTYPE yylloc; ;} break; - case 1058: + case 1064: #line 36 "third_party/libpg_query/grammar/statements/index.y" { PGIndexStmt *n = makeNode(PGIndexStmt); @@ -206845,62 +220725,62 @@ YYLTYPE yylloc; ;} break; - case 1059: + case 1065: #line 62 "third_party/libpg_query/grammar/statements/index.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1060: + case 1066: #line 66 "third_party/libpg_query/grammar/statements/index.y" { (yyval.str) = (yyvsp[(2) - (2)].str); ;} break; - case 1061: + case 1067: #line 67 "third_party/libpg_query/grammar/statements/index.y" { (yyval.str) = (char*) DEFAULT_INDEX_TYPE; ;} break; - case 1062: + case 1068: #line 72 "third_party/libpg_query/grammar/statements/index.y" { (yyval.boolean) = true; ;} break; - case 1063: + case 1069: #line 73 "third_party/libpg_query/grammar/statements/index.y" { (yyval.boolean) = false; ;} break; - case 1064: + case 1070: #line 78 "third_party/libpg_query/grammar/statements/index.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1065: + case 1071: #line 79 "third_party/libpg_query/grammar/statements/index.y" { (yyval.str) = NULL; ;} break; - case 1066: + case 1072: #line 83 "third_party/libpg_query/grammar/statements/index.y" { (yyval.list) = (yyvsp[(2) - (2)].list); ;} break; - case 1067: + case 1073: #line 84 "third_party/libpg_query/grammar/statements/index.y" { (yyval.list) = NIL; ;} break; - case 1068: + case 1074: #line 89 "third_party/libpg_query/grammar/statements/index.y" { (yyval.boolean) = true; ;} break; - case 1069: + case 1075: #line 90 "third_party/libpg_query/grammar/statements/index.y" { (yyval.boolean) = false; ;} break; - case 1070: + case 1076: #line 8 "third_party/libpg_query/grammar/statements/alter_schema.y" { PGAlterObjectSchemaStmt *n = makeNode(PGAlterObjectSchemaStmt); @@ -206912,7 +220792,7 @@ YYLTYPE yylloc; ;} break; - case 1071: + case 1077: #line 17 "third_party/libpg_query/grammar/statements/alter_schema.y" { PGAlterObjectSchemaStmt *n = makeNode(PGAlterObjectSchemaStmt); @@ -206924,7 +220804,7 @@ YYLTYPE yylloc; ;} break; - case 1072: + case 1078: #line 26 "third_party/libpg_query/grammar/statements/alter_schema.y" { PGAlterObjectSchemaStmt *n = makeNode(PGAlterObjectSchemaStmt); @@ -206936,7 +220816,7 @@ YYLTYPE yylloc; ;} break; - case 1073: + case 1079: #line 35 "third_party/libpg_query/grammar/statements/alter_schema.y" { PGAlterObjectSchemaStmt *n = makeNode(PGAlterObjectSchemaStmt); @@ -206948,7 +220828,7 @@ YYLTYPE yylloc; ;} break; - case 1074: + case 1080: #line 44 "third_party/libpg_query/grammar/statements/alter_schema.y" { PGAlterObjectSchemaStmt *n = makeNode(PGAlterObjectSchemaStmt); @@ -206960,7 +220840,7 @@ YYLTYPE yylloc; ;} break; - case 1075: + case 1081: #line 53 "third_party/libpg_query/grammar/statements/alter_schema.y" { PGAlterObjectSchemaStmt *n = makeNode(PGAlterObjectSchemaStmt); @@ -206972,7 +220852,7 @@ YYLTYPE yylloc; ;} break; - case 1076: + case 1082: #line 6 "third_party/libpg_query/grammar/statements/checkpoint.y" { PGCheckPointStmt *n = makeNode(PGCheckPointStmt); @@ -206981,7 +220861,7 @@ YYLTYPE yylloc; ;} break; - case 1077: + case 1083: #line 12 "third_party/libpg_query/grammar/statements/checkpoint.y" { PGCheckPointStmt *n = makeNode(PGCheckPointStmt); @@ -206990,7 +220870,7 @@ YYLTYPE yylloc; ;} break; - case 1078: + case 1084: #line 8 "third_party/libpg_query/grammar/statements/export.y" { PGExportStmt *n = makeNode(PGExportStmt); @@ -207003,7 +220883,7 @@ YYLTYPE yylloc; ;} break; - case 1079: + case 1085: #line 21 "third_party/libpg_query/grammar/statements/export.y" { PGImportStmt *n = makeNode(PGImportStmt); @@ -207012,7 +220892,7 @@ YYLTYPE yylloc; ;} break; - case 1080: + case 1086: #line 10 "third_party/libpg_query/grammar/statements/explain.y" { PGExplainStmt *n = makeNode(PGExplainStmt); @@ -207022,7 +220902,7 @@ YYLTYPE yylloc; ;} break; - case 1081: + case 1087: #line 17 "third_party/libpg_query/grammar/statements/explain.y" { PGExplainStmt *n = makeNode(PGExplainStmt); @@ -207035,7 +220915,7 @@ YYLTYPE yylloc; ;} break; - case 1082: + case 1088: #line 27 "third_party/libpg_query/grammar/statements/explain.y" { PGExplainStmt *n = makeNode(PGExplainStmt); @@ -207045,7 +220925,7 @@ YYLTYPE yylloc; ;} break; - case 1083: + case 1089: #line 34 "third_party/libpg_query/grammar/statements/explain.y" { PGExplainStmt *n = makeNode(PGExplainStmt); @@ -207055,118 +220935,118 @@ YYLTYPE yylloc; ;} break; - case 1084: + case 1090: #line 44 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.boolean) = true; ;} break; - case 1085: + case 1091: #line 45 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.boolean) = false; ;} break; - case 1086: + case 1092: #line 50 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.node) = (PGNode *) makeString((yyvsp[(1) - (1)].str)); ;} break; - case 1087: + case 1093: #line 51 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.node) = (PGNode *) (yyvsp[(1) - (1)].value); ;} break; - case 1088: + case 1094: #line 52 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.node) = NULL; ;} break; - case 1094: + case 1100: #line 65 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1095: + case 1101: #line 66 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1096: + case 1102: #line 67 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = pstrdup((yyvsp[(1) - (1)].keyword)); ;} break; - case 1097: + case 1103: #line 72 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1098: + case 1104: #line 73 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1099: + case 1105: #line 79 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].defelt)); ;} break; - case 1100: + case 1106: #line 83 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].defelt)); ;} break; - case 1101: + case 1107: #line 90 "third_party/libpg_query/grammar/statements/explain.y" {;} break; - case 1102: + case 1108: #line 91 "third_party/libpg_query/grammar/statements/explain.y" {;} break; - case 1103: + case 1109: #line 96 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = (char*) "true"; ;} break; - case 1104: + case 1110: #line 97 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = (char*) "false"; ;} break; - case 1105: + case 1111: #line 98 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = (char*) "on"; ;} break; - case 1106: + case 1112: #line 104 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1107: + case 1113: #line 110 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.defelt) = makeDefElem((yyvsp[(1) - (2)].str), (yyvsp[(2) - (2)].node), (yylsp[(1) - (2)])); ;} break; - case 1108: + case 1114: #line 117 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1109: + case 1115: #line 118 "third_party/libpg_query/grammar/statements/explain.y" { (yyval.str) = (char*) "analyze"; ;} break; - case 1110: + case 1116: #line 11 "third_party/libpg_query/grammar/statements/variable_set.y" { PGVariableSetStmt *n = (yyvsp[(2) - (2)].vsetstmt); @@ -207175,7 +221055,7 @@ YYLTYPE yylloc; ;} break; - case 1111: + case 1117: #line 17 "third_party/libpg_query/grammar/statements/variable_set.y" { PGVariableSetStmt *n = (yyvsp[(3) - (3)].vsetstmt); @@ -207184,7 +221064,7 @@ YYLTYPE yylloc; ;} break; - case 1112: + case 1118: #line 23 "third_party/libpg_query/grammar/statements/variable_set.y" { PGVariableSetStmt *n = (yyvsp[(3) - (3)].vsetstmt); @@ -207193,12 +221073,12 @@ YYLTYPE yylloc; ;} break; - case 1113: + case 1119: #line 32 "third_party/libpg_query/grammar/statements/variable_set.y" {(yyval.vsetstmt) = (yyvsp[(1) - (1)].vsetstmt);;} break; - case 1114: + case 1120: #line 34 "third_party/libpg_query/grammar/statements/variable_set.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207208,7 +221088,7 @@ YYLTYPE yylloc; ;} break; - case 1115: + case 1121: #line 42 "third_party/libpg_query/grammar/statements/variable_set.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207222,7 +221102,7 @@ YYLTYPE yylloc; ;} break; - case 1116: + case 1122: #line 53 "third_party/libpg_query/grammar/statements/variable_set.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207233,7 +221113,7 @@ YYLTYPE yylloc; ;} break; - case 1117: + case 1123: #line 65 "third_party/libpg_query/grammar/statements/variable_set.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207244,7 +221124,7 @@ YYLTYPE yylloc; ;} break; - case 1118: + case 1124: #line 73 "third_party/libpg_query/grammar/statements/variable_set.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207255,7 +221135,7 @@ YYLTYPE yylloc; ;} break; - case 1119: + case 1125: #line 81 "third_party/libpg_query/grammar/statements/variable_set.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207265,7 +221145,7 @@ YYLTYPE yylloc; ;} break; - case 1120: + case 1126: #line 88 "third_party/libpg_query/grammar/statements/variable_set.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207275,31 +221155,31 @@ YYLTYPE yylloc; ;} break; - case 1121: + case 1127: #line 98 "third_party/libpg_query/grammar/statements/variable_set.y" { (yyval.node) = makeStringConst((yyvsp[(1) - (1)].str), (yylsp[(1) - (1)])); ;} break; - case 1122: + case 1128: #line 100 "third_party/libpg_query/grammar/statements/variable_set.y" { (yyval.node) = makeAConst((yyvsp[(1) - (1)].value), (yylsp[(1) - (1)])); ;} break; - case 1123: + case 1129: #line 106 "third_party/libpg_query/grammar/statements/variable_set.y" { (yyval.node) = makeStringConst((yyvsp[(1) - (1)].str), (yylsp[(1) - (1)])); ;} break; - case 1124: + case 1130: #line 110 "third_party/libpg_query/grammar/statements/variable_set.y" { (yyval.node) = makeStringConst((yyvsp[(1) - (1)].str), (yylsp[(1) - (1)])); ;} break; - case 1125: + case 1131: #line 114 "third_party/libpg_query/grammar/statements/variable_set.y" { PGTypeName *t = (yyvsp[(1) - (3)].typnam); @@ -207317,7 +221197,7 @@ YYLTYPE yylloc; ;} break; - case 1126: + case 1132: #line 129 "third_party/libpg_query/grammar/statements/variable_set.y" { PGTypeName *t = (yyvsp[(1) - (5)].typnam); @@ -207327,32 +221207,32 @@ YYLTYPE yylloc; ;} break; - case 1127: + case 1133: #line 135 "third_party/libpg_query/grammar/statements/variable_set.y" { (yyval.node) = makeAConst((yyvsp[(1) - (1)].value), (yylsp[(1) - (1)])); ;} break; - case 1128: + case 1134: #line 136 "third_party/libpg_query/grammar/statements/variable_set.y" { (yyval.node) = NULL; ;} break; - case 1129: + case 1135: #line 137 "third_party/libpg_query/grammar/statements/variable_set.y" { (yyval.node) = NULL; ;} break; - case 1130: + case 1136: #line 141 "third_party/libpg_query/grammar/statements/variable_set.y" { (yyval.list) = list_make1((yyvsp[(1) - (1)].node)); ;} break; - case 1131: + case 1137: #line 142 "third_party/libpg_query/grammar/statements/variable_set.y" { (yyval.list) = lappend((yyvsp[(1) - (3)].list), (yyvsp[(3) - (3)].node)); ;} break; - case 1132: + case 1138: #line 8 "third_party/libpg_query/grammar/statements/load.y" { PGLoadStmt *n = makeNode(PGLoadStmt); @@ -207361,12 +221241,12 @@ YYLTYPE yylloc; ;} break; - case 1133: + case 1139: #line 16 "third_party/libpg_query/grammar/statements/load.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1134: + case 1140: #line 9 "third_party/libpg_query/grammar/statements/vacuum.y" { PGVacuumStmt *n = makeNode(PGVacuumStmt); @@ -207383,7 +221263,7 @@ YYLTYPE yylloc; ;} break; - case 1135: + case 1141: #line 23 "third_party/libpg_query/grammar/statements/vacuum.y" { PGVacuumStmt *n = makeNode(PGVacuumStmt); @@ -207400,7 +221280,7 @@ YYLTYPE yylloc; ;} break; - case 1136: + case 1142: #line 37 "third_party/libpg_query/grammar/statements/vacuum.y" { PGVacuumStmt *n = (PGVacuumStmt *) (yyvsp[(5) - (5)].node); @@ -207415,7 +221295,7 @@ YYLTYPE yylloc; ;} break; - case 1137: + case 1143: #line 49 "third_party/libpg_query/grammar/statements/vacuum.y" { PGVacuumStmt *n = makeNode(PGVacuumStmt); @@ -207426,7 +221306,7 @@ YYLTYPE yylloc; ;} break; - case 1138: + case 1144: #line 57 "third_party/libpg_query/grammar/statements/vacuum.y" { PGVacuumStmt *n = makeNode(PGVacuumStmt); @@ -207439,27 +221319,27 @@ YYLTYPE yylloc; ;} break; - case 1139: + case 1145: #line 70 "third_party/libpg_query/grammar/statements/vacuum.y" { (yyval.ival) = PG_VACOPT_ANALYZE; ;} break; - case 1140: + case 1146: #line 71 "third_party/libpg_query/grammar/statements/vacuum.y" { (yyval.ival) = PG_VACOPT_VERBOSE; ;} break; - case 1141: + case 1147: #line 72 "third_party/libpg_query/grammar/statements/vacuum.y" { (yyval.ival) = PG_VACOPT_FREEZE; ;} break; - case 1142: + case 1148: #line 73 "third_party/libpg_query/grammar/statements/vacuum.y" { (yyval.ival) = PG_VACOPT_FULL; ;} break; - case 1143: + case 1149: #line 75 "third_party/libpg_query/grammar/statements/vacuum.y" { if (strcmp((yyvsp[(1) - (1)].str), "disable_page_skipping") == 0) @@ -207472,37 +221352,37 @@ YYLTYPE yylloc; ;} break; - case 1144: + case 1150: #line 87 "third_party/libpg_query/grammar/statements/vacuum.y" { (yyval.boolean) = true; ;} break; - case 1145: + case 1151: #line 88 "third_party/libpg_query/grammar/statements/vacuum.y" { (yyval.boolean) = false; ;} break; - case 1146: + case 1152: #line 93 "third_party/libpg_query/grammar/statements/vacuum.y" { (yyval.ival) = (yyvsp[(1) - (1)].ival); ;} break; - case 1147: + case 1153: #line 94 "third_party/libpg_query/grammar/statements/vacuum.y" { (yyval.ival) = (yyvsp[(1) - (3)].ival) | (yyvsp[(3) - (3)].ival); ;} break; - case 1148: + case 1154: #line 98 "third_party/libpg_query/grammar/statements/vacuum.y" { (yyval.boolean) = true; ;} break; - case 1149: + case 1155: #line 99 "third_party/libpg_query/grammar/statements/vacuum.y" { (yyval.boolean) = false; ;} break; - case 1150: + case 1156: #line 9 "third_party/libpg_query/grammar/statements/delete.y" { PGDeleteStmt *n = makeNode(PGDeleteStmt); @@ -207515,14 +221395,14 @@ YYLTYPE yylloc; ;} break; - case 1151: + case 1157: #line 22 "third_party/libpg_query/grammar/statements/delete.y" { (yyval.range) = (yyvsp[(1) - (1)].range); ;} break; - case 1152: + case 1158: #line 26 "third_party/libpg_query/grammar/statements/delete.y" { PGAlias *alias = makeNode(PGAlias); @@ -207532,7 +221412,7 @@ YYLTYPE yylloc; ;} break; - case 1153: + case 1159: #line 33 "third_party/libpg_query/grammar/statements/delete.y" { PGAlias *alias = makeNode(PGAlias); @@ -207542,27 +221422,27 @@ YYLTYPE yylloc; ;} break; - case 1154: + case 1160: #line 43 "third_party/libpg_query/grammar/statements/delete.y" { (yyval.node) = (yyvsp[(2) - (2)].node); ;} break; - case 1155: + case 1161: #line 44 "third_party/libpg_query/grammar/statements/delete.y" { (yyval.node) = NULL; ;} break; - case 1156: + case 1162: #line 50 "third_party/libpg_query/grammar/statements/delete.y" { (yyval.list) = (yyvsp[(2) - (2)].list); ;} break; - case 1157: + case 1163: #line 51 "third_party/libpg_query/grammar/statements/delete.y" { (yyval.list) = NIL; ;} break; - case 1158: + case 1164: #line 10 "third_party/libpg_query/grammar/statements/analyze.y" { PGVacuumStmt *n = makeNode(PGVacuumStmt); @@ -207575,7 +221455,7 @@ YYLTYPE yylloc; ;} break; - case 1159: + case 1165: #line 20 "third_party/libpg_query/grammar/statements/analyze.y" { PGVacuumStmt *n = makeNode(PGVacuumStmt); @@ -207588,12 +221468,12 @@ YYLTYPE yylloc; ;} break; - case 1160: + case 1166: #line 2 "third_party/libpg_query/grammar/statements/variable_reset.y" { (yyval.node) = (PGNode *) (yyvsp[(2) - (2)].vsetstmt); ;} break; - case 1161: + case 1167: #line 8 "third_party/libpg_query/grammar/statements/variable_reset.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207603,7 +221483,7 @@ YYLTYPE yylloc; ;} break; - case 1162: + case 1168: #line 15 "third_party/libpg_query/grammar/statements/variable_reset.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207612,12 +221492,12 @@ YYLTYPE yylloc; ;} break; - case 1163: + case 1169: #line 24 "third_party/libpg_query/grammar/statements/variable_reset.y" { (yyval.vsetstmt) = (yyvsp[(1) - (1)].vsetstmt); ;} break; - case 1164: + case 1170: #line 26 "third_party/libpg_query/grammar/statements/variable_reset.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207627,7 +221507,7 @@ YYLTYPE yylloc; ;} break; - case 1165: + case 1171: #line 33 "third_party/libpg_query/grammar/statements/variable_reset.y" { PGVariableSetStmt *n = makeNode(PGVariableSetStmt); @@ -207637,7 +221517,7 @@ YYLTYPE yylloc; ;} break; - case 1166: + case 1172: #line 3 "third_party/libpg_query/grammar/statements/variable_show.y" { PGVariableShowSelectStmt *n = makeNode(PGVariableShowSelectStmt); @@ -207647,7 +221527,7 @@ YYLTYPE yylloc; ;} break; - case 1167: + case 1173: #line 11 "third_party/libpg_query/grammar/statements/variable_show.y" { PGVariableShowStmt *n = makeNode(PGVariableShowStmt); @@ -207656,7 +221536,7 @@ YYLTYPE yylloc; ;} break; - case 1168: + case 1174: #line 17 "third_party/libpg_query/grammar/statements/variable_show.y" { PGVariableShowStmt *n = makeNode(PGVariableShowStmt); @@ -207665,7 +221545,7 @@ YYLTYPE yylloc; ;} break; - case 1169: + case 1175: #line 23 "third_party/libpg_query/grammar/statements/variable_show.y" { PGVariableShowStmt *n = makeNode(PGVariableShowStmt); @@ -207674,7 +221554,7 @@ YYLTYPE yylloc; ;} break; - case 1170: + case 1176: #line 29 "third_party/libpg_query/grammar/statements/variable_show.y" { PGVariableShowStmt *n = makeNode(PGVariableShowStmt); @@ -207683,17 +221563,17 @@ YYLTYPE yylloc; ;} break; - case 1173: + case 1179: #line 39 "third_party/libpg_query/grammar/statements/variable_show.y" { (yyval.str) = (yyvsp[(1) - (1)].str); ;} break; - case 1174: + case 1180: #line 41 "third_party/libpg_query/grammar/statements/variable_show.y" { (yyval.str) = psprintf("%s.%s", (yyvsp[(1) - (3)].str), (yyvsp[(3) - (3)].str)); ;} break; - case 1175: + case 1181: #line 7 "third_party/libpg_query/grammar/statements/call.y" { PGCallStmt *n = makeNode(PGCallStmt); @@ -207702,7 +221582,7 @@ YYLTYPE yylloc; ;} break; - case 1176: + case 1182: #line 10 "third_party/libpg_query/grammar/statements/view.y" { PGViewStmt *n = makeNode(PGViewStmt); @@ -207717,7 +221597,7 @@ YYLTYPE yylloc; ;} break; - case 1177: + case 1183: #line 23 "third_party/libpg_query/grammar/statements/view.y" { PGViewStmt *n = makeNode(PGViewStmt); @@ -207732,7 +221612,7 @@ YYLTYPE yylloc; ;} break; - case 1178: + case 1184: #line 36 "third_party/libpg_query/grammar/statements/view.y" { PGViewStmt *n = makeNode(PGViewStmt); @@ -207752,7 +221632,7 @@ YYLTYPE yylloc; ;} break; - case 1179: + case 1185: #line 54 "third_party/libpg_query/grammar/statements/view.y" { PGViewStmt *n = makeNode(PGViewStmt); @@ -207772,27 +221652,27 @@ YYLTYPE yylloc; ;} break; - case 1180: + case 1186: #line 74 "third_party/libpg_query/grammar/statements/view.y" { (yyval.viewcheckoption) = CASCADED_CHECK_OPTION; ;} break; - case 1181: + case 1187: #line 75 "third_party/libpg_query/grammar/statements/view.y" { (yyval.viewcheckoption) = CASCADED_CHECK_OPTION; ;} break; - case 1182: + case 1188: #line 76 "third_party/libpg_query/grammar/statements/view.y" { (yyval.viewcheckoption) = PG_LOCAL_CHECK_OPTION; ;} break; - case 1183: + case 1189: #line 77 "third_party/libpg_query/grammar/statements/view.y" { (yyval.viewcheckoption) = PG_NO_CHECK_OPTION; ;} break; - case 1184: + case 1190: #line 12 "third_party/libpg_query/grammar/statements/create_as.y" { PGCreateTableAsStmt *ctas = makeNode(PGCreateTableAsStmt); @@ -207808,7 +221688,7 @@ YYLTYPE yylloc; ;} break; - case 1185: + case 1191: #line 25 "third_party/libpg_query/grammar/statements/create_as.y" { PGCreateTableAsStmt *ctas = makeNode(PGCreateTableAsStmt); @@ -207824,22 +221704,22 @@ YYLTYPE yylloc; ;} break; - case 1186: + case 1192: #line 41 "third_party/libpg_query/grammar/statements/create_as.y" { (yyval.boolean) = true; ;} break; - case 1187: + case 1193: #line 42 "third_party/libpg_query/grammar/statements/create_as.y" { (yyval.boolean) = false; ;} break; - case 1188: + case 1194: #line 43 "third_party/libpg_query/grammar/statements/create_as.y" { (yyval.boolean) = true; ;} break; - case 1189: + case 1195: #line 49 "third_party/libpg_query/grammar/statements/create_as.y" { (yyval.into) = makeNode(PGIntoClause); @@ -207854,7 +221734,7 @@ YYLTYPE yylloc; /* Line 1267 of yacc.c. */ -#line 23394 "third_party/libpg_query/grammar/grammar_out.cpp" +#line 23485 "third_party/libpg_query/grammar/grammar_out.cpp" default: break; } YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); @@ -208411,12 +222291,6 @@ static PGNode* makeParamRef(int number, int location) return (PGNode *) p; } -static PGNode * -makeParamRefCast(int number, int location, PGTypeName *tpname) -{ - PGNode *p = makeParamRef(number, location); - return makeTypeCast(p, tpname, 0, -1); -} /* insertSelectOptions() * Insert ORDER BY, etc into an already-constructed SelectStmt. @@ -208824,7 +222698,7 @@ parser_init(base_yy_extra_type *yyext) // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*-------------------------------------------------------------------- @@ -208863,7 +222737,7 @@ parser_init(base_yy_extra_type *yyext) // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list @@ -209374,7 +223248,6 @@ bool is_keyword(const char *text) { std::vector tokenize(const char *str) { core_yyscan_t yyscanner; base_yy_extra_type yyextra; - int yyresult; std::vector result; yyscanner = scanner_init(str, &yyextra.core_yy_extra, ScanKeywords, NumScanKeywords); @@ -209415,9 +223288,6 @@ std::vector tokenize(const char *str) { case NOT_EQUALS: current_token.type = PGSimplifiedTokenType::PG_SIMPLIFIED_TOKEN_OPERATOR; break; - case COMMENT: - current_token.type = PGSimplifiedTokenType::PG_SIMPLIFIED_TOKEN_COMMENT; - break; default: if (token >= 255) { // non-ascii value, probably a keyword @@ -209567,7 +223437,7 @@ int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list #line 2 "third_party/libpg_query/src_backend_parser_scan.cpp" @@ -209611,7 +223481,7 @@ int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -209647,7 +223517,7 @@ bool scanner_isspace(char ch); // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*------------------------------------------------------------------------- @@ -209722,7 +223592,7 @@ namespace duckdb_libpgquery { #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, - * if you want the limit (max/min) macros for int types. + * if you want the limit (max/min) macros for int types. */ #ifndef __STDC_LIMIT_MACROS #define __STDC_LIMIT_MACROS 1 @@ -209740,7 +223610,7 @@ typedef uint64_t flex_uint64_t; typedef signed char flex_int8_t; typedef short int flex_int16_t; typedef int flex_int32_t; -typedef unsigned char flex_uint8_t; +typedef unsigned char flex_uint8_t; typedef unsigned short int flex_uint16_t; typedef unsigned int flex_uint32_t; #endif /* ! C99 */ @@ -209869,7 +223739,7 @@ typedef size_t yy_size_t; #define EOB_ACT_LAST_MATCH 2 #define YY_LESS_LINENO(n) - + /* Return all but the first "n" matched characters back to the input stream. */ #define yyless(n) \ do \ @@ -209926,7 +223796,7 @@ struct yy_buffer_state int yy_bs_lineno; /**< The line count. */ int yy_bs_column; /**< The column count. */ - + /* Whether to try to fill the input buffer when we reach the * end of it. */ @@ -210797,9 +224667,9 @@ static int yy_init_globals (yyscan_t yyscanner ); /* This must go here because YYSTYPE and YYLTYPE are included * from bison output in section 1.*/ # define yylval yyg->yylval_r - + # define yylloc yyg->yylloc_r - + int core_yylex_init (yyscan_t* scanner); int core_yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); @@ -210838,9 +224708,9 @@ YYSTYPE * core_yyget_lval (yyscan_t yyscanner ); void core_yyset_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner ); YYLTYPE *core_yyget_lloc (yyscan_t yyscanner ); - + void core_yyset_lloc (YYLTYPE * yylloc_param ,yyscan_t yyscanner ); - + /* Macros after this point can all be overridden by user definitions in * section 1. */ @@ -210996,13 +224866,6 @@ YY_DECL if ( ! yyg->yy_start ) yyg->yy_start = 1; /* first start state */ - - if ( ! yyin ) - yyin = stdin; - - if ( ! yyout ) - yyout = stdout; - if ( ! YY_CURRENT_BUFFER ) { core_yyensure_buffer_stack (yyscanner); YY_CURRENT_BUFFER_LVALUE = @@ -212540,7 +226403,7 @@ static void core_yy_load_buffer_state (yyscan_t yyscanner) YY_BUFFER_STATE core_yy_create_buffer (FILE * file, int size , yyscan_t yyscanner) { YY_BUFFER_STATE b; - + b = (YY_BUFFER_STATE) core_yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in core_yy_create_buffer()" ); @@ -212606,7 +226469,7 @@ static void core_yy_load_buffer_state (yyscan_t yyscanner) } b->yy_is_interactive = 0; - + errno = oerrno; } @@ -212712,9 +226575,9 @@ static void core_yyensure_buffer_stack (yyscan_t yyscanner) , yyscanner); if ( ! yyg->yy_buffer_stack ) YY_FATAL_ERROR( "out of dynamic memory in core_yyensure_buffer_stack()" ); - + memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*)); - + yyg->yy_buffer_stack_max = num_to_alloc; yyg->yy_buffer_stack_top = 0; return; @@ -212743,12 +226606,12 @@ static void core_yyensure_buffer_stack (yyscan_t yyscanner) * @param base the character buffer * @param size the size in bytes of the character buffer * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. + * @return the newly allocated buffer state object. */ YY_BUFFER_STATE core_yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner) { YY_BUFFER_STATE b; - + if ( size < 2 || base[size-2] != YY_END_OF_BUFFER_CHAR || base[size-1] != YY_END_OF_BUFFER_CHAR ) @@ -212784,7 +226647,7 @@ YY_BUFFER_STATE core_yy_scan_buffer (char * base, yy_size_t size , yyscan_t yy */ YY_BUFFER_STATE core_yy_scan_string (yyconst char * yystr , yyscan_t yyscanner) { - + return core_yy_scan_bytes(yystr,strlen(yystr) ,yyscanner); } @@ -212800,7 +226663,7 @@ YY_BUFFER_STATE core_yy_scan_bytes (yyconst char * yybytes, yy_size_t _yybytes YY_BUFFER_STATE b; char *buf; yy_size_t n, i; - + /* Get memory for full buffer, including space for trailing EOB's. */ n = _yybytes_len + 2; buf = (char *) core_yyalloc(n ,yyscanner ); @@ -212868,10 +226731,10 @@ YY_EXTRA_TYPE core_yyget_extra (yyscan_t yyscanner) int core_yyget_lineno (yyscan_t yyscanner) { struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - + if (! YY_CURRENT_BUFFER) return 0; - + return yylineno; } @@ -212881,10 +226744,10 @@ int core_yyget_lineno (yyscan_t yyscanner) int core_yyget_column (yyscan_t yyscanner) { struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - + if (! YY_CURRENT_BUFFER) return 0; - + return yycolumn; } @@ -212945,8 +226808,8 @@ void core_yyset_lineno (int line_number , yyscan_t yyscanner) /* lineno is only valid if an input buffer exists. */ if (! YY_CURRENT_BUFFER ) - yy_fatal_error( "core_yyset_lineno called with no buffer" , yyscanner); - + yy_fatal_error( "core_yyset_lineno called with no buffer" , yyscanner); + yylineno = line_number; } @@ -212960,8 +226823,8 @@ void core_yyset_column (int column_no , yyscan_t yyscanner) /* column is only valid if an input buffer exists. */ if (! YY_CURRENT_BUFFER ) - yy_fatal_error( "core_yyset_column called with no buffer" , yyscanner); - + yy_fatal_error( "core_yyset_column called with no buffer" , yyscanner); + yycolumn = column_no; } @@ -213014,13 +226877,13 @@ YYLTYPE *core_yyget_lloc (yyscan_t yyscanner) struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; return yylloc; } - + void core_yyset_lloc (YYLTYPE * yylloc_param , yyscan_t yyscanner) { struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; yylloc = yylloc_param; } - + /* User-visible API */ /* core_yylex_init is special because it creates the scanner itself, so it is @@ -213068,20 +226931,20 @@ int core_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals errno = EINVAL; return 1; } - + *ptr_yy_globals = (yyscan_t) core_yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts ); - + if (*ptr_yy_globals == NULL){ errno = ENOMEM; return 1; } - + /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */ memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); - + core_yyset_extra (yy_user_defined, *ptr_yy_globals); - + return yy_init_globals ( *ptr_yy_globals ); } @@ -213104,13 +226967,8 @@ static int yy_init_globals (yyscan_t yyscanner) yyg->yy_start_stack = NULL; /* Defined in main.c */ -#ifdef YY_STDINIT - yyin = stdin; - yyout = stdout; -#else yyin = (FILE *) 0; yyout = (FILE *) 0; -#endif /* For future reference: Set errno on error, since we are called by * core_yylex_init() @@ -213751,7 +227609,7 @@ core_yyfree(void *ptr, core_yyscan_t yyscanner) // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*-------------------------------------------------------------------- @@ -213871,7 +227729,7 @@ bool scanner_isspace(char ch) { // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #9 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #8 // See the end of this file for a list /*-------------------------------------------------------------------- @@ -213972,862 +227830,6 @@ const PGScanKeyword *ScanKeywordLookup(const char *text, const PGScanKeyword *ke // LICENSE_CHANGE_END - - - - - - - - -#include -#include - -#ifndef _WIN32 -#include -#include -#include -#include -#include -#include -#else -#include -#ifndef NOMINMAX -#define NOMINMAX -#endif -#include - -#ifdef __MINGW32__ -// need to manually define this for mingw -extern "C" WINBASEAPI BOOL WINAPI GetPhysicallyInstalledSystemMemory(PULONGLONG); -#endif - -#undef CreateDirectory -#undef MoveFile -#undef RemoveDirectory -#undef FILE_CREATE // woo mingw -#endif - -namespace duckdb { - -FileSystem &FileSystem::GetFileSystem(ClientContext &context) { - return *context.db->config.file_system; -} - -static void AssertValidFileFlags(uint8_t flags) { - // cannot combine Read and Write flags - D_ASSERT(!(flags & FileFlags::FILE_FLAGS_READ && flags & FileFlags::FILE_FLAGS_WRITE)); - // cannot combine Read and CREATE/Append flags - D_ASSERT(!(flags & FileFlags::FILE_FLAGS_READ && flags & FileFlags::FILE_FLAGS_APPEND)); - D_ASSERT(!(flags & FileFlags::FILE_FLAGS_READ && flags & FileFlags::FILE_FLAGS_FILE_CREATE)); - D_ASSERT(!(flags & FileFlags::FILE_FLAGS_READ && flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW)); - // cannot combine CREATE and CREATE_NEW flags - D_ASSERT(!(flags & FileFlags::FILE_FLAGS_FILE_CREATE && flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW)); -} - -#ifndef _WIN32 -// somehow sometimes this is missing -#ifndef O_CLOEXEC -#define O_CLOEXEC 0 -#endif - -// Solaris -#ifndef O_DIRECT -#define O_DIRECT 0 -#endif - -struct UnixFileHandle : public FileHandle { -public: - UnixFileHandle(FileSystem &file_system, string path, int fd) : FileHandle(file_system, move(path)), fd(fd) { - } - ~UnixFileHandle() override { - Close(); - } - -protected: - void Close() override { - if (fd != -1) { - close(fd); - } - }; - -public: - int fd; -}; - -unique_ptr FileSystem::OpenFile(const char *path, uint8_t flags, FileLockType lock_type) { - AssertValidFileFlags(flags); - - int open_flags = 0; - int rc; - if (flags & FileFlags::FILE_FLAGS_READ) { - open_flags = O_RDONLY; - } else { - // need Read or Write - D_ASSERT(flags & FileFlags::FILE_FLAGS_WRITE); - open_flags = O_RDWR | O_CLOEXEC; - if (flags & FileFlags::FILE_FLAGS_FILE_CREATE) { - open_flags |= O_CREAT; - } else if (flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW) { - open_flags |= O_CREAT | O_TRUNC; - } - if (flags & FileFlags::FILE_FLAGS_APPEND) { - open_flags |= O_APPEND; - } - } - if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { -#if defined(__sun) && defined(__SVR4) - throw Exception("DIRECT_IO not supported on Solaris"); -#endif -#if defined(__DARWIN__) || defined(__APPLE__) || defined(__OpenBSD__) - // OSX does not have O_DIRECT, instead we need to use fcntl afterwards to support direct IO - open_flags |= O_SYNC; -#else - open_flags |= O_DIRECT | O_SYNC; -#endif - } - int fd = open(path, open_flags, 0666); - if (fd == -1) { - throw IOException("Cannot open file \"%s\": %s", path, strerror(errno)); - } - // #if defined(__DARWIN__) || defined(__APPLE__) - // if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { - // // OSX requires fcntl for Direct IO - // rc = fcntl(fd, F_NOCACHE, 1); - // if (fd == -1) { - // throw IOException("Could not enable direct IO for file \"%s\": %s", path, strerror(errno)); - // } - // } - // #endif - if (lock_type != FileLockType::NO_LOCK) { - // set lock on file - struct flock fl; - memset(&fl, 0, sizeof fl); - fl.l_type = lock_type == FileLockType::READ_LOCK ? F_RDLCK : F_WRLCK; - fl.l_whence = SEEK_SET; - fl.l_start = 0; - fl.l_len = 0; - rc = fcntl(fd, F_SETLK, &fl); - if (rc == -1) { - throw IOException("Could not set lock on file \"%s\": %s", path, strerror(errno)); - } - } - return make_unique(*this, path, fd); -} - -void FileSystem::SetFilePointer(FileHandle &handle, idx_t location) { - int fd = ((UnixFileHandle &)handle).fd; - off_t offset = lseek(fd, location, SEEK_SET); - if (offset == (off_t)-1) { - throw IOException("Could not seek to location %lld for file \"%s\": %s", location, handle.path, - strerror(errno)); - } -} - -int64_t FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { - int fd = ((UnixFileHandle &)handle).fd; - int64_t bytes_read = read(fd, buffer, nr_bytes); - if (bytes_read == -1) { - throw IOException("Could not read from file \"%s\": %s", handle.path, strerror(errno)); - } - return bytes_read; -} - -int64_t FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { - int fd = ((UnixFileHandle &)handle).fd; - int64_t bytes_written = write(fd, buffer, nr_bytes); - if (bytes_written == -1) { - throw IOException("Could not write file \"%s\": %s", handle.path, strerror(errno)); - } - return bytes_written; -} - -int64_t FileSystem::GetFileSize(FileHandle &handle) { - int fd = ((UnixFileHandle &)handle).fd; - struct stat s; - if (fstat(fd, &s) == -1) { - return -1; - } - return s.st_size; -} - -time_t FileSystem::GetLastModifiedTime(FileHandle &handle) { - int fd = ((UnixFileHandle &)handle).fd; - struct stat s; - if (fstat(fd, &s) == -1) { - return -1; - } - return s.st_mtime; -} - -void FileSystem::Truncate(FileHandle &handle, int64_t new_size) { - int fd = ((UnixFileHandle &)handle).fd; - if (ftruncate(fd, new_size) != 0) { - throw IOException("Could not truncate file \"%s\": %s", handle.path, strerror(errno)); - } -} - -bool FileSystem::DirectoryExists(const string &directory) { - if (!directory.empty()) { - if (access(directory.c_str(), 0) == 0) { - struct stat status; - stat(directory.c_str(), &status); - if (status.st_mode & S_IFDIR) { - return true; - } - } - } - // if any condition fails - return false; -} - -bool FileSystem::FileExists(const string &filename) { - if (!filename.empty()) { - if (access(filename.c_str(), 0) == 0) { - struct stat status; - stat(filename.c_str(), &status); - if (!(status.st_mode & S_IFDIR)) { - return true; - } - } - } - // if any condition fails - return false; -} - -void FileSystem::CreateDirectory(const string &directory) { - struct stat st; - - if (stat(directory.c_str(), &st) != 0) { - /* Directory does not exist. EEXIST for race condition */ - if (mkdir(directory.c_str(), 0755) != 0 && errno != EEXIST) { - throw IOException("Failed to create directory \"%s\"!", directory); - } - } else if (!S_ISDIR(st.st_mode)) { - throw IOException("Failed to create directory \"%s\": path exists but is not a directory!", directory); - } -} - -int RemoveDirectoryRecursive(const char *path) { - DIR *d = opendir(path); - idx_t path_len = (idx_t)strlen(path); - int r = -1; - - if (d) { - struct dirent *p; - r = 0; - while (!r && (p = readdir(d))) { - int r2 = -1; - char *buf; - idx_t len; - /* Skip the names "." and ".." as we don't want to recurse on them. */ - if (!strcmp(p->d_name, ".") || !strcmp(p->d_name, "..")) { - continue; - } - len = path_len + (idx_t)strlen(p->d_name) + 2; - buf = new char[len]; - if (buf) { - struct stat statbuf; - snprintf(buf, len, "%s/%s", path, p->d_name); - if (!stat(buf, &statbuf)) { - if (S_ISDIR(statbuf.st_mode)) { - r2 = RemoveDirectoryRecursive(buf); - } else { - r2 = unlink(buf); - } - } - delete[] buf; - } - r = r2; - } - closedir(d); - } - if (!r) { - r = rmdir(path); - } - return r; -} - -void FileSystem::RemoveDirectory(const string &directory) { - RemoveDirectoryRecursive(directory.c_str()); -} - -void FileSystem::RemoveFile(const string &filename) { - if (std::remove(filename.c_str()) != 0) { - throw IOException("Could not remove file \"%s\": %s", filename, strerror(errno)); - } -} - -bool FileSystem::ListFiles(const string &directory, const std::function &callback) { - if (!DirectoryExists(directory)) { - return false; - } - DIR *dir = opendir(directory.c_str()); - if (!dir) { - return false; - } - struct dirent *ent; - // loop over all files in the directory - while ((ent = readdir(dir)) != nullptr) { - string name = string(ent->d_name); - // skip . .. and empty files - if (name.empty() || name == "." || name == "..") { - continue; - } - // now stat the file to figure out if it is a regular file or directory - string full_path = JoinPath(directory, name); - if (access(full_path.c_str(), 0) != 0) { - continue; - } - struct stat status; - stat(full_path.c_str(), &status); - if (!(status.st_mode & S_IFREG) && !(status.st_mode & S_IFDIR)) { - // not a file or directory: skip - continue; - } - // invoke callback - callback(name, status.st_mode & S_IFDIR); - } - closedir(dir); - return true; -} - -string FileSystem::PathSeparator() { - return "/"; -} - -void FileSystem::FileSync(FileHandle &handle) { - int fd = ((UnixFileHandle &)handle).fd; - if (fsync(fd) != 0) { - throw FatalException("fsync failed!"); - } -} - -void FileSystem::MoveFile(const string &source, const string &target) { - //! FIXME: rename does not guarantee atomicity or overwriting target file if it exists - if (rename(source.c_str(), target.c_str()) != 0) { - throw IOException("Could not rename file!"); - } -} - -void FileSystem::SetWorkingDirectory(const string &path) { - if (chdir(path.c_str()) != 0) { - throw IOException("Could not change working directory!"); - } -} - -idx_t FileSystem::GetAvailableMemory() { - errno = 0; - idx_t max_memory = MinValue((idx_t)sysconf(_SC_PHYS_PAGES) * (idx_t)sysconf(_SC_PAGESIZE), UINTPTR_MAX); - if (errno != 0) { - throw IOException("Could not fetch available system memory!"); - } - return max_memory; -} - -string FileSystem::GetWorkingDirectory() { - auto buffer = unique_ptr(new char[PATH_MAX]); - char *ret = getcwd(buffer.get(), PATH_MAX); - if (!ret) { - throw IOException("Could not get working directory!"); - } - return string(buffer.get()); -} -#else - -// Returns the last Win32 error, in string format. Returns an empty string if there is no error. -std::string GetLastErrorAsString() { - // Get the error message, if any. - DWORD errorMessageID = GetLastError(); - if (errorMessageID == 0) - return std::string(); // No error message has been recorded - - LPSTR messageBuffer = nullptr; - idx_t size = - FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, errorMessageID, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); - - std::string message(messageBuffer, size); - - // Free the buffer. - LocalFree(messageBuffer); - - return message; -} - -struct WindowsFileHandle : public FileHandle { -public: - WindowsFileHandle(FileSystem &file_system, string path, HANDLE fd) : FileHandle(file_system, path), fd(fd) { - } - virtual ~WindowsFileHandle() { - Close(); - } - -protected: - void Close() override { - CloseHandle(fd); - }; - -public: - HANDLE fd; -}; - -unique_ptr FileSystem::OpenFile(const char *path, uint8_t flags, FileLockType lock_type) { - AssertValidFileFlags(flags); - - DWORD desired_access; - DWORD share_mode; - DWORD creation_disposition = OPEN_EXISTING; - DWORD flags_and_attributes = FILE_ATTRIBUTE_NORMAL; - if (flags & FileFlags::FILE_FLAGS_READ) { - desired_access = GENERIC_READ; - share_mode = FILE_SHARE_READ; - } else { - // need Read or Write - D_ASSERT(flags & FileFlags::FILE_FLAGS_WRITE); - desired_access = GENERIC_READ | GENERIC_WRITE; - share_mode = 0; - if (flags & FileFlags::FILE_FLAGS_FILE_CREATE) { - creation_disposition = OPEN_ALWAYS; - } else if (flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW) { - creation_disposition = CREATE_ALWAYS; - } - if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { - flags_and_attributes |= FILE_FLAG_WRITE_THROUGH; - } - } - if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { - flags_and_attributes |= FILE_FLAG_NO_BUFFERING; - } - HANDLE hFile = - CreateFileA(path, desired_access, share_mode, NULL, creation_disposition, flags_and_attributes, NULL); - if (hFile == INVALID_HANDLE_VALUE) { - auto error = GetLastErrorAsString(); - throw IOException("Cannot open file \"%s\": %s", path, error); - } - auto handle = make_unique(*this, path, hFile); - if (flags & FileFlags::FILE_FLAGS_APPEND) { - SetFilePointer(*handle, GetFileSize(*handle)); - } - return move(handle); -} - -void FileSystem::SetFilePointer(FileHandle &handle, idx_t location) { - HANDLE hFile = ((WindowsFileHandle &)handle).fd; - LARGE_INTEGER loc; - loc.QuadPart = location; - auto rc = SetFilePointerEx(hFile, loc, NULL, FILE_BEGIN); - if (rc == 0) { - auto error = GetLastErrorAsString(); - throw IOException("Could not seek to location %lld for file \"%s\": %s", location, handle.path, error); - } -} - -int64_t FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { - HANDLE hFile = ((WindowsFileHandle &)handle).fd; - DWORD bytes_read; - auto rc = ReadFile(hFile, buffer, (DWORD)nr_bytes, &bytes_read, NULL); - if (rc == 0) { - auto error = GetLastErrorAsString(); - throw IOException("Could not write file \"%s\": %s", handle.path, error); - } - return bytes_read; -} - -int64_t FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { - HANDLE hFile = ((WindowsFileHandle &)handle).fd; - DWORD bytes_read; - auto rc = WriteFile(hFile, buffer, (DWORD)nr_bytes, &bytes_read, NULL); - if (rc == 0) { - auto error = GetLastErrorAsString(); - throw IOException("Could not write file \"%s\": %s", handle.path, error); - } - return bytes_read; -} - -int64_t FileSystem::GetFileSize(FileHandle &handle) { - HANDLE hFile = ((WindowsFileHandle &)handle).fd; - LARGE_INTEGER result; - if (!GetFileSizeEx(hFile, &result)) { - return -1; - } - return result.QuadPart; -} - -time_t FileSystem::GetLastModifiedTime(FileHandle &handle) { - HANDLE hFile = ((WindowsFileHandle &)handle).fd; - - // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfiletime - FILETIME last_write; - if (GetFileTime(hFile, nullptr, nullptr, &last_write) == 0) { - return -1; - } - - // https://stackoverflow.com/questions/29266743/what-is-dwlowdatetime-and-dwhighdatetime - ULARGE_INTEGER ul; - ul.LowPart = last_write.dwLowDateTime; - ul.HighPart = last_write.dwHighDateTime; - int64_t fileTime64 = ul.QuadPart; - - // fileTime64 contains a 64-bit value representing the number of - // 100-nanosecond intervals since January 1, 1601 (UTC). - // https://docs.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-filetime - - // Adapted from: https://stackoverflow.com/questions/6161776/convert-windows-filetime-to-second-in-unix-linux - const auto WINDOWS_TICK = 10000000; - const auto SEC_TO_UNIX_EPOCH = 11644473600LL; - time_t result = (fileTime64 / WINDOWS_TICK - SEC_TO_UNIX_EPOCH); - return result; -} - -void FileSystem::Truncate(FileHandle &handle, int64_t new_size) { - HANDLE hFile = ((WindowsFileHandle &)handle).fd; - // seek to the location - SetFilePointer(handle, new_size); - // now set the end of file position - if (!SetEndOfFile(hFile)) { - auto error = GetLastErrorAsString(); - throw IOException("Failure in SetEndOfFile call on file \"%s\": %s", handle.path, error); - } -} - -bool FileSystem::DirectoryExists(const string &directory) { - DWORD attrs = GetFileAttributesA(directory.c_str()); - return (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY)); -} - -bool FileSystem::FileExists(const string &filename) { - DWORD attrs = GetFileAttributesA(filename.c_str()); - return (attrs != INVALID_FILE_ATTRIBUTES && !(attrs & FILE_ATTRIBUTE_DIRECTORY)); -} - -void FileSystem::CreateDirectory(const string &directory) { - if (DirectoryExists(directory)) { - return; - } - if (directory.empty() || !CreateDirectoryA(directory.c_str(), NULL) || !DirectoryExists(directory)) { - throw IOException("Could not create directory!"); - } -} - -static void delete_dir_special_snowflake_windows(string directory) { - if (directory.size() + 3 > MAX_PATH) { - throw IOException("Pathname too long"); - } - // create search pattern - TCHAR szDir[MAX_PATH]; - snprintf(szDir, MAX_PATH, "%s\\*", directory.c_str()); - - WIN32_FIND_DATA ffd; - HANDLE hFind = FindFirstFile(szDir, &ffd); - if (hFind == INVALID_HANDLE_VALUE) { - return; - } - - do { - if (string(ffd.cFileName) == "." || string(ffd.cFileName) == "..") { - continue; - } - if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { - // recurse to zap directory contents - FileSystem fs; - delete_dir_special_snowflake_windows(fs.JoinPath(directory, ffd.cFileName)); - } else { - if (strlen(ffd.cFileName) + directory.size() + 1 > MAX_PATH) { - throw IOException("Pathname too long"); - } - // create search pattern - TCHAR del_path[MAX_PATH]; - snprintf(del_path, MAX_PATH, "%s\\%s", directory.c_str(), ffd.cFileName); - if (!DeleteFileA(del_path)) { - throw IOException("Failed to delete directory entry"); - } - } - } while (FindNextFile(hFind, &ffd) != 0); - - DWORD dwError = GetLastError(); - if (dwError != ERROR_NO_MORE_FILES) { - throw IOException("Something went wrong"); - } - FindClose(hFind); - - if (!RemoveDirectoryA(directory.c_str())) { - throw IOException("Failed to delete directory"); - } -} - -void FileSystem::RemoveDirectory(const string &directory) { - delete_dir_special_snowflake_windows(directory.c_str()); -} - -void FileSystem::RemoveFile(const string &filename) { - DeleteFileA(filename.c_str()); -} - -bool FileSystem::ListFiles(const string &directory, const std::function &callback) { - string search_dir = JoinPath(directory, "*"); - - WIN32_FIND_DATA ffd; - HANDLE hFind = FindFirstFile(search_dir.c_str(), &ffd); - if (hFind == INVALID_HANDLE_VALUE) { - return false; - } - do { - string cFileName = string(ffd.cFileName); - if (cFileName == "." || cFileName == "..") { - continue; - } - callback(cFileName, ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY); - } while (FindNextFile(hFind, &ffd) != 0); - - DWORD dwError = GetLastError(); - if (dwError != ERROR_NO_MORE_FILES) { - FindClose(hFind); - return false; - } - - FindClose(hFind); - return true; -} - -string FileSystem::PathSeparator() { - return "\\"; -} - -void FileSystem::FileSync(FileHandle &handle) { - HANDLE hFile = ((WindowsFileHandle &)handle).fd; - if (FlushFileBuffers(hFile) == 0) { - throw IOException("Could not flush file handle to disk!"); - } -} - -void FileSystem::MoveFile(const string &source, const string &target) { - if (!MoveFileA(source.c_str(), target.c_str())) { - throw IOException("Could not move file"); - } -} - -void FileSystem::SetWorkingDirectory(const string &path) { - if (!SetCurrentDirectory(path.c_str())) { - throw IOException("Could not change working directory!"); - } -} - -idx_t FileSystem::GetAvailableMemory() { - ULONGLONG available_memory_kb; - if (!GetPhysicallyInstalledSystemMemory(&available_memory_kb)) { - throw IOException("Could not fetch available system memory!"); - } - return MinValue(available_memory_kb * 1024, UINTPTR_MAX); -} - -string FileSystem::GetWorkingDirectory() { - idx_t count = GetCurrentDirectory(0, nullptr); - if (count == 0) { - throw IOException("Could not get working directory!"); - } - auto buffer = unique_ptr(new char[count]); - idx_t ret = GetCurrentDirectory(count, buffer.get()); - if (count != ret + 1) { - throw IOException("Could not get working directory!"); - } - return string(buffer.get(), ret); -} -#endif - -string FileSystem::GetHomeDirectory() { - const char *homedir = getenv("HOME"); - if (!homedir) { - return string(); - } - return homedir; -} - -void FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { - // seek to the location - SetFilePointer(handle, location); - // now read from the location - int64_t bytes_read = Read(handle, buffer, nr_bytes); - if (bytes_read != nr_bytes) { - throw IOException("Could not read sufficient bytes from file \"%s\"", handle.path); - } -} - -void FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { - // seek to the location - SetFilePointer(handle, location); - // now write to the location - int64_t bytes_written = Write(handle, buffer, nr_bytes); - if (bytes_written != nr_bytes) { - throw IOException("Could not write sufficient bytes from file \"%s\"", handle.path); - } -} - -string FileSystem::JoinPath(const string &a, const string &b) { - // FIXME: sanitize paths - return a + PathSeparator() + b; -} - -string FileSystem::ConvertSeparators(const string &path) { - auto separator_str = PathSeparator(); - char separator = separator_str[0]; - if (separator == '/') { - // on unix-based systems we only accept / as a separator - return path; - } - // on windows-based systems we accept both - string result = path; - for (idx_t i = 0; i < result.size(); i++) { - if (result[i] == '/') { - result[i] = separator; - } - } - return result; -} - -string FileSystem::ExtractBaseName(const string &path) { - auto sep = PathSeparator(); - auto vec = StringUtil::Split(StringUtil::Split(path, sep).back(), "."); - return vec[0]; -} - -void FileHandle::Read(void *buffer, idx_t nr_bytes, idx_t location) { - file_system.Read(*this, buffer, nr_bytes, location); -} - -void FileHandle::Write(void *buffer, idx_t nr_bytes, idx_t location) { - file_system.Write(*this, buffer, nr_bytes, location); -} - -void FileHandle::Sync() { - file_system.FileSync(*this); -} - -void FileHandle::Truncate(int64_t new_size) { - file_system.Truncate(*this, new_size); -} - -static bool HasGlob(const string &str) { - for (idx_t i = 0; i < str.size(); i++) { - switch (str[i]) { - case '*': - case '?': - case '[': - return true; - default: - break; - } - } - return false; -} - -static void GlobFiles(FileSystem &fs, const string &path, const string &glob, bool match_directory, - vector &result, bool join_path) { - fs.ListFiles(path, [&](const string &fname, bool is_directory) { - if (is_directory != match_directory) { - return; - } - if (LikeFun::Glob(fname.c_str(), fname.size(), glob.c_str(), glob.size())) { - if (join_path) { - result.push_back(fs.JoinPath(path, fname)); - } else { - result.push_back(fname); - } - } - }); -} - -vector FileSystem::Glob(const string &path) { - if (path.empty()) { - return vector(); - } - // first check if the path has a glob at all - if (!HasGlob(path)) { - // no glob: return only the file (if it exists) - vector result; - if (FileExists(path)) { - result.push_back(path); - } - return result; - } - // split up the path into separate chunks - vector splits; - idx_t last_pos = 0; - for (idx_t i = 0; i < path.size(); i++) { - if (path[i] == '\\' || path[i] == '/') { - if (i == last_pos) { - // empty: skip this position - last_pos = i + 1; - continue; - } - if (splits.empty()) { - splits.push_back(path.substr(0, i)); - } else { - splits.push_back(path.substr(last_pos, i - last_pos)); - } - last_pos = i + 1; - } - } - splits.push_back(path.substr(last_pos, path.size() - last_pos)); - // handle absolute paths - bool absolute_path = false; - if (path[0] == '/') { - // first character is a slash - unix absolute path - absolute_path = true; - } else if (StringUtil::Contains(splits[0], ":")) { - // first split has a colon - windows absolute path - absolute_path = true; - } else if (splits[0] == "~") { - // starts with home directory - auto home_directory = GetHomeDirectory(); - if (!home_directory.empty()) { - absolute_path = true; - splits[0] = home_directory; - } - } - vector previous_directories; - if (absolute_path) { - // for absolute paths, we don't start by scanning the current directory - previous_directories.push_back(splits[0]); - } - for (idx_t i = absolute_path ? 1 : 0; i < splits.size(); i++) { - bool is_last_chunk = i + 1 == splits.size(); - bool has_glob = HasGlob(splits[i]); - // if it's the last chunk we need to find files, otherwise we find directories - // not the last chunk: gather a list of all directories that match the glob pattern - vector result; - if (!has_glob) { - // no glob, just append as-is - if (previous_directories.empty()) { - result.push_back(splits[i]); - } else { - for (auto &prev_directory : previous_directories) { - result.push_back(JoinPath(prev_directory, splits[i])); - } - } - } else { - if (previous_directories.empty()) { - // no previous directories: list in the current path - GlobFiles(*this, ".", splits[i], !is_last_chunk, result, false); - } else { - // previous directories - // we iterate over each of the previous directories, and apply the glob of the current directory - for (auto &prev_directory : previous_directories) { - GlobFiles(*this, prev_directory, splits[i], !is_last_chunk, result, true); - } - } - } - if (is_last_chunk || result.empty()) { - return result; - } - previous_directories = move(result); - } - return vector(); -} - -} // namespace duckdb - - /* @@ -214866,33 +227868,6 @@ without including the above copyright and permission notices. ### THIRD PARTY LICENSE #2 ### -Copyright 2013-2014 RAD Game Tools and Valve Software -Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - - - -### THIRD PARTY LICENSE #3 ### - ## utf8proc license ## **utf8proc** is a software package originally developed @@ -214989,39 +227964,43 @@ trademarks mentioned herein are the property of their respective owners. -### THIRD PARTY LICENSE #4 ### +### THIRD PARTY LICENSE #3 ### -Copyright (c) 2012 Art.sy, Inc. +Copyright 2013-2014 RAD Game Tools and Valve Software +Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: +All Rights Reserved. -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. -### THIRD PARTY LICENSE #5 ### -Copyright (c) 2014-2017 Melissa O'Neill and PCG Project contributors +### THIRD PARTY LICENSE #4 ### -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +Copyright (c) 2012 Art.sy, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. @@ -215035,7 +228014,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -### THIRD PARTY LICENSE #6 ### +### THIRD PARTY LICENSE #5 ### Apache License Version 2.0, January 2004 @@ -215240,7 +228219,7 @@ SOFTWARE. limitations under the License. -### THIRD PARTY LICENSE #7 ### +### THIRD PARTY LICENSE #6 ### // Copyright (c) 2009 The RE2 Authors. All rights reserved. // @@ -215272,7 +228251,7 @@ SOFTWARE. -### THIRD PARTY LICENSE #8 ### +### THIRD PARTY LICENSE #7 ### This license file applies to everything in this repository except that which is explicitly annotated as being written by other authors, i.e. the Boost @@ -215338,7 +228317,7 @@ DEALINGS IN THE SOFTWARE. -### THIRD PARTY LICENSE #9 ### +### THIRD PARTY LICENSE #8 ### Copyright (c) 2015, Lukas Fittl All rights reserved. diff --git a/velox/external/duckdb/duckdb.hpp b/velox/external/duckdb/duckdb.hpp index 944adf2d21ae..f2d583d9fcf3 100644 --- a/velox/external/duckdb/duckdb.hpp +++ b/velox/external/duckdb/duckdb.hpp @@ -1,5 +1,5 @@ /* -Copyright 2018 DuckDB Contributors (see https://github.com/cwida/duckdb/graphs/contributors) +Copyright 2018 DuckDB Contributors (see https://github.com/duckdb/duckdb/graphs/contributors) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: @@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #pragma once #define DUCKDB_AMALGAMATION 1 #define DUCKDB_AMALGAMATION_EXTENDED 1 -#define DUCKDB_SOURCE_ID "05c2c90b9" -#define DUCKDB_VERSION "0.2.6-dev663" +#define DUCKDB_SOURCE_ID "a776543ee" +#define DUCKDB_VERSION "0.2.9-dev217" //===----------------------------------------------------------------------===// // DuckDB // @@ -105,12 +105,6 @@ typedef uint8_t data_t; typedef data_t *data_ptr_t; typedef const data_t *const_data_ptr_t; -//! Type used to represent dates (days since 1970-01-01) -typedef int32_t date_t; -//! Type used to represent time (microseconds) -typedef int64_t dtime_t; -//! Type used to represent timestamps (microseconds since 1970-01-01) -typedef int64_t timestamp_t; //! Type used for the selection vector typedef uint32_t sel_t; //! Type used for transaction timestamps @@ -125,6 +119,7 @@ extern const column_t COLUMN_IDENTIFIER_ROW_ID; extern const row_t MAX_ROW_ID; extern const transaction_t TRANSACTION_ID_START; +extern const transaction_t MAX_TRANSACTION_ID; extern const transaction_t MAXIMUM_QUERY_ID; extern const transaction_t NOT_DELETED_ID; @@ -214,7 +209,7 @@ unique_ptr make_unique(Args &&... args) { using std::make_unique; #endif template -unique_ptr make_unique_base(Args &&...args) { +unique_ptr make_unique_base(Args &&... args) { return unique_ptr(new T(std::forward(args)...)); } @@ -223,6 +218,20 @@ unique_ptr unique_ptr_cast(unique_ptr src) { return unique_ptr(static_cast(src.release())); } +struct SharedConstructor { + template + static shared_ptr Create(ARGS &&...args) { + return make_shared(std::forward(args)...); + } +}; + +struct UniqueConstructor { + template + static unique_ptr Create(ARGS &&...args) { + return make_unique(std::forward(args)...); + } +}; + template T MaxValue(T a, T b) { return a > b ? a : b; @@ -233,6 +242,11 @@ T MinValue(T a, T b) { return a < b ? a : b; } +template +T AbsValue(T a) { + return a < 0 ? -a : a; +} + template const T Load(const_data_ptr_t ptr) { T ret; @@ -245,6 +259,19 @@ void Store(const T val, data_ptr_t ptr) { memcpy(ptr, (void *)&val, sizeof(val)); } +//! This assigns a shared pointer, but ONLY assigns if "target" is not equal to "source" +//! If this is often the case, this manner of assignment is significantly faster (~20X faster) +//! Since it avoids the need of an atomic incref/decref at the cost of a single pointer comparison +//! Benchmark: https://gist.github.com/Mytherin/4db3faa8e233c4a9b874b21f62bb4b96 +//! If the shared pointers are not the same, the penalty is very low (on the order of 1%~ slower) +//! This method should always be preferred if there is a (reasonable) chance that the pointers are the same +template +void AssignSharedPointer(shared_ptr &target, const shared_ptr &source) { + if (target.get() != source.get()) { + target = source; + } +} + } // namespace duckdb @@ -281,22 +308,6 @@ void DuckDBAssertInternal(bool condition, const char *condition_name, const char #endif -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/vector.hpp -// -// -//===----------------------------------------------------------------------===// - - - -#include - -namespace duckdb { -using std::vector; -} - //===----------------------------------------------------------------------===// // DuckDB // @@ -467,7 +478,7 @@ struct _object_and_block : public RefCounter { T object; template - explicit _object_and_block(Args &&...args) : object(std::forward(args)...) { + explicit _object_and_block(Args &&... args) : object(std::forward(args)...) { } }; @@ -498,11 +509,26 @@ inline bool operator!=(std::nullptr_t, const single_thread_ptr &sp) noexcept } template -single_thread_ptr single_thread_make_shared(Args &&...args) { +single_thread_ptr single_thread_make_shared(Args &&... args) { auto tmp_object = new _object_and_block(std::forward(args)...); return single_thread_ptr(tmp_object, &(tmp_object->object)); } } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/vector.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#include + +namespace duckdb { +using std::vector; +} @@ -532,10 +558,103 @@ namespace duckdb { class Serializer; class Deserializer; +//! Type used to represent dates (days since 1970-01-01) +struct date_t { + int32_t days; + + date_t() = default; + explicit inline date_t(int32_t days_p) : days(days_p) {} + + // explicit conversion + explicit inline operator int32_t() const {return days;} + + // comparison operators + inline bool operator==(const date_t &rhs) const {return days == rhs.days;}; + inline bool operator!=(const date_t &rhs) const {return days != rhs.days;}; + inline bool operator<=(const date_t &rhs) const {return days <= rhs.days;}; + inline bool operator<(const date_t &rhs) const {return days < rhs.days;}; + inline bool operator>(const date_t &rhs) const {return days > rhs.days;}; + inline bool operator>=(const date_t &rhs) const {return days >= rhs.days;}; + + // arithmetic operators + inline date_t operator+(const int32_t &days) const {return date_t(this->days + days);}; + inline date_t operator-(const int32_t &days) const {return date_t(this->days - days);}; + + // in-place operators + inline date_t &operator+=(const int32_t &days) {this->days += days; return *this;}; + inline date_t &operator-=(const int32_t &days) {this->days -= days; return *this;}; +}; + +//! Type used to represent time (microseconds) +struct dtime_t { + int64_t micros; + + dtime_t() = default; + explicit inline dtime_t(int64_t micros_p) : micros(micros_p) {} + inline dtime_t& operator=(int64_t micros_p) {micros = micros_p; return *this;} + + // explicit conversion + explicit inline operator int64_t() const {return micros;} + explicit inline operator double() const {return micros;} + + // comparison operators + inline bool operator==(const dtime_t &rhs) const {return micros == rhs.micros;}; + inline bool operator!=(const dtime_t &rhs) const {return micros != rhs.micros;}; + inline bool operator<=(const dtime_t &rhs) const {return micros <= rhs.micros;}; + inline bool operator<(const dtime_t &rhs) const {return micros < rhs.micros;}; + inline bool operator>(const dtime_t &rhs) const {return micros > rhs.micros;}; + inline bool operator>=(const dtime_t &rhs) const {return micros >= rhs.micros;}; + + // arithmetic operators + inline dtime_t operator+(const int64_t µs) const {return dtime_t(this->micros + micros);}; + inline dtime_t operator+(const double µs) const {return dtime_t(this->micros + int64_t(micros));}; + inline dtime_t operator-(const int64_t µs) const {return dtime_t(this->micros - micros);}; + inline dtime_t operator*(const idx_t &copies) const {return dtime_t(this->micros * copies);}; + inline dtime_t operator/(const idx_t &copies) const {return dtime_t(this->micros / copies);}; + inline int64_t operator-(const dtime_t &other) const {return this->micros - other.micros;}; + + // in-place operators + inline dtime_t &operator+=(const int64_t µs) {this->micros += micros; return *this;}; + inline dtime_t &operator-=(const int64_t µs) {this->micros -= micros; return *this;}; + inline dtime_t &operator+=(const dtime_t &other) {this->micros += other.micros; return *this;}; +}; + +//! Type used to represent timestamps (seconds,microseconds,milliseconds or nanoseconds since 1970-01-01) +struct timestamp_t { + int64_t value; + + timestamp_t() = default; + explicit inline timestamp_t(int64_t value_p) : value(value_p) {} + inline timestamp_t& operator=(int64_t value_p) {value = value_p; return *this;} + + // explicit conversion + explicit inline operator int64_t() const {return value;} + + // comparison operators + inline bool operator==(const timestamp_t &rhs) const {return value == rhs.value;}; + inline bool operator!=(const timestamp_t &rhs) const {return value != rhs.value;}; + inline bool operator<=(const timestamp_t &rhs) const {return value <= rhs.value;}; + inline bool operator<(const timestamp_t &rhs) const {return value < rhs.value;}; + inline bool operator>(const timestamp_t &rhs) const {return value > rhs.value;}; + inline bool operator>=(const timestamp_t &rhs) const {return value >= rhs.value;}; + + // arithmetic operators + inline timestamp_t operator+(const double &value) const {return timestamp_t(this->value + int64_t(value));}; + inline int64_t operator-(const timestamp_t &other) const {return this->value - other.value;}; + + // in-place operators + inline timestamp_t &operator+=(const int64_t &value) {this->value += value; return *this;}; + inline timestamp_t &operator-=(const int64_t &value) {this->value -= value; return *this;}; +}; + struct interval_t { int32_t months; int32_t days; int64_t micros; + + inline bool operator==(const interval_t &rhs) const { + return this->days == rhs.days && this->months == rhs.months && this->micros == rhs.micros; + } }; struct hugeint_t { @@ -594,12 +713,13 @@ struct string_t; template using child_list_t = std::vector>; +// we should be using single_thread_ptr here but cross-thread access to ChunkCollections currently prohibits this. template -using buffer_ptr = single_thread_ptr; +using buffer_ptr = shared_ptr; template buffer_ptr make_buffer(Args &&...args) { - return single_thread_make_shared(std::forward(args)...); + return make_shared(std::forward(args)...); } struct list_entry_t { @@ -729,8 +849,6 @@ enum class PhysicalType : uint8_t { // DuckDB Extensions VARCHAR = 200, // our own string representation, different from STRING and LARGE_STRING above - POINTER = 202, - HASH = 203, INT128 = 204, // 128-bit integers /// Boolean as 1 bit, LSB bit-packed ordering @@ -755,18 +873,23 @@ enum class LogicalTypeId : uint8_t { BIGINT = 14, DATE = 15, TIME = 16, - TIMESTAMP = 17, - DECIMAL = 18, - FLOAT = 19, - DOUBLE = 20, - CHAR = 21, - VARCHAR = 22, - BLOB = 24, - INTERVAL = 25, - UTINYINT = 26, - USMALLINT = 27, - UINTEGER = 28, - UBIGINT = 29, + TIMESTAMP_SEC = 17, + TIMESTAMP_MS = 18, + TIMESTAMP = 19, //! us + TIMESTAMP_NS = 20, + DECIMAL = 21, + FLOAT = 22, + DOUBLE = 23, + CHAR = 24, + VARCHAR = 25, + BLOB = 26, + INTERVAL = 27, + UTINYINT = 28, + USMALLINT = 29, + UINTEGER = 30, + UBIGINT = 31, + + HUGEINT = 50, POINTER = 51, HASH = 52, @@ -778,37 +901,46 @@ enum class LogicalTypeId : uint8_t { TABLE = 103 }; +struct ExtraTypeInfo; + struct LogicalType { DUCKDB_API LogicalType(); DUCKDB_API LogicalType(LogicalTypeId id); // NOLINT: Allow implicit conversion from `LogicalTypeId` - DUCKDB_API LogicalType(LogicalTypeId id, string collation); - DUCKDB_API LogicalType(LogicalTypeId id, uint8_t width, uint8_t scale); - LogicalType(LogicalTypeId id, child_list_t child_types); - LogicalType(LogicalTypeId id, uint8_t width, uint8_t scale, string collation, - child_list_t child_types); + DUCKDB_API LogicalType(LogicalTypeId id, shared_ptr type_info); + DUCKDB_API LogicalType(const LogicalType &other) : + id_(other.id_), physical_type_(other.physical_type_), type_info_(other.type_info_) {} + + DUCKDB_API LogicalType(LogicalType &&other) : + id_(other.id_), physical_type_(other.physical_type_), type_info_(move(other.type_info_)) {} + + DUCKDB_API ~LogicalType(); LogicalTypeId id() const { return id_; } - uint8_t width() const { - return width_; - } - uint8_t scale() const { - return scale_; - } - const string &collation() const { - return collation_; - } - const child_list_t &child_types() const { - return child_types_; - } PhysicalType InternalType() const { return physical_type_; } + const ExtraTypeInfo *AuxInfo() const { + return type_info_.get(); + } - bool operator==(const LogicalType &rhs) const { - return id_ == rhs.id_ && width_ == rhs.width_ && scale_ == rhs.scale_ && child_types_ == rhs.child_types_; + // copy assignment + LogicalType& operator=(const LogicalType &other) { + id_ = other.id_; + physical_type_ = other.physical_type_; + type_info_ = other.type_info_; + return *this; + } + // move assignment + LogicalType& operator=(LogicalType&& other) { + id_ = other.id_; + physical_type_ = other.physical_type_; + type_info_ = move(other.type_info_); + return *this; } + + bool operator==(const LogicalType &rhs) const; bool operator!=(const LogicalType &rhs) const { return !(*this == rhs); } @@ -821,7 +953,6 @@ struct LogicalType { DUCKDB_API string ToString() const; DUCKDB_API bool IsIntegral() const; DUCKDB_API bool IsNumeric() const; - DUCKDB_API bool IsMoreGenericThan(LogicalType &other) const; DUCKDB_API hash_t Hash() const; DUCKDB_API static LogicalType MaxLogicalType(const LogicalType &left, const LogicalType &right); @@ -833,12 +964,8 @@ struct LogicalType { private: LogicalTypeId id_; - uint8_t width_; - uint8_t scale_; - string collation_; - - child_list_t child_types_; PhysicalType physical_type_; + shared_ptr type_info_; private: PhysicalType GetInternalType(); @@ -856,14 +983,13 @@ struct LogicalType { DUCKDB_API static const LogicalType UBIGINT; DUCKDB_API static const LogicalType FLOAT; DUCKDB_API static const LogicalType DOUBLE; - DUCKDB_API static const LogicalType DECIMAL; DUCKDB_API static const LogicalType DATE; DUCKDB_API static const LogicalType TIMESTAMP; + DUCKDB_API static const LogicalType TIMESTAMP_S; + DUCKDB_API static const LogicalType TIMESTAMP_MS; + DUCKDB_API static const LogicalType TIMESTAMP_NS; DUCKDB_API static const LogicalType TIME; DUCKDB_API static const LogicalType VARCHAR; - DUCKDB_API static const LogicalType STRUCT; - DUCKDB_API static const LogicalType MAP; - DUCKDB_API static const LogicalType LIST; DUCKDB_API static const LogicalType ANY; DUCKDB_API static const LogicalType BLOB; DUCKDB_API static const LogicalType INTERVAL; @@ -873,6 +999,13 @@ struct LogicalType { DUCKDB_API static const LogicalType TABLE; DUCKDB_API static const LogicalType INVALID; + // explicitly allowing these functions to be capitalized to be in-line with the remaining functions + DUCKDB_API static LogicalType DECIMAL(int width, int scale); // NOLINT + DUCKDB_API static LogicalType VARCHAR_COLLATION(string collation); // NOLINT + DUCKDB_API static LogicalType LIST(LogicalType child); // NOLINT + DUCKDB_API static LogicalType STRUCT(child_list_t children); // NOLINT + DUCKDB_API static LogicalType MAP(child_list_t children); // NOLINT + //! A list of all NUMERIC types (integral and floating point types) DUCKDB_API static const vector NUMERIC; //! A list of all INTEGRAL types @@ -881,9 +1014,30 @@ struct LogicalType { DUCKDB_API static const vector ALL_TYPES; }; +struct DecimalType { + DUCKDB_API static uint8_t GetWidth(const LogicalType &type); + DUCKDB_API static uint8_t GetScale(const LogicalType &type); +}; + +struct StringType { + DUCKDB_API static string GetCollation(const LogicalType &type); +}; + +struct ListType { + DUCKDB_API static const LogicalType &GetChildType(const LogicalType &type); +}; + +struct StructType { + DUCKDB_API static const child_list_t &GetChildTypes(const LogicalType &type); + DUCKDB_API static const LogicalType &GetChildType(const LogicalType &type, idx_t index); + DUCKDB_API static const string &GetChildName(const LogicalType &type, idx_t index); + DUCKDB_API static idx_t GetChildCount(const LogicalType &type); +}; + + string LogicalTypeIdToString(LogicalTypeId type); -LogicalType TransformStringToLogicalType(const string &str); +LogicalTypeId TransformStringToLogicalType(const string &str); //! Returns the PhysicalType for the given type template @@ -908,15 +1062,17 @@ PhysicalType GetTypeId() { return PhysicalType::UINT64; } else if (std::is_same()) { return PhysicalType::INT128; - } else if (std::is_same()) { - return PhysicalType::HASH; - } else if (std::is_same()) { - return PhysicalType::POINTER; + } else if (std::is_same()) { + return PhysicalType::DATE32; + } else if (std::is_same()) { + return PhysicalType::TIME32; + } else if (std::is_same()) { + return PhysicalType::TIMESTAMP; } else if (std::is_same()) { return PhysicalType::FLOAT; } else if (std::is_same()) { return PhysicalType::DOUBLE; - } else if (std::is_same() || std::is_same()) { + } else if (std::is_same() || std::is_same() || std::is_same()) { return PhysicalType::VARCHAR; } else if (std::is_same()) { return PhysicalType::INTERVAL; @@ -925,6 +1081,11 @@ PhysicalType GetTypeId() { } } +template +bool TypeIsNumber() { + return std::is_integral() || std::is_floating_point() || std::is_same(); +} + template bool IsValidType() { return GetTypeId() != PhysicalType::INVALID; @@ -1003,6 +1164,7 @@ ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(char *value); } // namespace duckdb + #include namespace duckdb { @@ -1062,7 +1224,8 @@ enum class ExceptionType { FATAL = 30, // Fatal exception: fatal exceptions are non-recoverable, and render the entire DB in an unusable state INTERNAL = 31, // Internal exception: exception that indicates something went wrong internally (i.e. bug in the code base) - INVALID_INPUT = 32 // Input or arguments error + INVALID_INPUT = 32, // Input or arguments error + OUT_OF_MEMORY = 33 // out of memory }; class Exception : public std::exception { @@ -1174,6 +1337,16 @@ class OutOfRangeException : public Exception { } }; +class OutOfMemoryException : public Exception { +public: + explicit OutOfMemoryException(const string &msg); + + template + explicit OutOfMemoryException(const string &msg, Args... params) + : OutOfMemoryException(ConstructMessage(msg, params...)) { + } +}; + class SyntaxException : public Exception { public: explicit SyntaxException(const string &msg); @@ -1409,6 +1582,7 @@ string Deserializer::Read(); namespace duckdb { +class Allocator; struct FileHandle; enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2 }; @@ -1420,9 +1594,12 @@ class FileBuffer { //! FileSystemConstants::FILE_BUFFER_BLOCK_SIZE. The content in this buffer can be written to FileHandles that have //! been opened with DIRECT_IO on all operating systems, however, the entire buffer must be written to the file. //! Note that the returned size is 8 bytes less than the allocation size to account for the checksum. - FileBuffer(FileBufferType type, uint64_t bufsiz); + FileBuffer(Allocator &allocator, FileBufferType type, uint64_t bufsiz); + FileBuffer(FileBuffer &source, FileBufferType type); + virtual ~FileBuffer(); + Allocator &allocator; //! The type of the buffer FileBufferType type; //! The buffer that users can write to @@ -1431,27 +1608,44 @@ class FileBuffer { uint64_t size; public: + //! Read into the FileBuffer from the specified location. + void Read(FileHandle &handle, uint64_t location); //! Read into the FileBuffer from the specified location. Automatically verifies the checksum, and throws an //! exception if the checksum does not match correctly. - void Read(FileHandle &handle, uint64_t location); + void ReadAndChecksum(FileHandle &handle, uint64_t location); + //! Write the contents of the FileBuffer to the specified location. + void Write(FileHandle &handle, uint64_t location); //! Write the contents of the FileBuffer to the specified location. Automatically adds a checksum of the contents of //! the filebuffer in front of the written data. - void Write(FileHandle &handle, uint64_t location); + void ChecksumAndWrite(FileHandle &handle, uint64_t location); void Clear(); + void Resize(uint64_t bufsiz); + uint64_t AllocSize() { return internal_size; } -private: +protected: //! The pointer to the internal buffer that will be read or written, including the buffer header data_ptr_t internal_buffer; //! The aligned size as passed to the constructor. This is the size that is read or written to disk. uint64_t internal_size; +private: //! The buffer that was actually malloc'd, i.e. the pointer that must be freed when the FileBuffer is destroyed data_ptr_t malloced_buffer; + uint64_t malloced_size; + +protected: + uint64_t GetMallocedSize() { + return malloced_size; + } + //! Sets malloced_size given the requested buffer size + void SetMallocedSize(uint64_t &bufsiz); + //! Constructs the Filebuffer object + void Construct(uint64_t bufsiz); }; } // namespace duckdb @@ -1474,6 +1668,24 @@ using std::unordered_map; } +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/file_compression_type.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +enum class FileCompressionType : uint8_t { AUTO_DETECT = 0, UNCOMPRESSED = 1, GZIP = 2 }; + +} // namespace duckdb + #include @@ -1486,6 +1698,25 @@ class ClientContext; class DatabaseInstance; class FileSystem; +enum class FileType { + //! Regular file + FILE_TYPE_REGULAR, + //! Directory + FILE_TYPE_DIR, + //! FIFO named pipe + FILE_TYPE_FIFO, + //! Socket + FILE_TYPE_SOCKET, + //! Symbolic link + FILE_TYPE_LINK, + //! Block device + FILE_TYPE_BLOCKDEV, + //! Character device + FILE_TYPE_CHARDEV, + //! Unknown or invalid file handle + FILE_TYPE_INVALID, +}; + struct FileHandle { public: FileHandle(FileSystem &file_system, string path) : file_system(file_system), path(path) { @@ -1494,10 +1725,21 @@ struct FileHandle { virtual ~FileHandle() { } + int64_t Read(void *buffer, idx_t nr_bytes); + int64_t Write(void *buffer, idx_t nr_bytes); void Read(void *buffer, idx_t nr_bytes, idx_t location); void Write(void *buffer, idx_t nr_bytes, idx_t location); + void Seek(idx_t location); + void Reset(); + idx_t SeekPosition(); void Sync(); void Truncate(int64_t new_size); + string ReadLine(); + + bool CanSeek(); + bool OnDiskFile(); + idx_t GetFileSize(); + FileType GetType(); protected: virtual void Close() = 0; @@ -1534,10 +1776,10 @@ class FileSystem { static FileSystem &GetFileSystem(ClientContext &context); static FileSystem &GetFileSystem(DatabaseInstance &db); - virtual unique_ptr OpenFile(const char *path, uint8_t flags, FileLockType lock = FileLockType::NO_LOCK); - unique_ptr OpenFile(string &path, uint8_t flags, FileLockType lock = FileLockType::NO_LOCK) { - return OpenFile(path.c_str(), flags, lock); - } + virtual unique_ptr OpenFile(const string &path, uint8_t flags, + FileLockType lock = FileLockType::NO_LOCK, + FileCompressionType compression = FileCompressionType::UNCOMPRESSED); + //! Read exactly nr_bytes from the specified location in the file. Fails if nr_bytes could not be read. This is //! equivalent to calling SetFilePointer(location) followed by calling Read(). virtual void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location); @@ -1554,6 +1796,8 @@ class FileSystem { virtual int64_t GetFileSize(FileHandle &handle); //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error virtual time_t GetLastModifiedTime(FileHandle &handle); + //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error + virtual FileType GetFileType(FileHandle &handle); //! Truncate a file to a maximum size of new_size, new_size should be smaller than or equal to the current size of //! the file virtual void Truncate(FileHandle &handle, int64_t new_size); @@ -1598,22 +1842,38 @@ class FileSystem { virtual idx_t GetAvailableMemory(); //! registers a sub-file system to handle certain file name prefixes, e.g. http:// etc. - virtual void RegisterProtocolHandler(string protocol, unique_ptr protocol_fs) { - throw NotImplementedException("Can't register a protocol handler on a non-virtual file system"); + virtual void RegisterSubSystem(unique_ptr sub_fs) { + throw NotImplementedException("Can't register a sub system on a non-virtual file system"); + } + + virtual bool CanHandleFile(const string &fpath) { + //! Whether or not a sub-system can handle a specific file path + return false; } + //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location + virtual void Seek(FileHandle &handle, idx_t location); + //! Reset a file to the beginning (equivalent to Seek(handle, 0) for simple files) + virtual void Reset(FileHandle &handle); + virtual idx_t SeekPosition(FileHandle &handle); + + //! Whether or not we can seek into the file + virtual bool CanSeek(); + //! Whether or not the FS handles plain files on disk. This is relevant for certain optimizations, as random reads + //! in a file on-disk are much cheaper than e.g. random reads in a file over the network + virtual bool OnDiskFile(FileHandle &handle); + private: //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location void SetFilePointer(FileHandle &handle, idx_t location); + virtual idx_t GetFilePointer(FileHandle &handle); }; // bunch of wrappers to allow registering protocol handlers class VirtualFileSystem : public FileSystem { public: - unique_ptr OpenFile(const char *path, uint8_t flags, - FileLockType lock = FileLockType::NO_LOCK) override { - return FindFileSystem(path)->OpenFile(path, flags, lock); - } + unique_ptr OpenFile(const string &path, uint8_t flags, FileLockType lock = FileLockType::NO_LOCK, + FileCompressionType compression = FileCompressionType::UNCOMPRESSED) override; virtual void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override { handle.file_system.Read(handle, buffer, nr_bytes, location); @@ -1637,6 +1897,9 @@ class VirtualFileSystem : public FileSystem { time_t GetLastModifiedTime(FileHandle &handle) override { return handle.file_system.GetLastModifiedTime(handle); } + FileType GetFileType(FileHandle &handle) override { + return handle.file_system.GetFileType(handle); + } void Truncate(FileHandle &handle, int64_t new_size) override { handle.file_system.Truncate(handle, new_size); @@ -1695,22 +1958,22 @@ class VirtualFileSystem : public FileSystem { return default_fs.GetAvailableMemory(); } - void RegisterProtocolHandler(string protocol, unique_ptr protocol_fs) override { - protocol_handler_fss[protocol] = move(protocol_fs); + void RegisterSubSystem(unique_ptr fs) override { + sub_systems.push_back(move(fs)); } private: FileSystem *FindFileSystem(const string &path) { - for (auto &handler : protocol_handler_fss) { - if (path.rfind(handler.first, 0) == 0) { - return handler.second.get(); + for (auto &sub_system : sub_systems) { + if (sub_system->CanHandleFile(path)) { + return sub_system.get(); } } return &default_fs; } private: - unordered_map> protocol_handler_fss; + vector> sub_systems; FileSystem default_fs; }; @@ -1725,7 +1988,7 @@ class BufferedFileWriter : public Serializer { public: //! Serializes to a buffer allocated by the serializer, will expand when //! writing past the initial threshold - BufferedFileWriter(FileSystem &fs, string path, + BufferedFileWriter(FileSystem &fs, const string &path, uint8_t open_flags = FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE); FileSystem &fs; @@ -1809,6 +2072,31 @@ using std::bitset; } +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/vector_type.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +enum class VectorType : uint8_t { + FLAT_VECTOR, // Flat vectors represent a standard uncompressed vector + CONSTANT_VECTOR, // Constant vector represents a single constant + DICTIONARY_VECTOR, // Dictionary vector represents a selection vector on top of another vector + SEQUENCE_VECTOR // Sequence vector represents a sequence with a start point and an increment +}; + +string VectorTypeToString(VectorType type); + +} // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // @@ -1901,9 +2189,6 @@ struct SelectionVector { sel_vector = other.sel_vector; } - bool empty() const { - return !sel_vector; - } void set_index(idx_t idx, idx_t loc) { sel_vector[idx] = loc; } @@ -1913,7 +2198,7 @@ struct SelectionVector { sel_vector[j] = tmp; } idx_t get_index(idx_t idx) const { - return sel_vector[idx]; + return sel_vector ? sel_vector[idx] : idx; } sel_t *data() { return sel_vector; @@ -1926,17 +2211,53 @@ struct SelectionVector { string ToString(idx_t count = 0) const; void Print(idx_t count = 0) const; + sel_t &operator[](idx_t index) { + return sel_vector[index]; + } + private: sel_t *sel_vector; buffer_ptr selection_data; }; +class OptionalSelection { +public: + explicit inline OptionalSelection(SelectionVector *sel_p) : sel(sel_p) { + + if (sel) { + vec.Initialize(sel->data()); + sel = &vec; + } + } + + inline operator SelectionVector *() { + return sel; + } + + inline void Append(idx_t &count, const idx_t idx) { + if (sel) { + sel->set_index(count, idx); + } + ++count; + } + + inline void Advance(idx_t completed) { + if (sel) { + sel->Initialize(sel->data() + completed); + } + } + +private: + SelectionVector *sel; + SelectionVector vec; +}; + } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/types/value.hpp +// duckdb/common/types/validity_mask.hpp // // //===----------------------------------------------------------------------===// @@ -1946,110 +2267,416 @@ struct SelectionVector { +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/to_string.hpp +// +// +//===----------------------------------------------------------------------===// + namespace duckdb { +using std::to_string; +} -class Deserializer; -class Serializer; -//! The Value object holds a single arbitrary value of any type that can be -//! stored in the database. -class Value { - friend class Vector; +namespace duckdb { +struct ValidityMask; -public: - //! Create an empty NULL value of the specified type - explicit Value(LogicalType type = LogicalType::SQLNULL); - //! Create an INTEGER value - Value(int32_t val); // NOLINT: Allow implicit conversion from `int32_t` - //! Create a BIGINT value - Value(int64_t val); // NOLINT: Allow implicit conversion from `int64_t` - //! Create a FLOAT value - Value(float val); // NOLINT: Allow implicit conversion from `float` - //! Create a DOUBLE value - Value(double val); // NOLINT: Allow implicit conversion from `double` - //! Create a VARCHAR value - Value(const char *val); // NOLINT: Allow implicit conversion from `const char *` - //! Create a NULL value - Value(std::nullptr_t val); // NOLINT: Allow implicit conversion from `nullptr_t` - //! Create a VARCHAR value - Value(string_t val); // NOLINT: Allow implicit conversion from `string_t` - //! Create a VARCHAR value - Value(string val); // NOLINT: Allow implicit conversion from `string` +template +struct TemplatedValidityData { + static constexpr const int BITS_PER_VALUE = sizeof(V) * 8; + static constexpr const V MAX_ENTRY = ~V(0); - LogicalType type() const { - return type_; +public: + explicit TemplatedValidityData(idx_t count) { + auto entry_count = EntryCount(count); + owned_data = unique_ptr(new V[entry_count]); + for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { + owned_data[entry_idx] = MAX_ENTRY; + } + } + TemplatedValidityData(const V *validity_mask, idx_t count) { + D_ASSERT(validity_mask); + auto entry_count = EntryCount(count); + owned_data = unique_ptr(new V[entry_count]); + for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { + owned_data[entry_idx] = validity_mask[entry_idx]; + } } - //! Create the lowest possible value of a given type (numeric only) - static Value MinimumValue(const LogicalType &type); - //! Create the highest possible value of a given type (numeric only) - static Value MaximumValue(const LogicalType &type); - //! Create a Numeric value of the specified type with the specified value - static Value Numeric(const LogicalType &type, int64_t value); - static Value Numeric(const LogicalType &type, hugeint_t value); + unique_ptr owned_data; - //! Create a tinyint Value from a specified value - static Value BOOLEAN(int8_t value); - //! Create a tinyint Value from a specified value - static Value TINYINT(int8_t value); - //! Create a smallint Value from a specified value - static Value SMALLINT(int16_t value); - //! Create an integer Value from a specified value - static Value INTEGER(int32_t value); +public: + static inline idx_t EntryCount(idx_t count) { + return (count + (BITS_PER_VALUE - 1)) / BITS_PER_VALUE; + } +}; + +using validity_t = uint64_t; + +struct ValidityData : TemplatedValidityData { +public: + DUCKDB_API explicit ValidityData(idx_t count); + DUCKDB_API ValidityData(const ValidityMask &original, idx_t count); +}; + +//! Type used for validity masks +template +struct TemplatedValidityMask { + using ValidityBuffer = TemplatedValidityData; + +public: + static constexpr const int BITS_PER_VALUE = ValidityBuffer::BITS_PER_VALUE; + static constexpr const int STANDARD_ENTRY_COUNT = (STANDARD_VECTOR_SIZE + (BITS_PER_VALUE - 1)) / BITS_PER_VALUE; + static constexpr const int STANDARD_MASK_SIZE = STANDARD_ENTRY_COUNT * sizeof(validity_t); + +public: + TemplatedValidityMask() : validity_mask(nullptr) { + } + explicit TemplatedValidityMask(idx_t max_count) { + Initialize(max_count); + } + explicit TemplatedValidityMask(V *ptr) : validity_mask(ptr) { + } + TemplatedValidityMask(const TemplatedValidityMask &original, idx_t count) { + Copy(original, count); + } + + static inline idx_t ValidityMaskSize(idx_t count = STANDARD_VECTOR_SIZE) { + return ValidityBuffer::EntryCount(count) * sizeof(V); + } + inline bool AllValid() const { + return !validity_mask; + } + bool CheckAllValid(idx_t count) const { + if (AllValid()) { + return true; + } + idx_t entry_count = ValidityBuffer::EntryCount(count); + idx_t valid_count = 0; + for (idx_t i = 0; i < entry_count; i++) { + valid_count += validity_mask[i] == ValidityBuffer::MAX_ENTRY; + } + return valid_count == entry_count; + } + + bool CheckAllValid(idx_t to, idx_t from) const { + if (AllValid()) { + return true; + } + for (idx_t i = from; i < to; i++) { + if (!RowIsValid(i)) { + return false; + } + } + return true; + } + + inline V *GetData() const { + return validity_mask; + } + void Reset() { + validity_mask = nullptr; + validity_data.reset(); + } + + static inline idx_t EntryCount(idx_t count) { + return ValidityBuffer::EntryCount(count); + } + inline V GetValidityEntry(idx_t entry_idx) const { + if (!validity_mask) { + return ValidityBuffer::MAX_ENTRY; + } + return validity_mask[entry_idx]; + } + static inline bool AllValid(V entry) { + return entry == ValidityBuffer::MAX_ENTRY; + } + static inline bool NoneValid(V entry) { + return entry == 0; + } + static inline bool RowIsValid(V entry, idx_t idx_in_entry) { + return entry & (V(1) << V(idx_in_entry)); + } + static inline void GetEntryIndex(idx_t row_idx, idx_t &entry_idx, idx_t &idx_in_entry) { + entry_idx = row_idx / BITS_PER_VALUE; + idx_in_entry = row_idx % BITS_PER_VALUE; + } + + //! RowIsValidUnsafe should only be used if AllValid() is false: it achieves the same as RowIsValid but skips a + //! not-null check + inline bool RowIsValidUnsafe(idx_t row_idx) const { + D_ASSERT(validity_mask); + idx_t entry_idx, idx_in_entry; + GetEntryIndex(row_idx, entry_idx, idx_in_entry); + auto entry = GetValidityEntry(entry_idx); + return RowIsValid(entry, idx_in_entry); + } + + //! Returns true if a row is valid (i.e. not null), false otherwise + inline bool RowIsValid(idx_t row_idx) const { + if (!validity_mask) { + return true; + } + return RowIsValidUnsafe(row_idx); + } + + //! Same as SetValid, but skips a null check on validity_mask + inline void SetValidUnsafe(idx_t row_idx) { + D_ASSERT(validity_mask); + idx_t entry_idx, idx_in_entry; + GetEntryIndex(row_idx, entry_idx, idx_in_entry); + validity_mask[entry_idx] |= (V(1) << V(idx_in_entry)); + } + + //! Marks the entry at the specified row index as valid (i.e. not-null) + inline void SetValid(idx_t row_idx) { + if (!validity_mask) { + // if AllValid() we don't need to do anything + // the row is already valid + return; + } + SetValidUnsafe(row_idx); + } + + //! Marks the bit at the specified entry as invalid (i.e. null) + inline void SetInvalidUnsafe(idx_t entry_idx, idx_t idx_in_entry) { + D_ASSERT(validity_mask); + validity_mask[entry_idx] &= ~(V(1) << V(idx_in_entry)); + } + + //! Marks the bit at the specified row index as invalid (i.e. null) + inline void SetInvalidUnsafe(idx_t row_idx) { + idx_t entry_idx, idx_in_entry; + GetEntryIndex(row_idx, entry_idx, idx_in_entry); + SetInvalidUnsafe(entry_idx, idx_in_entry); + } + + //! Marks the entry at the specified row index as invalid (i.e. null) + inline void SetInvalid(idx_t row_idx) { + if (!validity_mask) { + D_ASSERT(row_idx <= STANDARD_VECTOR_SIZE); + Initialize(STANDARD_VECTOR_SIZE); + } + SetInvalidUnsafe(row_idx); + } + + //! Mark the entrry at the specified index as either valid or invalid (non-null or null) + inline void Set(idx_t row_idx, bool valid) { + if (valid) { + SetValid(row_idx); + } else { + SetInvalid(row_idx); + } + } + + //! Ensure the validity mask is writable, allocating space if it is not initialized + inline void EnsureWritable() { + if (!validity_mask) { + Initialize(); + } + } + + //! Marks "count" entries in the validity mask as invalid (null) + inline void SetAllInvalid(idx_t count) { + EnsureWritable(); + for (idx_t i = 0; i < ValidityBuffer::EntryCount(count); i++) { + validity_mask[i] = 0; + } + } + + //! Marks "count" entries in the validity mask as valid (not null) + inline void SetAllValid(idx_t count) { + EnsureWritable(); + for (idx_t i = 0; i < ValidityBuffer::EntryCount(count); i++) { + validity_mask[i] = ValidityBuffer::MAX_ENTRY; + } + } + + inline bool IsMaskSet() const { + if (validity_mask) { + return true; + } + return false; + } + +public: + void Initialize(validity_t *validity) { + validity_data.reset(); + validity_mask = validity; + } + void Initialize(const TemplatedValidityMask &other) { + validity_mask = other.validity_mask; + validity_data = other.validity_data; + } + void Initialize(idx_t count = STANDARD_VECTOR_SIZE) { + validity_data = make_buffer(count); + validity_mask = validity_data->owned_data.get(); + } + void Copy(const TemplatedValidityMask &other, idx_t count) { + if (other.AllValid()) { + validity_data = nullptr; + validity_mask = nullptr; + } else { + validity_data = make_buffer(other.validity_mask, count); + validity_mask = validity_data->owned_data.get(); + } + } + +protected: + V *validity_mask; + buffer_ptr validity_data; +}; + +struct ValidityMask : public TemplatedValidityMask { +public: + ValidityMask() : TemplatedValidityMask(nullptr) { + } + explicit ValidityMask(idx_t max_count) : TemplatedValidityMask(max_count) { + } + explicit ValidityMask(validity_t *ptr) : TemplatedValidityMask(ptr) { + } + ValidityMask(const ValidityMask &original, idx_t count) : TemplatedValidityMask(original, count) { + } + +public: + void Resize(idx_t old_size, idx_t new_size); + + void Slice(const ValidityMask &other, idx_t offset); + void Combine(const ValidityMask &other, idx_t count); + string ToString(idx_t count) const; +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/types/value.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + +namespace duckdb { + +class Deserializer; +class Serializer; + +//! The Value object holds a single arbitrary value of any type that can be +//! stored in the database. +class Value { + friend class Vector; + +public: + //! Create an empty NULL value of the specified type + explicit Value(LogicalType type = LogicalType::SQLNULL); + //! Create an INTEGER value + Value(int32_t val); // NOLINT: Allow implicit conversion from `int32_t` + //! Create a BIGINT value + Value(int64_t val); // NOLINT: Allow implicit conversion from `int64_t` + //! Create a FLOAT value + Value(float val); // NOLINT: Allow implicit conversion from `float` + //! Create a DOUBLE value + Value(double val); // NOLINT: Allow implicit conversion from `double` + //! Create a VARCHAR value + Value(const char *val); // NOLINT: Allow implicit conversion from `const char *` + //! Create a NULL value + Value(std::nullptr_t val); // NOLINT: Allow implicit conversion from `nullptr_t` + //! Create a VARCHAR value + Value(string_t val); // NOLINT: Allow implicit conversion from `string_t` + //! Create a VARCHAR value + Value(string val); // NOLINT: Allow implicit conversion from `string` + + const LogicalType &type() const { + return type_; + } + + //! Create the lowest possible value of a given type (numeric only) + DUCKDB_API static Value MinimumValue(const LogicalType &type); + //! Create the highest possible value of a given type (numeric only) + DUCKDB_API static Value MaximumValue(const LogicalType &type); + //! Create a Numeric value of the specified type with the specified value + DUCKDB_API static Value Numeric(const LogicalType &type, int64_t value); + DUCKDB_API static Value Numeric(const LogicalType &type, hugeint_t value); + + //! Create a tinyint Value from a specified value + DUCKDB_API static Value BOOLEAN(int8_t value); + //! Create a tinyint Value from a specified value + DUCKDB_API static Value TINYINT(int8_t value); + //! Create a smallint Value from a specified value + DUCKDB_API static Value SMALLINT(int16_t value); + //! Create an integer Value from a specified value + DUCKDB_API static Value INTEGER(int32_t value); //! Create a bigint Value from a specified value - static Value BIGINT(int64_t value); + DUCKDB_API static Value BIGINT(int64_t value); //! Create an unsigned tinyint Value from a specified value - static Value UTINYINT(uint8_t value); + DUCKDB_API static Value UTINYINT(uint8_t value); //! Create an unsigned smallint Value from a specified value - static Value USMALLINT(uint16_t value); + DUCKDB_API static Value USMALLINT(uint16_t value); //! Create an unsigned integer Value from a specified value - static Value UINTEGER(uint32_t value); + DUCKDB_API static Value UINTEGER(uint32_t value); //! Create an unsigned bigint Value from a specified value - static Value UBIGINT(uint64_t value); + DUCKDB_API static Value UBIGINT(uint64_t value); //! Create a hugeint Value from a specified value - static Value HUGEINT(hugeint_t value); + DUCKDB_API static Value HUGEINT(hugeint_t value); //! Create a hash Value from a specified value - static Value HASH(hash_t value); + DUCKDB_API static Value HASH(hash_t value); //! Create a pointer Value from a specified value - static Value POINTER(uintptr_t value); + DUCKDB_API static Value POINTER(uintptr_t value); //! Create a date Value from a specified date - static Value DATE(date_t date); + DUCKDB_API static Value DATE(date_t date); //! Create a date Value from a specified date - static Value DATE(int32_t year, int32_t month, int32_t day); + DUCKDB_API static Value DATE(int32_t year, int32_t month, int32_t day); //! Create a time Value from a specified time - static Value TIME(dtime_t time); + DUCKDB_API static Value TIME(dtime_t time); //! Create a time Value from a specified time - static Value TIME(int32_t hour, int32_t min, int32_t sec, int32_t micros); + DUCKDB_API static Value TIME(int32_t hour, int32_t min, int32_t sec, int32_t micros); //! Create a timestamp Value from a specified date/time combination - static Value TIMESTAMP(date_t date, dtime_t time); + DUCKDB_API static Value TIMESTAMP(date_t date, dtime_t time); //! Create a timestamp Value from a specified timestamp - static Value TIMESTAMP(timestamp_t timestamp); + DUCKDB_API static Value TIMESTAMP(timestamp_t timestamp); + DUCKDB_API static Value TimestampNs(timestamp_t timestamp); + DUCKDB_API static Value TimestampMs(timestamp_t timestamp); + DUCKDB_API static Value TimestampSec(timestamp_t timestamp); //! Create a timestamp Value from a specified timestamp in separate values - static Value TIMESTAMP(int32_t year, int32_t month, int32_t day, int32_t hour, int32_t min, int32_t sec, - int32_t micros); - static Value INTERVAL(int32_t months, int32_t days, int64_t micros); - static Value INTERVAL(interval_t interval); + DUCKDB_API static Value TIMESTAMP(int32_t year, int32_t month, int32_t day, int32_t hour, int32_t min, int32_t sec, + int32_t micros); + DUCKDB_API static Value INTERVAL(int32_t months, int32_t days, int64_t micros); + DUCKDB_API static Value INTERVAL(interval_t interval); // Decimal values - static Value DECIMAL(int16_t value, uint8_t width, uint8_t scale); - static Value DECIMAL(int32_t value, uint8_t width, uint8_t scale); - static Value DECIMAL(int64_t value, uint8_t width, uint8_t scale); - static Value DECIMAL(hugeint_t value, uint8_t width, uint8_t scale); + DUCKDB_API static Value DECIMAL(int16_t value, uint8_t width, uint8_t scale); + DUCKDB_API static Value DECIMAL(int32_t value, uint8_t width, uint8_t scale); + DUCKDB_API static Value DECIMAL(int64_t value, uint8_t width, uint8_t scale); + DUCKDB_API static Value DECIMAL(hugeint_t value, uint8_t width, uint8_t scale); //! Create a float Value from a specified value - static Value FLOAT(float value); + DUCKDB_API static Value FLOAT(float value); //! Create a double Value from a specified value - static Value DOUBLE(double value); + DUCKDB_API static Value DOUBLE(double value); //! Create a struct value with given list of entries - static Value STRUCT(child_list_t values); + DUCKDB_API static Value STRUCT(child_list_t values); //! Create a list value with the given entries - static Value LIST(std::vector values); + DUCKDB_API static Value LIST(vector values); + //! Creat a map value from a (key, value) pair + DUCKDB_API static Value MAP(Value key, Value value); //! Create a blob Value from a data pointer and a length: no bytes are interpreted - static Value BLOB(const_data_ptr_t data, idx_t len); + DUCKDB_API static Value BLOB(const_data_ptr_t data, idx_t len); + DUCKDB_API static Value BLOB_RAW(const string &data) { + return Value::BLOB((const_data_ptr_t)data.c_str(), data.size()); + } //! Creates a blob by casting a specified string to a blob (i.e. interpreting \x characters) - static Value BLOB(const string &data); + DUCKDB_API static Value BLOB(const string &data); template T GetValue() const { @@ -2074,9 +2701,14 @@ class Value { //! Convert this value to a string DUCKDB_API string ToString() const; - //! Cast this value to another type + DUCKDB_API uintptr_t GetPointer() const; + + //! Cast this value to another type, throws exception if its not possible DUCKDB_API Value CastAs(const LogicalType &target_type, bool strict = false) const; - //! Tries to cast value to another type, throws exception if its not possible + //! Tries to cast this value to another type, and stores the result in "new_value" + DUCKDB_API bool TryCastAs(const LogicalType &target_type, Value &new_value, string *error_message, + bool strict = false) const; + //! Tries to cast this value to another type, and stores the result in THIS value again DUCKDB_API bool TryCastAs(const LogicalType &target_type, bool strict = false); //! Serializes a Value to a stand-alone binary blob @@ -2112,6 +2744,10 @@ class Value { static bool FloatIsValid(float value); static bool DoubleIsValid(double value); + static bool StringIsValid(const char *str, idx_t length); + static bool StringIsValid(const string &str) { + return StringIsValid(str.c_str(), str.size()); + } template static bool IsValid(T value) { @@ -2126,7 +2762,7 @@ class Value { out << val.ToString(); return out; } - void Print(); + void Print() const; private: //! The logical of the value @@ -2152,14 +2788,17 @@ class Value { double double_; uintptr_t pointer; uint64_t hash; + date_t date; + dtime_t time; + timestamp_t timestamp; interval_t interval; } value_; //! The value of the object, if it is of a variable size type string str_value; - child_list_t struct_value; - std::vector list_value; + vector struct_value; + vector list_value; private: template @@ -2198,7 +2837,13 @@ Value DUCKDB_API Value::CreateValue(int64_t value); template <> Value DUCKDB_API Value::CreateValue(hugeint_t value); template <> -Value DUCKDB_API Value::CreateValue(const char *value); +Value DUCKDB_API Value::CreateValue(date_t value); +template <> +Value DUCKDB_API Value::CreateValue(dtime_t value); +template <> +Value DUCKDB_API Value::CreateValue(timestamp_t value); +template <> +Value DUCKDB_API Value::CreateValue(const char *value); template <> Value DUCKDB_API Value::CreateValue(string value); template <> @@ -2208,6 +2853,8 @@ Value DUCKDB_API Value::CreateValue(float value); template <> Value DUCKDB_API Value::CreateValue(double value); template <> +Value DUCKDB_API Value::CreateValue(interval_t value); +template <> Value DUCKDB_API Value::CreateValue(Value value); template <> @@ -2225,6 +2872,10 @@ DUCKDB_API uint8_t Value::GetValue() const; template <> DUCKDB_API uint16_t Value::GetValue() const; template <> +DUCKDB_API uint32_t Value::GetValue() const; +template <> +DUCKDB_API uint64_t Value::GetValue() const; +template <> DUCKDB_API hugeint_t Value::GetValue() const; template <> DUCKDB_API string Value::GetValue() const; @@ -2233,7 +2884,13 @@ DUCKDB_API float Value::GetValue() const; template <> DUCKDB_API double Value::GetValue() const; template <> -DUCKDB_API uintptr_t Value::GetValue() const; +DUCKDB_API date_t Value::GetValue() const; +template <> +DUCKDB_API dtime_t Value::GetValue() const; +template <> +DUCKDB_API timestamp_t Value::GetValue() const; +template <> +DUCKDB_API interval_t Value::GetValue() const; template <> DUCKDB_API int8_t &Value::GetValueUnsafe(); @@ -2259,6 +2916,14 @@ template <> DUCKDB_API float &Value::GetValueUnsafe(); template <> DUCKDB_API double &Value::GetValueUnsafe(); +template <> +DUCKDB_API date_t &Value::GetValueUnsafe(); +template <> +DUCKDB_API dtime_t &Value::GetValueUnsafe(); +template <> +DUCKDB_API timestamp_t &Value::GetValueUnsafe(); +template <> +DUCKDB_API interval_t &Value::GetValueUnsafe(); template <> DUCKDB_API bool Value::IsValid(float value); @@ -2267,31 +2932,6 @@ DUCKDB_API bool Value::IsValid(double value); } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/enums/vector_type.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -namespace duckdb { - -enum class VectorType : uint8_t { - FLAT_VECTOR, // Flat vectors represent a standard uncompressed vector - CONSTANT_VECTOR, // Constant vector represents a single constant - DICTIONARY_VECTOR, // Dictionary vector represents a selection vector on top of another vector - SEQUENCE_VECTOR // Sequence vector represents a sequence with a start point and an increment -}; - -string VectorTypeToString(VectorType type); - -} // namespace duckdb - //===----------------------------------------------------------------------===// // DuckDB // @@ -2348,8 +2988,6 @@ class StringHeap { string_t AddBlob(const char *data, idx_t len); //! Allocates space for an empty string of size "len" on the heap string_t EmptyString(idx_t len); - //! Add all strings from a different string heap to this string heap - void MergeHeap(StringHeap &heap); private: struct StringChunk { @@ -2411,7 +3049,7 @@ struct string_t { if (IsInlined()) { // zero initialize the prefix first // this makes sure that strings with length smaller than 4 still have an equal prefix - memset(value.inlined.inlined, 0, PREFIX_LENGTH); + memset(value.inlined.inlined, 0, INLINE_LENGTH); if (GetSize() == 0) { return; } @@ -2458,12 +3096,16 @@ struct string_t { return string(GetDataUnsafe(), GetSize()); } + explicit operator string() const { + return GetString(); + } + void Finalize() { // set trailing NULL byte auto dataptr = (char *)GetDataUnsafe(); if (GetSize() <= INLINE_LENGTH) { // fill prefix with zeros if the length is smaller than the prefix length - for (idx_t i = GetSize(); i < PREFIX_LENGTH; i++) { + for (idx_t i = GetSize(); i < INLINE_LENGTH; i++) { value.inlined.inlined[i] = '\0'; } } else { @@ -2526,33 +3168,14 @@ class VectorBuffer { data = unique_ptr(new data_t[data_size]); } } - explicit VectorBuffer(VectorBufferType vectorBufferType, const LogicalType &type, VectorType vector_type) - : vector_type(vector_type), type(type), buffer_type(vectorBufferType) { + explicit VectorBuffer(unique_ptr data_p) + : buffer_type(VectorBufferType::STANDARD_BUFFER), data(move(data_p)) { } virtual ~VectorBuffer() { } VectorBuffer() { } - VectorBuffer(VectorType vectorType, const LogicalType &type, idx_t data_size) - : vector_type(vectorType), type(type), buffer_type(VectorBufferType::STANDARD_BUFFER) { - if (data_size > 0) { - data = unique_ptr(new data_t[data_size]); - } - } - VectorBuffer(VectorType vectorType, const LogicalType &type) : vector_type(vectorType), type(type) { - } - - VectorBuffer(VectorType vectorType, idx_t data_size) - : vector_type(vectorType), buffer_type(VectorBufferType::STANDARD_BUFFER) { - if (data_size > 0) { - data = unique_ptr(new data_t[data_size]); - } - } - - VectorBuffer(VectorType vectorType) : vector_type(vectorType) { - } - public: data_ptr_t GetData() { return data.get(); @@ -2561,42 +3184,19 @@ class VectorBuffer { data = move(new_data); } - static buffer_ptr CreateStandardVector(PhysicalType type); + static buffer_ptr CreateStandardVector(PhysicalType type, idx_t capacity = STANDARD_VECTOR_SIZE); static buffer_ptr CreateConstantVector(PhysicalType type); - static buffer_ptr CreateConstantVector(VectorType vectorType, const LogicalType &logicalType); - static buffer_ptr CreateStandardVector(VectorType vectorType, const LogicalType &logicalType); - static buffer_ptr CreateStandardVector(VectorType vectorType, PhysicalType type); + static buffer_ptr CreateConstantVector(const LogicalType &logical_type); + static buffer_ptr CreateStandardVector(const LogicalType &logical_type, + idx_t capacity = STANDARD_VECTOR_SIZE); - // Getters - inline VectorType GetVectorType() const { - return vector_type; - } - inline const LogicalType &GetType() const { - return type; - } inline VectorBufferType GetBufferType() const { return buffer_type; } - // Setters - inline void SetVectorType(VectorType vector_type) { - this->vector_type = vector_type; - } - inline void SetType(const LogicalType &type) { - this->type = type; - } - inline void SetBufferType(VectorBufferType buffer_type) { - this->buffer_type = buffer_type; - } - protected: - unique_ptr data; - //! The vector type specifies how the data of the vector is physically stored (i.e. if it is a single repeated - //! constant, if it is compressed) - VectorType vector_type; - //! The type of the elements stored in the vector (e.g. integer, float) - LogicalType type; VectorBufferType buffer_type; + unique_ptr data; }; //! The DictionaryBuffer holds a selection vector @@ -2605,9 +3205,6 @@ class DictionaryBuffer : public VectorBuffer { explicit DictionaryBuffer(const SelectionVector &sel) : VectorBuffer(VectorBufferType::DICTIONARY_BUFFER), sel_vector(sel) { } - DictionaryBuffer(const SelectionVector &sel, const LogicalType &type, VectorType vector_type) - : VectorBuffer(VectorBufferType::DICTIONARY_BUFFER, type, vector_type), sel_vector(sel) { - } explicit DictionaryBuffer(buffer_ptr data) : VectorBuffer(VectorBufferType::DICTIONARY_BUFFER), sel_vector(move(data)) { } @@ -2616,9 +3213,6 @@ class DictionaryBuffer : public VectorBuffer { } public: - DictionaryBuffer(buffer_ptr data, LogicalType type, VectorType vector_type) - : VectorBuffer(VectorBufferType::DICTIONARY_BUFFER, type, vector_type), sel_vector(move(data)) { - } const SelectionVector &GetSelVector() const { return sel_vector; } @@ -2665,34 +3259,33 @@ class VectorStringBuffer : public VectorBuffer { class VectorStructBuffer : public VectorBuffer { public: VectorStructBuffer(); + VectorStructBuffer(const LogicalType &struct_type, idx_t capacity = STANDARD_VECTOR_SIZE); ~VectorStructBuffer() override; public: - const child_list_t> &GetChildren() const { + const vector> &GetChildren() const { return children; } - child_list_t> &GetChildren() { + vector> &GetChildren() { return children; } - void AddChild(string name, unique_ptr vector) { - children.push_back(std::make_pair(name, move(vector))); - } private: //! child vectors used for nested data - child_list_t> children; + vector> children; }; class VectorListBuffer : public VectorBuffer { public: - VectorListBuffer(); + VectorListBuffer(unique_ptr vector, idx_t initial_capacity = STANDARD_VECTOR_SIZE); + VectorListBuffer(const LogicalType &list_type, idx_t initial_capacity = STANDARD_VECTOR_SIZE); ~VectorListBuffer() override; public: Vector &GetChild() { return *child; } - void SetChild(unique_ptr new_child); + void Reserve(idx_t to_reserve); void Append(const Vector &to_append, idx_t to_append_size, idx_t source_offset = 0); void Append(const Vector &to_append, const SelectionVector &sel, idx_t to_append_size, idx_t source_offset = 0); @@ -2703,8 +3296,6 @@ class VectorListBuffer : public VectorBuffer { idx_t size = 0; private: - void Reserve(const Vector &to_append, idx_t to_reserve); - //! child vectors used for nested data unique_ptr child; }; @@ -2722,391 +3313,160 @@ class ManagedVectorBuffer : public VectorBuffer { } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/validity_mask.hpp -// -// -//===----------------------------------------------------------------------===// - +namespace duckdb { +struct VectorData { + const SelectionVector *sel; + data_ptr_t data; + ValidityMask validity; + SelectionVector owned_sel; +}; +class VectorCache; +class VectorStructBuffer; +class VectorListBuffer; +class ChunkCollection; +struct SelCache; -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/to_string.hpp -// -// -//===----------------------------------------------------------------------===// +//! Vector of values of a specified PhysicalType. +class Vector { + friend struct ConstantVector; + friend struct DictionaryVector; + friend struct FlatVector; + friend struct ListVector; + friend struct StringVector; + friend struct StructVector; + friend struct SequenceVector; + friend class DataChunk; + friend class VectorCacheBuffer; +public: + //! Create a vector that references the other vector + explicit Vector(Vector &other); + //! Create a vector that slices another vector + explicit Vector(Vector &other, const SelectionVector &sel, idx_t count); + //! Create a vector that slices another vector starting from a specific offset + explicit Vector(Vector &other, idx_t offset); + //! Create a vector of size one holding the passed on value + explicit Vector(const Value &value); + //! Create an empty standard vector with a type, equivalent to calling Vector(type, true, false) + explicit Vector(LogicalType type, idx_t capacity = STANDARD_VECTOR_SIZE); + //! Create an empty standard vector with a type, equivalent to calling Vector(type, true, false) + explicit Vector(const VectorCache &cache); + //! Create a non-owning vector that references the specified data + Vector(LogicalType type, data_ptr_t dataptr); + //! Create an owning vector that holds at most STANDARD_VECTOR_SIZE entries. + /*! + Create a new vector + If create_data is true, the vector will be an owning empty vector. + If zero_data is true, the allocated data will be zero-initialized. + */ + Vector(LogicalType type, bool create_data, bool zero_data, idx_t capacity = STANDARD_VECTOR_SIZE); + // implicit copying of Vectors is not allowed + Vector(const Vector &) = delete; + // but moving of vectors is allowed + Vector(Vector &&other) noexcept; -namespace duckdb { -using std::to_string; -} +public: + //! Create a vector that references the specified value. + void Reference(const Value &value); + //! Causes this vector to reference the data held by the other vector. + //! The type of the "other" vector should match the type of this vector + void Reference(Vector &other); + //! Reinterpret the data of the other vector as the type of this vector + //! Note that this takes the data of the other vector as-is and places it in this vector + //! Without changing the type of this vector + void Reinterpret(Vector &other); + //! Resets a vector from a vector cache. + //! This turns the vector back into an empty FlatVector with STANDARD_VECTOR_SIZE entries. + //! The VectorCache is used so this can be done without requiring any allocations. + void ResetFromCache(const VectorCache &cache); -namespace duckdb { -struct ValidityMask; + //! Creates a reference to a slice of the other vector + void Slice(Vector &other, idx_t offset); + //! Creates a reference to a slice of the other vector + void Slice(Vector &other, const SelectionVector &sel, idx_t count); + //! Turns the vector into a dictionary vector with the specified dictionary + void Slice(const SelectionVector &sel, idx_t count); + //! Slice the vector, keeping the result around in a cache or potentially using the cache instead of slicing + void Slice(const SelectionVector &sel, idx_t count, SelCache &cache); -using validity_t = uint64_t; + //! Creates the data of this vector with the specified type. Any data that + //! is currently in the vector is destroyed. + void Initialize(bool zero_data = false, idx_t capacity = STANDARD_VECTOR_SIZE); -struct ValidityData { - static constexpr const int BITS_PER_VALUE = sizeof(validity_t) * 8; - static constexpr const validity_t MAX_ENTRY = ~validity_t(0); + //! Converts this Vector to a printable string representation + string ToString(idx_t count) const; + void Print(idx_t count); -public: - DUCKDB_API explicit ValidityData(idx_t count); - DUCKDB_API ValidityData(const ValidityMask &original, idx_t count); + string ToString() const; + void Print(); - unique_ptr owned_data; + //! Flatten the vector, removing any compression and turning it into a FLAT_VECTOR + DUCKDB_API void Normalify(idx_t count); + DUCKDB_API void Normalify(const SelectionVector &sel, idx_t count); + //! Obtains a selection vector and data pointer through which the data of this vector can be accessed + DUCKDB_API void Orrify(idx_t count, VectorData &data); -public: - static inline idx_t EntryCount(idx_t count) { - return (count + (BITS_PER_VALUE - 1)) / BITS_PER_VALUE; - } -}; + //! Turn the vector into a sequence vector + void Sequence(int64_t start, int64_t increment); -//! Type used for validity masks -struct ValidityMask { - friend struct ValidityData; + //! Verify that the Vector is in a consistent, not corrupt state. DEBUG + //! FUNCTION ONLY! + void Verify(idx_t count); + void Verify(const SelectionVector &sel, idx_t count); + void UTFVerify(idx_t count); + void UTFVerify(const SelectionVector &sel, idx_t count); -public: - static constexpr const int BITS_PER_VALUE = ValidityData::BITS_PER_VALUE; - static constexpr const int STANDARD_ENTRY_COUNT = (STANDARD_VECTOR_SIZE + (BITS_PER_VALUE - 1)) / BITS_PER_VALUE; - static constexpr const int STANDARD_MASK_SIZE = STANDARD_ENTRY_COUNT * sizeof(validity_t); + //! Returns the [index] element of the Vector as a Value. + Value GetValue(idx_t index) const; + //! Sets the [index] element of the Vector to the specified Value. + void SetValue(idx_t index, const Value &val); -public: - ValidityMask() : validity_mask(nullptr) { - } - explicit ValidityMask(idx_t max_count) { - Initialize(max_count); - } - explicit ValidityMask(validity_t *ptr) : validity_mask(ptr) { - } - explicit ValidityMask(data_ptr_t ptr) : ValidityMask((validity_t *)ptr) { - } - ValidityMask(const ValidityMask &original, idx_t count) { - Copy(original, count); - } + void SetAuxiliary(buffer_ptr new_buffer) { + auxiliary = std::move(new_buffer); + }; - static inline idx_t ValidityMaskSize(idx_t count = STANDARD_VECTOR_SIZE) { - return ValidityData::EntryCount(count) * sizeof(validity_t); - } - bool AllValid() const { - return !validity_mask; - } - bool CheckAllValid(idx_t count) const { - if (AllValid()) { - return true; - } - idx_t entry_count = ValidityData::EntryCount(count); - idx_t valid_count = 0; - for (idx_t i = 0; i < entry_count; i++) { - valid_count += validity_mask[i] == ValidityData::MAX_ENTRY; - } - return valid_count == entry_count; - } - validity_t *GetData() const { - return validity_mask; - } - void Reset() { - validity_mask = nullptr; - validity_data.reset(); - } + //! This functions resizes the vector + void Resize(idx_t cur_size, idx_t new_size); - void Resize(idx_t old_size, idx_t new_size); + //! Serializes a Vector to a stand-alone binary blob + void Serialize(idx_t count, Serializer &serializer); + //! Deserializes a blob back into a Vector + void Deserialize(idx_t count, Deserializer &source); - static inline idx_t EntryCount(idx_t count) { - return ValidityData::EntryCount(count); - } - validity_t GetValidityEntry(idx_t entry_idx) const { - if (!validity_mask) { - return ValidityData::MAX_ENTRY; - } - return validity_mask[entry_idx]; - } - static inline bool AllValid(validity_t entry) { - return entry == ValidityData::MAX_ENTRY; - } - static inline bool NoneValid(validity_t entry) { - return entry == 0; + // Getters + inline VectorType GetVectorType() const { + return vector_type; } - static inline bool RowIsValid(validity_t entry, idx_t idx_in_entry) { - return entry & (validity_t(1) << validity_t(idx_in_entry)); + inline const LogicalType &GetType() const { + return type; } - inline void GetEntryIndex(idx_t row_idx, idx_t &entry_idx, idx_t &idx_in_entry) const { - entry_idx = row_idx / BITS_PER_VALUE; - idx_in_entry = row_idx % BITS_PER_VALUE; + inline data_ptr_t GetData() { + return data; } - //! RowIsValidUnsafe should only be used if AllValid() is false: it achieves the same as RowIsValid but skips a - //! not-null check - inline bool RowIsValidUnsafe(idx_t row_idx) const { - D_ASSERT(validity_mask); - idx_t entry_idx, idx_in_entry; - GetEntryIndex(row_idx, entry_idx, idx_in_entry); - auto entry = GetValidityEntry(entry_idx); - return RowIsValid(entry, idx_in_entry); + buffer_ptr GetAuxiliary() { + return auxiliary; } - //! Returns true if a row is valid (i.e. not null), false otherwise - inline bool RowIsValid(idx_t row_idx) const { - if (!validity_mask) { - return true; - } - return RowIsValidUnsafe(row_idx); - } - - //! Same as SetValid, but skips a null check on validity_mask - inline void SetValidUnsafe(idx_t row_idx) { - D_ASSERT(validity_mask); - idx_t entry_idx, idx_in_entry; - GetEntryIndex(row_idx, entry_idx, idx_in_entry); - validity_mask[entry_idx] |= (validity_t(1) << validity_t(idx_in_entry)); - } - - //! Marks the entry at the specified row index as valid (i.e. not-null) - inline void SetValid(idx_t row_idx) { - if (!validity_mask) { - // if AllValid() we don't need to do anything - // the row is already valid - return; - } - SetValidUnsafe(row_idx); - } - - //! Marks the entry at the specified row index as invalid (i.e. null) - inline void SetInvalidUnsafe(idx_t row_idx) { - D_ASSERT(validity_mask); - idx_t entry_idx, idx_in_entry; - GetEntryIndex(row_idx, entry_idx, idx_in_entry); - validity_mask[entry_idx] &= ~(validity_t(1) << validity_t(idx_in_entry)); - } - - //! Marks the entry at the specified row index as invalid (i.e. null) - inline void SetInvalid(idx_t row_idx) { - if (!validity_mask) { - D_ASSERT(row_idx <= STANDARD_VECTOR_SIZE); - Initialize(STANDARD_VECTOR_SIZE); - } - SetInvalidUnsafe(row_idx); - } - - //! Mark the entrry at the specified index as either valid or invalid (non-null or null) - inline void Set(idx_t row_idx, bool valid) { - if (valid) { - SetValid(row_idx); - } else { - SetInvalid(row_idx); - } - } - - //! Ensure the validity mask is writable, allocating space if it is not initialized - inline void EnsureWritable() { - if (!validity_mask) { - Initialize(); - } - } - - //! Marks "count" entries in the validity mask as invalid (null) - inline void SetAllInvalid(idx_t count) { - D_ASSERT(count <= STANDARD_VECTOR_SIZE); - EnsureWritable(); - for (idx_t i = 0; i < ValidityData::EntryCount(count); i++) { - validity_mask[i] = 0; - } - } - - //! Marks "count" entries in the validity mask as valid (not null) - inline void SetAllValid(idx_t count) { - D_ASSERT(count <= STANDARD_VECTOR_SIZE); - EnsureWritable(); - for (idx_t i = 0; i < ValidityData::EntryCount(count); i++) { - validity_mask[i] = ValidityData::MAX_ENTRY; - } - } - - void Slice(const ValidityMask &other, idx_t offset); - void Combine(const ValidityMask &other, idx_t count); - string ToString(idx_t count) const; - - bool IsMaskSet() const; - -public: - void Initialize(validity_t *validity) { - validity_data.reset(); - validity_mask = validity; - } - void Initialize(const ValidityMask &other) { - validity_mask = other.validity_mask; - validity_data = other.validity_data; - } - void Initialize(idx_t count = STANDARD_VECTOR_SIZE) { - validity_data = make_buffer(count); - validity_mask = validity_data->owned_data.get(); - } - void Copy(const ValidityMask &other, idx_t count) { - if (other.AllValid()) { - validity_data = nullptr; - validity_mask = nullptr; - } else { - validity_data = make_buffer(other, count); - validity_mask = validity_data->owned_data.get(); - } - } - -private: - validity_t *validity_mask; - buffer_ptr validity_data; -}; - -} // namespace duckdb - - -namespace duckdb { - -struct VectorData { - const SelectionVector *sel; - data_ptr_t data; - ValidityMask validity; -}; - -class VectorStructBuffer; -class VectorListBuffer; -class ChunkCollection; - -struct SelCache; - -//! Vector of values of a specified PhysicalType. -class Vector { - friend struct ConstantVector; - friend struct DictionaryVector; - friend struct FlatVector; - friend struct ListVector; - friend struct StringVector; - friend struct StructVector; - friend struct SequenceVector; - - friend class DataChunk; - -public: - Vector(); - //! Create a vector of size one holding the passed on value - explicit Vector(const Value &value); - //! Create an empty standard vector with a type, equivalent to calling Vector(type, true, false) - explicit Vector(const LogicalType &type); - //! Create a non-owning vector that references the specified data - Vector(const LogicalType &type, data_ptr_t dataptr); - //! Create an owning vector that holds at most STANDARD_VECTOR_SIZE entries. - /*! - Create a new vector - If create_data is true, the vector will be an owning empty vector. - If zero_data is true, the allocated data will be zero-initialized. - */ - Vector(const LogicalType &type, bool create_data, bool zero_data); - // implicit copying of Vectors is not allowed - Vector(const Vector &) = delete; - // but moving of vectors is allowed - Vector(Vector &&other) noexcept; - -public: - //! Create a vector that references the specified value. - void Reference(const Value &value); - //! Causes this vector to reference the data held by the other vector. - void Reference(Vector &other); - - //! Creates a reference to a slice of the other vector - void Slice(Vector &other, idx_t offset); - //! Creates a reference to a slice of the other vector - void Slice(Vector &other, const SelectionVector &sel, idx_t count); - //! Turns the vector into a dictionary vector with the specified dictionary - void Slice(const SelectionVector &sel, idx_t count); - //! Slice the vector, keeping the result around in a cache or potentially using the cache instead of slicing - void Slice(const SelectionVector &sel, idx_t count, SelCache &cache); - - //! Creates the data of this vector with the specified type. Any data that - //! is currently in the vector is destroyed. - void Initialize(const LogicalType &new_type = LogicalType(LogicalTypeId::INVALID), bool zero_data = false); - - //! Converts this Vector to a printable string representation - string ToString(idx_t count) const; - void Print(idx_t count); - - string ToString() const; - void Print(); - - //! Flatten the vector, removing any compression and turning it into a FLAT_VECTOR - DUCKDB_API void Normalify(idx_t count); - DUCKDB_API void Normalify(const SelectionVector &sel, idx_t count); - //! Obtains a selection vector and data pointer through which the data of this vector can be accessed - DUCKDB_API void Orrify(idx_t count, VectorData &data); - - //! Turn the vector into a sequence vector - void Sequence(int64_t start, int64_t increment); - - //! Verify that the Vector is in a consistent, not corrupt state. DEBUG - //! FUNCTION ONLY! - void Verify(idx_t count); - void Verify(const SelectionVector &sel, idx_t count); - void UTFVerify(idx_t count); - void UTFVerify(const SelectionVector &sel, idx_t count); - - //! Returns the [index] element of the Vector as a Value. - Value GetValue(idx_t index) const; - //! Sets the [index] element of the Vector to the specified Value. - void SetValue(idx_t index, const Value &val); - - void SetAuxiliary(buffer_ptr new_buffer) { - auxiliary = std::move(new_buffer); - }; - - //! This functions resizes the vector - void Resize(idx_t cur_size, idx_t new_size); - - //! Serializes a Vector to a stand-alone binary blob - void Serialize(idx_t count, Serializer &serializer); - //! Deserializes a blob back into a Vector - void Deserialize(idx_t count, Deserializer &source); - - // Getters - inline VectorType GetVectorType() const { - return buffer->GetVectorType(); - } - inline const LogicalType &GetType() const { - return buffer->GetType(); - } - inline VectorBufferType GetBufferType() const { - return buffer->GetBufferType(); - } - inline data_ptr_t GetData() { - return data; - } - - buffer_ptr GetAuxiliary() { - return auxiliary; - } - - buffer_ptr GetBuffer() { - return buffer; + buffer_ptr GetBuffer() { + return buffer; } // Setters - inline void SetVectorType(VectorType vector_type) { - buffer->SetVectorType(vector_type); - } - inline void SetType(const LogicalType &type) { - buffer->SetType(type); - } - inline void SetBufferType(VectorBufferType buffer_type) { - buffer->SetBufferType(buffer_type); - } + DUCKDB_API void SetVectorType(VectorType vector_type); protected: + //! The vector type specifies how the data of the vector is physically stored (i.e. if it is a single repeated + //! constant, if it is compressed) + VectorType vector_type; + //! The type of the elements stored in the vector (e.g. integer, float) + LogicalType type; //! A pointer to the data. data_ptr_t data; //! The validity mask of the vector @@ -3121,7 +3481,7 @@ class Vector { //! The DictionaryBuffer holds a selection vector class VectorChildBuffer : public VectorBuffer { public: - VectorChildBuffer() : VectorBuffer(VectorBufferType::VECTOR_CHILD_BUFFER), data() { + VectorChildBuffer(Vector vector) : VectorBuffer(VectorBufferType::VECTOR_CHILD_BUFFER), data(move(vector)) { } public: @@ -3151,14 +3511,14 @@ struct ConstantVector { D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR); return !vector.validity.RowIsValid(0); } - static inline void SetNull(Vector &vector, bool is_null) { - D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR); - vector.validity.Set(0, !is_null); - } + DUCKDB_API static void SetNull(Vector &vector, bool is_null); static inline ValidityMask &Validity(Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR); return vector.validity; } + DUCKDB_API static const SelectionVector *ZeroSelectionVector(idx_t count, SelectionVector &owned_sel); + //! Turns "vector" into a constant vector by referencing a value within the source vector + DUCKDB_API static void Reference(Vector &vector, Vector &source, idx_t position, idx_t count); static const sel_t ZERO_VECTOR[STANDARD_VECTOR_SIZE]; static const SelectionVector ZERO_SELECTION_VECTOR; @@ -3216,10 +3576,7 @@ struct FlatVector { D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); vector.validity.Initialize(new_validity); } - static inline void SetNull(Vector &vector, idx_t idx, bool is_null) { - D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); - vector.validity.Set(idx, !is_null); - } + static void SetNull(Vector &vector, idx_t idx, bool is_null); static inline bool IsNull(const Vector &vector, idx_t idx) { D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); return !vector.validity.RowIsValid(idx); @@ -3230,48 +3587,61 @@ struct FlatVector { }; struct ListVector { - static const Vector &GetEntry(const Vector &vector); - static Vector &GetEntry(Vector &vector); - static idx_t GetListSize(const Vector &vector); - static void SetListSize(Vector &vec, idx_t size); - static bool HasEntry(const Vector &vector); - static void SetEntry(Vector &vector, unique_ptr entry); - static void Append(Vector &target, const Vector &source, idx_t source_size, idx_t source_offset = 0); - static void Append(Vector &target, const Vector &source, const SelectionVector &sel, idx_t source_size, - idx_t source_offset = 0); - static void PushBack(Vector &target, Value &insert); - static void Initialize(Vector &vec); + static inline list_entry_t *GetData(Vector &v) { + if (v.GetVectorType() == VectorType::DICTIONARY_VECTOR) { + auto &child = DictionaryVector::Child(v); + return GetData(child); + } + return FlatVector::GetData(v); + } + //! Gets a reference to the underlying child-vector of a list + DUCKDB_API static const Vector &GetEntry(const Vector &vector); + //! Gets a reference to the underlying child-vector of a list + DUCKDB_API static Vector &GetEntry(Vector &vector); + //! Gets the total size of the underlying child-vector of a list + DUCKDB_API static idx_t GetListSize(const Vector &vector); + //! Sets the total size of the underlying child-vector of a list + DUCKDB_API static void SetListSize(Vector &vec, idx_t size); + DUCKDB_API static void Reserve(Vector &vec, idx_t required_capacity); + DUCKDB_API static void Append(Vector &target, const Vector &source, idx_t source_size, idx_t source_offset = 0); + DUCKDB_API static void Append(Vector &target, const Vector &source, const SelectionVector &sel, idx_t source_size, + idx_t source_offset = 0); + DUCKDB_API static void PushBack(Vector &target, Value &insert); + DUCKDB_API static vector Search(Vector &list, Value &key, idx_t row); + DUCKDB_API static Value GetValuesFromOffsets(Vector &list, vector &offsets); //! Share the entry of the other list vector - static void ReferenceEntry(Vector &vector, Vector &other); + DUCKDB_API static void ReferenceEntry(Vector &vector, Vector &other); }; struct StringVector { //! Add a string to the string heap of the vector (auxiliary data) - static string_t AddString(Vector &vector, const char *data, idx_t len); + DUCKDB_API static string_t AddString(Vector &vector, const char *data, idx_t len); + //! Add a string or a blob to the string heap of the vector (auxiliary data) + //! This function is the same as ::AddString, except the added data does not need to be valid UTF8 + DUCKDB_API static string_t AddStringOrBlob(Vector &vector, const char *data, idx_t len); //! Add a string to the string heap of the vector (auxiliary data) - static string_t AddString(Vector &vector, const char *data); + DUCKDB_API static string_t AddString(Vector &vector, const char *data); //! Add a string to the string heap of the vector (auxiliary data) - static string_t AddString(Vector &vector, string_t data); + DUCKDB_API static string_t AddString(Vector &vector, string_t data); //! Add a string to the string heap of the vector (auxiliary data) - static string_t AddString(Vector &vector, const string &data); + DUCKDB_API static string_t AddString(Vector &vector, const string &data); //! Add a string or a blob to the string heap of the vector (auxiliary data) //! This function is the same as ::AddString, except the added data does not need to be valid UTF8 - static string_t AddStringOrBlob(Vector &vector, string_t data); + DUCKDB_API static string_t AddStringOrBlob(Vector &vector, string_t data); //! Allocates an empty string of the specified size, and returns a writable pointer that can be used to store the //! result of an operation - static string_t EmptyString(Vector &vector, idx_t len); + DUCKDB_API static string_t EmptyString(Vector &vector, idx_t len); //! Adds a reference to a handle that stores strings of this vector - static void AddHandle(Vector &vector, unique_ptr handle); + DUCKDB_API static void AddHandle(Vector &vector, unique_ptr handle); //! Adds a reference to an unspecified vector buffer that stores strings of this vector - static void AddBuffer(Vector &vector, buffer_ptr buffer); + DUCKDB_API static void AddBuffer(Vector &vector, buffer_ptr buffer); //! Add a reference from this vector to the string heap of the provided vector - static void AddHeapReference(Vector &vector, Vector &other); + DUCKDB_API static void AddHeapReference(Vector &vector, Vector &other); }; struct StructVector { - static bool HasEntries(const Vector &vector); - static const child_list_t> &GetEntries(const Vector &vector); - static void AddEntry(Vector &vector, const string &name, unique_ptr entry); + DUCKDB_API static const vector> &GetEntries(const Vector &vector); + DUCKDB_API static vector> &GetEntries(Vector &vector); }; struct SequenceVector { @@ -3312,6 +3682,7 @@ struct SequenceVector { struct ArrowArray; namespace duckdb { +class VectorCache; //! A Data Chunk represents a set of vectors. /*! @@ -3335,6 +3706,7 @@ class DataChunk { public: //! Creates an empty DataChunk DataChunk(); + ~DataChunk(); //! The vectors owned by the DataChunk. vector data; @@ -3346,9 +3718,9 @@ class DataChunk { DUCKDB_API idx_t ColumnCount() const { return data.size(); } - void SetCardinality(idx_t count) { + void SetCardinality(idx_t count_p) { D_ASSERT(count <= STANDARD_VECTOR_SIZE); - this->count = count; + this->count = count_p; } void SetCardinality(const DataChunk &other) { this->count = other.size(); @@ -3359,6 +3731,8 @@ class DataChunk { //! Set the DataChunk to reference another data chunk DUCKDB_API void Reference(DataChunk &chunk); + //! Set the DataChunk to own the data of data chunk, destroying the other chunk in the process + DUCKDB_API void Move(DataChunk &chunk); //! Initializes the DataChunk with the specified types to an empty DataChunk //! This will create one vector of the specified type for each LogicalType in the @@ -3417,7 +3791,10 @@ class DataChunk { DUCKDB_API void ToArrowArray(ArrowArray *out_array); private: + //! The amount of tuples stored in the data chunk idx_t count; + //! Vector caches, used to store data when ::Initialize is called + vector vector_caches; }; } // namespace duckdb @@ -3477,10 +3854,19 @@ struct VectorOperations { static void LessThan(Vector &A, Vector &B, Vector &result, idx_t count); // result = A <= B static void LessThanEquals(Vector &A, Vector &B, Vector &result, idx_t count); + // result = A != B with nulls being equal static void DistinctFrom(Vector &left, Vector &right, Vector &result, idx_t count); - // result = A == B with nulls being equal + // result := A == B with nulls being equal static void NotDistinctFrom(Vector &left, Vector &right, Vector &result, idx_t count); + // result := A > B with nulls being maximal + static void DistinctGreaterThan(Vector &left, Vector &right, Vector &result, idx_t count); + // result := A >= B with nulls being maximal + static void DistinctGreaterThanEquals(Vector &left, Vector &right, Vector &result, idx_t count); + // result := A < B with nulls being maximal + static void DistinctLessThan(Vector &left, Vector &right, Vector &result, idx_t count); + // result := A <= B with nulls being maximal + static void DistinctLessThanEquals(Vector &left, Vector &right, Vector &result, idx_t count); //===--------------------------------------------------------------------===// // Select Comparisons @@ -3497,22 +3883,48 @@ struct VectorOperations { SelectionVector *true_sel, SelectionVector *false_sel); static idx_t LessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); - // result = A != B with nulls being equal - static idx_t SelectDistinctFrom(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel); - // result = A == B with nulls being equal - static idx_t SelectNotDistinctFrom(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel); + + // true := A != B with nulls being equal + static idx_t DistinctFrom(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + // true := A == B with nulls being equal + static idx_t NotDistinctFrom(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + // true := A > B with nulls being maximal + static idx_t DistinctGreaterThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + // true := A >= B with nulls being maximal + static idx_t DistinctGreaterThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + // true := A < B with nulls being maximal + static idx_t DistinctLessThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + // true := A <= B with nulls being maximal + static idx_t DistinctLessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); //===--------------------------------------------------------------------===// - // Scatter methods + // Nested Comparisons //===--------------------------------------------------------------------===// - // make sure dest.count is set for gather methods! - struct Gather { - //! dest.data[i] = ptr[i]. NullValue is checked for and converted to the nullmask in dest. The source - //! addresses are incremented by the size of the type. - static void Set(Vector &source, Vector &dest, idx_t count); - }; + // true := A != B with nulls being equal, inputs selected + static idx_t NestedNotEquals(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + // true := A == B with nulls being equal, inputs selected + static idx_t NestedEquals(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + + // true := A > B with nulls being maximal, inputs selected + static idx_t NestedGreaterThan(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + // true := A >= B with nulls being maximal, inputs selected + static idx_t NestedGreaterThanEquals(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); + // true := A < B with nulls being maximal, inputs selected + static idx_t NestedLessThan(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + // true := A <= B with nulls being maximal, inputs selected + static idx_t NestedLessThanEquals(Vector &left, Vector &right, idx_t vcount, const SelectionVector &sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); //===--------------------------------------------------------------------===// // Hash functions @@ -3533,7 +3945,11 @@ struct VectorOperations { //===--------------------------------------------------------------------===// // Helpers //===--------------------------------------------------------------------===// - // Cast the data from the source type to the target type + //! Cast the data from the source type to the target type. Any elements that could not be converted are turned into + //! NULLs. If any elements cannot be converted, returns false and fills in the error_message. If no error message is + //! provided, an exception is thrown instead. + static bool TryCast(Vector &source, Vector &result, idx_t count, string *error_message, bool strict = false); + //! Cast the data from the source type to the target type. Throws an exception if the cast fails. static void Cast(Vector &source, Vector &result, idx_t count, bool strict = false); // Copy the data of to the target vector @@ -4210,33 +4626,42 @@ struct TernaryExecutor { namespace duckdb { struct UnaryOperatorWrapper { - template - static inline RESULT_TYPE Operation(FUNC fun, INPUT_TYPE input, ValidityMask &mask, idx_t idx) { + template + static inline RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { return OP::template Operation(input); } +}; - static bool AddsNulls() { - return false; +struct UnaryLambdaWrapper { + template + static inline RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto fun = (FUNC *)dataptr; + return (*fun)(input); } }; -struct UnaryLambdaWrapper { - template - static inline RESULT_TYPE Operation(FUNC fun, INPUT_TYPE input, ValidityMask &mask, idx_t idx) { - return fun(input); +struct GenericUnaryWrapper { + template + static inline RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + return OP::template Operation(input, mask, idx, dataptr); } +}; - static bool AddsNulls() { - return false; +template +struct UnaryStringOperator { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + auto vector = (Vector *)dataptr; + return OP::template Operation(input, *vector); } }; struct UnaryExecutor { private: - template + template static inline void ExecuteLoop(INPUT_TYPE *__restrict ldata, RESULT_TYPE *__restrict result_data, idx_t count, const SelectionVector *__restrict sel_vector, ValidityMask &mask, - ValidityMask &result_mask, FUNC fun) { + ValidityMask &result_mask, void *dataptr, bool adds_nulls) { ASSERT_RESTRICT(ldata, ldata + count, result_data, result_data + count); if (!mask.AllValid()) { @@ -4244,28 +4669,31 @@ struct UnaryExecutor { for (idx_t i = 0; i < count; i++) { auto idx = sel_vector->get_index(i); if (mask.RowIsValidUnsafe(idx)) { - result_data[i] = OPWRAPPER::template Operation(fun, ldata[idx], - result_mask, i); + result_data[i] = + OPWRAPPER::template Operation(ldata[idx], result_mask, i, dataptr); } else { result_mask.SetInvalid(i); } } } else { + if (adds_nulls) { + result_mask.EnsureWritable(); + } for (idx_t i = 0; i < count; i++) { auto idx = sel_vector->get_index(i); result_data[i] = - OPWRAPPER::template Operation(fun, ldata[idx], result_mask, i); + OPWRAPPER::template Operation(ldata[idx], result_mask, i, dataptr); } } } - template + template static inline void ExecuteFlat(INPUT_TYPE *__restrict ldata, RESULT_TYPE *__restrict result_data, idx_t count, - ValidityMask &mask, ValidityMask &result_mask, FUNC fun) { + ValidityMask &mask, ValidityMask &result_mask, void *dataptr, bool adds_nulls) { ASSERT_RESTRICT(ldata, ldata + count, result_data, result_data + count); if (!mask.AllValid()) { - if (!OPWRAPPER::AddsNulls()) { + if (!adds_nulls) { result_mask.Initialize(mask); } else { result_mask.Copy(mask, count); @@ -4278,8 +4706,8 @@ struct UnaryExecutor { if (ValidityMask::AllValid(validity_entry)) { // all valid: perform operation for (; base_idx < next; base_idx++) { - result_data[base_idx] = OPWRAPPER::template Operation( - fun, ldata[base_idx], result_mask, base_idx); + result_data[base_idx] = OPWRAPPER::template Operation( + ldata[base_idx], result_mask, base_idx, dataptr); } } else if (ValidityMask::NoneValid(validity_entry)) { // nothing valid: skip all @@ -4291,22 +4719,25 @@ struct UnaryExecutor { for (; base_idx < next; base_idx++) { if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) { D_ASSERT(mask.RowIsValid(base_idx)); - result_data[base_idx] = OPWRAPPER::template Operation( - fun, ldata[base_idx], result_mask, base_idx); + result_data[base_idx] = OPWRAPPER::template Operation( + ldata[base_idx], result_mask, base_idx, dataptr); } } } } } else { + if (adds_nulls) { + result_mask.EnsureWritable(); + } for (idx_t i = 0; i < count; i++) { result_data[i] = - OPWRAPPER::template Operation(fun, ldata[i], result_mask, i); + OPWRAPPER::template Operation(ldata[i], result_mask, i, dataptr); } } } - template - static inline void ExecuteStandard(Vector &input, Vector &result, idx_t count, FUNC fun) { + template + static inline void ExecuteStandard(Vector &input, Vector &result, idx_t count, void *dataptr, bool adds_nulls) { switch (input.GetVectorType()) { case VectorType::CONSTANT_VECTOR: { result.SetVectorType(VectorType::CONSTANT_VECTOR); @@ -4317,8 +4748,8 @@ struct UnaryExecutor { ConstantVector::SetNull(result, true); } else { ConstantVector::SetNull(result, false); - *result_data = OPWRAPPER::template Operation( - fun, *ldata, ConstantVector::Validity(result), 0); + *result_data = OPWRAPPER::template Operation( + *ldata, ConstantVector::Validity(result), 0, dataptr); } break; } @@ -4327,8 +4758,8 @@ struct UnaryExecutor { auto result_data = FlatVector::GetData(result); auto ldata = FlatVector::GetData(input); - ExecuteFlat( - ldata, result_data, count, FlatVector::Validity(input), FlatVector::Validity(result), fun); + ExecuteFlat(ldata, result_data, count, FlatVector::Validity(input), + FlatVector::Validity(result), dataptr, adds_nulls); break; } default: { @@ -4339,22 +4770,33 @@ struct UnaryExecutor { auto result_data = FlatVector::GetData(result); auto ldata = (INPUT_TYPE *)vdata.data; - ExecuteLoop( - ldata, result_data, count, vdata.sel, vdata.validity, FlatVector::Validity(result), fun); + ExecuteLoop(ldata, result_data, count, vdata.sel, vdata.validity, + FlatVector::Validity(result), dataptr, adds_nulls); break; } } } public: - template + template static void Execute(Vector &input, Vector &result, idx_t count) { - ExecuteStandard(input, result, count, false); + ExecuteStandard(input, result, count, nullptr, false); } template > static void Execute(Vector &input, Vector &result, idx_t count, FUNC fun) { - ExecuteStandard(input, result, count, fun); + ExecuteStandard(input, result, count, (void *)&fun, false); + } + + template + static void GenericExecute(Vector &input, Vector &result, idx_t count, void *dataptr, bool adds_nulls = false) { + ExecuteStandard(input, result, count, dataptr, adds_nulls); + } + + template + static void ExecuteString(Vector &input, Vector &result, idx_t count) { + UnaryExecutor::GenericExecute>(input, result, count, + (void *)&result); } }; @@ -4404,54 +4846,273 @@ using std::chrono::system_clock; using std::chrono::time_point; } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/random_engine.hpp +// +// +//===----------------------------------------------------------------------===// -namespace duckdb { -//! The cycle counter can be used to measure elapsed cycles + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/limits.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +template +struct NumericLimits { + static T Minimum(); + static T Maximum(); + static bool IsSigned(); + static idx_t Digits(); +}; + +template <> +struct NumericLimits { + static int8_t Minimum(); + static int8_t Maximum(); + static bool IsSigned() { + return true; + } + static idx_t Digits() { + return 3; + } +}; +template <> +struct NumericLimits { + static int16_t Minimum(); + static int16_t Maximum(); + static bool IsSigned() { + return true; + } + static idx_t Digits() { + return 5; + } +}; +template <> +struct NumericLimits { + static int32_t Minimum(); + static int32_t Maximum(); + static bool IsSigned() { + return true; + } + static idx_t Digits() { + return 10; + } +}; +template <> +struct NumericLimits { + static int64_t Minimum(); + static int64_t Maximum(); + static bool IsSigned() { + return true; + } + static idx_t Digits() { + return 19; + } +}; +template <> +struct NumericLimits { + static hugeint_t Minimum(); + static hugeint_t Maximum(); + static bool IsSigned() { + return true; + } + static idx_t Digits() { + return 39; + } +}; +template <> +struct NumericLimits { + static uint8_t Minimum(); + static uint8_t Maximum(); + static bool IsSigned() { + return false; + } + static idx_t Digits() { + return 3; + } +}; +template <> +struct NumericLimits { + static uint16_t Minimum(); + static uint16_t Maximum(); + static bool IsSigned() { + return false; + } + static idx_t Digits() { + return 5; + } +}; +template <> +struct NumericLimits { + static uint32_t Minimum(); + static uint32_t Maximum(); + static bool IsSigned() { + return false; + } + static idx_t Digits() { + return 10; + } +}; +template <> +struct NumericLimits { + static uint64_t Minimum(); + static uint64_t Maximum(); + static bool IsSigned() { + return false; + } + static idx_t Digits() { + return 20; + } +}; +template <> +struct NumericLimits { + static float Minimum(); + static float Maximum(); + static bool IsSigned() { + return true; + } + static idx_t Digits() { + return 127; + } +}; +template <> +struct NumericLimits { + static double Minimum(); + static double Maximum(); + static bool IsSigned() { + return true; + } + static idx_t Digits() { + return 250; + } +}; + +} // namespace duckdb + +#include + +namespace duckdb { + +struct RandomEngine { + std::mt19937 random_engine; + RandomEngine(int64_t seed = -1) { + if (seed < 0) { + std::random_device rd; + random_engine.seed(rd()); + } else { + random_engine.seed(seed); + } + } + + //! Generate a random number between min and max + double NextRandom(double min, double max) { + std::uniform_real_distribution dist(min, max); + return dist(random_engine); + } + //! Generate a random number between 0 and 1 + double NextRandom() { + return NextRandom(0, 1); + } + uint32_t NextRandomInteger() { + std::uniform_int_distribution dist(0, NumericLimits::Maximum()); + return dist(random_engine); + } +}; + +} // namespace duckdb + + +namespace duckdb { + +//! The cycle counter can be used to measure elapsed cycles for a function, expression and ... +//! Optimized by sampling mechanism. Once per 100 times. +//! //Todo Can be optimized further by calling RDTSC once per sample class CycleCounter { + friend struct ExpressionInfo; + friend struct ExpressionRootInfo; + static constexpr int SAMPLING_RATE = 50; + static constexpr int SAMPLING_VARIANCE = 100; + public: - //! Starts the timer - void Start() { - finished = false; - start = Tick(); + CycleCounter() : random(-1) { } - //! Finishes timing - void End() { - end = Tick(); - finished = true; + // Next_sample determines if a sample needs to be taken, if so start the profiler + void BeginSample() { + if (current_count >= next_sample) { + tmp = Tick(); + } } - uint64_t Elapsed() const { - return end - start; + // End the sample + void EndSample(int chunk_size) { + if (current_count >= next_sample) { + time += Tick() - tmp; + } + if (current_count >= next_sample) { + next_sample = SAMPLING_RATE + random.NextRandomInteger() % SAMPLING_VARIANCE; + ++sample_count; + sample_tuples_count += chunk_size; + current_count = 0; + } else { + ++current_count; + } + tuples_count += chunk_size; } private: uint64_t Tick() const; - uint64_t start; - uint64_t end; - bool finished = false; + // current number on RDT register + uint64_t tmp; + // Elapsed cycles + uint64_t time = 0; + //! Count the number of time the executor called since last sampling + uint64_t current_count = 0; + //! Show the next sample + uint64_t next_sample = 0; + //! Count the number of samples + uint64_t sample_count = 0; + //! Count the number of tuples sampled + uint64_t sample_tuples_count = 0; + //! Count the number of ALL tuples + uint64_t tuples_count = 0; + //! the random number generator used for sampling + RandomEngine random; }; } // namespace duckdb + namespace duckdb { class Expression; class ExpressionExecutor; struct ExpressionExecutorState; struct ExpressionState { - ExpressionState(Expression &expr, ExpressionExecutorState &root); + ExpressionState(const Expression &expr, ExpressionExecutorState &root); virtual ~ExpressionState() { } - Expression &expr; + const Expression &expr; ExpressionExecutorState &root; vector> child_states; vector types; DataChunk intermediate_chunk; string name; - double time; CycleCounter profiler; public: @@ -4460,11 +5121,15 @@ struct ExpressionState { }; struct ExpressionExecutorState { + explicit ExpressionExecutorState(const string &name); unique_ptr root_state; ExpressionExecutor *executor; + CycleCounter profiler; + string name; }; } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // @@ -4578,13 +5243,13 @@ enum class ExpressionType : uint8_t { COMPARE_NOT_IN = 36, // IS DISTINCT FROM operator COMPARE_DISTINCT_FROM = 37, - // compare final boundary COMPARE_BETWEEN = 38, COMPARE_NOT_BETWEEN = 39, - COMPARE_BOUNDARY_END = COMPARE_NOT_BETWEEN, // IS NOT DISTINCT FROM operator COMPARE_NOT_DISTINCT_FROM = 40, + // compare final boundary + COMPARE_BOUNDARY_END = COMPARE_NOT_DISTINCT_FROM, // ----------------------------- // Conjunction Operators @@ -4626,6 +5291,7 @@ enum class ExpressionType : uint8_t { WINDOW_LAST_VALUE = 131, WINDOW_LEAD = 132, WINDOW_LAG = 133, + WINDOW_NTH_VALUE = 134, // ----------------------------- // Functions @@ -4697,6 +5363,7 @@ enum class ExpressionClass : uint8_t { COLLATE = 16, LAMBDA = 17, POSITIONAL_REFERENCE = 18, + BETWEEN = 19, //===--------------------------------------------------------------------===// // Bound Expressions //===--------------------------------------------------------------------===// @@ -4919,7 +5586,7 @@ struct FunctionData { } virtual unique_ptr Copy() { - return make_unique(); + throw InternalException("Unimplemented copy for FunctionData"); }; virtual bool Equals(FunctionData &other) { return true; @@ -4936,9 +5603,6 @@ struct FunctionData { }; struct TableFunctionData : public FunctionData { - unique_ptr Copy() override { - throw NotImplementedException("Copy not required for table-producing function"); - } // used to pass on projections to table functions that support them. NB, can contain COLUMN_IDENTIFIER_ROW_ID vector column_ids; }; @@ -5106,7 +5770,6 @@ class BuiltinFunctions { void RegisterReadFunctions(); void RegisterTableFunctions(); void RegisterArrowFunctions(); - void RegisterInformationSchemaFunctions(); // aggregates void RegisterAlgebraicAggregates(); @@ -5166,87 +5829,7 @@ class BuiltinFunctions { -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/limits.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -namespace duckdb { - -template -struct NumericLimits { - static T Minimum(); - static T Maximum(); -}; - -template <> -struct NumericLimits { - static int8_t Minimum(); - static int8_t Maximum(); -}; -template <> -struct NumericLimits { - static int16_t Minimum(); - static int16_t Maximum(); -}; -template <> -struct NumericLimits { - static int32_t Minimum(); - static int32_t Maximum(); -}; -template <> -struct NumericLimits { - static int64_t Minimum(); - static int64_t Maximum(); -}; -template <> -struct NumericLimits { - static hugeint_t Minimum(); - static hugeint_t Maximum(); -}; -template <> -struct NumericLimits { - static uint8_t Minimum(); - static uint8_t Maximum(); -}; -template <> -struct NumericLimits { - static uint16_t Minimum(); - static uint16_t Maximum(); -}; -template <> -struct NumericLimits { - static uint32_t Minimum(); - static uint32_t Maximum(); -}; -template <> -struct NumericLimits { - static uint64_t Minimum(); - static uint64_t Maximum(); -}; -template <> -struct NumericLimits { - static float Minimum(); - static float Maximum(); -}; -template <> -struct NumericLimits { - static double Minimum(); - static double Maximum(); -}; - -//! Returns the minimal type that guarantees an integer value from not -//! overflowing -PhysicalType MinimalType(int64_t value); -} // namespace duckdb namespace duckdb { @@ -5278,7 +5861,16 @@ class Hugeint { } template - static hugeint_t Convert(T value); + static bool TryConvert(T value, hugeint_t &result); + + template + static hugeint_t Convert(T value) { + hugeint_t result; + if (!TryConvert(value, result)) { // LCOV_EXCL_START + throw ValueOutOfRangeException(double(value), GetTypeId(), GetTypeId()); + } // LCOV_EXCL_STOP + return result; + } static void NegateInPlace(hugeint_t &input) { input.lower = NumericLimits::Maximum() - input.lower + 1; @@ -5369,25 +5961,26 @@ template <> bool Hugeint::TryCast(hugeint_t input, double &result); template <> -hugeint_t Hugeint::Convert(int8_t value); +bool Hugeint::TryConvert(int8_t value, hugeint_t &result); template <> -hugeint_t Hugeint::Convert(int16_t value); +bool Hugeint::TryConvert(int16_t value, hugeint_t &result); template <> -hugeint_t Hugeint::Convert(int32_t value); +bool Hugeint::TryConvert(int32_t value, hugeint_t &result); template <> -hugeint_t Hugeint::Convert(int64_t value); +bool Hugeint::TryConvert(int64_t value, hugeint_t &result); template <> -hugeint_t Hugeint::Convert(uint8_t value); +bool Hugeint::TryConvert(uint8_t value, hugeint_t &result); template <> -hugeint_t Hugeint::Convert(uint16_t value); +bool Hugeint::TryConvert(uint16_t value, hugeint_t &result); template <> -hugeint_t Hugeint::Convert(uint32_t value); +bool Hugeint::TryConvert(uint32_t value, hugeint_t &result); template <> -hugeint_t Hugeint::Convert(uint64_t value); +bool Hugeint::TryConvert(uint64_t value, hugeint_t &result); template <> -hugeint_t Hugeint::Convert(float value); +bool Hugeint::TryConvert(float value, hugeint_t &result); template <> -hugeint_t Hugeint::Convert(double value); +bool Hugeint::TryConvert(double value, hugeint_t &result); + } // namespace duckdb //===----------------------------------------------------------------------===// @@ -5420,7 +6013,9 @@ class Interval { static constexpr const int32_t SECS_PER_MINUTE = 60; static constexpr const int32_t MINS_PER_HOUR = 60; static constexpr const int32_t HOURS_PER_DAY = 24; - static constexpr const int32_t SECS_PER_DAY = SECS_PER_MINUTE * MINS_PER_HOUR * HOURS_PER_DAY; + static constexpr const int32_t SECS_PER_HOUR = SECS_PER_MINUTE * MINS_PER_HOUR; + static constexpr const int32_t SECS_PER_DAY = SECS_PER_HOUR * HOURS_PER_DAY; + static constexpr const int32_t SECS_PER_WEEK = SECS_PER_DAY * DAYS_PER_WEEK; static constexpr const int64_t MICROS_PER_MSEC = 1000; static constexpr const int64_t MICROS_PER_SEC = MICROS_PER_MSEC * MSECS_PER_SEC; @@ -5429,14 +6024,23 @@ class Interval { static constexpr const int64_t MICROS_PER_DAY = MICROS_PER_HOUR * HOURS_PER_DAY; static constexpr const int64_t MICROS_PER_MONTH = MICROS_PER_DAY * DAYS_PER_MONTH; + static constexpr const int64_t NANOS_PER_MICRO = 1000; + static constexpr const int64_t NANOS_PER_MSEC = NANOS_PER_MICRO * MICROS_PER_MSEC; + public: //! Convert a string to an interval object static bool FromString(const string &str, interval_t &result); //! Convert a string to an interval object - static bool FromCString(const char *str, idx_t len, interval_t &result); + static bool FromCString(const char *str, idx_t len, interval_t &result, string *error_message, bool strict); //! Convert an interval object to a string static string ToString(interval_t date); + //! Get Interval in milliseconds + static int64_t GetMilli(interval_t val); + + //! Get Interval in Nanoseconds + static int64_t GetNanoseconds(interval_t val); + //! Returns the difference between two timestamps static interval_t GetDifference(timestamp_t timestamp_1, timestamp_t timestamp_2); @@ -5481,32 +6085,66 @@ struct GreaterThanEquals { } }; -struct DistinctFrom { +struct LessThan { + template + static inline bool Operation(T left, T right) { + return left < right; + } +}; +struct LessThanEquals { + template + static inline bool Operation(T left, T right) { + return left <= right; + } +}; + +// Distinct semantics are from Postgres record sorting. NULL = NULL and not-NULL < NULL +// Deferring to the non-distinct operations removes the need for further specialisation. +// TODO: To reverse the semantics, swap left_null and right_null for comparisons +struct DistinctFrom { template static inline bool Operation(T left, T right, bool left_null, bool right_null) { - return ((left != right) && !left_null && !right_null) || (left_null != right_null); + return (left_null != right_null) || (!left_null && !right_null && (left != right)); } }; struct NotDistinctFrom { template static inline bool Operation(T left, T right, bool left_null, bool right_null) { - return ((left == right) && !left_null && !right_null) || (left_null && right_null); + return (left_null && right_null) || (!left_null && !right_null && (left == right)); } }; -struct LessThan { +struct DistinctGreaterThan { template - static inline bool Operation(T left, T right) { - return left < right; + static inline bool Operation(T left, T right, bool left_null, bool right_null) { + return GreaterThan::Operation(left_null, right_null) || + (!left_null && !right_null && GreaterThan::Operation(left, right)); } }; -struct LessThanEquals { + +struct DistinctGreaterThanEquals { template - static inline bool Operation(T left, T right) { - return left <= right; + static inline bool Operation(T left, T right, bool left_null, bool right_null) { + return left_null || (!left_null && !right_null && GreaterThanEquals::Operation(left, right)); + } +}; + +struct DistinctLessThan { + template + static inline bool Operation(T left, T right, bool left_null, bool right_null) { + return LessThan::Operation(left_null, right_null) || + (!left_null && !right_null && LessThan::Operation(left, right)); + } +}; + +struct DistinctLessThanEquals { + template + static inline bool Operation(T left, T right, bool left_null, bool right_null) { + return right_null || (!left_null && !right_null && LessThanEquals::Operation(left, right)); } }; + //===--------------------------------------------------------------------===// // Specialized Boolean Comparison Operators //===--------------------------------------------------------------------===// @@ -5524,16 +6162,16 @@ inline bool LessThan::Operation(bool left, bool right) { struct StringComparisonOperators { template static inline bool EqualsOrNot(const string_t a, const string_t b) { - if (memcmp(&a, &b, sizeof(uint32_t) + string_t::PREFIX_LENGTH) == 0) { - // prefix and length are equal - if (a.IsInlined()) { - // small string: compare entire inlined string - if (memcmp(a.value.inlined.inlined, b.value.inlined.inlined, a.GetSize()) == 0) { - // entire string is equal - return INVERSE ? false : true; - } - } else { - // large string: check main data source + if (a.IsInlined()) { + // small string: compare entire string + if (memcmp(&a, &b, sizeof(string_t)) == 0) { + // entire string is equal + return INVERSE ? false : true; + } + } else { + // large string: first check prefix and length + if (memcmp(&a, &b, sizeof(uint32_t) + string_t::PREFIX_LENGTH) == 0) { + // prefix and length are equal: check main string if (memcmp(a.value.pointer.ptr, b.value.pointer.ptr, a.GetSize()) == 0) { // entire string is equal return INVERSE ? false : true; @@ -5544,6 +6182,7 @@ struct StringComparisonOperators { return INVERSE ? true : false; } }; + template <> inline bool Equals::Operation(string_t left, string_t right) { return StringComparisonOperators::EqualsOrNot(left, right); @@ -5555,13 +6194,13 @@ inline bool NotEquals::Operation(string_t left, string_t right) { template <> inline bool NotDistinctFrom::Operation(string_t left, string_t right, bool left_null, bool right_null) { - return (StringComparisonOperators::EqualsOrNot(left, right) && !left_null && !right_null) || - (left_null && right_null); + return (left_null && right_null) || + (!left_null && !right_null && StringComparisonOperators::EqualsOrNot(left, right)); } template <> inline bool DistinctFrom::Operation(string_t left, string_t right, bool left_null, bool right_null) { - return (StringComparisonOperators::EqualsOrNot(left, right) && !left_null && !right_null) || - (left_null != right_null); + return (left_null != right_null) || + (!left_null && !right_null && StringComparisonOperators::EqualsOrNot(left, right)); } // compare up to shared length. if still the same, compare lengths @@ -5622,12 +6261,16 @@ inline bool LessThanEquals::Operation(interval_t left, interval_t right) { template <> inline bool NotDistinctFrom::Operation(interval_t left, interval_t right, bool left_null, bool right_null) { - return (Interval::Equals(left, right) && !left_null && !right_null) || (left_null && right_null); + return (left_null && right_null) || (!left_null && !right_null && Interval::Equals(left, right)); } template <> inline bool DistinctFrom::Operation(interval_t left, interval_t right, bool left_null, bool right_null) { - return (!Equals::Operation(left, right) && !left_null && !right_null) || (left_null != right_null); + return (left_null != right_null) || (!left_null && !right_null && !Equals::Operation(left, right)); +} +inline bool operator<(const interval_t &lhs, const interval_t &rhs) { + return LessThan::Operation(lhs, rhs); } + //===--------------------------------------------------------------------===// // Specialized Hugeint Comparison Operators //===--------------------------------------------------------------------===// @@ -5659,7 +6302,10 @@ inline bool LessThanEquals::Operation(hugeint_t left, hugeint_t right) { + namespace duckdb { +struct SelectionVector; + class Serializer; class Deserializer; class Vector; @@ -5677,6 +6323,11 @@ class BaseStatistics { public: bool CanHaveNull(); + bool CanHaveNoNull(); + + virtual bool IsConstant() { + return false; + } static unique_ptr CreateEmpty(LogicalType type); @@ -5685,7 +6336,8 @@ class BaseStatistics { virtual void Serialize(Serializer &serializer); static unique_ptr Deserialize(Deserializer &source, LogicalType type); //! Verify that a vector does not violate the statistics - virtual void Verify(Vector &vector, idx_t count); + virtual void Verify(Vector &vector, const SelectionVector &sel, idx_t count); + void Verify(Vector &vector, idx_t count); virtual string ToString(); }; @@ -5756,14 +6408,40 @@ class ScalarFunction : public BaseScalarFunction { return !(*this == rhs); } + bool Equal(const ScalarFunction &rhs) const { + // number of types + if (this->arguments.size() != rhs.arguments.size()) { + return false; + } + // argument types + for (idx_t i = 0; i < this->arguments.size(); ++i) { + if (this->arguments[i] != rhs.arguments[i]) { + return false; + } + } + // return type + if (this->return_type != rhs.return_type) { + return false; + } + // varargs + if (this->varargs != rhs.varargs) { + return false; + } + + return true; // they are equal + } + private: bool CompareScalarFunctionT(const scalar_function_t other) const { - typedef void(funcTypeT)(DataChunk &, ExpressionState &, Vector &); + typedef void(scalar_function_ptr_t)(DataChunk &, ExpressionState &, Vector &); - funcTypeT **func_ptr = (funcTypeT **)function.template target(); - funcTypeT **other_ptr = (funcTypeT **)other.template target(); + auto func_ptr = (scalar_function_ptr_t **)function.template target(); + auto other_ptr = (scalar_function_ptr_t **)other.template target(); // Case the functions were created from lambdas the target will return a nullptr + if (!func_ptr && !other_ptr) { + return true; + } if (func_ptr == nullptr || other_ptr == nullptr) { // scalar_function_t (std::functions) from lambdas cannot be compared return false; @@ -5828,7 +6506,7 @@ class ScalarFunction : public BaseScalarFunction { function = &ScalarFunction::UnaryFunction; break; default: - throw NotImplementedException("Unimplemented type for GetScalarUnaryFunction"); + throw InternalException("Unimplemented type for GetScalarUnaryFunction"); } return function; } @@ -5871,7 +6549,7 @@ class ScalarFunction : public BaseScalarFunction { function = &ScalarFunction::UnaryFunction; break; default: - throw NotImplementedException("Unimplemented type for GetScalarUnaryFunctionFixedReturn"); + throw InternalException("Unimplemented type for GetScalarUnaryFunctionFixedReturn"); } return function; } @@ -5903,8 +6581,10 @@ class ScalarFunction : public BaseScalarFunction { + namespace duckdb { struct FunctionData; +typedef std::pair FrameBounds; class AggregateExecutor { private: @@ -6192,7 +6872,7 @@ class AggregateExecutor { template static void Combine(Vector &source, Vector &target, idx_t count) { D_ASSERT(source.GetType().id() == LogicalTypeId::POINTER && target.GetType().id() == LogicalTypeId::POINTER); - auto sdata = FlatVector::GetData(source); + auto sdata = FlatVector::GetData(source); auto tdata = FlatVector::GetData(target); for (idx_t i = 0; i < count; i++) { @@ -6201,7 +6881,7 @@ class AggregateExecutor { } template - static void Finalize(Vector &states, FunctionData *bind_data, Vector &result, idx_t count) { + static void Finalize(Vector &states, FunctionData *bind_data, Vector &result, idx_t count, idx_t offset) { if (states.GetVectorType() == VectorType::CONSTANT_VECTOR) { result.SetVectorType(VectorType::CONSTANT_VECTOR); @@ -6217,11 +6897,21 @@ class AggregateExecutor { auto rdata = FlatVector::GetData(result); for (idx_t i = 0; i < count; i++) { OP::template Finalize(result, bind_data, sdata[i], rdata, - FlatVector::Validity(result), i); + FlatVector::Validity(result), i + offset); } } } + template + static void UnaryWindow(Vector &input, FunctionData *bind_data, data_ptr_t state, const FrameBounds &frame, + const FrameBounds &prev, Vector &result, idx_t rid) { + + auto idata = FlatVector::GetData(input) - MinValue(frame.first, prev.first); + const auto &ivalid = FlatVector::Validity(input); + OP::template Window(idata, ivalid, bind_data, (STATE *)state, frame, prev, + result, rid); + } + template static void Destroy(Vector &states, idx_t count) { auto sdata = FlatVector::GetData(states); @@ -6352,7 +7042,7 @@ typedef void (*aggregate_update_t)(Vector inputs[], FunctionData *bind_data, idx //! The type used for combining hashed aggregate states (optional) typedef void (*aggregate_combine_t)(Vector &state, Vector &combined, idx_t count); //! The type used for finalizing hashed aggregate function payloads -typedef void (*aggregate_finalize_t)(Vector &state, FunctionData *bind_data, Vector &result, idx_t count); +typedef void (*aggregate_finalize_t)(Vector &state, FunctionData *bind_data, Vector &result, idx_t count, idx_t offset); //! The type used for propagating statistics in aggregate functions (optional) typedef unique_ptr (*aggregate_statistics_t)(ClientContext &context, BoundAggregateExpression &expr, FunctionData *bind_data, @@ -6368,25 +7058,30 @@ typedef void (*aggregate_destructor_t)(Vector &state, idx_t count); typedef void (*aggregate_simple_update_t)(Vector inputs[], FunctionData *bind_data, idx_t input_count, data_ptr_t state, idx_t count); +//! The type used for updating complex windowed aggregate functions (optional) +typedef std::pair FrameBounds; +typedef void (*aggregate_window_t)(Vector inputs[], FunctionData *bind_data, idx_t input_count, data_ptr_t state, + const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t offset); + class AggregateFunction : public BaseScalarFunction { public: AggregateFunction(string name, vector arguments, LogicalType return_type, aggregate_size_t state_size, aggregate_initialize_t initialize, aggregate_update_t update, aggregate_combine_t combine, aggregate_finalize_t finalize, aggregate_simple_update_t simple_update = nullptr, bind_aggregate_function_t bind = nullptr, aggregate_destructor_t destructor = nullptr, - aggregate_statistics_t statistics = nullptr) + aggregate_statistics_t statistics = nullptr, aggregate_window_t window = nullptr) : BaseScalarFunction(name, arguments, return_type, false), state_size(state_size), initialize(initialize), - update(update), combine(combine), finalize(finalize), simple_update(simple_update), bind(bind), - destructor(destructor), statistics(statistics) { + update(update), combine(combine), finalize(finalize), simple_update(simple_update), window(window), + bind(bind), destructor(destructor), statistics(statistics) { } AggregateFunction(vector arguments, LogicalType return_type, aggregate_size_t state_size, aggregate_initialize_t initialize, aggregate_update_t update, aggregate_combine_t combine, aggregate_finalize_t finalize, aggregate_simple_update_t simple_update = nullptr, bind_aggregate_function_t bind = nullptr, aggregate_destructor_t destructor = nullptr, - aggregate_statistics_t statistics = nullptr) + aggregate_statistics_t statistics = nullptr, aggregate_window_t window = nullptr) : AggregateFunction(string(), arguments, return_type, state_size, initialize, update, combine, finalize, - simple_update, bind, destructor, statistics) { + simple_update, bind, destructor, statistics, window) { } //! The hashed aggregate state sizing function @@ -6401,6 +7096,8 @@ class AggregateFunction : public BaseScalarFunction { aggregate_finalize_t finalize; //! The simple aggregate update function (may be null) aggregate_simple_update_t simple_update; + //! The windowed aggregate frame update function (may be null) + aggregate_window_t window; //! The bind function (may be null) bind_aggregate_function_t bind; @@ -6412,7 +7109,7 @@ class AggregateFunction : public BaseScalarFunction { bool operator==(const AggregateFunction &rhs) const { return state_size == rhs.state_size && initialize == rhs.initialize && update == rhs.update && - combine == rhs.combine && finalize == rhs.finalize; + combine == rhs.combine && finalize == rhs.finalize && window == rhs.window; } bool operator!=(const AggregateFunction &rhs) const { return !(*this == rhs); @@ -6498,6 +7195,14 @@ class AggregateFunction : public BaseScalarFunction { AggregateExecutor::UnaryUpdate(inputs[0], bind_data, state, count); } + template + static void UnaryWindow(Vector inputs[], FunctionData *bind_data, idx_t input_count, data_ptr_t state, + const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t rid) { + D_ASSERT(input_count == 1); + AggregateExecutor::UnaryWindow(inputs[0], bind_data, state, frame, prev, + result, rid); + } + template static void BinaryScatterUpdate(Vector inputs[], FunctionData *bind_data, idx_t input_count, Vector &states, idx_t count) { @@ -6518,8 +7223,8 @@ class AggregateFunction : public BaseScalarFunction { } template - static void StateFinalize(Vector &states, FunctionData *bind_data, Vector &result, idx_t count) { - AggregateExecutor::Finalize(states, bind_data, result, count); + static void StateFinalize(Vector &states, FunctionData *bind_data, Vector &result, idx_t count, idx_t offset) { + AggregateExecutor::Finalize(states, bind_data, result, count, offset); } template @@ -6651,26 +7356,24 @@ struct UDFWrapper { private: //-------------------------------- Templated functions --------------------------------// - template - static scalar_function_t CreateUnaryFunction(const string &name, TR (*udf_func)(Args...)) { - D_ASSERT(sizeof...(Args) == 1); - return CreateUnaryFunction(name, udf_func); - } + struct UnaryUDFExecutor { + template + static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { + typedef RESULT_TYPE (*unary_function_t)(INPUT_TYPE); + auto udf = (unary_function_t)dataptr; + return udf(input); + } + }; template static scalar_function_t CreateUnaryFunction(const string &name, TR (*udf_func)(TA)) { scalar_function_t udf_function = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { - UnaryExecutor::Execute(input.data[0], result, input.size(), udf_func); + UnaryExecutor::GenericExecute(input.data[0], result, input.size(), + (void *)udf_func); }; return udf_function; } - template - static scalar_function_t CreateBinaryFunction(const string &name, TR (*udf_func)(Args...)) { - D_ASSERT(sizeof...(Args) == 2); - return CreateBinaryFunction(name, udf_func); - } - template static scalar_function_t CreateBinaryFunction(const string &name, TR (*udf_func)(TA, TB)) { scalar_function_t udf_function = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { @@ -6679,12 +7382,6 @@ struct UDFWrapper { return udf_function; } - template - static scalar_function_t CreateTernaryFunction(const string &name, TR (*udf_func)(Args...)) { - D_ASSERT(sizeof...(Args) == 3); - return CreateTernaryFunction(name, udf_func); - } - template static scalar_function_t CreateTernaryFunction(const string &name, TR (*udf_func)(TA, TB, TC)) { scalar_function_t udf_function = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { @@ -6694,6 +7391,21 @@ struct UDFWrapper { return udf_function; } + template + static scalar_function_t CreateUnaryFunction(const string &name, TR (*udf_func)(Args...)) { + throw std::runtime_error("Incorrect number of arguments for unary function"); + } + + template + static scalar_function_t CreateBinaryFunction(const string &name, TR (*udf_func)(Args...)) { + throw std::runtime_error("Incorrect number of arguments for binary function"); + } + + template + static scalar_function_t CreateTernaryFunction(const string &name, TR (*udf_func)(Args...)) { + throw std::runtime_error("Incorrect number of arguments for ternary function"); + } + template static LogicalType GetArgumentType() { if (std::is_same()) { @@ -6735,8 +7447,7 @@ struct UDFWrapper { template static scalar_function_t CreateUnaryFunction(const string &name, vector args, LogicalType ret_type, TR (*udf_func)(Args...)) { - D_ASSERT(sizeof...(Args) == 1); - return CreateUnaryFunction(name, args, ret_type, udf_func); + throw std::runtime_error("Incorrect number of arguments for unary function"); } template @@ -6750,7 +7461,8 @@ struct UDFWrapper { } scalar_function_t udf_function = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { - UnaryExecutor::Execute(input.data[0], result, input.size(), udf_func); + UnaryExecutor::GenericExecute(input.data[0], result, input.size(), + (void *)udf_func); }; return udf_function; } @@ -6758,8 +7470,7 @@ struct UDFWrapper { template static scalar_function_t CreateBinaryFunction(const string &name, vector args, LogicalType ret_type, TR (*udf_func)(Args...)) { - D_ASSERT(sizeof...(Args) == 2); - return CreateBinaryFunction(name, args, ret_type, udf_func); + throw std::runtime_error("Incorrect number of arguments for binary function"); } template @@ -6784,8 +7495,7 @@ struct UDFWrapper { template static scalar_function_t CreateTernaryFunction(const string &name, vector args, LogicalType ret_type, TR (*udf_func)(Args...)) { - D_ASSERT(sizeof...(Args) == 3); - return CreateTernaryFunction(name, args, ret_type, udf_func); + throw std::runtime_error("Incorrect number of arguments for ternary function"); } template @@ -6820,13 +7530,19 @@ struct UDFWrapper { return std::is_same(); case LogicalTypeId::SMALLINT: return std::is_same(); - case LogicalTypeId::DATE: case LogicalTypeId::INTEGER: return std::is_same(); case LogicalTypeId::BIGINT: + return std::is_same(); + case LogicalTypeId::DATE: + return std::is_same(); case LogicalTypeId::TIME: + return std::is_same(); case LogicalTypeId::TIMESTAMP: - return std::is_same(); + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::TIMESTAMP_SEC: + return std::is_same(); case LogicalTypeId::FLOAT: return std::is_same(); case LogicalTypeId::DOUBLE: @@ -6956,12 +7672,18 @@ class ChunkCollection { //! Append a new DataChunk directly to this ChunkCollection DUCKDB_API void Append(DataChunk &new_chunk); + //! Append a new DataChunk directly to this ChunkCollection + DUCKDB_API void Append(unique_ptr new_chunk); + //! Append another ChunkCollection directly to this ChunkCollection DUCKDB_API void Append(ChunkCollection &other); //! Merge is like Append but messes up the order and destroys the other collection DUCKDB_API void Merge(ChunkCollection &other); + //! Fuse adds new columns to the right of the collection + DUCKDB_API void Fuse(ChunkCollection &other); + DUCKDB_API void Verify(); //! Gets the value of the column at the specified index @@ -6969,7 +7691,8 @@ class ChunkCollection { //! Sets the value of the column at the specified index DUCKDB_API void SetValue(idx_t column, idx_t index, const Value &value); - DUCKDB_API vector GetRow(idx_t index); + //! Copy a single cell to a target vector + DUCKDB_API void CopyCell(idx_t column, idx_t index, Vector &target, idx_t target_offset); DUCKDB_API string ToString() const { return chunks.size() == 0 ? "ChunkCollection [ 0 ]" @@ -7020,8 +7743,6 @@ class ChunkCollection { //! Reorders the rows in the collection according to the given indices. DUCKDB_API void Reorder(idx_t order[]); - DUCKDB_API void MaterializeSortedChunk(DataChunk &target, idx_t order[], idx_t start_offset); - //! Returns true if the ChunkCollections are equivalent DUCKDB_API bool Equals(ChunkCollection &other); @@ -7101,6 +7822,7 @@ enum class StatementType : uint8_t { }; string StatementTypeToString(StatementType type); +bool StatementTypeReturnChanges(StatementType type); } // namespace duckdb @@ -7161,6 +7883,19 @@ class QueryResult { return types.size(); } + DUCKDB_API bool TryFetch(unique_ptr &result, string &error) { + try { + result = Fetch(); + return success; + } catch (std::exception &ex) { + error = ex.what(); + return false; + } catch (...) { + error = "Unknown error in Fetch"; + return false; + } + } + DUCKDB_API void ToArrowSchema(ArrowSchema *out_array); private: @@ -7390,9 +8125,6 @@ enum class JoinType : uint8_t { //! Convert join type to string string JoinTypeToString(JoinType type); -//! True if join is left, full or right outer join -bool IsOuterJoin(JoinType type); - //! True if join is left or full outer join bool IsLeftOuterJoin(JoinType type); @@ -7476,7 +8208,7 @@ class Relation : public std::enable_shared_from_this { public: DUCKDB_API virtual const vector &Columns() = 0; - DUCKDB_API virtual unique_ptr GetQueryNode() = 0; + DUCKDB_API virtual unique_ptr GetQueryNode(); DUCKDB_API virtual BoundStatement Bind(Binder &binder); DUCKDB_API virtual string GetAlias(); @@ -7556,7 +8288,7 @@ class Relation : public std::enable_shared_from_this { //! Delete from a table, can only be used on a TableRelation DUCKDB_API virtual void Delete(const string &condition = string()); //! Create a relation from calling a table in/out function on the input relation - DUCKDB_API shared_ptr TableFunction(const std::string &fname, vector &values); + DUCKDB_API shared_ptr TableFunction(const std::string &fname, vector values); public: //! Whether or not the relation inherits column bindings from its child or not, only relevant for binding @@ -7715,9 +8447,11 @@ class SQLStatement { namespace duckdb { +class ChunkCollection; class ClientContext; class DatabaseInstance; class DuckDB; +class LogicalOperator; typedef void (*warning_callback)(std::string); @@ -7781,9 +8515,13 @@ class Connection { //! Extract a set of SQL statements from a specific query DUCKDB_API vector> ExtractStatements(const string &query); + //! Extract the logical plan that corresponds to a query + DUCKDB_API unique_ptr ExtractPlan(const string &query); //! Appends a DataChunk to the specified table DUCKDB_API void Append(TableDescription &description, DataChunk &chunk); + //! Appends a ChunkCollection to the specified table + DUCKDB_API void Append(TableDescription &description, ChunkCollection &collection); //! Returns a relation that produces a table from this connection DUCKDB_API shared_ptr Table(const string &tname); @@ -7915,6 +8653,7 @@ class Connection { namespace duckdb { using std::lock_guard; using std::mutex; +using std::unique_lock; } // namespace duckdb @@ -7928,6 +8667,91 @@ using std::mutex; +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/allocator.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { +class Allocator; +class ClientContext; +class DatabaseInstance; + +struct PrivateAllocatorData { + virtual ~PrivateAllocatorData() { + } +}; + +typedef data_ptr_t (*allocate_function_ptr_t)(PrivateAllocatorData *private_data, idx_t size); +typedef void (*free_function_ptr_t)(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size); +typedef data_ptr_t (*reallocate_function_ptr_t)(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size); + +class AllocatedData { +public: + AllocatedData(Allocator &allocator, data_ptr_t pointer, idx_t allocated_size); + ~AllocatedData(); + + data_ptr_t get() { + return pointer; + } + const_data_ptr_t get() const { + return pointer; + } + void Reset(); + +private: + Allocator &allocator; + data_ptr_t pointer; + idx_t allocated_size; +}; + +class Allocator { +public: + Allocator(); + Allocator(allocate_function_ptr_t allocate_function_p, free_function_ptr_t free_function_p, + reallocate_function_ptr_t reallocate_function_p, unique_ptr private_data); + + data_ptr_t AllocateData(idx_t size); + void FreeData(data_ptr_t pointer, idx_t size); + data_ptr_t ReallocateData(data_ptr_t pointer, idx_t size); + + unique_ptr Allocate(idx_t size) { + return make_unique(*this, AllocateData(size), size); + } + + static data_ptr_t DefaultAllocate(PrivateAllocatorData *private_data, idx_t size) { + return (data_ptr_t)malloc(size); + } + static void DefaultFree(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size) { + free(pointer); + } + static data_ptr_t DefaultReallocate(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size) { + return (data_ptr_t)realloc(pointer, size); + } + static Allocator &Get(ClientContext &context); + static Allocator &Get(DatabaseInstance &db); + + PrivateAllocatorData *GetPrivateData() { + return private_data.get(); + } + +private: + allocate_function_ptr_t allocate_function; + free_function_ptr_t free_function; + reallocate_function_ptr_t reallocate_function; + + unique_ptr private_data; +}; + +} // namespace duckdb + @@ -7964,24 +8788,130 @@ struct ReplacementScan { } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/set.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#include + +namespace duckdb { +using std::set; +} + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/compression_type.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +enum class CompressionType : uint8_t { + COMPRESSION_INVALID = 0, + COMPRESSION_UNCOMPRESSED = 1, + COMPRESSION_CONSTANT = 2, + COMPRESSION_RLE = 3, + COMPRESSION_DICTIONARY = 4, + COMPRESSION_PFOR_DELTA = 5, + COMPRESSION_BITPACKING = 6, + COMPRESSION_FSST = 7 +}; + +CompressionType CompressionTypeFromString(const string &str); +string CompressionTypeToString(CompressionType type); + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/optimizer_type.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +enum class OptimizerType : uint32_t { + INVALID = 0, + EXPRESSION_REWRITER, + FILTER_PULLUP, + FILTER_PUSHDOWN, + REGEX_RANGE, + IN_CLAUSE, + JOIN_ORDER, + DELIMINATOR, + UNUSED_COLUMNS, + STATISTICS_PROPAGATION, + COMMON_SUBEXPRESSIONS, + COMMON_AGGREGATE, + COLUMN_LIFETIME, + TOP_N, + REORDER_FILTER +}; + +string OptimizerTypeToString(OptimizerType type); + +} // namespace duckdb + namespace duckdb { class ClientContext; class TableFunctionRef; +class CompressionFunction; + +struct CompressionFunctionSet; enum class AccessMode : uint8_t { UNDEFINED = 0, AUTOMATIC = 1, READ_ONLY = 2, READ_WRITE = 3 }; enum class CheckpointAbort : uint8_t { NO_ABORT = 0, DEBUG_ABORT_BEFORE_TRUNCATE = 1, DEBUG_ABORT_BEFORE_HEADER = 2 }; +enum class ConfigurationOptionType : uint32_t { + INVALID = 0, + ACCESS_MODE, + DEFAULT_ORDER_TYPE, + DEFAULT_NULL_ORDER, + ENABLE_EXTERNAL_ACCESS, + ENABLE_OBJECT_CACHE, + MAXIMUM_MEMORY, + THREADS +}; + +struct ConfigurationOption { + ConfigurationOptionType type; + const char *name; + const char *description; + LogicalTypeId parameter_type; +}; + // this is optional and only used in tests at the moment struct DBConfig { friend class DatabaseInstance; friend class StorageManager; public: + DUCKDB_API DBConfig(); DUCKDB_API ~DBConfig(); //! Access mode of the database (AUTOMATIC, READ_ONLY or READ_WRITE) AccessMode access_mode = AccessMode::AUTOMATIC; + //! The allocator used by the system + Allocator allocator; // Checkpoint when WAL reaches this size (default: 16MB) idx_t checkpoint_wal_size = 1 << 24; //! Whether or not to use Direct IO, bypassing operating system buffers @@ -7991,6 +8921,8 @@ struct DBConfig { unique_ptr file_system; //! The maximum memory used by the database system (in bytes). Default: 80% of System available memory idx_t maximum_memory = (idx_t)-1; + //! The maximum amount of CPU threads used by the database system. Default: all available. + idx_t maximum_threads = (idx_t)-1; //! Whether or not to create and use a temporary directory to store intermediates that do not fit in memory bool use_temporary_directory = true; //! Directory to store temporary structures that do not fit in memory @@ -8002,7 +8934,7 @@ struct DBConfig { //! Null ordering used when none is specified (default: NULLS FIRST) OrderByNullType default_null_order = OrderByNullType::NULLS_FIRST; //! enable COPY and related commands - bool enable_copy = true; + bool enable_external_access = true; //! Whether or not object cache is used bool object_cache_enable = false; //! Database configuration variables as controlled by SET @@ -8015,10 +8947,36 @@ struct DBConfig { CheckpointAbort checkpoint_abort = CheckpointAbort::NO_ABORT; //! Replacement table scans are automatically attempted when a table name cannot be found in the schema vector replacement_scans; + //! Initialize the database with the standard set of DuckDB functions + //! You should probably not touch this unless you know what you are doing + bool initialize_default_database = true; + //! The set of disabled optimizers (default empty) + set disabled_optimizers; + //! Force a specific compression method to be used when checkpointing (if available) + CompressionType force_compression = CompressionType::COMPRESSION_INVALID; public: DUCKDB_API static DBConfig &GetConfig(ClientContext &context); DUCKDB_API static DBConfig &GetConfig(DatabaseInstance &db); + DUCKDB_API static vector GetOptions(); + DUCKDB_API static idx_t GetOptionCount(); + + //! Fetch an option by index. Returns a pointer to the option, or nullptr if out of range + DUCKDB_API static ConfigurationOption *GetOptionByIndex(idx_t index); + //! Fetch an option by name. Returns a pointer to the option, or nullptr if none exists. + DUCKDB_API static ConfigurationOption *GetOptionByName(const string &name); + + DUCKDB_API void SetOption(const ConfigurationOption &option, const Value &value); + + DUCKDB_API static idx_t ParseMemoryLimit(const string &arg); + + //! Return the list of possible compression functions for the specific physical type + DUCKDB_API vector GetCompressionFunctions(PhysicalType data_type); + //! Return the compression function for the specified compression type/physical type combo + DUCKDB_API CompressionFunction *GetCompressionFunction(CompressionType type, PhysicalType data_type); + +private: + unique_ptr compression_functions; }; } // namespace duckdb @@ -8166,7 +9124,6 @@ ExternC const PfnDliHook __pfnDliFailureHook2 = duckdb_dllimport_delay_hook; // // duckdb.h // -// Author: Mark Raasveldt // //===----------------------------------------------------------------------===// @@ -8190,6 +9147,9 @@ ExternC const PfnDliHook __pfnDliFailureHook2 = duckdb_dllimport_delay_hook; extern "C" { #endif +//===--------------------------------------------------------------------===// +// Type Information +//===--------------------------------------------------------------------===// typedef uint64_t idx_t; typedef enum DUCKDB_TYPE { @@ -8204,6 +9164,14 @@ typedef enum DUCKDB_TYPE { DUCKDB_TYPE_INTEGER, // int64_t DUCKDB_TYPE_BIGINT, + // uint8_t + DUCKDB_TYPE_UTINYINT, + // uint16_t + DUCKDB_TYPE_USMALLINT, + // uint32_t + DUCKDB_TYPE_UINTEGER, + // uint64_t + DUCKDB_TYPE_UBIGINT, // float DUCKDB_TYPE_FLOAT, // double @@ -8224,30 +9192,51 @@ typedef enum DUCKDB_TYPE { DUCKDB_TYPE_BLOB } duckdb_type; +//! Days are stored as days since 1970-01-01 +//! Use the duckdb_from_date/duckdb_to_date function to extract individual information +typedef struct { + int32_t days; +} duckdb_date; + typedef struct { int32_t year; int8_t month; int8_t day; -} duckdb_date; +} duckdb_date_struct; + +//! Time is stored as microseconds since 00:00:00 +//! Use the duckdb_from_time/duckdb_to_time function to extract individual information +typedef struct { + int64_t micros; +} duckdb_time; typedef struct { int8_t hour; int8_t min; int8_t sec; - int16_t micros; -} duckdb_time; + int32_t micros; +} duckdb_time_struct; +//! Timestamps are stored as microseconds since 1970-01-01 +//! Use the duckdb_from_timestamp/duckdb_to_timestamp function to extract individual information typedef struct { - duckdb_date date; - duckdb_time time; + int64_t micros; } duckdb_timestamp; +typedef struct { + duckdb_date_struct date; + duckdb_time_struct time; +} duckdb_timestamp_struct; + typedef struct { int32_t months; int32_t days; int64_t micros; } duckdb_interval; +//! Hugeints are composed in a (lower, upper) component +//! The value of the hugeint is upper * 2^64 + lower +//! For easy usage, the functions duckdb_hugeint_to_double/duckdb_double_to_hugeint are recommended typedef struct { uint64_t lower; int64_t upper; @@ -8263,946 +9252,926 @@ typedef struct { bool *nullmask; duckdb_type type; char *name; + void *internal_data; } duckdb_column; typedef struct { idx_t column_count; idx_t row_count; + idx_t rows_changed; duckdb_column *columns; char *error_message; + void *internal_data; } duckdb_result; -// typedef struct { -// void *data; -// bool *nullmask; -// } duckdb_column_data; - -// typedef struct { -// int column_count; -// int count; -// duckdb_column_data *columns; -// } duckdb_chunk; - typedef void *duckdb_database; typedef void *duckdb_connection; typedef void *duckdb_prepared_statement; typedef void *duckdb_appender; +typedef void *duckdb_arrow; +typedef void *duckdb_config; +typedef void *duckdb_arrow_schema; +typedef void *duckdb_arrow_array; typedef enum { DuckDBSuccess = 0, DuckDBError = 1 } duckdb_state; -//! Opens a database file at the given path (nullptr for in-memory). Returns DuckDBSuccess on success, or DuckDBError on -//! failure. [OUT: database] +//===--------------------------------------------------------------------===// +// Open/Connect +//===--------------------------------------------------------------------===// + +/*! +Creates a new database or opens an existing database file stored at the the given path. +If no path is given a new in-memory database is created instead. + +* path: Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database. +* out_database: The result database object. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ DUCKDB_API duckdb_state duckdb_open(const char *path, duckdb_database *out_database); -//! Closes the database. -DUCKDB_API void duckdb_close(duckdb_database *database); -//! Creates a connection to the specified database. [OUT: connection] -DUCKDB_API duckdb_state duckdb_connect(duckdb_database database, duckdb_connection *out_connection); -//! Closes the specified connection handle -DUCKDB_API void duckdb_disconnect(duckdb_connection *connection); +/*! +Extended version of duckdb_open. Creates a new database or opens an existing database file stored at the the given path. + +* path: Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database. +* out_database: The result database object. +* config: (Optional) configuration used to start up the database system. +* out_error: If set and the function returns DuckDBError, this will contain the reason why the start-up failed. +Note that the error must be freed using `duckdb_free`. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_open_ext(const char *path, duckdb_database *out_database, duckdb_config config, + char **out_error); -//! Executes the specified SQL query in the specified connection handle. [OUT: result descriptor] -DUCKDB_API duckdb_state duckdb_query(duckdb_connection connection, const char *query, duckdb_result *out_result); -//! Destroys the specified result -DUCKDB_API void duckdb_destroy_result(duckdb_result *result); +/*! +Closes the specified database and de-allocates all memory allocated for that database. +This should be called after you are done with any database allocated through `duckdb_open`. +Note that failing to call `duckdb_close` (in case of e.g. a program crash) will not cause data corruption. +Still it is recommended to always correctly close a database object after you are done with it. -//! Returns the column name of the specified column. The result does not need to be freed; -//! the column names will automatically be destroyed when the result is destroyed. -DUCKDB_API const char *duckdb_column_name(duckdb_result *result, idx_t col); +* database: The database object to shut down. +*/ +DUCKDB_API void duckdb_close(duckdb_database *database); -// SAFE fetch functions -// These functions will perform conversions if necessary. On failure (e.g. if conversion cannot be performed) a special -// value is returned. +/*! +Opens a connection to a database. Connections are required to query the database, and store transactional state +associated with the connection. -//! Converts the specified value to a bool. Returns false on failure or NULL. -DUCKDB_API bool duckdb_value_boolean(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to an int8_t. Returns 0 on failure or NULL. -DUCKDB_API int8_t duckdb_value_int8(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to an int16_t. Returns 0 on failure or NULL. -DUCKDB_API int16_t duckdb_value_int16(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to an int64_t. Returns 0 on failure or NULL. -DUCKDB_API int32_t duckdb_value_int32(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to an int64_t. Returns 0 on failure or NULL. -DUCKDB_API int64_t duckdb_value_int64(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to an uint8_t. Returns 0 on failure or NULL. -DUCKDB_API uint8_t duckdb_value_uint8(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to an uint16_t. Returns 0 on failure or NULL. -DUCKDB_API uint16_t duckdb_value_uint16(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to an uint64_t. Returns 0 on failure or NULL. -DUCKDB_API uint32_t duckdb_value_uint32(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to an uint64_t. Returns 0 on failure or NULL. -DUCKDB_API uint64_t duckdb_value_uint64(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to a float. Returns 0.0 on failure or NULL. -DUCKDB_API float duckdb_value_float(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to a double. Returns 0.0 on failure or NULL. -DUCKDB_API double duckdb_value_double(duckdb_result *result, idx_t col, idx_t row); -//! Converts the specified value to a string. Returns nullptr on failure or NULL. The result must be freed with free. -DUCKDB_API char *duckdb_value_varchar(duckdb_result *result, idx_t col, idx_t row); -//! Fetches a blob from a result set column. Returns a blob with blob.data set to nullptr on failure or NULL. The -//! resulting "blob.data" must be freed with free. -DUCKDB_API duckdb_blob duckdb_value_blob(duckdb_result *result, idx_t col, idx_t row); +* database: The database file to connect to. +* out_connection: The result connection object. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_connect(duckdb_database database, duckdb_connection *out_connection); -// Prepared Statements +/*! +Closes the specified connection and de-allocates all memory allocated for that connection. -//! prepares the specified SQL query in the specified connection handle. [OUT: prepared statement descriptor] -DUCKDB_API duckdb_state duckdb_prepare(duckdb_connection connection, const char *query, - duckdb_prepared_statement *out_prepared_statement); +* connection: The connection to close. +*/ +DUCKDB_API void duckdb_disconnect(duckdb_connection *connection); -DUCKDB_API duckdb_state duckdb_nparams(duckdb_prepared_statement prepared_statement, idx_t *nparams_out); +//===--------------------------------------------------------------------===// +// Configuration +//===--------------------------------------------------------------------===// +/*! +Initializes an empty configuration object that can be used to provide start-up options for the DuckDB instance +through `duckdb_open_ext`. -//! binds parameters to prepared statement -DUCKDB_API duckdb_state duckdb_bind_boolean(duckdb_prepared_statement prepared_statement, idx_t param_idx, bool val); -DUCKDB_API duckdb_state duckdb_bind_int8(duckdb_prepared_statement prepared_statement, idx_t param_idx, int8_t val); -DUCKDB_API duckdb_state duckdb_bind_int16(duckdb_prepared_statement prepared_statement, idx_t param_idx, int16_t val); -DUCKDB_API duckdb_state duckdb_bind_int32(duckdb_prepared_statement prepared_statement, idx_t param_idx, int32_t val); -DUCKDB_API duckdb_state duckdb_bind_int64(duckdb_prepared_statement prepared_statement, idx_t param_idx, int64_t val); -DUCKDB_API duckdb_state duckdb_bind_uint8(duckdb_prepared_statement prepared_statement, idx_t param_idx, int8_t val); -DUCKDB_API duckdb_state duckdb_bind_uint16(duckdb_prepared_statement prepared_statement, idx_t param_idx, int16_t val); -DUCKDB_API duckdb_state duckdb_bind_uint32(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint32_t val); -DUCKDB_API duckdb_state duckdb_bind_uint64(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint64_t val); -DUCKDB_API duckdb_state duckdb_bind_float(duckdb_prepared_statement prepared_statement, idx_t param_idx, float val); -DUCKDB_API duckdb_state duckdb_bind_double(duckdb_prepared_statement prepared_statement, idx_t param_idx, double val); -DUCKDB_API duckdb_state duckdb_bind_varchar(duckdb_prepared_statement prepared_statement, idx_t param_idx, - const char *val); -DUCKDB_API duckdb_state duckdb_bind_varchar_length(duckdb_prepared_statement prepared_statement, idx_t param_idx, - const char *val, idx_t length); -DUCKDB_API duckdb_state duckdb_bind_blob(duckdb_prepared_statement prepared_statement, idx_t param_idx, - const void *data, idx_t length); -DUCKDB_API duckdb_state duckdb_bind_null(duckdb_prepared_statement prepared_statement, idx_t param_idx); +This will always succeed unless there is a malloc failure. -//! Executes the prepared statements with currently bound parameters -DUCKDB_API duckdb_state duckdb_execute_prepared(duckdb_prepared_statement prepared_statement, - duckdb_result *out_result); +* out_config: The result configuration object. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_create_config(duckdb_config *out_config); -//! Destroys the specified prepared statement descriptor -DUCKDB_API void duckdb_destroy_prepare(duckdb_prepared_statement *prepared_statement); +/*! +This returns the total amount of configuration options available for usage with `duckdb_get_config_flag`. -DUCKDB_API duckdb_state duckdb_appender_create(duckdb_connection connection, const char *schema, const char *table, - duckdb_appender *out_appender); +This should not be called in a loop as it internally loops over all the options. -DUCKDB_API duckdb_state duckdb_appender_begin_row(duckdb_appender appender); -DUCKDB_API duckdb_state duckdb_appender_end_row(duckdb_appender appender); +* returns: The amount of config options available. +*/ +DUCKDB_API size_t duckdb_config_count(); -DUCKDB_API duckdb_state duckdb_append_bool(duckdb_appender appender, bool value); +/*! +Obtains a human-readable name and description of a specific configuration option. This can be used to e.g. +display configuration options. This will succeed unless `index` is out of range (i.e. `>= duckdb_config_count`). -DUCKDB_API duckdb_state duckdb_append_int8(duckdb_appender appender, int8_t value); -DUCKDB_API duckdb_state duckdb_append_int16(duckdb_appender appender, int16_t value); -DUCKDB_API duckdb_state duckdb_append_int32(duckdb_appender appender, int32_t value); -DUCKDB_API duckdb_state duckdb_append_int64(duckdb_appender appender, int64_t value); +The result name or description MUST NOT be freed. -DUCKDB_API duckdb_state duckdb_append_uint8(duckdb_appender appender, uint8_t value); -DUCKDB_API duckdb_state duckdb_append_uint16(duckdb_appender appender, uint16_t value); -DUCKDB_API duckdb_state duckdb_append_uint32(duckdb_appender appender, uint32_t value); -DUCKDB_API duckdb_state duckdb_append_uint64(duckdb_appender appender, uint64_t value); +* index: The index of the configuration option (between 0 and `duckdb_config_count`) +* out_name: A name of the configuration flag. +* out_description: A description of the configuration flag. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_get_config_flag(size_t index, const char **out_name, const char **out_description); -DUCKDB_API duckdb_state duckdb_append_float(duckdb_appender appender, float value); -DUCKDB_API duckdb_state duckdb_append_double(duckdb_appender appender, double value); +/*! +Sets the specified option for the specified configuration. The configuration option is indicated by name. +To obtain a list of config options, see `duckdb_get_config_flag`. -DUCKDB_API duckdb_state duckdb_append_varchar(duckdb_appender appender, const char *val); -DUCKDB_API duckdb_state duckdb_append_varchar_length(duckdb_appender appender, const char *val, idx_t length); -DUCKDB_API duckdb_state duckdb_append_blob(duckdb_appender appender, const void *data, idx_t length); -DUCKDB_API duckdb_state duckdb_append_null(duckdb_appender appender); +In the source code, configuration options are defined in `config.cpp`. -DUCKDB_API duckdb_state duckdb_appender_flush(duckdb_appender appender); -DUCKDB_API duckdb_state duckdb_appender_close(duckdb_appender appender); +This can fail if either the name is invalid, or if the value provided for the option is invalid. -DUCKDB_API duckdb_state duckdb_appender_destroy(duckdb_appender *appender); +* duckdb_config: The configuration object to set the option on. +* name: The name of the configuration flag to set. +* option: The value to set the configuration flag to. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_set_config(duckdb_config config, const char *name, const char *option); -#ifdef __cplusplus -} -#endif -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/date.hpp -// -// -//===----------------------------------------------------------------------===// +/*! +Destroys the specified configuration option and de-allocates all memory allocated for the object. +* config: The configuration object to destroy. +*/ +DUCKDB_API void duckdb_destroy_config(duckdb_config *config); +//===--------------------------------------------------------------------===// +// Query Execution +//===--------------------------------------------------------------------===// +/*! +Executes a SQL query within a connection and stores the full (materialized) result in the out_result pointer. +If the query fails to execute, DuckDBError is returned and the error message can be retrieved by calling +`duckdb_result_error`. +Note that after running `duckdb_query`, `duckdb_destroy_result` must be called on the result object even if the +query fails, otherwise the error stored within the result will not be freed correctly. +* connection: The connection to perform the query in. +* query: The SQL query to run. +* out_result: The query result. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_query(duckdb_connection connection, const char *query, duckdb_result *out_result); +/*! +Closes the result and de-allocates all memory allocated for that connection. -namespace duckdb { +* result: The result to destroy. +*/ +DUCKDB_API void duckdb_destroy_result(duckdb_result *result); -//! The Date class is a static class that holds helper functions for the Date -//! type. -class Date { -public: - static const string_t MONTH_NAMES[12]; - static const string_t MONTH_NAMES_ABBREVIATED[12]; - static const string_t DAY_NAMES[7]; - static const string_t DAY_NAMES_ABBREVIATED[7]; - static const int32_t NORMAL_DAYS[13]; - static const int32_t CUMULATIVE_DAYS[13]; - static const int32_t LEAP_DAYS[13]; - static const int32_t CUMULATIVE_LEAP_DAYS[13]; - static const int32_t CUMULATIVE_YEAR_DAYS[401]; - static const int8_t MONTH_PER_DAY_OF_YEAR[365]; - static const int8_t LEAP_MONTH_PER_DAY_OF_YEAR[366]; +/*! +Returns the column name of the specified column. The result should not need be freed; the column names will +automatically be destroyed when the result is destroyed. - constexpr static const int32_t MIN_YEAR = -290307; - constexpr static const int32_t MAX_YEAR = 294247; - constexpr static const int32_t EPOCH_YEAR = 1970; +Returns `NULL` if the column is out of range. - constexpr static const int32_t YEAR_INTERVAL = 400; - constexpr static const int32_t DAYS_PER_YEAR_INTERVAL = 146097; +* result: The result object to fetch the column name from. +* col: The column index. +* returns: The column name of the specified column. +*/ +DUCKDB_API const char *duckdb_column_name(duckdb_result *result, idx_t col); -public: - //! Convert a string in the format "YYYY-MM-DD" to a date object - static date_t FromString(const string &str, bool strict = false); - //! Convert a string in the format "YYYY-MM-DD" to a date object - static date_t FromCString(const char *str, idx_t len, bool strict = false); - //! Convert a date object to a string in the format "YYYY-MM-DD" - static string ToString(date_t date); - //! Try to convert text in a buffer to a date; returns true if parsing was successful - static bool TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool strict = false); +/*! +Returns the column type of the specified column. - //! Create a string "YYYY-MM-DD" from a specified (year, month, day) - //! combination - static string Format(int32_t year, int32_t month, int32_t day); +Returns `DUCKDB_TYPE_INVALID` if the column is out of range. - //! Extract the year, month and day from a given date object - static void Convert(date_t date, int32_t &out_year, int32_t &out_month, int32_t &out_day); - //! Create a Date object from a specified (year, month, day) combination - static date_t FromDate(int32_t year, int32_t month, int32_t day); +* result: The result object to fetch the column type from. +* col: The column index. +* returns: The column type of the specified column. +*/ +DUCKDB_API duckdb_type duckdb_column_type(duckdb_result *result, idx_t col); - //! Returns true if (year) is a leap year, and false otherwise - static bool IsLeapYear(int32_t year); +/*! +Returns the number of columns present in a the result object. - //! Returns true if the specified (year, month, day) combination is a valid - //! date - static bool IsValid(int32_t year, int32_t month, int32_t day); +* result: The result object. +* returns: The number of columns present in the result object. +*/ +DUCKDB_API idx_t duckdb_column_count(duckdb_result *result); - //! Extract the epoch from the date (seconds since 1970-01-01) - static int64_t Epoch(date_t date); - //! Extract the epoch from the date (nanoseconds since 1970-01-01) - static int64_t EpochNanoseconds(date_t date); - //! Convert the epoch (seconds since 1970-01-01) to a date_t - static date_t EpochToDate(int64_t epoch); +/*! +Returns the number of rows present in a the result object. - //! Extract the number of days since epoch (days since 1970-01-01) - static int32_t EpochDays(date_t date); - //! Convert the epoch number of days to a date_t - static date_t EpochDaysToDate(int32_t epoch); +* result: The result object. +* returns: The number of rows present in the result object. +*/ +DUCKDB_API idx_t duckdb_row_count(duckdb_result *result); - //! Extract year of a date entry - static int32_t ExtractYear(date_t date); - //! Extract year of a date entry, but optimized to first try the last year found - static int32_t ExtractYear(date_t date, int32_t *last_year); - static int32_t ExtractYear(timestamp_t ts, int32_t *last_year); - //! Extract month of a date entry - static int32_t ExtractMonth(date_t date); - //! Extract day of a date entry - static int32_t ExtractDay(date_t date); - //! Extract the day of the week (1-7) - static int32_t ExtractISODayOfTheWeek(date_t date); - //! Extract the day of the year - static int32_t ExtractDayOfTheYear(date_t date); - //! Extract the ISO week number - //! ISO weeks start on Monday and the first week of a year - //! contains January 4 of that year. - //! In the ISO week-numbering system, it is possible for early-January dates - //! to be part of the 52nd or 53rd week of the previous year. - static int32_t ExtractISOWeekNumber(date_t date); - //! Extract the week number as Python handles it. - //! Either Monday or Sunday is the first day of the week, - //! and any date before the first Monday/Sunday returns week 0 - //! This is a bit more consistent because week numbers in a year are always incrementing - static int32_t ExtractWeekNumberRegular(date_t date, bool monday_first = true); - //! Returns the date of the monday of the current week. - static date_t GetMondayOfCurrentWeek(date_t date); +/*! +Returns the number of rows changed by the query stored in the result. This is relevant only for INSERT/UPDATE/DELETE +queries. For other queries the rows_changed will be 0. - //! Helper function to parse two digits from a string (e.g. "30" -> 30, "03" -> 3, "3" -> 3) - static bool ParseDoubleDigit(const char *buf, idx_t len, idx_t &pos, int32_t &result); +* result: The result object. +* returns: The number of rows changed. +*/ +DUCKDB_API idx_t duckdb_rows_changed(duckdb_result *result); -private: - static void ExtractYearOffset(int32_t &n, int32_t &year, int32_t &year_offset); -}; -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/arrow.hpp -// -// -//===----------------------------------------------------------------------===// +/*! +Returns the data of a specific column of a result in columnar format. This is the fastest way of accessing data in a +query result, as no conversion or type checking must be performed (outside of the original switch). If performance +is a concern, it is recommended to use this API over the `duckdb_value` functions. + +The function returns a dense array which contains the result data. The exact type stored in the array depends on the +corresponding duckdb_type (as provided by `duckdb_column_type`). For the exact type by which the data should be +accessed, see the comments in [the types section](types) or the `DUCKDB_TYPE` enum. + +For example, for a column of type `DUCKDB_TYPE_INTEGER`, rows can be accessed in the following manner: +```c +int32_t *data = (int32_t *) duckdb_column_data(&result, 0); +printf("Data for row %d: %d\n", row, data[row]); +``` + +* result: The result object to fetch the column data from. +* col: The column index. +* returns: The column data of the specified column. +*/ +DUCKDB_API void *duckdb_column_data(duckdb_result *result, idx_t col); -#ifndef ARROW_FLAG_DICTIONARY_ORDERED +/*! +Returns the nullmask of a specific column of a result in columnar format. The nullmask indicates for every row +whether or not the corresponding row is `NULL`. If a row is `NULL`, the values present in the array provided +by `duckdb_column_data` are undefined. + +```c +int32_t *data = (int32_t *) duckdb_column_data(&result, 0); +bool *nullmask = duckdb_nullmask_data(&result, 0); +if (nullmask[row]) { + printf("Data for row %d: NULL\n", row); +} else { + printf("Data for row %d: %d\n", row, data[row]); +} +``` -#include +* result: The result object to fetch the nullmask from. +* col: The column index. +* returns: The nullmask of the specified column. +*/ +DUCKDB_API bool *duckdb_nullmask_data(duckdb_result *result, idx_t col); -#ifdef __cplusplus -extern "C" { -#endif +/*! +Returns the error message contained within the result. The error is only set if `duckdb_query` returns `DuckDBError`. -#define ARROW_FLAG_DICTIONARY_ORDERED 1 -#define ARROW_FLAG_NULLABLE 2 -#define ARROW_FLAG_MAP_KEYS_SORTED 4 +The result of this function must not be freed. It will be cleaned up when `duckdb_destroy_result` is called. -struct ArrowSchema { - // Array type description - const char *format; - const char *name; - const char *metadata; - int64_t flags; - int64_t n_children; - struct ArrowSchema **children; - struct ArrowSchema *dictionary; +* result: The result object to fetch the nullmask from. +* returns: The error of the result. +*/ +DUCKDB_API char *duckdb_result_error(duckdb_result *result); - // Release callback - void (*release)(struct ArrowSchema *); - // Opaque producer-specific data - void *private_data; -}; +//===--------------------------------------------------------------------===// +// Result Functions +//===--------------------------------------------------------------------===// -struct ArrowArray { - // Array data description - int64_t length; - int64_t null_count; - int64_t offset; - int64_t n_buffers; - int64_t n_children; - const void **buffers; - struct ArrowArray **children; - struct ArrowArray *dictionary; +// Safe fetch functions +// These functions will perform conversions if necessary. +// On failure (e.g. if conversion cannot be performed or if the value is NULL) a default value is returned. +// Note that these functions are slow since they perform bounds checking and conversion +// For fast access of values prefer using duckdb_column_data and duckdb_nullmask_data - // Release callback - void (*release)(struct ArrowArray *); - // Opaque producer-specific data - void *private_data; -}; +/*! + * returns: The boolean value at the specified location, or false if the value cannot be converted. + */ +DUCKDB_API bool duckdb_value_boolean(duckdb_result *result, idx_t col, idx_t row); -// EXPERIMENTAL -struct ArrowArrayStream { - // Callback to get the stream type - // (will be the same for all arrays in the stream). - // Return value: 0 if successful, an `errno`-compatible error code otherwise. - int (*get_schema)(struct ArrowArrayStream *, struct ArrowSchema *out); - // Callback to get the next array - // (if no error and the array is released, the stream has ended) - // Return value: 0 if successful, an `errno`-compatible error code otherwise. - int (*get_next)(struct ArrowArrayStream *, struct ArrowArray *out); +/*! + * returns: The int8_t value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API int8_t duckdb_value_int8(duckdb_result *result, idx_t col, idx_t row); - // Callback to get optional detailed error information. - // This must only be called if the last stream operation failed - // with a non-0 return code. The returned pointer is only valid until - // the next operation on this stream (including release). - // If unavailable, NULL is returned. - const char *(*get_last_error)(struct ArrowArrayStream *); +/*! + * returns: The int16_t value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API int16_t duckdb_value_int16(duckdb_result *result, idx_t col, idx_t row); - // Release callback: release the stream's own resources. - // Note that arrays returned by `get_next` must be individually released. - void (*release)(struct ArrowArrayStream *); - // Opaque producer-specific data - void *private_data; -}; +/*! + * returns: The int32_t value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API int32_t duckdb_value_int32(duckdb_result *result, idx_t col, idx_t row); -#ifdef __cplusplus -} -#endif +/*! + * returns: The int64_t value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API int64_t duckdb_value_int64(duckdb_result *result, idx_t col, idx_t row); -#endif -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/decimal.hpp -// -// -//===----------------------------------------------------------------------===// +/*! + * returns: The duckdb_hugeint value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API duckdb_hugeint duckdb_value_hugeint(duckdb_result *result, idx_t col, idx_t row); +/*! + * returns: The uint8_t value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API uint8_t duckdb_value_uint8(duckdb_result *result, idx_t col, idx_t row); +/*! + * returns: The uint16_t value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API uint16_t duckdb_value_uint16(duckdb_result *result, idx_t col, idx_t row); +/*! + * returns: The uint32_t value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API uint32_t duckdb_value_uint32(duckdb_result *result, idx_t col, idx_t row); +/*! + * returns: The uint64_t value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API uint64_t duckdb_value_uint64(duckdb_result *result, idx_t col, idx_t row); -namespace duckdb { +/*! + * returns: The float value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API float duckdb_value_float(duckdb_result *result, idx_t col, idx_t row); -//! The Decimal class is a static class that holds helper functions for the Decimal type -class Decimal { -public: - static constexpr uint8_t MAX_WIDTH_INT16 = 4; - static constexpr uint8_t MAX_WIDTH_INT32 = 9; - static constexpr uint8_t MAX_WIDTH_INT64 = 18; - static constexpr uint8_t MAX_WIDTH_INT128 = 38; - static constexpr uint8_t MAX_WIDTH_DECIMAL = MAX_WIDTH_INT128; +/*! + * returns: The double value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API double duckdb_value_double(duckdb_result *result, idx_t col, idx_t row); -public: - static string ToString(int16_t value, uint8_t scale); - static string ToString(int32_t value, uint8_t scale); - static string ToString(int64_t value, uint8_t scale); - static string ToString(hugeint_t value, uint8_t scale); -}; -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/timestamp.hpp -// -// -//===----------------------------------------------------------------------===// +/*! + * returns: The duckdb_date value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API duckdb_date duckdb_value_date(duckdb_result *result, idx_t col, idx_t row); +/*! + * returns: The duckdb_time value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API duckdb_time duckdb_value_time(duckdb_result *result, idx_t col, idx_t row); +/*! + * returns: The duckdb_timestamp value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API duckdb_timestamp duckdb_value_timestamp(duckdb_result *result, idx_t col, idx_t row); +/*! + * returns: The duckdb_interval value at the specified location, or 0 if the value cannot be converted. + */ +DUCKDB_API duckdb_interval duckdb_value_interval(duckdb_result *result, idx_t col, idx_t row); +/*! +* returns: The char* value at the specified location, or nullptr if the value cannot be converted. +The result must be freed with `duckdb_free`. +*/ +DUCKDB_API char *duckdb_value_varchar(duckdb_result *result, idx_t col, idx_t row); -namespace duckdb { +/*! +* returns: The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast. +If the column is NOT a VARCHAR column this function will return NULL. -struct timestamp_struct { - int32_t year; - int8_t month; - int8_t day; - int8_t hour; - int8_t min; - int8_t sec; - int16_t msec; -}; -//! The Timestamp class is a static class that holds helper functions for the Timestamp -//! type. -class Timestamp { -public: - //! Convert a string in the format "YYYY-MM-DD hh:mm:ss" to a timestamp object - static timestamp_t FromString(const string &str); - static timestamp_t FromCString(const char *str, idx_t len); - //! Convert a date object to a string in the format "YYYY-MM-DD hh:mm:ss" - static string ToString(timestamp_t timestamp); +The result must NOT be freed. +*/ +DUCKDB_API char *duckdb_value_varchar_internal(duckdb_result *result, idx_t col, idx_t row); - static date_t GetDate(timestamp_t timestamp); +/*! +* returns: The duckdb_blob value at the specified location. Returns a blob with blob.data set to nullptr if the +value cannot be converted. The resulting "blob.data" must be freed with `duckdb_free.` +*/ +DUCKDB_API duckdb_blob duckdb_value_blob(duckdb_result *result, idx_t col, idx_t row); - static dtime_t GetTime(timestamp_t timestamp); - //! Create a Timestamp object from a specified (date, time) combination - static timestamp_t FromDatetime(date_t date, dtime_t time); - //! Extract the date and time from a given timestamp object - static void Convert(timestamp_t date, date_t &out_date, dtime_t &out_time); - //! Returns current timestamp - static timestamp_t GetCurrentTimestamp(); +/*! + * returns: Returns true if the value at the specified index is NULL, and false otherwise. + */ +DUCKDB_API bool duckdb_value_is_null(duckdb_result *result, idx_t col, idx_t row); - //! Convert the epoch (in sec) to a timestamp - static timestamp_t FromEpochSeconds(int64_t ms); - //! Convert the epoch (in ms) to a timestamp - static timestamp_t FromEpochMs(int64_t ms); - //! Convert the epoch (in microseconds) to a timestamp - static timestamp_t FromEpochMicroSeconds(int64_t micros); - //! Convert the epoch (in nanoseconds) to a timestamp - static timestamp_t FromEpochNanoSeconds(int64_t micros); +//===--------------------------------------------------------------------===// +// Helpers +//===--------------------------------------------------------------------===// +/*! +Allocate `size` bytes of memory using the duckdb internal malloc function. Any memory allocated in this manner +should be freed using `duckdb_free`. - //! Convert the epoch (in seconds) to a timestamp - static int64_t GetEpochSeconds(timestamp_t timestamp); - //! Convert the epoch (in ms) to a timestamp - static int64_t GetEpochMs(timestamp_t timestamp); - //! Convert a timestamp to epoch (in microseconds) - static int64_t GetEpochMicroSeconds(timestamp_t timestamp); - //! Convert a timestamp to epoch (in nanoseconds) - static int64_t GetEpochNanoSeconds(timestamp_t timestamp); +* size: The number of bytes to allocate. +* returns: A pointer to the allocated memory region. +*/ +DUCKDB_API void *duckdb_malloc(size_t size); - static bool TryParseUTCOffset(const char *str, idx_t &pos, idx_t len, int &hour_offset, int &minute_offset); -}; -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/time.hpp -// -// -//===----------------------------------------------------------------------===// +/*! +Free a value returned from `duckdb_malloc`, `duckdb_value_varchar` or `duckdb_value_blob`. +* ptr: The memory region to de-allocate. +*/ +DUCKDB_API void duckdb_free(void *ptr); +//===--------------------------------------------------------------------===// +// Date/Time/Timestamp Helpers +//===--------------------------------------------------------------------===// +/*! +Decompose a `duckdb_date` object into year, month and date (stored as `duckdb_date_struct`). +* date: The date object, as obtained from a `DUCKDB_TYPE_DATE` column. +* returns: The `duckdb_date_struct` with the decomposed elements. +*/ +DUCKDB_API duckdb_date_struct duckdb_from_date(duckdb_date date); +/*! +Re-compose a `duckdb_date` from year, month and date (`duckdb_date_struct`). -namespace duckdb { +* date: The year, month and date stored in a `duckdb_date_struct`. +* returns: The `duckdb_date` element. +*/ +DUCKDB_API duckdb_date duckdb_to_date(duckdb_date_struct date); -//! The Time class is a static class that holds helper functions for the Time -//! type. -class Time { -public: - //! Convert a string in the format "hh:mm:ss" to a time object - static dtime_t FromString(const string &str, bool strict = false); - static dtime_t FromCString(const char *buf, idx_t len, bool strict = false); - static bool TryConvertTime(const char *buf, idx_t len, idx_t &pos, dtime_t &result, bool strict = false); +/*! +Decompose a `duckdb_time` object into hour, minute, second and microsecond (stored as `duckdb_time_struct`). - //! Convert a time object to a string in the format "hh:mm:ss" - static string ToString(dtime_t time); +* time: The time object, as obtained from a `DUCKDB_TYPE_TIME` column. +* returns: The `duckdb_time_struct` with the decomposed elements. +*/ +DUCKDB_API duckdb_time_struct duckdb_from_time(duckdb_time time); - static string Format(int32_t hour, int32_t minute, int32_t second, int32_t microseconds = 0); +/*! +Re-compose a `duckdb_time` from hour, minute, second and microsecond (`duckdb_time_struct`). - static dtime_t FromTime(int32_t hour, int32_t minute, int32_t second, int32_t microseconds = 0); +* time: The hour, minute, second and microsecond in a `duckdb_time_struct`. +* returns: The `duckdb_time` element. +*/ +DUCKDB_API duckdb_time duckdb_to_time(duckdb_time_struct time); - static bool IsValidTime(int32_t hour, int32_t minute, int32_t second, int32_t microseconds = 0); - //! Extract the time from a given timestamp object - static void Convert(dtime_t time, int32_t &out_hour, int32_t &out_min, int32_t &out_sec, int32_t &out_micros); -}; +/*! +Decompose a `duckdb_timestamp` object into a `duckdb_timestamp_struct`. -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/serializer/buffered_serializer.hpp -// -// -//===----------------------------------------------------------------------===// +* ts: The ts object, as obtained from a `DUCKDB_TYPE_TIMESTAMP` column. +* returns: The `duckdb_timestamp_struct` with the decomposed elements. +*/ +DUCKDB_API duckdb_timestamp_struct duckdb_from_timestamp(duckdb_timestamp ts); +/*! +Re-compose a `duckdb_timestamp` from a duckdb_timestamp_struct. +* ts: The de-composed elements in a `duckdb_timestamp_struct`. +* returns: The `duckdb_timestamp` element. +*/ +DUCKDB_API duckdb_timestamp duckdb_to_timestamp(duckdb_timestamp_struct ts); +//===--------------------------------------------------------------------===// +// Hugeint Helpers +//===--------------------------------------------------------------------===// +/*! +Converts a duckdb_hugeint object (as obtained from a `DUCKDB_TYPE_HUGEINT` column) into a double. +* val: The hugeint value. +* returns: The converted `double` element. +*/ +DUCKDB_API double duckdb_hugeint_to_double(duckdb_hugeint val); -namespace duckdb { +/*! +Converts a double value to a duckdb_hugeint object. -#define SERIALIZER_DEFAULT_SIZE 1024 +If the conversion fails because the double value is too big the result will be 0. -struct BinaryData { - unique_ptr data; - idx_t size; -}; +* val: The double value. +* returns: The converted `duckdb_hugeint` element. +*/ +DUCKDB_API duckdb_hugeint duckdb_double_to_hugeint(double val); -class BufferedSerializer : public Serializer { -public: - //! Serializes to a buffer allocated by the serializer, will expand when - //! writing past the initial threshold - explicit BufferedSerializer(idx_t maximum_size = SERIALIZER_DEFAULT_SIZE); - //! Serializes to a provided (owned) data pointer - BufferedSerializer(unique_ptr data, idx_t size); - BufferedSerializer(data_ptr_t data, idx_t size); +//===--------------------------------------------------------------------===// +// Prepared Statements +//===--------------------------------------------------------------------===// +// A prepared statement is a parameterized query that allows you to bind parameters to it. +// * This is useful to easily supply parameters to functions and avoid SQL injection attacks. +// * This is useful to speed up queries that you will execute several times with different parameters. +// Because the query will only be parsed, bound, optimized and planned once during the prepare stage, +// rather than once per execution. +// For example: +// SELECT * FROM tbl WHERE id=? +// Or a query with multiple parameters: +// SELECT * FROM tbl WHERE id=$1 OR name=$2 - idx_t maximum_size; - data_ptr_t data; +/*! +Create a prepared statement object from a query. - BinaryData blob; +Note that after calling `duckdb_prepare`, the prepared statement should always be destroyed using +`duckdb_destroy_prepare`, even if the prepare fails. -public: - void WriteData(const_data_ptr_t buffer, uint64_t write_size) override; +If the prepare fails, `duckdb_prepare_error` can be called to obtain the reason why the prepare failed. - //! Retrieves the data after the writing has been completed - BinaryData GetData() { - return std::move(blob); - } +* connection: The connection object +* query: The SQL query to prepare +* out_prepared_statement: The resulting prepared statement object +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_prepare(duckdb_connection connection, const char *query, + duckdb_prepared_statement *out_prepared_statement); - void Reset() { - blob.size = 0; - } -}; +/*! +Closes the prepared statement and de-allocates all memory allocated for that connection. -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/main/appender.hpp -// -// -//===----------------------------------------------------------------------===// +* prepared_statement: The prepared statement to destroy. +*/ +DUCKDB_API void duckdb_destroy_prepare(duckdb_prepared_statement *prepared_statement); +/*! +Returns the error message associated with the given prepared statement. +If the prepared statement has no error message, this returns `nullptr` instead. +The error message should not be freed. It will be de-allocated when `duckdb_destroy_prepare` is called. +* prepared_statement: The prepared statement to obtain the error from. +* returns: The error message, or `nullptr` if there is none. +*/ +DUCKDB_API const char *duckdb_prepare_error(duckdb_prepared_statement prepared_statement); +/*! +Returns the number of parameters that can be provided to the given prepared statement. +Returns 0 if the query was not successfully prepared. +* prepared_statement: The prepared statement to obtain the number of parameters for. +*/ +DUCKDB_API idx_t duckdb_nparams(duckdb_prepared_statement prepared_statement); -namespace duckdb { +/*! +Returns the parameter type for the parameter at the given index. -class ClientContext; -class DuckDB; -class TableCatalogEntry; -class Connection; +Returns `DUCKDB_TYPE_INVALID` if the parameter index is out of range or the statement was not successfully prepared. -//! The Appender class can be used to append elements to a table. -class Appender { - //! A reference to a database connection that created this appender - shared_ptr context; - //! The table description (including column names) - unique_ptr description; - //! Internal chunk used for appends - DataChunk chunk; - //! The current column to append to - idx_t column = 0; - -public: - DUCKDB_API Appender(Connection &con, const string &schema_name, const string &table_name); - DUCKDB_API Appender(Connection &con, const string &table_name); - DUCKDB_API ~Appender(); +* prepared_statement: The prepared statement. +* param_idx: The parameter index. +* returns: The parameter type +*/ +DUCKDB_API duckdb_type duckdb_param_type(duckdb_prepared_statement prepared_statement, idx_t param_idx); - //! Begins a new row append, after calling this the other AppendX() functions - //! should be called the correct amount of times. After that, - //! EndRow() should be called. - DUCKDB_API void BeginRow(); - //! Finishes appending the current row. - DUCKDB_API void EndRow(); +/*! +Binds a bool value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_boolean(duckdb_prepared_statement prepared_statement, idx_t param_idx, bool val); - // Append functions - template - void Append(T value) { - throw Exception("Undefined type for Appender::Append!"); - } +/*! +Binds an int8_t value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_int8(duckdb_prepared_statement prepared_statement, idx_t param_idx, int8_t val); - DUCKDB_API void Append(const char *value, uint32_t length); +/*! +Binds an int16_t value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_int16(duckdb_prepared_statement prepared_statement, idx_t param_idx, int16_t val); - // prepared statements - template - void AppendRow(Args... args) { - BeginRow(); - AppendRowRecursive(args...); - } +/*! +Binds an int32_t value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_int32(duckdb_prepared_statement prepared_statement, idx_t param_idx, int32_t val); - //! Commit the changes made by the appender. - DUCKDB_API void Flush(); - //! Flush the changes made by the appender and close it. The appender cannot be used after this point - DUCKDB_API void Close(); +/*! +Binds an int64_t value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_int64(duckdb_prepared_statement prepared_statement, idx_t param_idx, int64_t val); - //! Obtain a reference to the internal vector that is used to append to the table - DUCKDB_API DataChunk &GetAppendChunk() { - return chunk; - } +/*! +Binds an duckdb_hugeint value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_hugeint(duckdb_prepared_statement prepared_statement, idx_t param_idx, + duckdb_hugeint val); - DUCKDB_API idx_t CurrentColumn() { - return column; - } +/*! +Binds an uint8_t value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_uint8(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint8_t val); -private: - template - void AppendValueInternal(T value); - template - void AppendValueInternal(Vector &vector, SRC input); +/*! +Binds an uint16_t value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_uint16(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint16_t val); - void AppendRowRecursive() { - EndRow(); - } +/*! +Binds an uint32_t value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_uint32(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint32_t val); - template - void AppendRowRecursive(T value, Args... args) { - Append(value); - AppendRowRecursive(args...); - } +/*! +Binds an uint64_t value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_uint64(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint64_t val); - void AppendValue(const Value &value); -}; +/*! +Binds an float value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_float(duckdb_prepared_statement prepared_statement, idx_t param_idx, float val); -template <> -void DUCKDB_API Appender::Append(bool value); -template <> -void DUCKDB_API Appender::Append(int8_t value); -template <> -void DUCKDB_API Appender::Append(int16_t value); -template <> -void DUCKDB_API Appender::Append(int32_t value); -template <> -void DUCKDB_API Appender::Append(int64_t value); -template <> -void DUCKDB_API Appender::Append(uint8_t value); -template <> -void DUCKDB_API Appender::Append(uint16_t value); -template <> -void DUCKDB_API Appender::Append(uint32_t value); -template <> -void DUCKDB_API Appender::Append(uint64_t value); -template <> -void DUCKDB_API Appender::Append(float value); -template <> -void DUCKDB_API Appender::Append(double value); -template <> -void DUCKDB_API Appender::Append(const char *value); -template <> -void DUCKDB_API Appender::Append(Value value); -template <> -void DUCKDB_API Appender::Append(std::nullptr_t value); +/*! +Binds an double value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_double(duckdb_prepared_statement prepared_statement, idx_t param_idx, double val); -} // namespace duckdb//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/main/client_context.hpp -// -// -//===----------------------------------------------------------------------===// +/*! +Binds a duckdb_date value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_date(duckdb_prepared_statement prepared_statement, idx_t param_idx, + duckdb_date val); +/*! +Binds a duckdb_time value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_time(duckdb_prepared_statement prepared_statement, idx_t param_idx, + duckdb_time val); +/*! +Binds a duckdb_timestamp value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_timestamp(duckdb_prepared_statement prepared_statement, idx_t param_idx, + duckdb_timestamp val); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/catalog/catalog_entry/schema_catalog_entry.hpp -// -// -//===----------------------------------------------------------------------===// +/*! +Binds a duckdb_interval value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_interval(duckdb_prepared_statement prepared_statement, idx_t param_idx, + duckdb_interval val); +/*! +Binds a null-terminated varchar value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_varchar(duckdb_prepared_statement prepared_statement, idx_t param_idx, + const char *val); +/*! +Binds a varchar value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_varchar_length(duckdb_prepared_statement prepared_statement, idx_t param_idx, + const char *val, idx_t length); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/catalog/catalog_entry.hpp -// -// -//===----------------------------------------------------------------------===// +/*! +Binds a blob value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_blob(duckdb_prepared_statement prepared_statement, idx_t param_idx, + const void *data, idx_t length); +/*! +Binds a NULL value to the prepared statement at the specified index. +*/ +DUCKDB_API duckdb_state duckdb_bind_null(duckdb_prepared_statement prepared_statement, idx_t param_idx); +/*! +Executes the prepared statement with the given bound parameters, and returns a materialized query result. +This method can be called multiple times for each prepared statement, and the parameters can be modified +between calls to this function. -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/enums/catalog_type.hpp -// -// -//===----------------------------------------------------------------------===// +* prepared_statement: The prepared statement to execute. +* out_result: The query result. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_execute_prepared(duckdb_prepared_statement prepared_statement, + duckdb_result *out_result); +/*! +Executes the prepared statement with the given bound parameters, and returns an arrow query result. +* prepared_statement: The prepared statement to execute. +* out_result: The query result. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_execute_prepared_arrow(duckdb_prepared_statement prepared_statement, + duckdb_arrow *out_result); +//===--------------------------------------------------------------------===// +// Appender +//===--------------------------------------------------------------------===// +// Appenders are the most efficient way of loading data into DuckDB from within the C interface, and are recommended for +// fast data loading. The appender is much faster than using prepared statements or individual `INSERT INTO` statements. -namespace duckdb { +// Appends are made in row-wise format. For every column, a `duckdb_append_[type]` call should be made, after which +// the row should be finished by calling `duckdb_appender_end_row`. After all rows have been appended, +// `duckdb_appender_destroy` should be used to finalize the appender and clean up the resulting memory. -//===--------------------------------------------------------------------===// -// Catalog Types -//===--------------------------------------------------------------------===// -enum class CatalogType : uint8_t { - INVALID = 0, - TABLE_ENTRY = 1, - SCHEMA_ENTRY = 2, - VIEW_ENTRY = 3, - INDEX_ENTRY = 4, - PREPARED_STATEMENT = 5, - SEQUENCE_ENTRY = 6, - COLLATION_ENTRY = 7, +// Note that `duckdb_appender_destroy` should always be called on the resulting appender, even if the function returns +// `DuckDBError`. - // functions - TABLE_FUNCTION_ENTRY = 25, - SCALAR_FUNCTION_ENTRY = 26, - AGGREGATE_FUNCTION_ENTRY = 27, - PRAGMA_FUNCTION_ENTRY = 28, - COPY_FUNCTION_ENTRY = 29, - MACRO_ENTRY = 30, +/*! +Creates an appender object. - // version info - UPDATED_ENTRY = 50, - DELETED_ENTRY = 51, -}; +* connection: The connection context to create the appender in. +* schema: The schema of the table to append to, or `nullptr` for the default schema. +* table: The table name to append to. +* out_appender: The resulting appender object. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_appender_create(duckdb_connection connection, const char *schema, const char *table, + duckdb_appender *out_appender); -string CatalogTypeToString(CatalogType type); +/*! +Returns the error message associated with the given appender. +If the appender has no error message, this returns `nullptr` instead. -} // namespace duckdb +The error message should be freed using `duckdb_free`. +* appender: The appender to get the error from. +* returns: The error message, or `nullptr` if there is none. +*/ +DUCKDB_API const char *duckdb_appender_error(duckdb_appender appender); -#include +/*! +Flush the appender to the table, forcing the cache of the appender to be cleared and the data to be appended to the +base table. -namespace duckdb { -struct AlterInfo; -class Catalog; -class CatalogSet; -class ClientContext; +This should generally not be used unless you know what you are doing. Instead, call `duckdb_appender_destroy` when you +are done with the appender. -//! Abstract base class of an entry in the catalog -class CatalogEntry { -public: - CatalogEntry(CatalogType type, Catalog *catalog, string name) - : type(type), catalog(catalog), set(nullptr), name(name), deleted(false), temporary(false), internal(false), - parent(nullptr) { - } - virtual ~CatalogEntry(); +* appender: The appender to flush. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_appender_flush(duckdb_appender appender); - virtual unique_ptr AlterEntry(ClientContext &context, AlterInfo *info) { - throw CatalogException("Unsupported alter type for catalog entry!"); - } +/*! +Close the appender, flushing all intermediate state in the appender to the table and closing it for further appends. - virtual unique_ptr Copy(ClientContext &context) { - throw CatalogException("Unsupported copy type for catalog entry!"); - } - //! Sets the CatalogEntry as the new root entry (i.e. the newest entry) - this is called on a rollback to an - //! AlterEntry - virtual void SetAsRoot() { - } - //! Convert the catalog entry to a SQL string that can be used to re-construct the catalog entry - virtual string ToSQL() { - throw CatalogException("Unsupported catalog type for ToSQL()"); - } +This is generally not necessary. Call `duckdb_appender_destroy` instead. - //! The type of this catalog entry - CatalogType type; - //! Reference to the catalog this entry belongs to - Catalog *catalog; - //! Reference to the catalog set this entry is stored in - CatalogSet *set; - //! The name of the entry - string name; - //! Whether or not the object is deleted - bool deleted; - //! Whether or not the object is temporary and should not be added to the WAL - bool temporary; - //! Whether or not the entry is an internal entry (cannot be deleted, not dumped, etc) - bool internal; - //! Timestamp at which the catalog entry was created - transaction_t timestamp; - //! Child entry - unique_ptr child; - //! Parent entry (the node that owns this node) - CatalogEntry *parent; -}; -} // namespace duckdb +* appender: The appender to flush and close. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_appender_close(duckdb_appender appender); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/catalog/catalog_set.hpp -// -// -//===----------------------------------------------------------------------===// +/*! +Close the appender and destroy it. Flushing all intermediate state in the appender to the table, and de-allocating +all memory associated with the appender. +* appender: The appender to flush, close and destroy. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_appender_destroy(duckdb_appender *appender); +/*! +A nop function, provided for backwards compatibility reasons. Does nothing. Only `duckdb_appender_end_row` is required. +*/ +DUCKDB_API duckdb_state duckdb_appender_begin_row(duckdb_appender appender); +/*! +Finish the current row of appends. After end_row is called, the next row can be appended. -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/catalog/default/default_generator.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -namespace duckdb { -class ClientContext; - -class DefaultGenerator { -public: - explicit DefaultGenerator(Catalog &catalog) : catalog(catalog) { - } - virtual ~DefaultGenerator() { - } - - Catalog &catalog; - -public: - //! Creates a default entry with the specified name, or returns nullptr if no such entry can be generated - virtual unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) = 0; -}; - -} // namespace duckdb - - +* appender: The appender. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_appender_end_row(duckdb_appender appender); +/*! +Append a bool value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_bool(duckdb_appender appender, bool value); +/*! +Append an int8_t value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_int8(duckdb_appender appender, int8_t value); +/*! +Append an int16_t value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_int16(duckdb_appender appender, int16_t value); +/*! +Append an int32_t value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_int32(duckdb_appender appender, int32_t value); +/*! +Append an int64_t value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_int64(duckdb_appender appender, int64_t value); +/*! +Append a duckdb_hugeint value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_hugeint(duckdb_appender appender, duckdb_hugeint value); +/*! +Append a uint8_t value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_uint8(duckdb_appender appender, uint8_t value); +/*! +Append a uint16_t value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_uint16(duckdb_appender appender, uint16_t value); +/*! +Append a uint32_t value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_uint32(duckdb_appender appender, uint32_t value); +/*! +Append a uint64_t value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_uint64(duckdb_appender appender, uint64_t value); -#include -#include +/*! +Append a float value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_float(duckdb_appender appender, float value); +/*! +Append a double value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_double(duckdb_appender appender, double value); -namespace duckdb { -struct AlterInfo; +/*! +Append a duckdb_date value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_date(duckdb_appender appender, duckdb_date value); +/*! +Append a duckdb_time value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_time(duckdb_appender appender, duckdb_time value); +/*! +Append a duckdb_timestamp value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_timestamp(duckdb_appender appender, duckdb_timestamp value); +/*! +Append a duckdb_interval value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_interval(duckdb_appender appender, duckdb_interval value); -class ClientContext; +/*! +Append a varchar value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_varchar(duckdb_appender appender, const char *val); +/*! +Append a varchar value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_varchar_length(duckdb_appender appender, const char *val, idx_t length); +/*! +Append a blob value to the appender. +*/ +DUCKDB_API duckdb_state duckdb_append_blob(duckdb_appender appender, const void *data, idx_t length); +/*! +Append a NULL value to the appender (of any type). +*/ +DUCKDB_API duckdb_state duckdb_append_null(duckdb_appender appender); -typedef unordered_map> set_lock_map_t; +//===--------------------------------------------------------------------===// +// Arrow Interface +//===--------------------------------------------------------------------===// +/*! +Executes a SQL query within a connection and stores the full (materialized) result in an arrow structure. +If the query fails to execute, DuckDBError is returned and the error message can be retrieved by calling +`duckdb_query_arrow_error`. -struct MappingValue { - explicit MappingValue(idx_t index_) : index(index_), timestamp(0), deleted(false), parent(nullptr) { - } +Note that after running `duckdb_query_arrow`, `duckdb_destroy_arrow` must be called on the result object even if the +query fails, otherwise the error stored within the result will not be freed correctly. - idx_t index; - transaction_t timestamp; - bool deleted; - unique_ptr child; - MappingValue *parent; -}; +* connection: The connection to perform the query in. +* query: The SQL query to run. +* out_result: The query result. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_query_arrow(duckdb_connection connection, const char *query, duckdb_arrow *out_result); -//! The Catalog Set stores (key, value) map of a set of CatalogEntries -class CatalogSet { - friend class DependencyManager; +/*! +Fetch the internal arrow schema from the arrow result. -public: - explicit CatalogSet(Catalog &catalog, unique_ptr defaults = nullptr); +* result: The result to fetch the schema from. +* out_schema: The output schema. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_query_arrow_schema(duckdb_arrow result, duckdb_arrow_schema *out_schema); - //! Create an entry in the catalog set. Returns whether or not it was - //! successful. - bool CreateEntry(ClientContext &context, const string &name, unique_ptr value, - unordered_set &dependencies); +/*! +Fetch an internal arrow array from the arrow result. - bool AlterEntry(ClientContext &context, const string &name, AlterInfo *alter_info); +This function can be called multiple time to get next chunks, which will free the previous out_array. +So consume the out_array before calling this function again. - bool DropEntry(ClientContext &context, const string &name, bool cascade); +* result: The result to fetch the array from. +* out_array: The output array. +* returns: `DuckDBSuccess` on success or `DuckDBError` on failure. +*/ +DUCKDB_API duckdb_state duckdb_query_arrow_array(duckdb_arrow result, duckdb_arrow_array *out_array); - //! Returns the entry with the specified name - CatalogEntry *GetEntry(ClientContext &context, const string &name); +/*! +Returns the number of columns present in a the arrow result object. - //! Gets the entry that is most similar to the given name (i.e. smallest levenshtein distance), or empty string if - //! none is found - string SimilarEntry(ClientContext &context, const string &name); +* result: The result object. +* returns: The number of columns present in the result object. +*/ +DUCKDB_API idx_t duckdb_arrow_column_count(duckdb_arrow result); - //! Rollback to be the currently valid entry for a certain catalog - //! entry - void Undo(CatalogEntry *entry); +/*! +Returns the number of rows present in a the arrow result object. - //! Scan the catalog set, invoking the callback method for every entry - template - void Scan(ClientContext &context, T &&callback) { - // lock the catalog set - std::lock_guard lock(catalog_lock); - for (auto &kv : entries) { - auto entry = kv.second.get(); - entry = GetEntryForTransaction(context, entry); - if (!entry->deleted) { - callback(entry); - } - } - } +* result: The result object. +* returns: The number of rows present in the result object. +*/ +DUCKDB_API idx_t duckdb_arrow_row_count(duckdb_arrow result); - //! Scan the catalog set, invoking the callback method for every committed entry - template - void Scan(T &&callback) { - // lock the catalog set - std::lock_guard lock(catalog_lock); - for (auto &kv : entries) { - auto entry = kv.second.get(); - entry = GetCommittedEntry(entry); - if (!entry->deleted) { - callback(entry); - } - } - } +/*! +Returns the number of rows changed by the query stored in the arrow result. This is relevant only for +INSERT/UPDATE/DELETE queries. For other queries the rows_changed will be 0. - static bool HasConflict(ClientContext &context, transaction_t timestamp); - static bool UseTimestamp(ClientContext &context, transaction_t timestamp); +* result: The result object. +* returns: The number of rows changed. +*/ +DUCKDB_API idx_t duckdb_arrow_rows_changed(duckdb_arrow result); - idx_t GetEntryIndex(CatalogEntry *entry); - CatalogEntry *GetEntryFromIndex(idx_t index); - void UpdateTimestamp(CatalogEntry *entry, transaction_t timestamp); +/*! +Returns the error message contained within the result. The error is only set if `duckdb_query_arrow` returns +`DuckDBError`. - //! Returns the root entry with the specified name regardless of transaction (or nullptr if there are none) - CatalogEntry *GetRootEntry(const string &name); +The result should be freed using `duckdb_free`. -private: - //! Given a root entry, gets the entry valid for this transaction - CatalogEntry *GetEntryForTransaction(ClientContext &context, CatalogEntry *current); - CatalogEntry *GetCommittedEntry(CatalogEntry *current); - bool GetEntryInternal(ClientContext &context, const string &name, idx_t &entry_index, CatalogEntry *&entry); - bool GetEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry *&entry); - //! Drops an entry from the catalog set; must hold the catalog_lock to safely call this - void DropEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry &entry, bool cascade, - set_lock_map_t &lock_set); - MappingValue *GetMapping(ClientContext &context, const string &name, bool allow_lowercase_alias, - bool get_latest = false); - void PutMapping(ClientContext &context, const string &name, idx_t entry_index); - void DeleteMapping(ClientContext &context, const string &name); +* result: The result object to fetch the nullmask from. +* returns: The error of the result. +*/ +DUCKDB_API const char *duckdb_query_arrow_error(duckdb_arrow result); -private: - Catalog &catalog; - //! The catalog lock is used to make changes to the data - mutex catalog_lock; - //! Mapping of string to catalog entry - unordered_map> mapping; - //! The set of catalog entries - unordered_map> entries; - //! The current catalog entry index - idx_t current_entry = 0; - //! The generator used to generate default internal entries - unique_ptr defaults; -}; +/*! +Closes the result and de-allocates all memory allocated for the arrow result. -} // namespace duckdb +* result: The result to destroy. +*/ +DUCKDB_API void duckdb_destroy_arrow(duckdb_arrow *result); +#ifdef __cplusplus +} +#endif //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/query_error_context.hpp +// duckdb/common/types/date.hpp // // //===----------------------------------------------------------------------===// @@ -9214,216 +10183,288 @@ class CatalogSet { namespace duckdb { -class SQLStatement; -class QueryErrorContext { +//! The Date class is a static class that holds helper functions for the Date type. +class Date { public: - explicit QueryErrorContext(SQLStatement *statement_ = nullptr, idx_t query_location_ = INVALID_INDEX) - : statement(statement_), query_location(query_location_) { - } + static const string_t MONTH_NAMES[12]; + static const string_t MONTH_NAMES_ABBREVIATED[12]; + static const string_t DAY_NAMES[7]; + static const string_t DAY_NAMES_ABBREVIATED[7]; + static const int32_t NORMAL_DAYS[13]; + static const int32_t CUMULATIVE_DAYS[13]; + static const int32_t LEAP_DAYS[13]; + static const int32_t CUMULATIVE_LEAP_DAYS[13]; + static const int32_t CUMULATIVE_YEAR_DAYS[401]; + static const int8_t MONTH_PER_DAY_OF_YEAR[365]; + static const int8_t LEAP_MONTH_PER_DAY_OF_YEAR[366]; - //! The query statement - SQLStatement *statement; - //! The location in which the error should be thrown - idx_t query_location; + // min date is 5877642-06-23 (BC) (-2^31) + constexpr static const int32_t DATE_MIN_YEAR = -5877641; + constexpr static const int32_t DATE_MIN_MONTH = 6; + constexpr static const int32_t DATE_MIN_DAY = 23; + // max date is 5881580-07-11 (2^31) + constexpr static const int32_t DATE_MAX_YEAR = 5881580; + constexpr static const int32_t DATE_MAX_MONTH = 7; + constexpr static const int32_t DATE_MAX_DAY = 11; + constexpr static const int32_t EPOCH_YEAR = 1970; -public: - static string Format(const string &query, const string &error_message, int error_location); + constexpr static const int32_t YEAR_INTERVAL = 400; + constexpr static const int32_t DAYS_PER_YEAR_INTERVAL = 146097; - string FormatErrorRecursive(const string &msg, vector &values); - template - string FormatErrorRecursive(const string &msg, vector &values, T param, Args... params) { - values.push_back(ExceptionFormatValue::CreateFormatValue(param)); - return FormatErrorRecursive(msg, values, params...); - } +public: + //! Convert a string in the format "YYYY-MM-DD" to a date object + static date_t FromString(const string &str, bool strict = false); + //! Convert a string in the format "YYYY-MM-DD" to a date object + static date_t FromCString(const char *str, idx_t len, bool strict = false); + //! Convert a date object to a string in the format "YYYY-MM-DD" + static string ToString(date_t date); + //! Try to convert text in a buffer to a date; returns true if parsing was successful + static bool TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool strict = false); - template - string FormatError(const string &msg, Args... params) { - vector values; - return FormatErrorRecursive(msg, values, params...); - } -}; + //! Create a string "YYYY-MM-DD" from a specified (year, month, day) + //! combination + static string Format(int32_t year, int32_t month, int32_t day); -} // namespace duckdb + //! Extract the year, month and day from a given date object + static void Convert(date_t date, int32_t &out_year, int32_t &out_month, int32_t &out_day); + //! Create a Date object from a specified (year, month, day) combination + static date_t FromDate(int32_t year, int32_t month, int32_t day); + static bool TryFromDate(int32_t year, int32_t month, int32_t day, date_t &result); + //! Returns true if (year) is a leap year, and false otherwise + static bool IsLeapYear(int32_t year); -namespace duckdb { -class ClientContext; + //! Returns true if the specified (year, month, day) combination is a valid + //! date + static bool IsValid(int32_t year, int32_t month, int32_t day); -class StandardEntry; -class TableCatalogEntry; -class TableFunctionCatalogEntry; -class SequenceCatalogEntry; -class Serializer; -class Deserializer; + //! The max number of days in a month of a given year + static int32_t MonthDays(int32_t year, int32_t month); -enum class OnCreateConflict : uint8_t; - -struct AlterTableInfo; -struct CreateIndexInfo; -struct CreateFunctionInfo; -struct CreateCollationInfo; -struct CreateViewInfo; -struct BoundCreateTableInfo; -struct CreatePragmaFunctionInfo; -struct CreateSequenceInfo; -struct CreateSchemaInfo; -struct CreateTableFunctionInfo; -struct CreateCopyFunctionInfo; - -struct DropInfo; - -//! A schema in the catalog -class SchemaCatalogEntry : public CatalogEntry { - friend class Catalog; - -public: - SchemaCatalogEntry(Catalog *catalog, string name, bool is_internal); - -private: - //! The catalog set holding the tables - CatalogSet tables; - //! The catalog set holding the indexes - CatalogSet indexes; - //! The catalog set holding the table functions - CatalogSet table_functions; - //! The catalog set holding the copy functions - CatalogSet copy_functions; - //! The catalog set holding the pragma functions - CatalogSet pragma_functions; - //! The catalog set holding the scalar and aggregate functions - CatalogSet functions; - //! The catalog set holding the sequences - CatalogSet sequences; - //! The catalog set holding the collations - CatalogSet collations; - -public: - //! Gets a catalog entry from the given catalog set matching the given name - CatalogEntry *GetEntry(ClientContext &context, CatalogType type, const string &name, bool if_exists, - QueryErrorContext error_context = QueryErrorContext()); + //! Extract the epoch from the date (seconds since 1970-01-01) + static int64_t Epoch(date_t date); + //! Extract the epoch from the date (nanoseconds since 1970-01-01) + static int64_t EpochNanoseconds(date_t date); + //! Convert the epoch (seconds since 1970-01-01) to a date_t + static date_t EpochToDate(int64_t epoch); - //! Scan the specified catalog set, invoking the callback method for every entry - void Scan(ClientContext &context, CatalogType type, const std::function &callback); - //! Scan the specified catalog set, invoking the callback method for every committed entry - void Scan(CatalogType type, const std::function &callback); + //! Extract the number of days since epoch (days since 1970-01-01) + static int32_t EpochDays(date_t date); + //! Convert the epoch number of days to a date_t + static date_t EpochDaysToDate(int32_t epoch); - //! Serialize the meta information of the SchemaCatalogEntry a serializer - virtual void Serialize(Serializer &serializer); - //! Deserializes to a CreateSchemaInfo - static unique_ptr Deserialize(Deserializer &source); + //! Extract year of a date entry + static int32_t ExtractYear(date_t date); + //! Extract year of a date entry, but optimized to first try the last year found + static int32_t ExtractYear(date_t date, int32_t *last_year); + static int32_t ExtractYear(timestamp_t ts, int32_t *last_year); + //! Extract month of a date entry + static int32_t ExtractMonth(date_t date); + //! Extract day of a date entry + static int32_t ExtractDay(date_t date); + //! Extract the day of the week (1-7) + static int32_t ExtractISODayOfTheWeek(date_t date); + //! Extract the day of the year + static int32_t ExtractDayOfTheYear(date_t date); + //! Extract the ISO week number + //! ISO weeks start on Monday and the first week of a year + //! contains January 4 of that year. + //! In the ISO week-numbering system, it is possible for early-January dates + //! to be part of the 52nd or 53rd week of the previous year. + static int32_t ExtractISOWeekNumber(date_t date); + //! Extract the week number as Python handles it. + //! Either Monday or Sunday is the first day of the week, + //! and any date before the first Monday/Sunday returns week 0 + //! This is a bit more consistent because week numbers in a year are always incrementing + static int32_t ExtractWeekNumberRegular(date_t date, bool monday_first = true); + //! Returns the date of the monday of the current week. + static date_t GetMondayOfCurrentWeek(date_t date); - string ToSQL() override; + //! Helper function to parse two digits from a string (e.g. "30" -> 30, "03" -> 3, "3" -> 3) + static bool ParseDoubleDigit(const char *buf, idx_t len, idx_t &pos, int32_t &result); - //! Creates an index with the given name in the schema - CatalogEntry *CreateIndex(ClientContext &context, CreateIndexInfo *info, TableCatalogEntry *table); + static string ConversionError(const string &str); + static string ConversionError(string_t str); private: - //! Create a scalar or aggregate function within the given schema - CatalogEntry *CreateFunction(ClientContext &context, CreateFunctionInfo *info); - //! Creates a table with the given name in the schema - CatalogEntry *CreateTable(ClientContext &context, BoundCreateTableInfo *info); - //! Creates a view with the given name in the schema - CatalogEntry *CreateView(ClientContext &context, CreateViewInfo *info); - //! Creates a sequence with the given name in the schema - CatalogEntry *CreateSequence(ClientContext &context, CreateSequenceInfo *info); - //! Create a table function within the given schema - CatalogEntry *CreateTableFunction(ClientContext &context, CreateTableFunctionInfo *info); - //! Create a copy function within the given schema - CatalogEntry *CreateCopyFunction(ClientContext &context, CreateCopyFunctionInfo *info); - //! Create a pragma function within the given schema - CatalogEntry *CreatePragmaFunction(ClientContext &context, CreatePragmaFunctionInfo *info); - //! Create a collation within the given schema - CatalogEntry *CreateCollation(ClientContext &context, CreateCollationInfo *info); - - //! Drops an entry from the schema - void DropEntry(ClientContext &context, DropInfo *info); - - //! Alters a catalog entry - void Alter(ClientContext &context, AlterInfo *info); - - //! Add a catalog entry to this schema - CatalogEntry *AddEntry(ClientContext &context, unique_ptr entry, OnCreateConflict on_conflict); - //! Add a catalog entry to this schema - CatalogEntry *AddEntry(ClientContext &context, unique_ptr entry, OnCreateConflict on_conflict, - unordered_set dependencies); - - //! Get the catalog set for the specified type - CatalogSet &GetCatalogSet(CatalogType type); + static void ExtractYearOffset(int32_t &n, int32_t &year, int32_t &year_offset); }; } // namespace duckdb - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/deque.hpp +// duckdb/common/arrow.hpp // // //===----------------------------------------------------------------------===// +#ifndef ARROW_FLAG_DICTIONARY_ORDERED +#include -#include +#ifdef __cplusplus +extern "C" { +#endif -namespace duckdb { -using std::deque; -} -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/enums/output_type.hpp -// -// -//===----------------------------------------------------------------------===// +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 +struct ArrowSchema { + // Array type description + const char *format; + const char *name; + const char *metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema **children; + struct ArrowSchema *dictionary; + // Release callback + void (*release)(struct ArrowSchema *); + // Opaque producer-specific data + void *private_data; +}; +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void **buffers; + struct ArrowArray **children; + struct ArrowArray *dictionary; + // Release callback + void (*release)(struct ArrowArray *); + // Opaque producer-specific data + void *private_data; +}; -namespace duckdb { +// EXPERIMENTAL +struct ArrowArrayStream { + // Callback to get the stream type + // (will be the same for all arrays in the stream). + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + int (*get_schema)(struct ArrowArrayStream *, struct ArrowSchema *out); + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + int (*get_next)(struct ArrowArrayStream *, struct ArrowArray *out); -enum class ExplainOutputType : uint8_t { ALL = 0, OPTIMIZED_ONLY = 1, PHYSICAL_ONLY = 2 }; + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. The returned pointer is only valid until + // the next operation on this stream (including release). + // If unavailable, NULL is returned. + const char *(*get_last_error)(struct ArrowArrayStream *); -} // namespace duckdb + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowArrayStream *); + // Opaque producer-specific data + void *private_data; +}; + +#ifdef __cplusplus +} +#endif +#endif //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/pair.hpp +// duckdb/common/types/blob.hpp // // //===----------------------------------------------------------------------===// -#include + + namespace duckdb { -using std::make_pair; -using std::pair; -} // namespace duckdb +//! The Blob class is a static class that holds helper functions for the Blob type. +class Blob { +public: + // map of integer -> hex value + static constexpr const char *HEX_TABLE = "0123456789ABCDEF"; + // reverse map of byte -> integer value, or -1 for invalid hex values + static const int HEX_MAP[256]; + //! map of index -> base64 character + static constexpr const char *BASE64_MAP = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + //! padding character used in base64 encoding + static constexpr const char BASE64_PADDING = '='; + +public: + //! Returns the string size of a blob -> string conversion + static idx_t GetStringSize(string_t blob); + //! Converts a blob to a string, writing the output to the designated output string. + //! The string needs to have space for at least GetStringSize(blob) bytes. + static void ToString(string_t blob, char *output); + //! Convert a blob object to a string + static string ToString(string_t blob); + + //! Returns the blob size of a string -> blob conversion + static bool TryGetBlobSize(string_t str, idx_t &result_size, string *error_message); + static idx_t GetBlobSize(string_t str); + //! Convert a string to a blob. This function should ONLY be called after calling GetBlobSize, since it does NOT + //! perform data validation. + static void ToBlob(string_t str, data_ptr_t output); + //! Convert a string object to a blob + static string ToBlob(string_t str); + + // base 64 conversion functions + //! Returns the string size of a blob -> base64 conversion + static idx_t ToBase64Size(string_t blob); + //! Converts a blob to a base64 string, output should have space for at least ToBase64Size(blob) bytes + static void ToBase64(string_t blob, char *output); + + //! Returns the string size of a base64 string -> blob conversion + static idx_t FromBase64Size(string_t str); + //! Converts a base64 string to a blob, output should have space for at least FromBase64Size(blob) bytes + static void FromBase64(string_t str, data_ptr_t output, idx_t output_size); +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/progress_bar.hpp +// duckdb/common/types/decimal.hpp // // //===----------------------------------------------------------------------===// -#ifndef DUCKDB_NO_THREADS -#include -#include -#endif +namespace duckdb { + +//! The Decimal class is a static class that holds helper functions for the Decimal type +class Decimal { +public: + static constexpr uint8_t MAX_WIDTH_INT16 = 4; + static constexpr uint8_t MAX_WIDTH_INT32 = 9; + static constexpr uint8_t MAX_WIDTH_INT64 = 18; + static constexpr uint8_t MAX_WIDTH_INT128 = 38; + static constexpr uint8_t MAX_WIDTH_DECIMAL = MAX_WIDTH_INT128; + +public: + static string ToString(int16_t value, uint8_t scale); + static string ToString(int32_t value, uint8_t scale); + static string ToString(int64_t value, uint8_t scale); + static string ToString(hugeint_t value, uint8_t scale); +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/execution/executor.hpp +// duckdb/common/types/timestamp.hpp // // //===----------------------------------------------------------------------===// @@ -9432,40 +10473,69 @@ using std::pair; -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parallel/pipeline.hpp -// -// -//===----------------------------------------------------------------------===// - +namespace duckdb { -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/physical_sink.hpp -// -// -//===----------------------------------------------------------------------===// +struct timestamp_struct { + int32_t year; + int8_t month; + int8_t day; + int8_t hour; + int8_t min; + int8_t sec; + int16_t msec; +}; +//! The Timestamp class is a static class that holds helper functions for the Timestamp +//! type. +class Timestamp { +public: + //! Convert a string in the format "YYYY-MM-DD hh:mm:ss" to a timestamp object + static timestamp_t FromString(const string &str); + static bool TryConvertTimestamp(const char *str, idx_t len, timestamp_t &result); + static timestamp_t FromCString(const char *str, idx_t len); + //! Convert a date object to a string in the format "YYYY-MM-DD hh:mm:ss" + static string ToString(timestamp_t timestamp); + static date_t GetDate(timestamp_t timestamp); + static dtime_t GetTime(timestamp_t timestamp); + //! Create a Timestamp object from a specified (date, time) combination + static timestamp_t FromDatetime(date_t date, dtime_t time); + static bool TryFromDatetime(date_t date, dtime_t time, timestamp_t &result); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/physical_operator.hpp -// -// -//===----------------------------------------------------------------------===// + //! Extract the date and time from a given timestamp object + static void Convert(timestamp_t date, date_t &out_date, dtime_t &out_time); + //! Returns current timestamp + static timestamp_t GetCurrentTimestamp(); + + //! Convert the epoch (in sec) to a timestamp + static timestamp_t FromEpochSeconds(int64_t ms); + //! Convert the epoch (in ms) to a timestamp + static timestamp_t FromEpochMs(int64_t ms); + //! Convert the epoch (in microseconds) to a timestamp + static timestamp_t FromEpochMicroSeconds(int64_t micros); + //! Convert the epoch (in nanoseconds) to a timestamp + static timestamp_t FromEpochNanoSeconds(int64_t micros); + //! Convert the epoch (in seconds) to a timestamp + static int64_t GetEpochSeconds(timestamp_t timestamp); + //! Convert the epoch (in ms) to a timestamp + static int64_t GetEpochMs(timestamp_t timestamp); + //! Convert a timestamp to epoch (in microseconds) + static int64_t GetEpochMicroSeconds(timestamp_t timestamp); + //! Convert a timestamp to epoch (in nanoseconds) + static int64_t GetEpochNanoSeconds(timestamp_t timestamp); + static bool TryParseUTCOffset(const char *str, idx_t &pos, idx_t len, int &hour_offset, int &minute_offset); + static string ConversionError(const string &str); + static string ConversionError(string_t str); +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/catalog/catalog.hpp +// duckdb/common/types/time.hpp // // //===----------------------------------------------------------------------===// @@ -9475,162 +10545,86 @@ using std::pair; - -#include -#include - namespace duckdb { -struct CreateSchemaInfo; -struct DropInfo; -struct BoundCreateTableInfo; -struct AlterTableInfo; -struct CreateTableFunctionInfo; -struct CreateCopyFunctionInfo; -struct CreatePragmaFunctionInfo; -struct CreateFunctionInfo; -struct CreateViewInfo; -struct CreateSequenceInfo; -struct CreateCollationInfo; -class ClientContext; -class Transaction; +//! The Time class is a static class that holds helper functions for the Time +//! type. +class Time { +public: + //! Convert a string in the format "hh:mm:ss" to a time object + static dtime_t FromString(const string &str, bool strict = false); + static dtime_t FromCString(const char *buf, idx_t len, bool strict = false); + static bool TryConvertTime(const char *buf, idx_t len, idx_t &pos, dtime_t &result, bool strict = false); -class AggregateFunctionCatalogEntry; -class CollateCatalogEntry; -class SchemaCatalogEntry; -class TableCatalogEntry; -class ViewCatalogEntry; -class SequenceCatalogEntry; -class TableFunctionCatalogEntry; -class CopyFunctionCatalogEntry; -class PragmaFunctionCatalogEntry; -class CatalogSet; -class DatabaseInstance; -class DependencyManager; + //! Convert a time object to a string in the format "hh:mm:ss" + static string ToString(dtime_t time); -//! The Catalog object represents the catalog of the database. -class Catalog { -public: - explicit Catalog(DatabaseInstance &db); - ~Catalog(); + static dtime_t FromTime(int32_t hour, int32_t minute, int32_t second, int32_t microseconds = 0); - //! Reference to the database - DatabaseInstance &db; - //! The catalog set holding the schemas - unique_ptr schemas; - //! The DependencyManager manages dependencies between different catalog objects - unique_ptr dependency_manager; - //! Write lock for the catalog - mutex write_lock; + //! Extract the time from a given timestamp object + static void Convert(dtime_t time, int32_t &out_hour, int32_t &out_min, int32_t &out_sec, int32_t &out_micros); -public: - //! Get the ClientContext from the Catalog - static Catalog &GetCatalog(ClientContext &context); - static Catalog &GetCatalog(DatabaseInstance &db); + static string ConversionError(const string &str); + static string ConversionError(string_t str); - //! Returns the current version of the catalog (incremented whenever anything changes, not stored between restarts) - idx_t GetCatalogVersion(); - //! Trigger a modification in the catalog, increasing the catalog version - void ModifyCatalog(); +private: + static bool TryConvertInternal(const char *buf, idx_t len, idx_t &pos, dtime_t &result, bool strict); +}; - //! Creates a schema in the catalog. - CatalogEntry *CreateSchema(ClientContext &context, CreateSchemaInfo *info); - //! Creates a table in the catalog. - CatalogEntry *CreateTable(ClientContext &context, BoundCreateTableInfo *info); - //! Create a table function in the catalog - CatalogEntry *CreateTableFunction(ClientContext &context, CreateTableFunctionInfo *info); - //! Create a copy function in the catalog - CatalogEntry *CreateCopyFunction(ClientContext &context, CreateCopyFunctionInfo *info); - //! Create a pragma function in the catalog - CatalogEntry *CreatePragmaFunction(ClientContext &context, CreatePragmaFunctionInfo *info); - //! Create a scalar or aggregate function in the catalog - CatalogEntry *CreateFunction(ClientContext &context, CreateFunctionInfo *info); - //! Creates a table in the catalog. - CatalogEntry *CreateView(ClientContext &context, CreateViewInfo *info); - //! Creates a table in the catalog. - CatalogEntry *CreateSequence(ClientContext &context, CreateSequenceInfo *info); - //! Creates a collation in the catalog - CatalogEntry *CreateCollation(ClientContext &context, CreateCollationInfo *info); +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/serializer/buffered_serializer.hpp +// +// +//===----------------------------------------------------------------------===// - //! Creates a table in the catalog. - CatalogEntry *CreateTable(ClientContext &context, SchemaCatalogEntry *schema, BoundCreateTableInfo *info); - //! Create a table function in the catalog - CatalogEntry *CreateTableFunction(ClientContext &context, SchemaCatalogEntry *schema, - CreateTableFunctionInfo *info); - //! Create a copy function in the catalog - CatalogEntry *CreateCopyFunction(ClientContext &context, SchemaCatalogEntry *schema, CreateCopyFunctionInfo *info); - //! Create a pragma function in the catalog - CatalogEntry *CreatePragmaFunction(ClientContext &context, SchemaCatalogEntry *schema, - CreatePragmaFunctionInfo *info); - //! Create a scalar or aggregate function in the catalog - CatalogEntry *CreateFunction(ClientContext &context, SchemaCatalogEntry *schema, CreateFunctionInfo *info); - //! Creates a table in the catalog. - CatalogEntry *CreateView(ClientContext &context, SchemaCatalogEntry *schema, CreateViewInfo *info); - //! Creates a table in the catalog. - CatalogEntry *CreateSequence(ClientContext &context, SchemaCatalogEntry *schema, CreateSequenceInfo *info); - //! Creates a collation in the catalog - CatalogEntry *CreateCollation(ClientContext &context, SchemaCatalogEntry *schema, CreateCollationInfo *info); - //! Drops an entry from the catalog - void DropEntry(ClientContext &context, DropInfo *info); - //! Returns the schema object with the specified name, or throws an exception if it does not exist - SchemaCatalogEntry *GetSchema(ClientContext &context, const string &name = DEFAULT_SCHEMA, - QueryErrorContext error_context = QueryErrorContext()); - //! Scans all the schemas in the system one-by-one, invoking the callback for each entry - void ScanSchemas(ClientContext &context, std::function callback); - //! Gets the "schema.name" entry of the specified type, if if_exists=true returns nullptr if entry does not exist, - //! otherwise an exception is thrown - CatalogEntry *GetEntry(ClientContext &context, CatalogType type, string schema, const string &name, - bool if_exists = false, QueryErrorContext error_context = QueryErrorContext()); - template - T *GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists = false, - QueryErrorContext error_context = QueryErrorContext()); - //! Alter an existing entry in the catalog. - void Alter(ClientContext &context, AlterInfo *info); +namespace duckdb { -private: - //! The catalog version, incremented whenever anything changes in the catalog - std::atomic catalog_version; +#define SERIALIZER_DEFAULT_SIZE 1024 -private: - void DropSchema(ClientContext &context, DropInfo *info); +struct BinaryData { + unique_ptr data; + idx_t size; }; -template <> -TableCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists, - QueryErrorContext error_context); -template <> -ViewCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists, - QueryErrorContext error_context); -template <> -SequenceCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists, - QueryErrorContext error_context); -template <> -TableFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, - bool if_exists, QueryErrorContext error_context); -template <> -CopyFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, - bool if_exists, QueryErrorContext error_context); -template <> -PragmaFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, - bool if_exists, QueryErrorContext error_context); -template <> -AggregateFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, - bool if_exists, QueryErrorContext error_context); -template <> -CollateCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists, - QueryErrorContext error_context); +class BufferedSerializer : public Serializer { +public: + //! Serializes to a buffer allocated by the serializer, will expand when + //! writing past the initial threshold + explicit BufferedSerializer(idx_t maximum_size = SERIALIZER_DEFAULT_SIZE); + //! Serializes to a provided (owned) data pointer + BufferedSerializer(unique_ptr data, idx_t size); + BufferedSerializer(data_ptr_t data, idx_t size); -} // namespace duckdb + idx_t maximum_size; + data_ptr_t data; + + BinaryData blob; + +public: + void WriteData(const_data_ptr_t buffer, uint64_t write_size) override; + + //! Retrieves the data after the writing has been completed + BinaryData GetData() { + return std::move(blob); + } + void Reset() { + blob.size = 0; + } +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/enums/physical_operator_type.hpp +// duckdb/main/appender.hpp // // //===----------------------------------------------------------------------===// @@ -9639,24 +10633,903 @@ CollateCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_nam + + + namespace duckdb { -//===--------------------------------------------------------------------===// -// Physical Operator Types -//===--------------------------------------------------------------------===// -enum class PhysicalOperatorType : uint8_t { - INVALID, - LEAF, - ORDER_BY, - LIMIT, - TOP_N, - AGGREGATE, - WINDOW, - UNNEST, - SIMPLE_AGGREGATE, - HASH_GROUP_BY, - PERFECT_HASH_GROUP_BY, - SORT_GROUP_BY, +class ClientContext; +class DuckDB; +class TableCatalogEntry; +class Connection; + +//! The Appender class can be used to append elements to a table. +class Appender { + //! The amount of chunks that will be gathered in the chunk collection before flushing + static constexpr const idx_t FLUSH_COUNT = 100; + + //! A reference to a database connection that created this appender + shared_ptr context; + //! The table description (including column names) + unique_ptr description; + //! The append types + vector types; + //! The buffered data for the append + ChunkCollection collection; + //! Internal chunk used for appends + unique_ptr chunk; + //! The current column to append to + idx_t column = 0; + +public: + DUCKDB_API Appender(Connection &con, const string &schema_name, const string &table_name); + DUCKDB_API Appender(Connection &con, const string &table_name); + DUCKDB_API ~Appender(); + + //! Begins a new row append, after calling this the other AppendX() functions + //! should be called the correct amount of times. After that, + //! EndRow() should be called. + DUCKDB_API void BeginRow(); + //! Finishes appending the current row. + DUCKDB_API void EndRow(); + + // Append functions + template + void Append(T value) { + throw Exception("Undefined type for Appender::Append!"); + } + + DUCKDB_API void Append(const char *value, uint32_t length); + + // prepared statements + template + void AppendRow(Args... args) { + BeginRow(); + AppendRowRecursive(args...); + } + + //! Commit the changes made by the appender. + DUCKDB_API void Flush(); + //! Flush the changes made by the appender and close it. The appender cannot be used after this point + DUCKDB_API void Close(); + + DUCKDB_API vector &GetTypes() { + return types; + } + DUCKDB_API idx_t CurrentColumn() { + return column; + } + +private: + void InitializeChunk(); + void FlushChunk(); + + template + void AppendValueInternal(T value); + template + void AppendValueInternal(Vector &vector, SRC input); + + void AppendRowRecursive() { + EndRow(); + } + + template + void AppendRowRecursive(T value, Args... args) { + Append(value); + AppendRowRecursive(args...); + } + + void AppendValue(const Value &value); +}; + +template <> +void DUCKDB_API Appender::Append(bool value); +template <> +void DUCKDB_API Appender::Append(int8_t value); +template <> +void DUCKDB_API Appender::Append(int16_t value); +template <> +void DUCKDB_API Appender::Append(int32_t value); +template <> +void DUCKDB_API Appender::Append(int64_t value); +template <> +void DUCKDB_API Appender::Append(hugeint_t value); +template <> +void DUCKDB_API Appender::Append(uint8_t value); +template <> +void DUCKDB_API Appender::Append(uint16_t value); +template <> +void DUCKDB_API Appender::Append(uint32_t value); +template <> +void DUCKDB_API Appender::Append(uint64_t value); +template <> +void DUCKDB_API Appender::Append(float value); +template <> +void DUCKDB_API Appender::Append(double value); +template <> +void DUCKDB_API Appender::Append(date_t value); +template <> +void DUCKDB_API Appender::Append(dtime_t value); +template <> +void DUCKDB_API Appender::Append(timestamp_t value); +template <> +void DUCKDB_API Appender::Append(interval_t value); +template <> +void DUCKDB_API Appender::Append(const char *value); +template <> +void DUCKDB_API Appender::Append(string_t value); +template <> +void DUCKDB_API Appender::Append(Value value); +template <> +void DUCKDB_API Appender::Append(std::nullptr_t value); + +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/main/client_context.hpp +// +// +//===----------------------------------------------------------------------===// + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/catalog_type.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Catalog Types +//===--------------------------------------------------------------------===// +enum class CatalogType : uint8_t { + INVALID = 0, + TABLE_ENTRY = 1, + SCHEMA_ENTRY = 2, + VIEW_ENTRY = 3, + INDEX_ENTRY = 4, + PREPARED_STATEMENT = 5, + SEQUENCE_ENTRY = 6, + COLLATION_ENTRY = 7, + + // functions + TABLE_FUNCTION_ENTRY = 25, + SCALAR_FUNCTION_ENTRY = 26, + AGGREGATE_FUNCTION_ENTRY = 27, + PRAGMA_FUNCTION_ENTRY = 28, + COPY_FUNCTION_ENTRY = 29, + MACRO_ENTRY = 30, + + // version info + UPDATED_ENTRY = 50, + DELETED_ENTRY = 51, +}; + +string CatalogTypeToString(CatalogType type); + +} // namespace duckdb + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/atomic.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#include + +namespace duckdb { +using std::atomic; +} + + +#include + +namespace duckdb { +struct AlterInfo; +class Catalog; +class CatalogSet; +class ClientContext; + +//! Abstract base class of an entry in the catalog +class CatalogEntry { +public: + CatalogEntry(CatalogType type, Catalog *catalog, string name); + virtual ~CatalogEntry(); + + //! The oid of the entry + idx_t oid; + //! The type of this catalog entry + CatalogType type; + //! Reference to the catalog this entry belongs to + Catalog *catalog; + //! Reference to the catalog set this entry is stored in + CatalogSet *set; + //! The name of the entry + string name; + //! Whether or not the object is deleted + bool deleted; + //! Whether or not the object is temporary and should not be added to the WAL + bool temporary; + //! Whether or not the entry is an internal entry (cannot be deleted, not dumped, etc) + bool internal; + //! Timestamp at which the catalog entry was created + atomic timestamp; + //! Child entry + unique_ptr child; + //! Parent entry (the node that owns this node) + CatalogEntry *parent; + +public: + virtual unique_ptr AlterEntry(ClientContext &context, AlterInfo *info); + + virtual unique_ptr Copy(ClientContext &context); + + //! Sets the CatalogEntry as the new root entry (i.e. the newest entry) + // this is called on a rollback to an AlterEntry + virtual void SetAsRoot(); + + //! Convert the catalog entry to a SQL string that can be used to re-construct the catalog entry + virtual string ToSQL(); +}; +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_set.hpp +// +// +//===----------------------------------------------------------------------===// + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/default/default_generator.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { +class ClientContext; + +class DefaultGenerator { +public: + explicit DefaultGenerator(Catalog &catalog) : catalog(catalog), created_all_entries(false) { + } + virtual ~DefaultGenerator() { + } + + Catalog &catalog; + atomic created_all_entries; + +public: + //! Creates a default entry with the specified name, or returns nullptr if no such entry can be generated + virtual unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) = 0; + //! Get a list of all default entries in the generator + virtual vector GetDefaultEntries() = 0; +}; + +} // namespace duckdb + + + + + + +#include +#include + +namespace duckdb { +struct AlterInfo; + +class ClientContext; + +typedef unordered_map> set_lock_map_t; + +struct MappingValue { + explicit MappingValue(idx_t index_) : index(index_), timestamp(0), deleted(false), parent(nullptr) { + } + + idx_t index; + transaction_t timestamp; + bool deleted; + unique_ptr child; + MappingValue *parent; +}; + +//! The Catalog Set stores (key, value) map of a set of CatalogEntries +class CatalogSet { + friend class DependencyManager; + +public: + explicit CatalogSet(Catalog &catalog, unique_ptr defaults = nullptr); + + //! Create an entry in the catalog set. Returns whether or not it was + //! successful. + bool CreateEntry(ClientContext &context, const string &name, unique_ptr value, + unordered_set &dependencies); + + bool AlterEntry(ClientContext &context, const string &name, AlterInfo *alter_info); + + bool DropEntry(ClientContext &context, const string &name, bool cascade); + + void CleanupEntry(CatalogEntry *catalog_entry); + + //! Returns the entry with the specified name + CatalogEntry *GetEntry(ClientContext &context, const string &name); + + //! Gets the entry that is most similar to the given name (i.e. smallest levenshtein distance), or empty string if + //! none is found + string SimilarEntry(ClientContext &context, const string &name); + + //! Rollback to be the currently valid entry for a certain catalog + //! entry + void Undo(CatalogEntry *entry); + + //! Scan the catalog set, invoking the callback method for every committed entry + void Scan(const std::function &callback); + //! Scan the catalog set, invoking the callback method for every entry + void Scan(ClientContext &context, const std::function &callback); + + template + vector GetEntries(ClientContext &context) { + vector result; + Scan(context, [&](CatalogEntry *entry) { result.push_back((T *)entry); }); + return result; + } + + static bool HasConflict(ClientContext &context, transaction_t timestamp); + static bool UseTimestamp(ClientContext &context, transaction_t timestamp); + + CatalogEntry *GetEntryFromIndex(idx_t index); + void UpdateTimestamp(CatalogEntry *entry, transaction_t timestamp); + +private: + //! Given a root entry, gets the entry valid for this transaction + CatalogEntry *GetEntryForTransaction(ClientContext &context, CatalogEntry *current); + CatalogEntry *GetCommittedEntry(CatalogEntry *current); + bool GetEntryInternal(ClientContext &context, const string &name, idx_t &entry_index, CatalogEntry *&entry); + bool GetEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry *&entry); + //! Drops an entry from the catalog set; must hold the catalog_lock to safely call this + void DropEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry &entry, bool cascade, + set_lock_map_t &lock_set); + CatalogEntry *CreateEntryInternal(ClientContext &context, unique_ptr entry); + MappingValue *GetMapping(ClientContext &context, const string &name, bool allow_lowercase_alias, + bool get_latest = false); + void PutMapping(ClientContext &context, const string &name, idx_t entry_index); + void DeleteMapping(ClientContext &context, const string &name); + +private: + Catalog &catalog; + //! The catalog lock is used to make changes to the data + mutex catalog_lock; + //! Mapping of string to catalog entry + unordered_map> mapping; + //! The set of catalog entries + unordered_map> entries; + //! The current catalog entry index + idx_t current_entry = 0; + //! The generator used to generate default internal entries + unique_ptr defaults; +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/query_error_context.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { +class SQLStatement; + +class QueryErrorContext { +public: + explicit QueryErrorContext(SQLStatement *statement_ = nullptr, idx_t query_location_ = INVALID_INDEX) + : statement(statement_), query_location(query_location_) { + } + + //! The query statement + SQLStatement *statement; + //! The location in which the error should be thrown + idx_t query_location; + +public: + static string Format(const string &query, const string &error_message, int error_location); + + string FormatErrorRecursive(const string &msg, vector &values); + template + string FormatErrorRecursive(const string &msg, vector &values, T param, Args... params) { + values.push_back(ExceptionFormatValue::CreateFormatValue(param)); + return FormatErrorRecursive(msg, values, params...); + } + + template + string FormatError(const string &msg, Args... params) { + vector values; + return FormatErrorRecursive(msg, values, params...); + } +}; + +} // namespace duckdb + + +namespace duckdb { +class ClientContext; + +class StandardEntry; +class TableCatalogEntry; +class TableFunctionCatalogEntry; +class SequenceCatalogEntry; +class Serializer; +class Deserializer; + +enum class OnCreateConflict : uint8_t; + +struct AlterTableInfo; +struct CreateIndexInfo; +struct CreateFunctionInfo; +struct CreateCollationInfo; +struct CreateViewInfo; +struct BoundCreateTableInfo; +struct CreatePragmaFunctionInfo; +struct CreateSequenceInfo; +struct CreateSchemaInfo; +struct CreateTableFunctionInfo; +struct CreateCopyFunctionInfo; + +struct DropInfo; + +//! A schema in the catalog +class SchemaCatalogEntry : public CatalogEntry { + friend class Catalog; + +public: + SchemaCatalogEntry(Catalog *catalog, string name, bool is_internal); + +private: + //! The catalog set holding the tables + CatalogSet tables; + //! The catalog set holding the indexes + CatalogSet indexes; + //! The catalog set holding the table functions + CatalogSet table_functions; + //! The catalog set holding the copy functions + CatalogSet copy_functions; + //! The catalog set holding the pragma functions + CatalogSet pragma_functions; + //! The catalog set holding the scalar and aggregate functions + CatalogSet functions; + //! The catalog set holding the sequences + CatalogSet sequences; + //! The catalog set holding the collations + CatalogSet collations; + +public: + //! Gets a catalog entry from the given catalog set matching the given name + CatalogEntry *GetEntry(ClientContext &context, CatalogType type, const string &name, bool if_exists, + QueryErrorContext error_context = QueryErrorContext()); + + //! Scan the specified catalog set, invoking the callback method for every entry + void Scan(ClientContext &context, CatalogType type, const std::function &callback); + //! Scan the specified catalog set, invoking the callback method for every committed entry + void Scan(CatalogType type, const std::function &callback); + + //! Serialize the meta information of the SchemaCatalogEntry a serializer + virtual void Serialize(Serializer &serializer); + //! Deserializes to a CreateSchemaInfo + static unique_ptr Deserialize(Deserializer &source); + + string ToSQL() override; + + //! Creates an index with the given name in the schema + CatalogEntry *CreateIndex(ClientContext &context, CreateIndexInfo *info, TableCatalogEntry *table); + +private: + //! Create a scalar or aggregate function within the given schema + CatalogEntry *CreateFunction(ClientContext &context, CreateFunctionInfo *info); + //! Creates a table with the given name in the schema + CatalogEntry *CreateTable(ClientContext &context, BoundCreateTableInfo *info); + //! Creates a view with the given name in the schema + CatalogEntry *CreateView(ClientContext &context, CreateViewInfo *info); + //! Creates a sequence with the given name in the schema + CatalogEntry *CreateSequence(ClientContext &context, CreateSequenceInfo *info); + //! Create a table function within the given schema + CatalogEntry *CreateTableFunction(ClientContext &context, CreateTableFunctionInfo *info); + //! Create a copy function within the given schema + CatalogEntry *CreateCopyFunction(ClientContext &context, CreateCopyFunctionInfo *info); + //! Create a pragma function within the given schema + CatalogEntry *CreatePragmaFunction(ClientContext &context, CreatePragmaFunctionInfo *info); + //! Create a collation within the given schema + CatalogEntry *CreateCollation(ClientContext &context, CreateCollationInfo *info); + + //! Drops an entry from the schema + void DropEntry(ClientContext &context, DropInfo *info); + + //! Alters a catalog entry + void Alter(ClientContext &context, AlterInfo *info); + + //! Add a catalog entry to this schema + CatalogEntry *AddEntry(ClientContext &context, unique_ptr entry, OnCreateConflict on_conflict); + //! Add a catalog entry to this schema + CatalogEntry *AddEntry(ClientContext &context, unique_ptr entry, OnCreateConflict on_conflict, + unordered_set dependencies); + + //! Get the catalog set for the specified type + CatalogSet &GetCatalogSet(CatalogType type); +}; +} // namespace duckdb + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/deque.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#include + +namespace duckdb { +using std::deque; +} +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/output_type.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +enum class ExplainOutputType : uint8_t { ALL = 0, OPTIMIZED_ONLY = 1, PHYSICAL_ONLY = 2 }; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/pair.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#include + +namespace duckdb { +using std::make_pair; +using std::pair; +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/progress_bar.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#ifndef DUCKDB_NO_THREADS +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/thread.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#include + +namespace duckdb { +using std::thread; +} + +#include +#endif + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/executor.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parallel/pipeline.hpp +// +// +//===----------------------------------------------------------------------===// + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/physical_sink.hpp +// +// +//===----------------------------------------------------------------------===// + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/physical_operator.hpp +// +// +//===----------------------------------------------------------------------===// + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +#include + + +namespace duckdb { +struct CreateSchemaInfo; +struct DropInfo; +struct BoundCreateTableInfo; +struct AlterTableInfo; +struct CreateTableFunctionInfo; +struct CreateCopyFunctionInfo; +struct CreatePragmaFunctionInfo; +struct CreateFunctionInfo; +struct CreateViewInfo; +struct CreateSequenceInfo; +struct CreateCollationInfo; + +class ClientContext; +class Transaction; + +class AggregateFunctionCatalogEntry; +class CollateCatalogEntry; +class SchemaCatalogEntry; +class TableCatalogEntry; +class ViewCatalogEntry; +class SequenceCatalogEntry; +class TableFunctionCatalogEntry; +class CopyFunctionCatalogEntry; +class PragmaFunctionCatalogEntry; +class CatalogSet; +class DatabaseInstance; +class DependencyManager; + +//! The Catalog object represents the catalog of the database. +class Catalog { +public: + explicit Catalog(DatabaseInstance &db); + ~Catalog(); + + //! Reference to the database + DatabaseInstance &db; + //! The catalog set holding the schemas + unique_ptr schemas; + //! The DependencyManager manages dependencies between different catalog objects + unique_ptr dependency_manager; + //! Write lock for the catalog + mutex write_lock; + +public: + //! Get the ClientContext from the Catalog + static Catalog &GetCatalog(ClientContext &context); + static Catalog &GetCatalog(DatabaseInstance &db); + + DependencyManager &GetDependencyManager() { + return *dependency_manager; + } + + //! Returns the current version of the catalog (incremented whenever anything changes, not stored between restarts) + idx_t GetCatalogVersion(); + //! Trigger a modification in the catalog, increasing the catalog version and returning the previous version + idx_t ModifyCatalog(); + + //! Creates a schema in the catalog. + CatalogEntry *CreateSchema(ClientContext &context, CreateSchemaInfo *info); + //! Creates a table in the catalog. + CatalogEntry *CreateTable(ClientContext &context, BoundCreateTableInfo *info); + //! Create a table function in the catalog + CatalogEntry *CreateTableFunction(ClientContext &context, CreateTableFunctionInfo *info); + //! Create a copy function in the catalog + CatalogEntry *CreateCopyFunction(ClientContext &context, CreateCopyFunctionInfo *info); + //! Create a pragma function in the catalog + CatalogEntry *CreatePragmaFunction(ClientContext &context, CreatePragmaFunctionInfo *info); + //! Create a scalar or aggregate function in the catalog + CatalogEntry *CreateFunction(ClientContext &context, CreateFunctionInfo *info); + //! Creates a table in the catalog. + CatalogEntry *CreateView(ClientContext &context, CreateViewInfo *info); + //! Creates a table in the catalog. + CatalogEntry *CreateSequence(ClientContext &context, CreateSequenceInfo *info); + //! Creates a collation in the catalog + CatalogEntry *CreateCollation(ClientContext &context, CreateCollationInfo *info); + + //! Creates a table in the catalog. + CatalogEntry *CreateTable(ClientContext &context, SchemaCatalogEntry *schema, BoundCreateTableInfo *info); + //! Create a table function in the catalog + CatalogEntry *CreateTableFunction(ClientContext &context, SchemaCatalogEntry *schema, + CreateTableFunctionInfo *info); + //! Create a copy function in the catalog + CatalogEntry *CreateCopyFunction(ClientContext &context, SchemaCatalogEntry *schema, CreateCopyFunctionInfo *info); + //! Create a pragma function in the catalog + CatalogEntry *CreatePragmaFunction(ClientContext &context, SchemaCatalogEntry *schema, + CreatePragmaFunctionInfo *info); + //! Create a scalar or aggregate function in the catalog + CatalogEntry *CreateFunction(ClientContext &context, SchemaCatalogEntry *schema, CreateFunctionInfo *info); + //! Creates a table in the catalog. + CatalogEntry *CreateView(ClientContext &context, SchemaCatalogEntry *schema, CreateViewInfo *info); + //! Creates a table in the catalog. + CatalogEntry *CreateSequence(ClientContext &context, SchemaCatalogEntry *schema, CreateSequenceInfo *info); + //! Creates a collation in the catalog + CatalogEntry *CreateCollation(ClientContext &context, SchemaCatalogEntry *schema, CreateCollationInfo *info); + + //! Drops an entry from the catalog + void DropEntry(ClientContext &context, DropInfo *info); + + //! Returns the schema object with the specified name, or throws an exception if it does not exist + SchemaCatalogEntry *GetSchema(ClientContext &context, const string &name = DEFAULT_SCHEMA, + QueryErrorContext error_context = QueryErrorContext()); + //! Scans all the schemas in the system one-by-one, invoking the callback for each entry + void ScanSchemas(ClientContext &context, std::function callback); + //! Gets the "schema.name" entry of the specified type, if if_exists=true returns nullptr if entry does not exist, + //! otherwise an exception is thrown + CatalogEntry *GetEntry(ClientContext &context, CatalogType type, string schema, const string &name, + bool if_exists = false, QueryErrorContext error_context = QueryErrorContext()); + + template + T *GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists = false, + QueryErrorContext error_context = QueryErrorContext()); + + //! Alter an existing entry in the catalog. + void Alter(ClientContext &context, AlterInfo *info); + +private: + //! The catalog version, incremented whenever anything changes in the catalog + atomic catalog_version; + +private: + void DropSchema(ClientContext &context, DropInfo *info); +}; + +template <> +TableCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists, + QueryErrorContext error_context); +template <> +SequenceCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists, + QueryErrorContext error_context); +template <> +TableFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, + bool if_exists, QueryErrorContext error_context); +template <> +CopyFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, + bool if_exists, QueryErrorContext error_context); +template <> +PragmaFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, + bool if_exists, QueryErrorContext error_context); +template <> +AggregateFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, + bool if_exists, QueryErrorContext error_context); +template <> +CollateCatalogEntry *Catalog::GetEntry(ClientContext &context, string schema_name, const string &name, bool if_exists, + QueryErrorContext error_context); + +} // namespace duckdb + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/physical_operator_type.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Physical Operator Types +//===--------------------------------------------------------------------===// +enum class PhysicalOperatorType : uint8_t { + INVALID, + ORDER_BY, + LIMIT, + TOP_N, + WINDOW, + UNNEST, + SIMPLE_AGGREGATE, + HASH_GROUP_BY, + PERFECT_HASH_GROUP_BY, FILTER, PROJECTION, COPY_TO_FILE, @@ -9670,8 +11543,6 @@ enum class PhysicalOperatorType : uint8_t { CHUNK_SCAN, RECURSIVE_CTE_SCAN, DELIM_SCAN, - EXTERNAL_FILE_SCAN, - QUERY_DERIVED_SCAN, EXPRESSION_SCAN, // ----------------------------- // Joins @@ -9689,54 +11560,420 @@ enum class PhysicalOperatorType : uint8_t { UNION, RECURSIVE_CTE, - // ----------------------------- - // Updates - // ----------------------------- - INSERT, - INSERT_SELECT, - DELETE_OPERATOR, - UPDATE, - EXPORT_EXTERNAL_FILE, + // ----------------------------- + // Updates + // ----------------------------- + INSERT, + DELETE_OPERATOR, + UPDATE, + + // ----------------------------- + // Schema + // ----------------------------- + CREATE_TABLE, + CREATE_TABLE_AS, + CREATE_INDEX, + ALTER, + CREATE_SEQUENCE, + CREATE_VIEW, + CREATE_SCHEMA, + CREATE_MACRO, + DROP, + PRAGMA, + TRANSACTION, + + // ----------------------------- + // Helpers + // ----------------------------- + EXPLAIN, + EMPTY_RESULT, + EXECUTE, + PREPARE, + VACUUM, + EXPORT, + SET, + LOAD, + INOUT_FUNCTION +}; + +string PhysicalOperatorToString(PhysicalOperatorType type); + +} // namespace duckdb + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/execution_context.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { +class ClientContext; +class ThreadContext; +class TaskContext; + +class ExecutionContext { +public: + ExecutionContext(ClientContext &client_p, ThreadContext &thread_p, TaskContext &task_p) + : client(client_p), thread(thread_p), task(task_p) { + } + + //! The client-global context; caution needs to be taken when used in parallel situations + ClientContext &client; + //! The thread-local context for this execution + ThreadContext &thread; + //! The task context for this execution + TaskContext &task; +}; + +} // namespace duckdb + + +#include +#include + +namespace duckdb { +class ExpressionExecutor; +class PhysicalOperator; + +//! The current state/context of the operator. The PhysicalOperatorState is +//! updated using the GetChunk function, and allows the caller to repeatedly +//! call the GetChunk function and get new batches of data everytime until the +//! data source is exhausted. +class PhysicalOperatorState { +public: + PhysicalOperatorState(PhysicalOperator &op, PhysicalOperator *child); + virtual ~PhysicalOperatorState() = default; + + //! Flag indicating whether or not the operator is finished [note: not all + //! operators use this flag] + bool finished; + //! DataChunk that stores data from the child of this operator + DataChunk child_chunk; + //! State of the child of this operator + unique_ptr child_state; +}; + +//! PhysicalOperator is the base class of the physical operators present in the +//! execution plan +/*! + The execution model is a pull-based execution model. GetChunk is called on + the root node, which causes the root node to be executed, and presumably call + GetChunk again on its child nodes. Every node in the operator chain has a + state that is updated as GetChunk is called: PhysicalOperatorState (different + operators subclass this state and add different properties). +*/ +class PhysicalOperator { +public: + PhysicalOperator(PhysicalOperatorType type, vector types, idx_t estimated_cardinality) + : type(type), types(std::move(types)), estimated_cardinality(estimated_cardinality) { + } + virtual ~PhysicalOperator() { + } + + //! The physical operator type + PhysicalOperatorType type; + //! The set of children of the operator + vector> children; + //! The types returned by this physical operator + vector types; + //! The extimated cardinality of this physical operator + idx_t estimated_cardinality; + +public: + virtual string GetName() const; + virtual string ParamsToString() const { + return ""; + } + virtual string ToString() const; + void Print(); + + //! Return a vector of the types that will be returned by this operator + vector &GetTypes() { + return types; + } + //! Initialize a given chunk to the types that will be returned by this + //! operator, this will prepare chunk for a call to GetChunk. This method + //! only has to be called once for any amount of calls to GetChunk. + virtual void InitializeChunk(DataChunk &chunk) { + auto &types = GetTypes(); + chunk.Initialize(types); + } + //! Retrieves a chunk from this operator and stores it in the chunk + //! variable. + virtual void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const = 0; + + void GetChunk(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) const; + + //! Create a new empty instance of the operator state + virtual unique_ptr GetOperatorState() { + return make_unique(*this, children.size() == 0 ? nullptr : children[0].get()); + } + + virtual void FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) { + if (!children.empty() && state.child_state) { + children[0]->FinalizeOperatorState(*state.child_state, context); + } + } + + virtual bool IsSink() const { + return false; + } +}; + +} // namespace duckdb + + +namespace duckdb { + +class Pipeline; + +class GlobalOperatorState { +public: + virtual ~GlobalOperatorState() { + } +}; + +class LocalSinkState { +public: + virtual ~LocalSinkState() { + } +}; + +class PhysicalSink : public PhysicalOperator { +public: + PhysicalSink(PhysicalOperatorType type, vector types, idx_t estimated_cardinality) + : PhysicalOperator(type, move(types), estimated_cardinality) { + } + + unique_ptr sink_state; + +public: + //! The sink method is called constantly with new input, as long as new input is available. Note that this method + //! CAN be called in parallel, proper locking is needed when accessing data inside the GlobalOperatorState. + virtual void Sink(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate, + DataChunk &input) const = 0; + // The combine is called when a single thread has completed execution of its part of the pipeline, it is the final + // time that a specific LocalSinkState is accessible. This method can be called in parallel while other Sink() or + // Combine() calls are active on the same GlobalOperatorState. + virtual void Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) { + } + //! The finalize is called when ALL threads are finished execution. It is called only once per pipeline, and is + //! entirely single threaded. + virtual bool Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) { + this->sink_state = move(gstate); + return true; + } + + virtual unique_ptr GetLocalSinkState(ExecutionContext &context) { + return make_unique(); + } + virtual unique_ptr GetGlobalState(ClientContext &context) { + return make_unique(); + } + + bool IsSink() const override { + return true; + } + + void Schedule(ClientContext &context); +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/function/table_function.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +#include + +namespace duckdb { +class BaseStatistics; +class LogicalGet; +struct ParallelState; +class TableFilterSet; + +struct FunctionOperatorData { + virtual ~FunctionOperatorData() { + } +}; + +struct TableFilterCollection { + TableFilterSet *table_filters; + explicit TableFilterCollection(TableFilterSet *table_filters) : table_filters(table_filters) { + } +}; + +typedef unique_ptr (*table_function_bind_t)(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names); +typedef unique_ptr (*table_function_init_t)(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, + TableFilterCollection *filters); +typedef unique_ptr (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data, + column_t column_index); +typedef void (*table_function_t)(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output); + +typedef void (*table_function_parallel_t)(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output, + ParallelState *parallel_state); + +typedef void (*table_function_cleanup_t)(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state); +typedef idx_t (*table_function_max_threads_t)(ClientContext &context, const FunctionData *bind_data); +typedef unique_ptr (*table_function_init_parallel_state_t)(ClientContext &context, + const FunctionData *bind_data); +typedef unique_ptr (*table_function_init_parallel_t)(ClientContext &context, + const FunctionData *bind_data, + ParallelState *state, + const vector &column_ids, + TableFilterCollection *filters); +typedef bool (*table_function_parallel_state_next_t)(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *state, ParallelState *parallel_state); +typedef int (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data); +typedef void (*table_function_dependency_t)(unordered_set &dependencies, const FunctionData *bind_data); +typedef unique_ptr (*table_function_cardinality_t)(ClientContext &context, + const FunctionData *bind_data); +typedef void (*table_function_pushdown_complex_filter_t)(ClientContext &context, LogicalGet &get, + FunctionData *bind_data, + vector> &filters); +typedef string (*table_function_to_string_t)(const FunctionData *bind_data); + +class TableFunction : public SimpleNamedParameterFunction { +public: + TableFunction(string name, vector arguments, table_function_t function, + table_function_bind_t bind = nullptr, table_function_init_t init = nullptr, + table_statistics_t statistics = nullptr, table_function_cleanup_t cleanup = nullptr, + table_function_dependency_t dependency = nullptr, table_function_cardinality_t cardinality = nullptr, + table_function_pushdown_complex_filter_t pushdown_complex_filter = nullptr, + table_function_to_string_t to_string = nullptr, table_function_max_threads_t max_threads = nullptr, + table_function_init_parallel_state_t init_parallel_state = nullptr, + table_function_parallel_t parallel_function = nullptr, + table_function_init_parallel_t parallel_init = nullptr, + table_function_parallel_state_next_t parallel_state_next = nullptr, bool projection_pushdown = false, + bool filter_pushdown = false, table_function_progress_t query_progress = nullptr) + : SimpleNamedParameterFunction(std::move(name), move(arguments)), bind(bind), init(init), function(function), + statistics(statistics), cleanup(cleanup), dependency(dependency), cardinality(cardinality), + pushdown_complex_filter(pushdown_complex_filter), to_string(to_string), max_threads(max_threads), + init_parallel_state(init_parallel_state), parallel_function(parallel_function), parallel_init(parallel_init), + parallel_state_next(parallel_state_next), table_scan_progress(query_progress), + projection_pushdown(projection_pushdown), filter_pushdown(filter_pushdown) { + } + TableFunction(const vector &arguments, table_function_t function, table_function_bind_t bind = nullptr, + table_function_init_t init = nullptr, table_statistics_t statistics = nullptr, + table_function_cleanup_t cleanup = nullptr, table_function_dependency_t dependency = nullptr, + table_function_cardinality_t cardinality = nullptr, + table_function_pushdown_complex_filter_t pushdown_complex_filter = nullptr, + table_function_to_string_t to_string = nullptr, table_function_max_threads_t max_threads = nullptr, + table_function_init_parallel_state_t init_parallel_state = nullptr, + table_function_parallel_t parallel_function = nullptr, + table_function_init_parallel_t parallel_init = nullptr, + table_function_parallel_state_next_t parallel_state_next = nullptr, bool projection_pushdown = false, + bool filter_pushdown = false, table_function_progress_t query_progress = nullptr) + : TableFunction(string(), arguments, function, bind, init, statistics, cleanup, dependency, cardinality, + pushdown_complex_filter, to_string, max_threads, init_parallel_state, parallel_function, + parallel_init, parallel_state_next, projection_pushdown, filter_pushdown, query_progress) { + } + TableFunction() : SimpleNamedParameterFunction("", {}) { + } + + //! Bind function + //! This function is used for determining the return type of a table producing function and returning bind data + //! The returned FunctionData object should be constant and should not be changed during execution. + table_function_bind_t bind; + //! (Optional) init function + //! Initialize the operator state of the function. The operator state is used to keep track of the progress in the + //! table function. + table_function_init_t init; + //! The main function + table_function_t function; + //! (Optional) statistics function + //! Returns the statistics of a specified column + table_statistics_t statistics; + //! (Optional) cleanup function + //! The final cleanup function, called after all data is exhausted from the main function + table_function_cleanup_t cleanup; + //! (Optional) dependency function + //! Sets up which catalog entries this table function depend on + table_function_dependency_t dependency; + //! (Optional) cardinality function + //! Returns the expected cardinality of this scan + table_function_cardinality_t cardinality; + //! (Optional) pushdown a set of arbitrary filter expressions, rather than only simple comparisons with a constant + //! Any functions remaining in the expression list will be pushed as a regular filter after the scan + table_function_pushdown_complex_filter_t pushdown_complex_filter; + //! (Optional) function for rendering the operator to a string in profiling output + table_function_to_string_t to_string; + //! (Optional) function that returns the maximum amount of threads that can work on this task + table_function_max_threads_t max_threads; + //! (Optional) initialize the parallel scan state, called once in total. + table_function_init_parallel_state_t init_parallel_state; + //! (Optional) Parallel version of the main function + table_function_parallel_t parallel_function; + //! (Optional) initialize the parallel scan given the parallel state. Called once per task. Return nullptr if there + //! is nothing left to scan. + table_function_init_parallel_t parallel_init; + //! (Optional) return the next chunk to process in the parallel scan, or return nullptr if there is none + table_function_parallel_state_next_t parallel_state_next; + //! (Optional) return how much of the table we have scanned up to this point (% of the data) + table_function_progress_t table_scan_progress; + //! Whether or not the table function supports projection pushdown. If not supported a projection will be added + //! that filters out unused columns. + bool projection_pushdown; + //! Whether or not the table function supports filter pushdown. If not supported a filter will be added + //! that applies the table filter directly. + bool filter_pushdown; + + string ToString() override { + return SimpleNamedParameterFunction::ToString(); + } +}; - // ----------------------------- - // Schema - // ----------------------------- - CREATE_TABLE, - CREATE_TABLE_AS, - CREATE_INDEX, - ALTER, - CREATE_SEQUENCE, - CREATE_VIEW, - CREATE_SCHEMA, - CREATE_MACRO, - DROP, - PRAGMA, - TRANSACTION, +} // namespace duckdb - // ----------------------------- - // Helpers - // ----------------------------- - EXPLAIN, - EMPTY_RESULT, - EXECUTE, - PREPARE, - VACUUM, - EXPORT, - SET, - LOAD, - INOUT_FUNCTION -}; +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parallel/parallel_state.hpp +// +// +//===----------------------------------------------------------------------===// -string PhysicalOperatorToString(PhysicalOperatorType type); -} // namespace duckdb +namespace duckdb { + +struct ParallelState { + virtual ~ParallelState() { + } +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/execution/execution_context.hpp +// duckdb/parallel/task_scheduler.hpp // // //===----------------------------------------------------------------------===// @@ -9745,367 +11982,299 @@ string PhysicalOperatorToString(PhysicalOperatorType type); + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parallel/task.hpp +// +// +//===----------------------------------------------------------------------===// + + + namespace duckdb { -class ClientContext; -class ThreadContext; -class TaskContext; -class ExecutionContext { +class Task { public: - ExecutionContext(ClientContext &client_p, ThreadContext &thread_p, TaskContext &task_p) - : client(client_p), thread(thread_p), task(task_p) { + virtual ~Task() { } - //! The client-global context; caution needs to be taken when used in parallel situations - ClientContext &client; - //! The thread-local context for this execution - ThreadContext &thread; - //! The task context for this execution - TaskContext &task; + //! Execute the task + virtual void Execute() = 0; }; } // namespace duckdb -#include -#include namespace duckdb { -class ExpressionExecutor; -class PhysicalOperator; -//! The current state/context of the operator. The PhysicalOperatorState is -//! updated using the GetChunk function, and allows the caller to repeatedly -//! call the GetChunk function and get new batches of data everytime until the -//! data source is exhausted. -class PhysicalOperatorState { -public: - PhysicalOperatorState(PhysicalOperator &op, PhysicalOperator *child); - virtual ~PhysicalOperatorState() = default; +struct ConcurrentQueue; +struct QueueProducerToken; +class ClientContext; +class TaskScheduler; - //! Flag indicating whether or not the operator is finished [note: not all - //! operators use this flag] - bool finished; - //! DataChunk that stores data from the child of this operator - DataChunk child_chunk; - //! State of the child of this operator - unique_ptr child_state; - //! The initial chunk - DataChunk initial_chunk; -}; +struct SchedulerThread; -//! PhysicalOperator is the base class of the physical operators present in the -//! execution plan -/*! - The execution model is a pull-based execution model. GetChunk is called on - the root node, which causes the root node to be executed, and presumably call - GetChunk again on its child nodes. Every node in the operator chain has a - state that is updated as GetChunk is called: PhysicalOperatorState (different - operators subclass this state and add different properties). -*/ -class PhysicalOperator { -public: - PhysicalOperator(PhysicalOperatorType type, vector types, idx_t estimated_cardinality) - : type(type), types(std::move(types)), estimated_cardinality(estimated_cardinality) { - } - virtual ~PhysicalOperator() { - } +struct ProducerToken { + ProducerToken(TaskScheduler &scheduler, unique_ptr token); + ~ProducerToken(); - //! The physical operator type - PhysicalOperatorType type; - //! The set of children of the operator - vector> children; - //! The types returned by this physical operator - vector types; - //! The extimated cardinality of this physical operator - idx_t estimated_cardinality; + TaskScheduler &scheduler; + unique_ptr token; + mutex producer_lock; +}; + +//! The TaskScheduler is responsible for managing tasks and threads +class TaskScheduler { + // timeout for semaphore wait, default 50ms + constexpr static int64_t TASK_TIMEOUT_USECS = 50000; public: - virtual string GetName() const; - virtual string ParamsToString() const { - return ""; - } - virtual string ToString() const; - void Print(); + TaskScheduler(); + ~TaskScheduler(); - //! Return a vector of the types that will be returned by this operator - vector &GetTypes() { - return types; - } - //! Initialize a given chunk to the types that will be returned by this - //! operator, this will prepare chunk for a call to GetChunk. This method - //! only has to be called once for any amount of calls to GetChunk. - virtual void InitializeChunk(DataChunk &chunk) { - auto &types = GetTypes(); - chunk.Initialize(types); - } - virtual void InitializeChunkEmpty(DataChunk &chunk) { - auto &types = GetTypes(); - chunk.InitializeEmpty(types); - } - //! Retrieves a chunk from this operator and stores it in the chunk - //! variable. - virtual void GetChunkInternal(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state) = 0; + static TaskScheduler &GetScheduler(ClientContext &context); - void GetChunk(ExecutionContext &context, DataChunk &chunk, PhysicalOperatorState *state); + unique_ptr CreateProducer(); + //! Schedule a task to be executed by the task scheduler + void ScheduleTask(ProducerToken &producer, unique_ptr task); + //! Fetches a task from a specific producer, returns true if successful or false if no tasks were available + bool GetTaskFromProducer(ProducerToken &token, unique_ptr &task); + //! Run tasks forever until "marker" is set to false, "marker" must remain valid until the thread is joined + void ExecuteForever(atomic *marker); - //! Create a new empty instance of the operator state - virtual unique_ptr GetOperatorState() { - return make_unique(*this, children.size() == 0 ? nullptr : children[0].get()); - } + //! Sets the amount of active threads executing tasks for the system; n-1 background threads will be launched. + //! The main thread will also be used for execution + void SetThreads(int32_t n); + //! Returns the number of threads + int32_t NumberOfThreads(); - virtual void FinalizeOperatorState(PhysicalOperatorState &state, ExecutionContext &context) { - if (!children.empty() && state.child_state) { - children[0]->FinalizeOperatorState(*state.child_state, context); - } - } +private: + void SetThreadsInternal(int32_t n); - virtual bool IsSink() const { - return false; - } + //! The task queue + unique_ptr queue; + //! The active background threads of the task scheduler + vector> threads; + //! Markers used by the various threads, if the markers are set to "false" the thread execution is stopped + vector>> markers; }; } // namespace duckdb + namespace duckdb { +class Executor; +class TaskContext; -class Pipeline; +//! The Pipeline class represents an execution pipeline +class Pipeline : public std::enable_shared_from_this { + friend class Executor; -class GlobalOperatorState { public: - virtual ~GlobalOperatorState() { - } -}; + Pipeline(Executor &execution_context, ProducerToken &token); -class LocalSinkState { -public: - virtual ~LocalSinkState() { - } -}; + Executor &executor; + ProducerToken &token; -class PhysicalSink : public PhysicalOperator { public: - PhysicalSink(PhysicalOperatorType type, vector types, idx_t estimated_cardinality) - : PhysicalOperator(type, move(types), estimated_cardinality) { + //! Execute a task within the pipeline on a single thread + void Execute(TaskContext &task); + + void AddDependency(shared_ptr &pipeline); + void CompleteDependency(); + bool HasDependencies() { + return !dependencies.empty(); } - unique_ptr sink_state; + void Reset(ClientContext &context); + void Schedule(); -public: - //! The sink method is called constantly with new input, as long as new input is available. Note that this method - //! CAN be called in parallel, proper locking is needed when accessing data inside the GlobalOperatorState. - virtual void Sink(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate, - DataChunk &input) = 0; - // The combine is called when a single thread has completed execution of its part of the pipeline, it is the final - // time that a specific LocalSinkState is accessible. This method can be called in parallel while other Sink() or - // Combine() calls are active on the same GlobalOperatorState. - virtual void Combine(ExecutionContext &context, GlobalOperatorState &gstate, LocalSinkState &lstate) { + //! Finish a single task of this pipeline + void FinishTask(); + //! Finish executing this pipeline + void Finish(); + + string ToString() const; + void Print() const; + + void SetRecursiveCTE(PhysicalOperator *op) { + this->recursive_cte = op; } - //! The finalize is called when ALL threads are finished execution. It is called only once per pipeline, and is - //! entirely single threaded. - virtual void Finalize(Pipeline &pipeline, ClientContext &context, unique_ptr gstate) { - this->sink_state = move(gstate); + PhysicalOperator *GetRecursiveCTE() { + return recursive_cte; } + void ClearParents(); - virtual unique_ptr GetLocalSinkState(ExecutionContext &context) { - return make_unique(); - } - virtual unique_ptr GetGlobalState(ClientContext &context) { - return make_unique(); + void IncrementTasks(idx_t amount) { + this->total_tasks += amount; } - bool IsSink() const override { - return true; + bool IsFinished() { + return finished; } + //! Returns query progress + bool GetProgress(int ¤t_percentage); - void Schedule(ClientContext &context); +public: + //! The current threads working on the pipeline + atomic finished_tasks; + //! The maximum amount of threads that can work on the pipeline + atomic total_tasks; + +private: + //! The child from which to pull chunks + PhysicalOperator *child; + //! The global sink state + unique_ptr sink_state; + //! The sink (i.e. destination) for data; this is e.g. a hash table to-be-built + PhysicalSink *sink; + //! The parent pipelines (i.e. pipelines that are dependent on this pipeline to finish) + unordered_map> parents; + //! The dependencies of this pipeline + unordered_map> dependencies; + //! The amount of completed dependencies (the pipeline can only be started after the dependencies have finished + //! executing) + atomic finished_dependencies; + + //! The parallel operator (if any) + PhysicalOperator *parallel_node; + //! The parallel state (if any) + unique_ptr parallel_state; + + //! Whether or not the pipeline is finished executing + bool finished; + //! The recursive CTE node that this pipeline belongs to, and may be executed multiple times + PhysicalOperator *recursive_cte; + +private: + bool GetProgress(ClientContext &context, PhysicalOperator *op, int ¤t_percentage); + void ScheduleSequentialTask(); + bool LaunchScanTasks(PhysicalOperator *op, idx_t max_threads, unique_ptr parallel_state); + bool ScheduleOperator(PhysicalOperator *op); }; } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parallel/parallel_state.hpp -// -// -//===----------------------------------------------------------------------===// +#include namespace duckdb { +class ClientContext; +class DataChunk; +class PhysicalOperator; +class PhysicalOperatorState; +class ThreadContext; +class Task; -struct ParallelState { - virtual ~ParallelState() { - } -}; +struct ProducerToken; -} // namespace duckdb +class Executor { + friend class Pipeline; + friend class PipelineTask; +public: + explicit Executor(ClientContext &context); + ~Executor(); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/function/table_function.hpp -// -// -//===----------------------------------------------------------------------===// + ClientContext &context; + +public: + void Initialize(PhysicalOperator *physical_plan); + void BuildPipelines(PhysicalOperator *op, Pipeline *parent); + void Reset(); + vector GetTypes(); + unique_ptr FetchChunk(); + //! Push a new error + void PushError(const string &exception); + bool GetError(string &exception); + //! Flush a thread context into the client context + void Flush(ThreadContext &context); + //! Returns the progress of the pipelines + bool GetPipelinesProgress(int ¤t_progress); -#include +private: + PhysicalOperator *physical_plan; + unique_ptr physical_state; -namespace duckdb { -class BaseStatistics; -class LogicalGet; -struct ParallelState; -struct TableFilterSet; + mutex executor_lock; + //! The pipelines of the current query + vector> pipelines; + //! The producer of this query + unique_ptr producer; + //! Exceptions that occurred during the execution of the current query + vector exceptions; -struct FunctionOperatorData { - virtual ~FunctionOperatorData() { - } -}; + //! The amount of completed pipelines of the query + atomic completed_pipelines; + //! The total amount of pipelines in the query + idx_t total_pipelines; -struct TableFilterCollection { - TableFilterSet *table_filters; - explicit TableFilterCollection(TableFilterSet *table_filters) : table_filters(table_filters) { - } + unordered_map delim_join_dependencies; + PhysicalOperator *recursive_cte; }; +} // namespace duckdb -typedef unique_ptr (*table_function_bind_t)(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, vector &names); -typedef unique_ptr (*table_function_init_t)(ClientContext &context, const FunctionData *bind_data, - vector &column_ids, - TableFilterCollection *filters); -typedef unique_ptr (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data, - column_t column_index); -typedef void (*table_function_t)(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output); -typedef void (*table_function_cleanup_t)(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *operator_state); -typedef idx_t (*table_function_max_threads_t)(ClientContext &context, const FunctionData *bind_data); -typedef unique_ptr (*table_function_init_parallel_state_t)(ClientContext &context, - const FunctionData *bind_data); -typedef unique_ptr (*table_function_init_parallel_t)(ClientContext &context, - const FunctionData *bind_data, - ParallelState *state, - vector &column_ids, - TableFilterCollection *filters); -typedef bool (*table_function_parallel_state_next_t)(ClientContext &context, const FunctionData *bind_data, - FunctionOperatorData *state, ParallelState *parallel_state); -typedef int (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data); -typedef void (*table_function_dependency_t)(unordered_set &dependencies, const FunctionData *bind_data); -typedef unique_ptr (*table_function_cardinality_t)(ClientContext &context, - const FunctionData *bind_data); -typedef void (*table_function_pushdown_complex_filter_t)(ClientContext &context, LogicalGet &get, - FunctionData *bind_data, - vector> &filters); -typedef string (*table_function_to_string_t)(const FunctionData *bind_data); -class TableFunction : public SimpleNamedParameterFunction { + +namespace duckdb { +class ProgressBar { public: - TableFunction(string name, vector arguments, table_function_t function, - table_function_bind_t bind = nullptr, table_function_init_t init = nullptr, - table_statistics_t statistics = nullptr, table_function_cleanup_t cleanup = nullptr, - table_function_dependency_t dependency = nullptr, table_function_cardinality_t cardinality = nullptr, - table_function_pushdown_complex_filter_t pushdown_complex_filter = nullptr, - table_function_to_string_t to_string = nullptr, table_function_max_threads_t max_threads = nullptr, - table_function_init_parallel_state_t init_parallel_state = nullptr, - table_function_init_parallel_t parallel_init = nullptr, - table_function_parallel_state_next_t parallel_state_next = nullptr, bool projection_pushdown = false, - bool filter_pushdown = false, table_function_progress_t query_progress = nullptr) - : SimpleNamedParameterFunction(std::move(name), move(arguments)), bind(bind), init(init), function(function), - statistics(statistics), cleanup(cleanup), dependency(dependency), cardinality(cardinality), - pushdown_complex_filter(pushdown_complex_filter), to_string(to_string), max_threads(max_threads), - init_parallel_state(init_parallel_state), parallel_init(parallel_init), - parallel_state_next(parallel_state_next), table_scan_progress(query_progress), - projection_pushdown(projection_pushdown), filter_pushdown(filter_pushdown) { - } - TableFunction(const vector &arguments, table_function_t function, table_function_bind_t bind = nullptr, - table_function_init_t init = nullptr, table_statistics_t statistics = nullptr, - table_function_cleanup_t cleanup = nullptr, table_function_dependency_t dependency = nullptr, - table_function_cardinality_t cardinality = nullptr, - table_function_pushdown_complex_filter_t pushdown_complex_filter = nullptr, - table_function_to_string_t to_string = nullptr, table_function_max_threads_t max_threads = nullptr, - table_function_init_parallel_state_t init_parallel_state = nullptr, - table_function_init_parallel_t parallel_init = nullptr, - table_function_parallel_state_next_t parallel_state_next = nullptr, bool projection_pushdown = false, - bool filter_pushdown = false, table_function_progress_t query_progress = nullptr) - : TableFunction(string(), arguments, function, bind, init, statistics, cleanup, dependency, cardinality, - pushdown_complex_filter, to_string, max_threads, init_parallel_state, parallel_init, - parallel_state_next, projection_pushdown, filter_pushdown, query_progress) { + explicit ProgressBar(Executor *executor, idx_t show_progress_after, idx_t time_update_bar = 100) + : executor(executor), show_progress_after(show_progress_after), time_update_bar(time_update_bar), + current_percentage(-1), stop(false) { } - TableFunction() : SimpleNamedParameterFunction("", {}) { + ~ProgressBar(); + + //! Starts the thread + void Start(); + //! Stops the thread + void Stop(); + //! Gets current percentage + int GetCurrentPercentage(); + + void Initialize(idx_t show_progress_after) { + this->show_progress_after = show_progress_after; } - //! Bind function - //! This function is used for determining the return type of a table producing function and returning bind data - //! The returned FunctionData object should be constant and should not be changed during execution. - table_function_bind_t bind; - //! (Optional) init function - //! Initialize the operator state of the function. The operator state is used to keep track of the progress in the - //! table function. - table_function_init_t init; - //! The main function - table_function_t function; - //! (Optional) statistics function - //! Returns the statistics of a specified column - table_statistics_t statistics; - //! (Optional) cleanup function - //! The final cleanup function, called after all data is exhausted from the main function - table_function_cleanup_t cleanup; - //! (Optional) dependency function - //! Sets up which catalog entries this table function depend on - table_function_dependency_t dependency; - //! (Optional) cardinality function - //! Returns the expected cardinality of this scan - table_function_cardinality_t cardinality; - //! (Optional) pushdown a set of arbitrary filter expressions, rather than only simple comparisons with a constant - //! Any functions remaining in the expression list will be pushed as a regular filter after the scan - table_function_pushdown_complex_filter_t pushdown_complex_filter; - //! (Optional) function for rendering the operator to a string in profiling output - table_function_to_string_t to_string; - //! (Optional) function that returns the maximum amount of threads that can work on this task - table_function_max_threads_t max_threads; - //! (Optional) initialize the parallel scan state, called once in total. - table_function_init_parallel_state_t init_parallel_state; - //! (Optional) initialize the parallel scan given the parallel state. Called once per task. Return nullptr if there - //! is nothing left to scan. - table_function_init_parallel_t parallel_init; - //! (Optional) return the next chunk to process in the parallel scan, or return nullptr if there is none - table_function_parallel_state_next_t parallel_state_next; - //! (Optional) return how much of the table we have scanned up to this point (% of the data) - table_function_progress_t table_scan_progress; - //! Whether or not the table function supports projection pushdown. If not supported a projection will be added - //! that filters out unused columns. - bool projection_pushdown; - //! Whether or not the table function supports filter pushdown. If not supported a filter will be added - //! that applies the table filter directly. - bool filter_pushdown; +private: + const string PROGRESS_BAR_STRING = "||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"; + static constexpr const idx_t PROGRESS_BAR_WIDTH = 60; + Executor *executor = nullptr; +#ifndef DUCKDB_NO_THREADS + thread progress_bar_thread; + std::condition_variable c; + mutex m; +#endif + idx_t show_progress_after; + idx_t time_update_bar; + atomic current_percentage; + atomic stop; + //! In case our progress bar tries to use a scan operator that is not implemented we don't print anything + bool supported = true; + //! Starts the Progress Bar Thread that prints the progress bar + void ProgressBarThread(); - string ToString() override { - return SimpleNamedParameterFunction::ToString(); +#ifndef DUCKDB_NO_THREADS + template + bool WaitFor(DURATION duration) { + unique_lock l(m); + return !c.wait_for(l, duration, [this]() { return stop.load(); }); } +#endif }; - } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parallel/task_scheduler.hpp -// -// -//===----------------------------------------------------------------------===// - @@ -10114,297 +12283,456 @@ class TableFunction : public SimpleNamedParameterFunction { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parallel/task.hpp +// duckdb/transaction/transaction_context.hpp // // //===----------------------------------------------------------------------===// + + + namespace duckdb { -class Task { +class ClientContext; +class Transaction; +class TransactionManager; + +//! The transaction context keeps track of all the information relating to the +//! current transaction +class TransactionContext { public: - virtual ~Task() { + TransactionContext(TransactionManager &transaction_manager, ClientContext &context) + : transaction_manager(transaction_manager), context(context), auto_commit(true), current_transaction(nullptr) { + } + ~TransactionContext(); + + Transaction &ActiveTransaction() { + D_ASSERT(current_transaction); + return *current_transaction; } - //! Execute the task - virtual void Execute() = 0; + bool HasActiveTransaction() { + return !!current_transaction; + } + + void RecordQuery(string query); + void BeginTransaction(); + void Commit(); + void Rollback(); + void ClearTransaction(); + + void SetAutoCommit(bool value); + bool IsAutoCommit() { + return auto_commit; + } + +private: + TransactionManager &transaction_manager; + ClientContext &context; + bool auto_commit; + + Transaction *current_transaction; + + TransactionContext(const TransactionContext &) = delete; }; } // namespace duckdb +#include + namespace duckdb { +class Appender; +class Catalog; +class ChunkCollection; +class DatabaseInstance; +class LogicalOperator; +class PreparedStatementData; +class Relation; +class BufferedFileWriter; +class QueryProfiler; +class QueryProfilerHistory; +class ClientContextLock; +struct CreateScalarFunctionInfo; +class ScalarFunctionCatalogEntry; -struct ConcurrentQueue; -struct QueueProducerToken; -class ClientContext; -class TaskScheduler; +//! The ClientContext holds information relevant to the current client session +//! during execution +class ClientContext : public std::enable_shared_from_this { + friend class TransactionManager; -struct SchedulerThread; +public: + DUCKDB_API explicit ClientContext(shared_ptr db); + DUCKDB_API ~ClientContext(); + //! Query profiler + unique_ptr profiler; + //! QueryProfiler History + unique_ptr query_profiler_history; + //! The database that this client is connected to + shared_ptr db; + //! Data for the currently running transaction + TransactionContext transaction; + //! Whether or not the query is interrupted + atomic interrupted; + //! The current query being executed by the client context + string query; -struct ProducerToken { - ProducerToken(TaskScheduler &scheduler, unique_ptr token); - ~ProducerToken(); + //! The query executor + Executor executor; - TaskScheduler &scheduler; - unique_ptr token; - std::mutex producer_lock; -}; + //! The Progress Bar + unique_ptr progress_bar; + //! If the progress bar is enabled or not. + bool enable_progress_bar = false; + //! If the print of the progress bar is enabled + bool print_progress_bar = true; + //! The wait time before showing the progress bar + int wait_time = 2000; -//! The TaskScheduler is responsible for managing tasks and threads -class TaskScheduler { - // timeout for semaphore wait, default 50ms - constexpr static int64_t TASK_TIMEOUT_USECS = 50000; + unique_ptr temporary_objects; + unordered_map> prepared_statements; + + // Whether or not aggressive query verification is enabled + bool query_verification_enabled = false; + //! Enable the running of optimizers + bool enable_optimizer = true; + //! Force parallelism of small tables, used for testing + bool force_parallelism = false; + //! Force index join independent of table cardinality, used for testing + bool force_index_join = false; + //! Force out-of-core computation for operators that support it, used for testing + bool force_external = false; + //! Maximum bits allowed for using a perfect hash table (i.e. the perfect HT can hold up to 2^perfect_ht_threshold + //! elements) + idx_t perfect_ht_threshold = 12; + //! The writer used to log queries (if logging is enabled) + unique_ptr log_query_writer; + //! The explain output type used when none is specified (default: PHYSICAL_ONLY) + ExplainOutputType explain_output_type = ExplainOutputType::PHYSICAL_ONLY; + //! The random generator used by random(). Its seed value can be set by setseed(). + std::mt19937 random_engine; + + //! The schema search path, in order by which entries are searched if no schema entry is provided + vector catalog_search_path = {TEMP_SCHEMA, DEFAULT_SCHEMA, "pg_catalog"}; public: - TaskScheduler(); - ~TaskScheduler(); + DUCKDB_API Transaction &ActiveTransaction() { + return transaction.ActiveTransaction(); + } - static TaskScheduler &GetScheduler(ClientContext &context); + //! Interrupt execution of a query + DUCKDB_API void Interrupt(); + //! Enable query profiling + DUCKDB_API void EnableProfiling(); + //! Disable query profiling + DUCKDB_API void DisableProfiling(); - unique_ptr CreateProducer(); - //! Schedule a task to be executed by the task scheduler - void ScheduleTask(ProducerToken &producer, unique_ptr task); - //! Fetches a task from a specific producer, returns true if successful or false if no tasks were available - bool GetTaskFromProducer(ProducerToken &token, unique_ptr &task); - //! Run tasks forever until "marker" is set to false, "marker" must remain valid until the thread is joined - void ExecuteForever(bool *marker); + //! Issue a query, returning a QueryResult. The QueryResult can be either a StreamQueryResult or a + //! MaterializedQueryResult. The StreamQueryResult will only be returned in the case of a successful SELECT + //! statement. + DUCKDB_API unique_ptr Query(const string &query, bool allow_stream_result); + DUCKDB_API unique_ptr Query(unique_ptr statement, bool allow_stream_result); + //! Fetch a query from the current result set (if any) + DUCKDB_API unique_ptr Fetch(); + //! Cleanup the result set (if any). + DUCKDB_API void Cleanup(); + //! Destroy the client context + DUCKDB_API void Destroy(); - //! Sets the amount of active threads executing tasks for the system; n-1 background threads will be launched. - //! The main thread will also be used for execution - void SetThreads(int32_t n); - //! Returns the number of threads - int32_t NumberOfThreads(); + //! Get the table info of a specific table, or nullptr if it cannot be found + DUCKDB_API unique_ptr TableInfo(const string &schema_name, const string &table_name); + //! Appends a DataChunk to the specified table. Returns whether or not the append was successful. + DUCKDB_API void Append(TableDescription &description, ChunkCollection &collection); + //! Try to bind a relation in the current client context; either throws an exception or fills the result_columns + //! list with the set of returned columns + DUCKDB_API void TryBindRelation(Relation &relation, vector &result_columns); + + //! Execute a relation + DUCKDB_API unique_ptr Execute(const shared_ptr &relation); + + //! Prepare a query + DUCKDB_API unique_ptr Prepare(const string &query); + //! Directly prepare a SQL statement + DUCKDB_API unique_ptr Prepare(unique_ptr statement); + + //! Execute a prepared statement with the given name and set of parameters + //! It is possible that the prepared statement will be re-bound. This will generally happen if the catalog is + //! modified in between the prepared statement being bound and the prepared statement being run. + DUCKDB_API unique_ptr Execute(const string &query, shared_ptr &prepared, + vector &values, bool allow_stream_result = true); + + //! Gets current percentage of the query's progress, returns 0 in case the progress bar is disabled. + int GetProgress(); + + //! Register function in the temporary schema + DUCKDB_API void RegisterFunction(CreateFunctionInfo *info); + + //! Parse statements from a query + DUCKDB_API vector> ParseStatements(const string &query); + //! Extract the logical plan of a query + DUCKDB_API unique_ptr ExtractPlan(const string &query); + void HandlePragmaStatements(vector> &statements); + + //! Runs a function with a valid transaction context, potentially starting a transaction if the context is in auto + //! commit mode. + DUCKDB_API void RunFunctionInTransaction(const std::function &fun, + bool requires_valid_transaction = true); + //! Same as RunFunctionInTransaction, but does not obtain a lock on the client context or check for validation + DUCKDB_API void RunFunctionInTransactionInternal(ClientContextLock &lock, const std::function &fun, + bool requires_valid_transaction = true); private: - void SetThreadsInternal(int32_t n); + //! Parse statements from a query + vector> ParseStatementsInternal(ClientContextLock &lock, const string &query); + //! Perform aggressive query verification of a SELECT statement. Only called when query_verification_enabled is + //! true. + string VerifyQuery(ClientContextLock &lock, const string &query, unique_ptr statement); - //! The task queue - unique_ptr queue; - //! The active background threads of the task scheduler - vector> threads; - //! Markers used by the various threads, if the markers are set to "false" the thread execution is stopped - vector> markers; -}; + void InitialCleanup(ClientContextLock &lock); + //! Internal clean up, does not lock. Caller must hold the context_lock. + void CleanupInternal(ClientContextLock &lock); + string FinalizeQuery(ClientContextLock &lock, bool success); + //! Internal fetch, does not lock. Caller must hold the context_lock. + unique_ptr FetchInternal(ClientContextLock &lock); + //! Internally execute a set of SQL statement. Caller must hold the context_lock. + unique_ptr RunStatements(ClientContextLock &lock, const string &query, + vector> &statements, bool allow_stream_result); + //! Internally prepare and execute a prepared SQL statement. Caller must hold the context_lock. + unique_ptr RunStatement(ClientContextLock &lock, const string &query, + unique_ptr statement, bool allow_stream_result); + unique_ptr RunStatementOrPreparedStatement(ClientContextLock &lock, const string &query, + unique_ptr statement, + shared_ptr &prepared, + vector *values, bool allow_stream_result); -} // namespace duckdb + //! Internally prepare a SQL statement. Caller must hold the context_lock. + shared_ptr CreatePreparedStatement(ClientContextLock &lock, const string &query, + unique_ptr statement); + //! Internally execute a prepared SQL statement. Caller must hold the context_lock. + unique_ptr ExecutePreparedStatement(ClientContextLock &lock, const string &query, + shared_ptr statement, + vector bound_values, bool allow_stream_result); + //! Call CreatePreparedStatement() and ExecutePreparedStatement() without any bound values + unique_ptr RunStatementInternal(ClientContextLock &lock, const string &query, + unique_ptr statement, bool allow_stream_result); + unique_ptr PrepareInternal(ClientContextLock &lock, unique_ptr statement); + void LogQueryInternal(ClientContextLock &lock, const string &query); -#include + unique_ptr LockContext(); -namespace duckdb { -class Executor; -class TaskContext; + bool UpdateFunctionInfoFromEntry(ScalarFunctionCatalogEntry *existing_function, CreateScalarFunctionInfo *new_info); + +private: + //! The currently opened StreamQueryResult (if any) + StreamQueryResult *open_result = nullptr; + //! Lock on using the ClientContext in parallel + mutex context_lock; +}; -//! The Pipeline class represents an execution pipeline -class Pipeline { - friend class Executor; +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/parsed_data/create_table_function_info.hpp +// +// +//===----------------------------------------------------------------------===// -public: - Pipeline(Executor &execution_context, ProducerToken &token); - Executor &executor; - ProducerToken &token; -public: - //! Execute a task within the pipeline on a single thread - void Execute(TaskContext &task); +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/parsed_data/create_function_info.hpp +// +// +//===----------------------------------------------------------------------===// - void AddDependency(Pipeline *pipeline); - void CompleteDependency(); - bool HasDependencies() { - return !dependencies.empty(); - } - void Reset(ClientContext &context); - void Schedule(); - //! Finish a single task of this pipeline - void FinishTask(); - //! Finish executing this pipeline - void Finish(); +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/parsed_data/create_info.hpp +// +// +//===----------------------------------------------------------------------===// - string ToString() const; - void Print() const; - void SetRecursiveCTE(PhysicalOperator *op) { - this->recursive_cte = op; - } - PhysicalOperator *GetRecursiveCTE() { - return recursive_cte; - } - unordered_set &GetDependencies() { - return dependencies; - } - void ClearParents(); - void IncrementTasks(idx_t amount) { - this->total_tasks += amount; - } +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/parsed_data/parse_info.hpp +// +// +//===----------------------------------------------------------------------===// - bool IsFinished() { - return finished; - } - //! Returns query progress - bool GetProgress(int ¤t_percentage); -public: - //! The current threads working on the pipeline - std::atomic finished_tasks; - //! The maximum amount of threads that can work on the pipeline - idx_t total_tasks; -private: - //! The child from which to pull chunks - PhysicalOperator *child; - //! The global sink state - unique_ptr sink_state; - //! The sink (i.e. destination) for data; this is e.g. a hash table to-be-built - PhysicalSink *sink; - //! The parent pipelines (i.e. pipelines that are dependent on this pipeline to finish) - unordered_set parents; - //! The dependencies of this pipeline - unordered_set dependencies; - //! The amount of completed dependencies (the pipeline can only be started after the dependencies have finished - //! executing) - std::atomic finished_dependencies; - //! The parallel operator (if any) - PhysicalOperator *parallel_node; - //! The parallel state (if any) - unique_ptr parallel_state; - //! Whether or not the pipeline is finished executing - bool finished; - //! The recursive CTE node that this pipeline belongs to, and may be executed multiple times - PhysicalOperator *recursive_cte; +namespace duckdb { -private: - bool GetProgress(ClientContext &context, PhysicalOperator *op, int ¤t_percentage); - void ScheduleSequentialTask(); - bool LaunchScanTasks(PhysicalOperator *op, idx_t max_threads, unique_ptr parallel_state); - bool ScheduleOperator(PhysicalOperator *op); +struct ParseInfo { + virtual ~ParseInfo() { + } }; } // namespace duckdb -#include - namespace duckdb { -class ClientContext; -class DataChunk; -class PhysicalOperator; -class PhysicalOperatorState; -class ThreadContext; -class Task; -struct ProducerToken; +enum class OnCreateConflict : uint8_t { + // Standard: throw error + ERROR_ON_CONFLICT, + // CREATE IF NOT EXISTS, silently do nothing on conflict + IGNORE_ON_CONFLICT, + // CREATE OR REPLACE + REPLACE_ON_CONFLICT +}; -class Executor { - friend class Pipeline; - friend class PipelineTask; +struct CreateInfo : public ParseInfo { + explicit CreateInfo(CatalogType type, string schema = DEFAULT_SCHEMA) + : type(type), schema(schema), on_conflict(OnCreateConflict::ERROR_ON_CONFLICT), temporary(false), + internal(false) { + } + ~CreateInfo() override { + } + + //! The to-be-created catalog type + CatalogType type; + //! The schema name of the entry + string schema; + //! What to do on create conflict + OnCreateConflict on_conflict; + //! Whether or not the entry is temporary + bool temporary; + //! Whether or not the entry is an internal entry + bool internal; + //! The SQL string of the CREATE statement + string sql; public: - explicit Executor(ClientContext &context); - ~Executor(); + virtual unique_ptr Copy() const = 0; + void CopyProperties(CreateInfo &other) const { + other.type = type; + other.schema = schema; + other.on_conflict = on_conflict; + other.temporary = temporary; + other.internal = internal; + other.sql = sql; + } +}; - ClientContext &context; +} // namespace duckdb -public: - void Initialize(PhysicalOperator *physical_plan); - void BuildPipelines(PhysicalOperator *op, Pipeline *parent); - void Reset(); - vector GetTypes(); +namespace duckdb { - unique_ptr FetchChunk(); +struct CreateFunctionInfo : public CreateInfo { + explicit CreateFunctionInfo(CatalogType type) : CreateInfo(type) { + D_ASSERT(type == CatalogType::SCALAR_FUNCTION_ENTRY || type == CatalogType::AGGREGATE_FUNCTION_ENTRY || + type == CatalogType::TABLE_FUNCTION_ENTRY || type == CatalogType::PRAGMA_FUNCTION_ENTRY || + type == CatalogType::MACRO_ENTRY); + } - //! Push a new error - void PushError(const string &exception); + //! Function name + string name; +}; - //! Flush a thread context into the client context - void Flush(ThreadContext &context); +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/function/function_set.hpp +// +// +//===----------------------------------------------------------------------===// - //! Returns the progress of the pipelines - bool GetPipelinesProgress(int ¤t_progress); -private: - PhysicalOperator *physical_plan; - unique_ptr physical_state; - mutex executor_lock; - //! The pipelines of the current query - vector> pipelines; - //! The producer of this query - unique_ptr producer; - //! Exceptions that occurred during the execution of the current query - vector exceptions; - //! The amount of completed pipelines of the query - std::atomic completed_pipelines; - //! The total amount of pipelines in the query - idx_t total_pipelines; - unordered_map delim_join_dependencies; - PhysicalOperator *recursive_cte; -}; -} // namespace duckdb namespace duckdb { -class ProgressBar { + +template +class FunctionSet { public: - explicit ProgressBar(Executor *executor, idx_t show_progress_after, idx_t time_update_bar = 100) - : executor(executor), show_progress_after(show_progress_after), time_update_bar(time_update_bar) { + explicit FunctionSet(string name) : name(name) { + } - }; + //! The name of the function set + string name; + //! The set of functions + vector functions; - //! Starts the thread - void Start(); - //! Stops the thread - void Stop(); - //! Gets current percentage - int GetCurrentPercentage(); +public: + void AddFunction(T function) { + function.name = name; + functions.push_back(function); + } +}; -private: - const string PROGRESS_BAR_STRING = "||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"; - static constexpr const idx_t PROGRESS_BAR_WIDTH = 60; - Executor *executor = nullptr; -#ifndef DUCKDB_NO_THREADS - std::thread progress_bar_thread; - std::condition_variable c; - std::mutex m; -#endif - idx_t show_progress_after; - idx_t time_update_bar; - int current_percentage = -1; - bool stop = false; - //! In case our progress bar tries to use a scan operator that is not implemented we don't print anything - bool supported = true; - //! Starts the Progress Bar Thread that prints the progress bar - void ProgressBarThread(); +class ScalarFunctionSet : public FunctionSet { +public: + explicit ScalarFunctionSet(string name) : FunctionSet(move(name)) { + } +}; -#ifndef DUCKDB_NO_THREADS - template - bool WaitFor(DURATION duration) { - std::unique_lock l(m); - return !c.wait_for(l, duration, [this]() { return stop; }); +class AggregateFunctionSet : public FunctionSet { +public: + explicit AggregateFunctionSet(string name) : FunctionSet(move(name)) { + } +}; + +class TableFunctionSet : public FunctionSet { +public: + explicit TableFunctionSet(string name) : FunctionSet(move(name)) { } -#endif }; + } // namespace duckdb +namespace duckdb { + +struct CreateTableFunctionInfo : public CreateFunctionInfo { + explicit CreateTableFunctionInfo(TableFunctionSet set) + : CreateFunctionInfo(CatalogType::TABLE_FUNCTION_ENTRY), functions(move(set.functions)) { + this->name = set.name; + } + explicit CreateTableFunctionInfo(TableFunction function) : CreateFunctionInfo(CatalogType::TABLE_FUNCTION_ENTRY) { + this->name = function.name; + functions.push_back(move(function)); + } + + //! The table functions + vector functions; +public: + unique_ptr Copy() const override { + TableFunctionSet set(name); + set.functions = functions; + auto result = make_unique(move(set)); + CopyProperties(*result); + return move(result); + } +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/main/query_profiler.hpp +// duckdb/parser/parsed_data/create_copy_function_info.hpp // // //===----------------------------------------------------------------------===// @@ -10412,11 +12740,10 @@ class ProgressBar { - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/profiler.hpp +// duckdb/function/copy_function.hpp // // //===----------------------------------------------------------------------===// @@ -10425,47 +12752,10 @@ class ProgressBar { - -namespace duckdb { - -//! The profiler can be used to measure elapsed time -template -class Profiler { -public: - //! Starts the timer - void Start() { - finished = false; - start = Tick(); - } - //! Finishes timing - void End() { - end = Tick(); - finished = true; - } - - //! Returns the elapsed time in seconds. If End() has been called, returns - //! the total elapsed time. Otherwise returns how far along the timer is - //! right now. - double Elapsed() const { - auto _end = finished ? end : Tick(); - return std::chrono::duration_cast>(_end - start).count(); - } - -private: - time_point Tick() const { - return T::now(); - } - time_point start; - time_point end; - bool finished = false; -}; - -} // namespace duckdb - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/string_util.hpp +// duckdb/parser/parsed_data/copy_info.hpp // // //===----------------------------------------------------------------------===// @@ -10476,335 +12766,204 @@ class Profiler { -namespace duckdb { -/** - * String Utility Functions - * Note that these are not the most efficient implementations (i.e., they copy - * memory) and therefore they should only be used for debug messages and other - * such things. - */ -class StringUtil { -public: - static bool CharacterIsSpace(char c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; - } - static bool CharacterIsNewline(char c) { - return c == '\n' || c == '\r'; - } - static bool CharacterIsDigit(char c) { - return c >= '0' && c <= '9'; - } - - //! Returns true if the needle string exists in the haystack - static bool Contains(const string &haystack, const string &needle); - - //! Returns true if the target string starts with the given prefix - static bool StartsWith(string str, string prefix); - - //! Returns true if the target string ends with the given suffix. - static bool EndsWith(const string &str, const string &suffix); - - //! Repeat a string multiple times - static string Repeat(const string &str, const idx_t n); - - //! Split the input string based on newline char - static vector Split(const string &str, char delimiter); - - //! Join multiple strings into one string. Components are concatenated by the given separator - static string Join(const vector &input, const string &separator); - - //! Join multiple items of container with given size, transformed to string - //! using function, into one string using the given separator - template - static string Join(const C &input, S count, const string &separator, Func f) { - // The result - std::string result; - - // If the input isn't empty, append the first element. We do this so we - // don't need to introduce an if into the loop. - if (count > 0) { - result += f(input[0]); - } - // Append the remaining input components, after the first - for (size_t i = 1; i < count; i++) { - result += separator + f(input[i]); - } +namespace duckdb { - return result; +struct CopyInfo : public ParseInfo { + CopyInfo() : schema(DEFAULT_SCHEMA) { } - //! Append the prefix to the beginning of each line in str - static string Prefix(const string &str, const string &prefix); - - //! Return a string that formats the give number of bytes - static string FormatSize(idx_t bytes); - - //! Convert a string to uppercase - static string Upper(const string &str); - - //! Convert a string to lowercase - static string Lower(const string &str); + //! The schema name to copy to/from + string schema; + //! The table name to copy to/from + string table; + //! List of columns to copy to/from + vector select_list; + //! The file path to copy to/from + string file_path; + //! Whether or not this is a copy to file (false) or copy from a file (true) + bool is_from; + //! The file format of the external file + string format; + //! Set of (key, value) options + unordered_map> options; - //! Format a string using printf semantics - template - static string Format(const string fmt_str, Args... params) { - return Exception::ConstructMessage(fmt_str, params...); +public: + unique_ptr Copy() const { + auto result = make_unique(); + result->schema = schema; + result->table = table; + result->select_list = select_list; + result->file_path = file_path; + result->is_from = is_from; + result->format = format; + result->options = options; + return result; } - - //! Split the input string into a vector of strings based on the split string - static vector Split(const string &input, const string &split); - - //! Remove the whitespace char in the left end of the string - static void LTrim(string &str); - //! Remove the whitespace char in the right end of the string - static void RTrim(string &str); - //! Remove the whitespace char in the left and right end of the string - static void Trim(string &str); - - static string Replace(string source, const string &from, const string &to); - - //! Get the levenshtein distance from two strings - static idx_t LevenshteinDistance(const string &s1, const string &s2); - - //! Get the top-n strings (sorted by the given score distance) from a set of scores. - //! At least one entry is returned (if there is one). - //! Strings are only returned if they have a score less than the threshold. - static vector TopNStrings(vector> scores, idx_t n = 5, idx_t threshold = 5); - //! Computes the levenshtein distance of each string in strings, and compares it to target, then returns TopNStrings - //! with the given params. - static vector TopNLevenshtein(const vector &strings, const string &target, idx_t n = 5, - idx_t threshold = 5); - static string CandidatesMessage(const vector &candidates, const string &candidate = "Candidate bindings"); }; -} // namespace duckdb - - - - - - -#include -#include +} // namespace duckdb namespace duckdb { -class ExpressionExecutor; -class PhysicalOperator; -class SQLStatement; +class ExecutionContext; -struct ExpressionInformation { - ExpressionInformation(string &name, double time) : name(name), time(time) { +struct LocalFunctionData { + virtual ~LocalFunctionData() { } - void ExtractExpressionsRecursive(unique_ptr &state); - vector> children; - bool hasfunction = false; - string name; - string function_name; - uint64_t time = 0; -}; - -struct ExpressionExecutorInformation { - explicit ExpressionExecutorInformation(ExpressionExecutor &executor); - - //! Count the number of time the executor called - uint64_t total_count = 0; - //! Count the number of time the executor called since last sampling - uint64_t current_count = 0; - //! Count the number of samples - uint64_t sample_count = 0; - //! Count the number of tuples in all samples - uint64_t sample_tuples_count = 0; - //! Count the number of tuples processed by this executor - uint64_t tuples_count = 0; - - vector> roots; }; -struct OperatorTimingInformation { - double time = 0; - idx_t elements = 0; - bool has_executor = false; - explicit OperatorTimingInformation(double time_ = 0, idx_t elements_ = 0) : time(time_), elements(elements_) { +struct GlobalFunctionData { + virtual ~GlobalFunctionData() { } - - //! A mapping of physical operators to recorded timings - unique_ptr executors_info; }; -//! The OperatorProfiler measures timings of individual operators -class OperatorProfiler { - friend class QueryProfiler; - -public: - DUCKDB_API explicit OperatorProfiler(bool enabled); +typedef unique_ptr (*copy_to_bind_t)(ClientContext &context, CopyInfo &info, vector &names, + vector &sql_types); +typedef unique_ptr (*copy_to_initialize_local_t)(ClientContext &context, FunctionData &bind_data); +typedef unique_ptr (*copy_to_initialize_global_t)(ClientContext &context, FunctionData &bind_data); +typedef void (*copy_to_sink_t)(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, + LocalFunctionData &lstate, DataChunk &input); +typedef void (*copy_to_combine_t)(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, + LocalFunctionData &lstate); +typedef void (*copy_to_finalize_t)(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate); - DUCKDB_API void StartOperator(PhysicalOperator *phys_op); - DUCKDB_API void EndOperator(DataChunk *chunk); - DUCKDB_API void Flush(PhysicalOperator *phys_op, ExpressionExecutor *expression_executor); +typedef unique_ptr (*copy_from_bind_t)(ClientContext &context, CopyInfo &info, + vector &expected_names, + vector &expected_types); - ~OperatorProfiler() { +class CopyFunction : public Function { +public: + explicit CopyFunction(string name) + : Function(name), copy_to_bind(nullptr), copy_to_initialize_local(nullptr), copy_to_initialize_global(nullptr), + copy_to_sink(nullptr), copy_to_combine(nullptr), copy_to_finalize(nullptr), copy_from_bind(nullptr) { } -private: - void AddTiming(PhysicalOperator *op, double time, idx_t elements); + copy_to_bind_t copy_to_bind; + copy_to_initialize_local_t copy_to_initialize_local; + copy_to_initialize_global_t copy_to_initialize_global; + copy_to_sink_t copy_to_sink; + copy_to_combine_t copy_to_combine; + copy_to_finalize_t copy_to_finalize; + + copy_from_bind_t copy_from_bind; + TableFunction copy_from_function; - //! Whether or not the profiler is enabled - bool enabled; - //! The timer used to time the execution time of the individual Physical Operators - Profiler op; - //! The stack of Physical Operators that are currently active - std::stack execution_stack; - //! A mapping of physical operators to recorded timings - unordered_map timings; + string extension; }; -//! The QueryProfiler can be used to measure timings of queries -class QueryProfiler { -public: - struct TreeNode { - string name; - string extra_info; - OperatorTimingInformation info; - vector> children; - idx_t depth = 0; - }; +} // namespace duckdb -private: - unique_ptr CreateTree(PhysicalOperator *root, idx_t depth = 0); - void Render(const TreeNode &node, std::ostream &str) const; +namespace duckdb { -public: - DUCKDB_API QueryProfiler() - : automatic_print_format(ProfilerPrintFormat::NONE), enabled(false), detailed_enabled(false), running(false) { +struct CreateCopyFunctionInfo : public CreateInfo { + explicit CreateCopyFunctionInfo(CopyFunction function) + : CreateInfo(CatalogType::COPY_FUNCTION_ENTRY), function(function) { + this->name = function.name; } - DUCKDB_API void Enable() { - enabled = true; - detailed_enabled = false; - } + //! Function name + string name; + //! The table function + CopyFunction function; - DUCKDB_API void DetailedEnable() { - detailed_enabled = true; +public: + unique_ptr Copy() const override { + auto result = make_unique(function); + CopyProperties(*result); + return move(result); } +}; - DUCKDB_API void Disable() { - enabled = false; - } +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/expression/bound_constant_expression.hpp +// +// +//===----------------------------------------------------------------------===// - DUCKDB_API bool IsEnabled() { - return enabled; - } - bool IsDetailedEnabled() const { - return detailed_enabled; - } - DUCKDB_API void StartQuery(string query); - DUCKDB_API void EndQuery(); - //! Adds the timings gathered by an OperatorProfiler to this query profiler - DUCKDB_API void Flush(OperatorProfiler &profiler); - DUCKDB_API void StartPhase(string phase); - DUCKDB_API void EndPhase(); - DUCKDB_API void Initialize(PhysicalOperator *root); +namespace duckdb { - DUCKDB_API string ToString(bool print_optimizer_output = false) const; - DUCKDB_API void ToStream(std::ostream &str, bool print_optimizer_output = false) const; - DUCKDB_API void Print(); +class BoundConstantExpression : public Expression { +public: + explicit BoundConstantExpression(Value value); - DUCKDB_API string ToJSON() const; - DUCKDB_API void WriteToFile(const char *path, string &info) const; + Value value; - //! The format to automatically print query profiling information in (default: disabled) - ProfilerPrintFormat automatic_print_format; - //! The file to save query profiling information to, instead of printing it to the console (empty = print to - //! console) - string save_location; +public: + string ToString() const override; - idx_t OperatorSize() { - return tree_map.size(); - } + bool Equals(const BaseExpression *other) const override; + hash_t Hash() const override; -private: - //! Whether or not query profiling is enabled - bool enabled; - //! Whether or not detailed query profiling is enabled - bool detailed_enabled; - //! Whether or not the query profiler is running - bool running; - - bool query_requires_profiling; - - //! The root of the query tree - unique_ptr root; - //! The query string - string query; + unique_ptr Copy() override; +}; +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/expression/bound_function_expression.hpp +// +// +//===----------------------------------------------------------------------===// - //! The timer used to time the execution time of the entire query - Profiler main_query; - //! A map of a Physical Operator pointer to a tree node - unordered_map tree_map; -public: - const unordered_map &GetTreeMap() const { - return tree_map; - } -private: - //! The timer used to time the individual phases of the planning process - Profiler phase_profiler; - //! A mapping of the phase names to the timings - using PhaseTimingStorage = unordered_map; - PhaseTimingStorage phase_timings; - using PhaseTimingItem = PhaseTimingStorage::value_type; - //! The stack of currently active phases - vector phase_stack; -private: - vector GetOrderedPhaseTimings() const; - //! Check whether or not an operator type requires query profiling. If none of the ops in a query require profiling - //! no profiling information is output. - bool OperatorRequiresProfiling(PhysicalOperatorType op_type); -}; -//! The QueryProfilerHistory can be used to access the profiler of previous queries -class QueryProfilerHistory { -private: - //! Previous Query profilers - deque> prev_profilers; - //! Previous Query profilers size - uint64_t prev_profilers_size = 20; +namespace duckdb { +class ScalarFunctionCatalogEntry; +//! Represents a function call that has been bound to a base function +class BoundFunctionExpression : public Expression { public: - deque> &GetPrevProfilers() { - return prev_profilers; - } + BoundFunctionExpression(LogicalType return_type, ScalarFunction bound_function, + vector> arguments, unique_ptr bind_info, + bool is_operator = false); - void SetPrevProfilersSize(uint64_t prevProfilersSize) { - prev_profilers_size = prevProfilersSize; - } - uint64_t GetPrevProfilersSize() const { - return prev_profilers_size; - } + // The bound function expression + ScalarFunction function; + //! List of child-expressions of the function + vector> children; + //! The bound function data (if any) + unique_ptr bind_info; + //! Whether or not the function is an operator, only used for rendering + bool is_operator; public: - void SetProfilerHistorySize(uint64_t size) { - this->prev_profilers_size = size; - } + bool HasSideEffects() const override; + bool IsFoldable() const override; + string ToString() const override; + + hash_t Hash() const override; + bool Equals(const BaseExpression *other) const override; + + unique_ptr Copy() override; }; } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/transaction/transaction_context.hpp +// duckdb/catalog/standard_entry.hpp // // //===----------------------------------------------------------------------===// @@ -10813,266 +12972,162 @@ class QueryProfilerHistory { - namespace duckdb { +class SchemaCatalogEntry; -class ClientContext; -class Transaction; -class TransactionManager; - -//! The transaction context keeps track of all the information relating to the -//! current transaction -class TransactionContext { +//! A StandardEntry is a catalog entry that is a member of a schema +class StandardEntry : public CatalogEntry { public: - TransactionContext(TransactionManager &transaction_manager, ClientContext &context) - : transaction_manager(transaction_manager), context(context), auto_commit(true), current_transaction(nullptr) { - } - ~TransactionContext(); - - Transaction &ActiveTransaction() { - D_ASSERT(current_transaction); - return *current_transaction; - } - - bool HasActiveTransaction() { - return !!current_transaction; + StandardEntry(CatalogType type, SchemaCatalogEntry *schema, Catalog *catalog, string name) + : CatalogEntry(type, catalog, name), schema(schema) { } - - void RecordQuery(string query); - void BeginTransaction(); - void Commit(); - void Rollback(); - void ClearTransaction(); - - void SetAutoCommit(bool value); - bool IsAutoCommit() { - return auto_commit; + ~StandardEntry() override { } -private: - TransactionManager &transaction_manager; - ClientContext &context; - bool auto_commit; - - Transaction *current_transaction; - - TransactionContext(const TransactionContext &) = delete; + //! The schema the entry belongs to + SchemaCatalogEntry *schema; }; - } // namespace duckdb -#include -namespace duckdb { -class Appender; -class Catalog; -class DatabaseInstance; -class PreparedStatementData; -class Relation; -class BufferedFileWriter; +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/parsed_data/create_scalar_function_info.hpp +// +// +//===----------------------------------------------------------------------===// -class ClientContextLock; -//! The ClientContext holds information relevant to the current client session -//! during execution -class ClientContext : public std::enable_shared_from_this { - friend class TransactionManager; -public: - DUCKDB_API explicit ClientContext(shared_ptr db); - DUCKDB_API ~ClientContext(); - //! Query profiler - QueryProfiler profiler; - //! QueryProfiler History - QueryProfilerHistory query_profiler_history; - //! The database that this client is connected to - shared_ptr db; - //! Data for the currently running transaction - TransactionContext transaction; - //! Whether or not the query is interrupted - bool interrupted; - //! The current query being executed by the client context - string query; - //! The query executor - Executor executor; - //! The Progress Bar - unique_ptr progress_bar; - //! If the progress bar is enabled or not. - bool enable_progress_bar = false; - //! If the print of the progress bar is enabled - bool print_progress_bar = true; - //! The wait time before showing the progress bar - int wait_time = 2000; - unique_ptr temporary_objects; - unordered_map> prepared_statements; - // Whether or not aggressive query verification is enabled - bool query_verification_enabled = false; - //! Enable the running of optimizers - bool enable_optimizer = true; - //! Force parallelism of small tables, used for testing - bool force_parallelism = false; - //! Force index join independent of table cardinality, used for testing - bool force_index_join = false; - //! Maximum bits allowed for using a perfect hash table (i.e. the perfect HT can hold up to 2^perfect_ht_threshold - //! elements) - idx_t perfect_ht_threshold = 12; - //! The writer used to log queries (if logging is enabled) - unique_ptr log_query_writer; - //! The explain output type used when none is specified (default: PHYSICAL_ONLY) - ExplainOutputType explain_output_type = ExplainOutputType::PHYSICAL_ONLY; - //! The random generator used by random(). Its seed value can be set by setseed(). - std::mt19937 random_engine; +namespace duckdb { -public: - DUCKDB_API Transaction &ActiveTransaction() { - return transaction.ActiveTransaction(); +struct CreateScalarFunctionInfo : public CreateFunctionInfo { + explicit CreateScalarFunctionInfo(ScalarFunction function) + : CreateFunctionInfo(CatalogType::SCALAR_FUNCTION_ENTRY) { + this->name = function.name; + functions.push_back(function); + } + explicit CreateScalarFunctionInfo(ScalarFunctionSet set) + : CreateFunctionInfo(CatalogType::SCALAR_FUNCTION_ENTRY), functions(move(set.functions)) { + this->name = set.name; + for (auto &func : functions) { + func.name = set.name; + } } - //! Interrupt execution of a query - DUCKDB_API void Interrupt(); - //! Enable query profiling - DUCKDB_API void EnableProfiling(); - //! Disable query profiling - DUCKDB_API void DisableProfiling(); - - //! Issue a query, returning a QueryResult. The QueryResult can be either a StreamQueryResult or a - //! MaterializedQueryResult. The StreamQueryResult will only be returned in the case of a successful SELECT - //! statement. - DUCKDB_API unique_ptr Query(const string &query, bool allow_stream_result); - DUCKDB_API unique_ptr Query(unique_ptr statement, bool allow_stream_result); - //! Fetch a query from the current result set (if any) - DUCKDB_API unique_ptr Fetch(); - //! Cleanup the result set (if any). - DUCKDB_API void Cleanup(); - //! Destroy the client context - DUCKDB_API void Destroy(); - - //! Get the table info of a specific table, or nullptr if it cannot be found - DUCKDB_API unique_ptr TableInfo(const string &schema_name, const string &table_name); - //! Appends a DataChunk to the specified table. Returns whether or not the append was successful. - DUCKDB_API void Append(TableDescription &description, DataChunk &chunk); - //! Try to bind a relation in the current client context; either throws an exception or fills the result_columns - //! list with the set of returned columns - DUCKDB_API void TryBindRelation(Relation &relation, vector &result_columns); - - //! Execute a relation - DUCKDB_API unique_ptr Execute(const shared_ptr &relation); - - //! Prepare a query - DUCKDB_API unique_ptr Prepare(const string &query); - //! Directly prepare a SQL statement - DUCKDB_API unique_ptr Prepare(unique_ptr statement); - - //! Execute a prepared statement with the given name and set of parameters - //! It is possible that the prepared statement will be re-bound. This will generally happen if the catalog is - //! modified in between the prepared statement being bound and the prepared statement being run. - DUCKDB_API unique_ptr Execute(const string &query, shared_ptr &prepared, - vector &values, bool allow_stream_result = true); - - //! Gets current percentage of the query's progress, returns 0 in case the progress bar is disabled. - int GetProgress(); - - //! Register function in the temporary schema - DUCKDB_API void RegisterFunction(CreateFunctionInfo *info); - - //! Parse statements from a query - DUCKDB_API vector> ParseStatements(const string &query); - void HandlePragmaStatements(vector> &statements); + vector functions; - //! Runs a function with a valid transaction context, potentially starting a transaction if the context is in auto - //! commit mode. - DUCKDB_API void RunFunctionInTransaction(const std::function &fun, - bool requires_valid_transaction = true); - //! Same as RunFunctionInTransaction, but does not obtain a lock on the client context or check for validation - DUCKDB_API void RunFunctionInTransactionInternal(ClientContextLock &lock, const std::function &fun, - bool requires_valid_transaction = true); +public: + unique_ptr Copy() const override { + ScalarFunctionSet set(name); + set.functions = functions; + auto result = make_unique(move(set)); + CopyProperties(*result); + return move(result); + } +}; -private: - //! Parse statements from a query - vector> ParseStatementsInternal(ClientContextLock &lock, const string &query); - //! Perform aggressive query verification of a SELECT statement. Only called when query_verification_enabled is - //! true. - string VerifyQuery(ClientContextLock &lock, const string &query, unique_ptr statement); +} // namespace duckdb - void InitialCleanup(ClientContextLock &lock); - //! Internal clean up, does not lock. Caller must hold the context_lock. - void CleanupInternal(ClientContextLock &lock); - string FinalizeQuery(ClientContextLock &lock, bool success); - //! Internal fetch, does not lock. Caller must hold the context_lock. - unique_ptr FetchInternal(ClientContextLock &lock); - //! Internally execute a set of SQL statement. Caller must hold the context_lock. - unique_ptr RunStatements(ClientContextLock &lock, const string &query, - vector> &statements, bool allow_stream_result); - //! Internally prepare and execute a prepared SQL statement. Caller must hold the context_lock. - unique_ptr RunStatement(ClientContextLock &lock, const string &query, - unique_ptr statement, bool allow_stream_result); - unique_ptr RunStatementOrPreparedStatement(ClientContextLock &lock, const string &query, - unique_ptr statement, - shared_ptr &prepared, - vector *values, bool allow_stream_result); - //! Internally prepare a SQL statement. Caller must hold the context_lock. - shared_ptr CreatePreparedStatement(ClientContextLock &lock, const string &query, - unique_ptr statement); - //! Internally execute a prepared SQL statement. Caller must hold the context_lock. - unique_ptr ExecutePreparedStatement(ClientContextLock &lock, const string &query, - shared_ptr statement, - vector bound_values, bool allow_stream_result); - //! Call CreatePreparedStatement() and ExecutePreparedStatement() without any bound values - unique_ptr RunStatementInternal(ClientContextLock &lock, const string &query, - unique_ptr statement, bool allow_stream_result); - unique_ptr PrepareInternal(ClientContextLock &lock, unique_ptr statement); - void LogQueryInternal(ClientContextLock &lock, const string &query); +namespace duckdb { - unique_ptr LockContext(); +//! A table function in the catalog +class ScalarFunctionCatalogEntry : public StandardEntry { +public: + ScalarFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateScalarFunctionInfo *info) + : StandardEntry(CatalogType::SCALAR_FUNCTION_ENTRY, schema, catalog, info->name), functions(info->functions) { + } -private: - //! The currently opened StreamQueryResult (if any) - StreamQueryResult *open_result = nullptr; - //! Lock on using the ClientContext in parallel - std::mutex context_lock; + //! The scalar functions + vector functions; }; - } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_table_function_info.hpp +// duckdb/parser/parsed_data/create_table_info.hpp // // //===----------------------------------------------------------------------===// + + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_function_info.hpp +// duckdb/parser/constraint.hpp // // //===----------------------------------------------------------------------===// + + +namespace duckdb { + +class Serializer; +class Deserializer; + +//===--------------------------------------------------------------------===// +// Constraint Types +//===--------------------------------------------------------------------===// +enum class ConstraintType : uint8_t { + INVALID = 0, // invalid constraint type + NOT_NULL = 1, // NOT NULL constraint + CHECK = 2, // CHECK constraint + UNIQUE = 3, // UNIQUE constraint + FOREIGN_KEY = 4 // FOREIGN KEY constraint +}; + +//! Constraint is the base class of any type of table constraint. +class Constraint { +public: + explicit Constraint(ConstraintType type) : type(type) {}; + virtual ~Constraint() { + } + + ConstraintType type; + +public: + virtual string ToString() const = 0; + void Print(); + + virtual unique_ptr Copy() = 0; + //! Serializes a Constraint to a stand-alone binary blob + virtual void Serialize(Serializer &serializer); + //! Deserializes a blob back into a Constraint, returns NULL if + //! deserialization is not possible + static unique_ptr Deserialize(Deserializer &source); +}; +} // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_info.hpp +// duckdb/parser/statement/select_statement.hpp // // //===----------------------------------------------------------------------===// + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/parse_info.hpp +// duckdb/parser/query_node.hpp // // //===----------------------------------------------------------------------===// @@ -11081,76 +13136,112 @@ class ClientContext : public std::enable_shared_from_this { -namespace duckdb { -struct ParseInfo { - virtual ~ParseInfo() { - } -}; +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/result_modifier.hpp +// +// +//===----------------------------------------------------------------------===// + + + + -} // namespace duckdb namespace duckdb { -enum class OnCreateConflict : uint8_t { - // Standard: throw error - ERROR_ON_CONFLICT, - // CREATE IF NOT EXISTS, silently do nothing on conflict - IGNORE_ON_CONFLICT, - // CREATE OR REPLACE - REPLACE_ON_CONFLICT -}; +enum ResultModifierType : uint8_t { LIMIT_MODIFIER = 1, ORDER_MODIFIER = 2, DISTINCT_MODIFIER = 3 }; -struct CreateInfo : public ParseInfo { - explicit CreateInfo(CatalogType type, string schema = DEFAULT_SCHEMA) - : type(type), schema(schema), on_conflict(OnCreateConflict::ERROR_ON_CONFLICT), temporary(false), - internal(false) { +//! A ResultModifier +class ResultModifier { +public: + explicit ResultModifier(ResultModifierType type) : type(type) { } - ~CreateInfo() override { + virtual ~ResultModifier() { } - //! The to-be-created catalog type - CatalogType type; - //! The schema name of the entry - string schema; - //! What to do on create conflict - OnCreateConflict on_conflict; - //! Whether or not the entry is temporary - bool temporary; - //! Whether or not the entry is an internal entry - bool internal; - //! The SQL string of the CREATE statement - string sql; + ResultModifierType type; public: - virtual unique_ptr Copy() const = 0; - void CopyProperties(CreateInfo &other) const { - other.type = type; - other.schema = schema; - other.on_conflict = on_conflict; - other.temporary = temporary; - other.internal = internal; - other.sql = sql; + //! Returns true if the two result modifiers are equivalent + virtual bool Equals(const ResultModifier *other) const; + + //! Create a copy of this ResultModifier + virtual unique_ptr Copy() = 0; + //! Serializes a ResultModifier to a stand-alone binary blob + virtual void Serialize(Serializer &serializer); + //! Deserializes a blob back into a ResultModifier + static unique_ptr Deserialize(Deserializer &source); +}; + +//! Single node in ORDER BY statement +struct OrderByNode { + OrderByNode(OrderType type, OrderByNullType null_order, unique_ptr expression) + : type(type), null_order(null_order), expression(move(expression)) { } + + //! Sort order, ASC or DESC + OrderType type; + //! The NULL sort order, NULLS_FIRST or NULLS_LAST + OrderByNullType null_order; + //! Expression to order by + unique_ptr expression; + +public: + void Serialize(Serializer &serializer); + string ToString() const; + static OrderByNode Deserialize(Deserializer &source); }; -} // namespace duckdb +class LimitModifier : public ResultModifier { +public: + LimitModifier() : ResultModifier(ResultModifierType::LIMIT_MODIFIER) { + } + //! LIMIT count + unique_ptr limit; + //! OFFSET + unique_ptr offset; +public: + bool Equals(const ResultModifier *other) const override; + unique_ptr Copy() override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(Deserializer &source); +}; -namespace duckdb { +class OrderModifier : public ResultModifier { +public: + OrderModifier() : ResultModifier(ResultModifierType::ORDER_MODIFIER) { + } -struct CreateFunctionInfo : public CreateInfo { - explicit CreateFunctionInfo(CatalogType type) : CreateInfo(type) { - D_ASSERT(type == CatalogType::SCALAR_FUNCTION_ENTRY || type == CatalogType::AGGREGATE_FUNCTION_ENTRY || - type == CatalogType::TABLE_FUNCTION_ENTRY || type == CatalogType::PRAGMA_FUNCTION_ENTRY || - type == CatalogType::MACRO_ENTRY); + //! List of order nodes + vector orders; + +public: + bool Equals(const ResultModifier *other) const override; + unique_ptr Copy() override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(Deserializer &source); +}; + +class DistinctModifier : public ResultModifier { +public: + DistinctModifier() : ResultModifier(ResultModifierType::DISTINCT_MODIFIER) { } - //! Function name - string name; + //! list of distinct on targets (if any) + vector> distinct_on_targets; + +public: + bool Equals(const ResultModifier *other) const override; + unique_ptr Copy() override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(Deserializer &source); }; } // namespace duckdb @@ -11158,53 +13249,22 @@ struct CreateFunctionInfo : public CreateInfo { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/function/function_set.hpp +// duckdb/parser/common_table_expression_info.hpp // // -//===----------------------------------------------------------------------===// - - - - - - - -namespace duckdb { - -template -class FunctionSet { -public: - explicit FunctionSet(string name) : name(name) { - } +//===----------------------------------------------------------------------===// - //! The name of the function set - string name; - //! The set of functions - vector functions; -public: - void AddFunction(T function) { - function.name = name; - functions.push_back(function); - } -}; -class ScalarFunctionSet : public FunctionSet { -public: - explicit ScalarFunctionSet(string name) : FunctionSet(move(name)) { - } -}; -class AggregateFunctionSet : public FunctionSet { -public: - explicit AggregateFunctionSet(string name) : FunctionSet(move(name)) { - } -}; -class TableFunctionSet : public FunctionSet { -public: - explicit TableFunctionSet(string name) : FunctionSet(move(name)) { - } +namespace duckdb { + +class SelectStatement; + +struct CommonTableExpressionInfo { + vector aliases; + unique_ptr query; }; } // namespace duckdb @@ -11212,34 +13272,53 @@ class TableFunctionSet : public FunctionSet { namespace duckdb { -struct CreateTableFunctionInfo : public CreateFunctionInfo { - explicit CreateTableFunctionInfo(TableFunctionSet set) - : CreateFunctionInfo(CatalogType::TABLE_FUNCTION_ENTRY), functions(move(set.functions)) { - this->name = set.name; +enum QueryNodeType : uint8_t { + SELECT_NODE = 1, + SET_OPERATION_NODE = 2, + BOUND_SUBQUERY_NODE = 3, + RECURSIVE_CTE_NODE = 4 +}; + +class QueryNode { +public: + explicit QueryNode(QueryNodeType type) : type(type) { } - explicit CreateTableFunctionInfo(TableFunction function) : CreateFunctionInfo(CatalogType::TABLE_FUNCTION_ENTRY) { - this->name = function.name; - functions.push_back(move(function)); + virtual ~QueryNode() { } - //! The table functions - vector functions; + //! The type of the query node, either SetOperation or Select + QueryNodeType type; + //! The set of result modifiers associated with this query node + vector> modifiers; + //! CTEs (used by SelectNode and SetOperationNode) + unordered_map> cte_map; + + virtual const vector> &GetSelectList() const = 0; public: - unique_ptr Copy() const override { - TableFunctionSet set(name); - set.functions = functions; - auto result = make_unique(move(set)); - CopyProperties(*result); - return move(result); - } + virtual bool Equals(const QueryNode *other) const; + + //! Create a copy of this QueryNode + virtual unique_ptr Copy() = 0; + //! Serializes a QueryNode to a stand-alone binary blob + virtual void Serialize(Serializer &serializer); + //! Deserializes a blob back into a QueryNode, returns nullptr if + //! deserialization is not possible + static unique_ptr Deserialize(Deserializer &source); + +protected: + //! Copy base QueryNode properties from another expression to this one, + //! used in Copy method + void CopyProperties(QueryNode &other) const; }; } // namespace duckdb + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_copy_function_info.hpp +// duckdb/parser/tableref.hpp // // //===----------------------------------------------------------------------===// @@ -11250,7 +13329,7 @@ struct CreateTableFunctionInfo : public CreateFunctionInfo { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/function/copy_function.hpp +// duckdb/common/enums/tableref_type.hpp // // //===----------------------------------------------------------------------===// @@ -11259,10 +13338,29 @@ struct CreateTableFunctionInfo : public CreateFunctionInfo { +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Table Reference Types +//===--------------------------------------------------------------------===// +enum class TableReferenceType : uint8_t { + INVALID = 0, // invalid table reference type + BASE_TABLE = 1, // base table reference + SUBQUERY = 2, // output of a subquery + JOIN = 3, // output of join + CROSS_PRODUCT = 4, // out of cartesian product + TABLE_FUNCTION = 5, // table producing function + EXPRESSION_LIST = 6, // expression list + CTE = 7, // Recursive CTE + EMPTY = 8 // placeholder for empty FROM +}; + +} // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/copy_info.hpp +// duckdb/parser/parsed_data/sample_options.hpp // // //===----------------------------------------------------------------------===// @@ -11276,109 +13374,121 @@ struct CreateTableFunctionInfo : public CreateFunctionInfo { namespace duckdb { -struct CopyInfo : public ParseInfo { - CopyInfo() : schema(DEFAULT_SCHEMA) { - } +enum class SampleMethod : uint8_t { SYSTEM_SAMPLE = 0, BERNOULLI_SAMPLE = 1, RESERVOIR_SAMPLE = 2 }; - //! The schema name to copy to/from - string schema; - //! The table name to copy to/from - string table; - //! List of columns to copy to/from - vector select_list; - //! The file path to copy to/from - string file_path; - //! Whether or not this is a copy to file (false) or copy from a file (true) - bool is_from; - //! The file format of the external file - string format; - //! Set of (key, value) options - unordered_map> options; +string SampleMethodToString(SampleMethod method); -public: - unique_ptr Copy() const { - auto result = make_unique(); - result->schema = schema; - result->table = table; - result->select_list = select_list; - result->file_path = file_path; - result->is_from = is_from; - result->format = format; - result->options = options; - return result; - } +struct SampleOptions { + Value sample_size; + bool is_percentage; + SampleMethod method; + int64_t seed = -1; + + unique_ptr Copy(); + void Serialize(Serializer &serializer); + static unique_ptr Deserialize(Deserializer &source); + static bool Equals(SampleOptions *a, SampleOptions *b); }; } // namespace duckdb namespace duckdb { -class ExecutionContext; +class Deserializer; +class Serializer; -struct LocalFunctionData { - virtual ~LocalFunctionData() { +//! Represents a generic expression that returns a table. +class TableRef { +public: + explicit TableRef(TableReferenceType type) : type(type) { } -}; - -struct GlobalFunctionData { - virtual ~GlobalFunctionData() { + virtual ~TableRef() { } + + TableReferenceType type; + string alias; + //! Sample options (if any) + unique_ptr sample; + //! The location in the query (if any) + idx_t query_location = INVALID_INDEX; + +public: + //! Convert the object to a string + virtual string ToString() const; + void Print(); + + virtual bool Equals(const TableRef *other) const; + + virtual unique_ptr Copy() = 0; + + //! Serializes a TableRef to a stand-alone binary blob + virtual void Serialize(Serializer &serializer); + //! Deserializes a blob back into a TableRef + static unique_ptr Deserialize(Deserializer &source); + + //! Copy the properties of this table ref to the target + void CopyProperties(TableRef &target) const; }; +} // namespace duckdb -typedef unique_ptr (*copy_to_bind_t)(ClientContext &context, CopyInfo &info, vector &names, - vector &sql_types); -typedef unique_ptr (*copy_to_initialize_local_t)(ClientContext &context, FunctionData &bind_data); -typedef unique_ptr (*copy_to_initialize_global_t)(ClientContext &context, FunctionData &bind_data); -typedef void (*copy_to_sink_t)(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate, DataChunk &input); -typedef void (*copy_to_combine_t)(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate); -typedef void (*copy_to_finalize_t)(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate); -typedef unique_ptr (*copy_from_bind_t)(ClientContext &context, CopyInfo &info, - vector &expected_names, - vector &expected_types); +namespace duckdb { -class CopyFunction : public Function { +class QueryNode; + +//! SelectStatement is a typical SELECT clause +class SelectStatement : public SQLStatement { public: - explicit CopyFunction(string name) - : Function(name), copy_to_bind(nullptr), copy_to_initialize_local(nullptr), copy_to_initialize_global(nullptr), - copy_to_sink(nullptr), copy_to_combine(nullptr), copy_to_finalize(nullptr), copy_from_bind(nullptr) { + SelectStatement() : SQLStatement(StatementType::SELECT_STATEMENT) { } - copy_to_bind_t copy_to_bind; - copy_to_initialize_local_t copy_to_initialize_local; - copy_to_initialize_global_t copy_to_initialize_global; - copy_to_sink_t copy_to_sink; - copy_to_combine_t copy_to_combine; - copy_to_finalize_t copy_to_finalize; - - copy_from_bind_t copy_from_bind; - TableFunction copy_from_function; + //! The main query node + unique_ptr node; - string extension; +public: + //! Create a copy of this SelectStatement + unique_ptr Copy() const override; + //! Serializes a SelectStatement to a stand-alone binary blob + void Serialize(Serializer &serializer); + //! Deserializes a blob back into a SelectStatement, returns nullptr if + //! deserialization is not possible + static unique_ptr Deserialize(Deserializer &source); + //! Whether or not the statements are equivalent + bool Equals(const SQLStatement *other) const; }; - } // namespace duckdb namespace duckdb { -struct CreateCopyFunctionInfo : public CreateInfo { - explicit CreateCopyFunctionInfo(CopyFunction function) - : CreateInfo(CatalogType::COPY_FUNCTION_ENTRY), function(function) { - this->name = function.name; +struct CreateTableInfo : public CreateInfo { + CreateTableInfo() : CreateInfo(CatalogType::TABLE_ENTRY, INVALID_SCHEMA) { + } + CreateTableInfo(string schema, string name) : CreateInfo(CatalogType::TABLE_ENTRY, schema), table(name) { } - //! Function name - string name; - //! The table function - CopyFunction function; + //! Table name to insert to + string table; + //! List of columns of the table + vector columns; + //! List of constraints on the table + vector> constraints; + //! CREATE TABLE from QUERY + unique_ptr query; public: unique_ptr Copy() const override { - auto result = make_unique(function); + auto result = make_unique(schema, table); CopyProperties(*result); + for (auto &column : columns) { + result->columns.push_back(column.Copy()); + } + for (auto &constraint : constraints) { + result->constraints.push_back(constraint->Copy()); + } + if (query) { + result->query = unique_ptr_cast(query->Copy()); + } return move(result); } }; @@ -11387,7 +13497,7 @@ struct CreateCopyFunctionInfo : public CreateInfo { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/expression/bound_constant_expression.hpp +// duckdb/planner/parsed_data/bound_create_table_info.hpp // // //===----------------------------------------------------------------------===// @@ -11395,29 +13505,36 @@ struct CreateCopyFunctionInfo : public CreateInfo { +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/bound_constraint.hpp +// +// +//===----------------------------------------------------------------------===// -namespace duckdb { -class BoundConstantExpression : public Expression { -public: - explicit BoundConstantExpression(Value value); - Value value; -public: - string ToString() const override; - bool Equals(const BaseExpression *other) const override; - hash_t Hash() const override; +namespace duckdb { +//! Bound equivalent of Constraint +class BoundConstraint { +public: + explicit BoundConstraint(ConstraintType type) : type(type) {}; + virtual ~BoundConstraint() { + } - unique_ptr Copy() override; + ConstraintType type; }; } // namespace duckdb + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/expression/bound_function_expression.hpp +// duckdb/planner/logical_operator.hpp // // //===----------------------------------------------------------------------===// @@ -11426,140 +13543,287 @@ class BoundConstantExpression : public Expression { +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/logical_operator_type.hpp +// +// +//===----------------------------------------------------------------------===// + + + + namespace duckdb { -class ScalarFunctionCatalogEntry; -//! Represents a function call that has been bound to a base function -class BoundFunctionExpression : public Expression { -public: - BoundFunctionExpression(LogicalType return_type, ScalarFunction bound_function, - vector> arguments, unique_ptr bind_info, - bool is_operator = false); +//===--------------------------------------------------------------------===// +// Logical Operator Types +//===--------------------------------------------------------------------===// +enum class LogicalOperatorType : uint8_t { + LOGICAL_INVALID = 0, + LOGICAL_PROJECTION = 1, + LOGICAL_FILTER = 2, + LOGICAL_AGGREGATE_AND_GROUP_BY = 3, + LOGICAL_WINDOW = 4, + LOGICAL_UNNEST = 5, + LOGICAL_LIMIT = 6, + LOGICAL_ORDER_BY = 7, + LOGICAL_TOP_N = 8, + LOGICAL_COPY_TO_FILE = 10, + LOGICAL_DISTINCT = 11, + LOGICAL_SAMPLE = 12, - // The bound function expression - ScalarFunction function; - //! List of child-expressions of the function - vector> children; - //! The bound function data (if any) - unique_ptr bind_info; - //! Whether or not the function is an operator, only used for rendering - bool is_operator; + // ----------------------------- + // Data sources + // ----------------------------- + LOGICAL_GET = 25, + LOGICAL_CHUNK_GET = 26, + LOGICAL_DELIM_GET = 27, + LOGICAL_EXPRESSION_GET = 28, + LOGICAL_DUMMY_SCAN = 29, + LOGICAL_EMPTY_RESULT = 30, + LOGICAL_CTE_REF = 31, + // ----------------------------- + // Joins + // ----------------------------- + LOGICAL_JOIN = 50, + LOGICAL_DELIM_JOIN = 51, + LOGICAL_COMPARISON_JOIN = 52, + LOGICAL_ANY_JOIN = 53, + LOGICAL_CROSS_PRODUCT = 54, + // ----------------------------- + // SetOps + // ----------------------------- + LOGICAL_UNION = 75, + LOGICAL_EXCEPT = 76, + LOGICAL_INTERSECT = 77, + LOGICAL_RECURSIVE_CTE = 78, -public: - bool HasSideEffects() const override; - bool IsFoldable() const override; - string ToString() const override; + // ----------------------------- + // Updates + // ----------------------------- + LOGICAL_INSERT = 100, + LOGICAL_DELETE = 101, + LOGICAL_UPDATE = 102, - hash_t Hash() const override; - bool Equals(const BaseExpression *other) const override; + // ----------------------------- + // Schema + // ----------------------------- + LOGICAL_ALTER = 125, + LOGICAL_CREATE_TABLE = 126, + LOGICAL_CREATE_INDEX = 127, + LOGICAL_CREATE_SEQUENCE = 128, + LOGICAL_CREATE_VIEW = 129, + LOGICAL_CREATE_SCHEMA = 130, + LOGICAL_CREATE_MACRO = 131, + LOGICAL_DROP = 132, + LOGICAL_PRAGMA = 133, + LOGICAL_TRANSACTION = 134, - unique_ptr Copy() override; + // ----------------------------- + // Explain + // ----------------------------- + LOGICAL_EXPLAIN = 150, + + // ----------------------------- + // Show + // ----------------------------- + LOGICAL_SHOW = 160, + + // ----------------------------- + // Helpers + // ----------------------------- + LOGICAL_PREPARE = 175, + LOGICAL_EXECUTE = 176, + LOGICAL_EXPORT = 177, + LOGICAL_VACUUM = 178, + LOGICAL_SET = 179, + LOGICAL_LOAD = 180 }; + +string LogicalOperatorToString(LogicalOperatorType type); + } // namespace duckdb + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +// duckdb/planner/logical_operator_visitor.hpp // // //===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/catalog/standard_entry.hpp +// duckdb/planner/bound_tokens.hpp // // //===----------------------------------------------------------------------===// +namespace duckdb { +//===--------------------------------------------------------------------===// +// Query Node +//===--------------------------------------------------------------------===// +class BoundQueryNode; +class BoundSelectNode; +class BoundSetOperationNode; +class BoundRecursiveCTENode; -namespace duckdb { -class SchemaCatalogEntry; +//===--------------------------------------------------------------------===// +// Expressions +//===--------------------------------------------------------------------===// +class Expression; -//! A StandardEntry is a catalog entry that is a member of a schema -class StandardEntry : public CatalogEntry { -public: - StandardEntry(CatalogType type, SchemaCatalogEntry *schema, Catalog *catalog, string name) - : CatalogEntry(type, catalog, name), schema(schema) { - } - ~StandardEntry() override { - } +class BoundAggregateExpression; +class BoundBetweenExpression; +class BoundCaseExpression; +class BoundCastExpression; +class BoundColumnRefExpression; +class BoundComparisonExpression; +class BoundConjunctionExpression; +class BoundConstantExpression; +class BoundDefaultExpression; +class BoundFunctionExpression; +class BoundOperatorExpression; +class BoundParameterExpression; +class BoundReferenceExpression; +class BoundSubqueryExpression; +class BoundUnnestExpression; +class BoundWindowExpression; - //! The schema the entry belongs to - SchemaCatalogEntry *schema; -}; -} // namespace duckdb +//===--------------------------------------------------------------------===// +// TableRefs +//===--------------------------------------------------------------------===// +class BoundTableRef; +class BoundBaseTableRef; +class BoundCrossProductRef; +class BoundJoinRef; +class BoundSubqueryRef; +class BoundTableFunction; +class BoundEmptyTableRef; +class BoundExpressionListRef; +class BoundCTERef; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_scalar_function_info.hpp +// duckdb/planner/logical_tokens.hpp // // //===----------------------------------------------------------------------===// +namespace duckdb { + +class LogicalOperator; + +class LogicalAggregate; +class LogicalAnyJoin; +class LogicalChunkGet; +class LogicalComparisonJoin; +class LogicalCopyToFile; +class LogicalCreate; +class LogicalCreateTable; +class LogicalCreateIndex; +class LogicalCreateTable; +class LogicalCrossProduct; +class LogicalCTERef; +class LogicalDelete; +class LogicalDelimGet; +class LogicalDelimJoin; +class LogicalDistinct; +class LogicalDummyScan; +class LogicalEmptyResult; +class LogicalExecute; +class LogicalExplain; +class LogicalExport; +class LogicalExpressionGet; +class LogicalFilter; +class LogicalGet; +class LogicalInsert; +class LogicalJoin; +class LogicalLimit; +class LogicalOrder; +class LogicalPragma; +class LogicalPrepare; +class LogicalProjection; +class LogicalRecursiveCTE; +class LogicalSetOperation; +class LogicalSample; +class LogicalShow; +class LogicalSimple; +class LogicalSet; +class LogicalTopN; +class LogicalUnnest; +class LogicalUpdate; +class LogicalWindow; +} // namespace duckdb +#include namespace duckdb { - -struct CreateScalarFunctionInfo : public CreateFunctionInfo { - explicit CreateScalarFunctionInfo(ScalarFunction function) - : CreateFunctionInfo(CatalogType::SCALAR_FUNCTION_ENTRY) { - this->name = function.name; - functions.push_back(function); - } - explicit CreateScalarFunctionInfo(ScalarFunctionSet set) - : CreateFunctionInfo(CatalogType::SCALAR_FUNCTION_ENTRY), functions(move(set.functions)) { - this->name = set.name; - for (auto &func : functions) { - func.name = set.name; - } - } - - vector functions; - +//! The LogicalOperatorVisitor is an abstract base class that implements the +//! Visitor pattern on LogicalOperator. +class LogicalOperatorVisitor { public: - unique_ptr Copy() const override { - ScalarFunctionSet set(name); - set.functions = functions; - auto result = make_unique(move(set)); - CopyProperties(*result); - return move(result); - } -}; + virtual ~LogicalOperatorVisitor() {}; -} // namespace duckdb + virtual void VisitOperator(LogicalOperator &op); + virtual void VisitExpression(unique_ptr *expression); + static void EnumerateExpressions(LogicalOperator &op, + const std::function *child)> &callback); -namespace duckdb { +protected: + //! Automatically calls the Visit method for LogicalOperator children of the current operator. Can be overloaded to + //! change this behavior. + void VisitOperatorChildren(LogicalOperator &op); + //! Automatically calls the Visit method for Expression children of the current operator. Can be overloaded to + //! change this behavior. + void VisitOperatorExpressions(LogicalOperator &op); -//! A table function in the catalog -class ScalarFunctionCatalogEntry : public StandardEntry { -public: - ScalarFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateScalarFunctionInfo *info) - : StandardEntry(CatalogType::SCALAR_FUNCTION_ENTRY, schema, catalog, info->name), functions(info->functions) { - } + // The VisitExpressionChildren method is called at the end of every call to VisitExpression to recursively visit all + // expressions in an expression tree. It can be overloaded to prevent automatically visiting the entire tree. + virtual void VisitExpressionChildren(Expression &expression); - //! The scalar functions - vector functions; + virtual unique_ptr VisitReplace(BoundAggregateExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundBetweenExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundCaseExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundCastExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundColumnRefExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundComparisonExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundConjunctionExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundConstantExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundDefaultExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundFunctionExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundOperatorExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundReferenceExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundSubqueryExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundParameterExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundWindowExpression &expr, unique_ptr *expr_ptr); + virtual unique_ptr VisitReplace(BoundUnnestExpression &expr, unique_ptr *expr_ptr); }; } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_table_info.hpp +// duckdb/planner/column_binding.hpp // // //===----------------------------------------------------------------------===// @@ -11567,62 +13831,96 @@ class ScalarFunctionCatalogEntry : public StandardEntry { +#include +namespace duckdb { -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/constraint.hpp -// -// -//===----------------------------------------------------------------------===// +struct ColumnBinding { + idx_t table_index; + idx_t column_index; + ColumnBinding() : table_index(INVALID_INDEX), column_index(INVALID_INDEX) { + } + ColumnBinding(idx_t table, idx_t column) : table_index(table), column_index(column) { + } + + bool operator==(const ColumnBinding &rhs) const { + return table_index == rhs.table_index && column_index == rhs.column_index; + } +}; +} // namespace duckdb +#include +#include namespace duckdb { -class Serializer; -class Deserializer; +//! LogicalOperator is the base class of the logical operators present in the +//! logical query tree +class LogicalOperator { +public: + explicit LogicalOperator(LogicalOperatorType type) : type(type) { + } + LogicalOperator(LogicalOperatorType type, vector> expressions) + : type(type), expressions(move(expressions)) { + } + virtual ~LogicalOperator() { + } -//===--------------------------------------------------------------------===// -// Constraint Types -//===--------------------------------------------------------------------===// -enum class ConstraintType : uint8_t { - INVALID = 0, // invalid constraint type - NOT_NULL = 1, // NOT NULL constraint - CHECK = 2, // CHECK constraint - UNIQUE = 3, // UNIQUE constraint - FOREIGN_KEY = 4 // FOREIGN KEY constraint -}; + //! The type of the logical operator + LogicalOperatorType type; + //! The set of children of the operator + vector> children; + //! The set of expressions contained within the operator, if any + vector> expressions; + //! The types returned by this logical operator. Set by calling LogicalOperator::ResolveTypes. + vector types; + //! Estimated Cardinality + idx_t estimated_cardinality = 0; -//! Constraint is the base class of any type of table constraint. -class Constraint { public: - explicit Constraint(ConstraintType type) : type(type) {}; - virtual ~Constraint() { + virtual vector GetColumnBindings() { + return {ColumnBinding(0, 0)}; } + static vector GenerateColumnBindings(idx_t table_idx, idx_t column_count); + static vector MapTypes(const vector &types, const vector &projection_map); + static vector MapBindings(const vector &types, const vector &projection_map); - ConstraintType type; + //! Resolve the types of the logical operator and its children + void ResolveOperatorTypes(); -public: - virtual string ToString() const = 0; + virtual string GetName() const; + virtual string ParamsToString() const; + virtual string ToString(idx_t depth = 0) const; void Print(); + //! Debug method: verify that the integrity of expressions & child nodes are maintained + virtual void Verify(); - virtual unique_ptr Copy() = 0; - //! Serializes a Constraint to a stand-alone binary blob - virtual void Serialize(Serializer &serializer); - //! Deserializes a blob back into a Constraint, returns NULL if - //! deserialization is not possible - static unique_ptr Deserialize(Deserializer &source); + void AddChild(unique_ptr child) { + children.push_back(move(child)); + } + + virtual idx_t EstimateCardinality(ClientContext &context) { + // simple estimator, just take the max of the children + idx_t max_cardinality = 0; + for (auto &child : children) { + max_cardinality = MaxValue(child->EstimateCardinality(context), max_cardinality); + } + return max_cardinality; + } + +protected: + //! Resolve types for this specific operator + virtual void ResolveTypes() = 0; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/statement/select_statement.hpp +// duckdb/storage/table/persistent_table_data.hpp // // //===----------------------------------------------------------------------===// @@ -11634,7 +13932,7 @@ class Constraint { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/query_node.hpp +// duckdb/storage/table/segment_tree.hpp // // //===----------------------------------------------------------------------===// @@ -11642,12 +13940,10 @@ class Constraint { - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/result_modifier.hpp +// duckdb/storage/storage_lock.hpp // // //===----------------------------------------------------------------------===// @@ -11658,96 +13954,41 @@ class Constraint { - namespace duckdb { +class StorageLock; -enum ResultModifierType : uint8_t { LIMIT_MODIFIER = 1, ORDER_MODIFIER = 2, DISTINCT_MODIFIER = 3 }; - -//! A ResultModifier -class ResultModifier { -public: - explicit ResultModifier(ResultModifierType type) : type(type) { - } - virtual ~ResultModifier() { - } - - ResultModifierType type; - -public: - //! Returns true if the two result modifiers are equivalent - virtual bool Equals(const ResultModifier *other) const; - - //! Create a copy of this ResultModifier - virtual unique_ptr Copy() = 0; - //! Serializes a ResultModifier to a stand-alone binary blob - virtual void Serialize(Serializer &serializer); - //! Deserializes a blob back into a ResultModifier - static unique_ptr Deserialize(Deserializer &source); -}; - -//! Single node in ORDER BY statement -struct OrderByNode { - OrderByNode(OrderType type, OrderByNullType null_order, unique_ptr expression) - : type(type), null_order(null_order), expression(move(expression)) { - } - - //! Sort order, ASC or DESC - OrderType type; - //! The NULL sort order, NULLS_FIRST or NULLS_LAST - OrderByNullType null_order; - //! Expression to order by - unique_ptr expression; - -public: - void Serialize(Serializer &serializer); - static OrderByNode Deserialize(Deserializer &source); -}; - -class LimitModifier : public ResultModifier { -public: - LimitModifier() : ResultModifier(ResultModifierType::LIMIT_MODIFIER) { - } - - //! LIMIT count - unique_ptr limit; - //! OFFSET - unique_ptr offset; - -public: - bool Equals(const ResultModifier *other) const override; - unique_ptr Copy() override; - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(Deserializer &source); -}; - -class OrderModifier : public ResultModifier { -public: - OrderModifier() : ResultModifier(ResultModifierType::ORDER_MODIFIER) { - } - - //! List of order nodes - vector orders; +enum class StorageLockType { SHARED = 0, EXCLUSIVE = 1 }; +class StorageLockKey { public: - bool Equals(const ResultModifier *other) const override; - unique_ptr Copy() override; - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(Deserializer &source); + StorageLockKey(StorageLock &lock, StorageLockType type); + ~StorageLockKey(); + +private: + StorageLock &lock; + StorageLockType type; }; -class DistinctModifier : public ResultModifier { +class StorageLock { + friend class StorageLockKey; + public: - DistinctModifier() : ResultModifier(ResultModifierType::DISTINCT_MODIFIER) { - } + StorageLock(); - //! list of distinct on targets (if any) - vector> distinct_on_targets; + //! Get an exclusive lock + unique_ptr GetExclusiveLock(); + //! Get a shared lock + unique_ptr GetSharedLock(); -public: - bool Equals(const ResultModifier *other) const override; - unique_ptr Copy() override; - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(Deserializer &source); +private: + mutex exclusive_lock; + atomic read_count; + +private: + //! Release an exclusive lock + void ReleaseExclusiveLock(); + //! Release a shared lock + void ReleaseSharedLock(); }; } // namespace duckdb @@ -11755,7 +13996,7 @@ class DistinctModifier : public ResultModifier { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/common_table_expression_info.hpp +// duckdb/storage/table/segment_base.hpp // // //===----------------------------------------------------------------------===// @@ -11764,76 +14005,74 @@ class DistinctModifier : public ResultModifier { + namespace duckdb { -class SelectStatement; +class SegmentBase { +public: + SegmentBase(idx_t start, idx_t count) : start(start), count(count) { + } + virtual ~SegmentBase() { + // destroy the chain of segments iteratively (rather than recursively) + while (next && next->next) { + next = move(next->next); + } + } -struct CommonTableExpressionInfo { - vector aliases; - unique_ptr query; + //! The start row id of this chunk + const idx_t start; + //! The amount of entries in this storage chunk + atomic count; + //! The next segment after this one + unique_ptr next; }; } // namespace duckdb + + namespace duckdb { -enum QueryNodeType : uint8_t { - SELECT_NODE = 1, - SET_OPERATION_NODE = 2, - BOUND_SUBQUERY_NODE = 3, - RECURSIVE_CTE_NODE = 4 +struct SegmentNode { + idx_t row_start; + SegmentBase *node; }; -class QueryNode { +//! The SegmentTree maintains a list of all segments of a specific column in a table, and allows searching for a segment +//! by row number +class SegmentTree { public: - explicit QueryNode(QueryNodeType type) : type(type) { - } - virtual ~QueryNode() { - } - - //! The type of the query node, either SetOperation or Select - QueryNodeType type; - //! The set of result modifiers associated with this query node - vector> modifiers; - //! CTEs (used by SelectNode and SetOperationNode) - unordered_map> cte_map; - - virtual const vector> &GetSelectList() const = 0; + //! The initial segment of the tree + unique_ptr root_node; + //! The nodes in the tree, can be binary searched + vector nodes; + //! Lock to access or modify the nodes + mutex node_lock; public: - virtual bool Equals(const QueryNode *other) const; + //! Gets a pointer to the first segment. Useful for scans. + SegmentBase *GetRootSegment(); + //! Gets a pointer to the last segment. Useful for appends. + SegmentBase *GetLastSegment(); + //! Gets a pointer to a specific column segment for the given row + SegmentBase *GetSegment(idx_t row_number); + //! Append a column segment to the tree + void AppendSegment(unique_ptr segment); - //! Create a copy of this QueryNode - virtual unique_ptr Copy() = 0; - //! Serializes a QueryNode to a stand-alone binary blob - virtual void Serialize(Serializer &serializer); - //! Deserializes a blob back into a QueryNode, returns nullptr if - //! deserialization is not possible - static unique_ptr Deserialize(Deserializer &source); + //! Replace this tree with another tree, taking over its nodes in-place + void Replace(SegmentTree &other); -protected: - void CopyProperties(QueryNode &other); + //! Get the segment index of the column segment for the given row (does not lock the segment tree!) + idx_t GetSegmentIndex(idx_t row_number); }; } // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/tableref.hpp -// -// -//===----------------------------------------------------------------------===// - - - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/enums/tableref_type.hpp +// duckdb/storage/data_pointer.hpp // // //===----------------------------------------------------------------------===// @@ -11842,29 +14081,10 @@ class QueryNode { -namespace duckdb { - -//===--------------------------------------------------------------------===// -// Table Reference Types -//===--------------------------------------------------------------------===// -enum class TableReferenceType : uint8_t { - INVALID = 0, // invalid table reference type - BASE_TABLE = 1, // base table reference - SUBQUERY = 2, // output of a subquery - JOIN = 3, // output of join - CROSS_PRODUCT = 4, // out of cartesian product - TABLE_FUNCTION = 5, // table producing function - EXPRESSION_LIST = 6, // expression list - CTE = 7, // Recursive CTE - EMPTY = 8 // placeholder for empty FROM -}; - -} // namespace duckdb - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/sample_options.hpp +// duckdb/storage/storage_info.hpp // // //===----------------------------------------------------------------------===// @@ -11873,137 +14093,93 @@ enum class TableReferenceType : uint8_t { - - - -namespace duckdb { - -enum class SampleMethod : uint8_t { SYSTEM_SAMPLE = 0, BERNOULLI_SAMPLE = 1, RESERVOIR_SAMPLE = 2 }; - -string SampleMethodToString(SampleMethod method); - -struct SampleOptions { - Value sample_size; - bool is_percentage; - SampleMethod method; - int64_t seed; - - unique_ptr Copy(); - void Serialize(Serializer &serializer); - static unique_ptr Deserialize(Deserializer &source); - static bool Equals(SampleOptions *a, SampleOptions *b); -}; - -} // namespace duckdb - - namespace duckdb { -class Deserializer; class Serializer; +class Deserializer; -//! Represents a generic expression that returns a table. -class TableRef { -public: - explicit TableRef(TableReferenceType type) : type(type) { - } - virtual ~TableRef() { - } +//! The version number of the database storage format +extern const uint64_t VERSION_NUMBER; - TableReferenceType type; - string alias; - //! Sample options (if any) - unique_ptr sample; - //! The location in the query (if any) - idx_t query_location = INVALID_INDEX; +using block_id_t = int64_t; -public: - //! Convert the object to a string - virtual string ToString() const { - return string(); - } - void Print(); +#define INVALID_BLOCK (-1) - virtual bool Equals(const TableRef *other) const; +// maximum block id, 2^62 +#define MAXIMUM_BLOCK 4611686018427388000LL - virtual unique_ptr Copy() = 0; +//! The MainHeader is the first header in the storage file. The MainHeader is typically written only once for a database +//! file. +struct MainHeader { + static constexpr idx_t MAGIC_BYTE_SIZE = 4; + static constexpr idx_t FLAG_COUNT = 4; + // the magic bytes in front of the file + // should be "DUCK" + static const char MAGIC_BYTES[]; + //! The version of the database + uint64_t version_number; + //! The set of flags used by the database + uint64_t flags[FLAG_COUNT]; - //! Serializes a TableRef to a stand-alone binary blob - virtual void Serialize(Serializer &serializer); - //! Deserializes a blob back into a TableRef - static unique_ptr Deserialize(Deserializer &source); + void Serialize(Serializer &ser); + static MainHeader Deserialize(Deserializer &source); +}; - //! Copy the properties of this table ref to the target - void CopyProperties(TableRef &target); +//! The DatabaseHeader contains information about the current state of the database. Every storage file has two +//! DatabaseHeaders. On startup, the DatabaseHeader with the highest iteration count is used as the active header. When +//! a checkpoint is performed, the active DatabaseHeader is switched by increasing the iteration count of the +//! DatabaseHeader. +struct DatabaseHeader { + //! The iteration count, increases by 1 every time the storage is checkpointed. + uint64_t iteration; + //! A pointer to the initial meta block + block_id_t meta_block; + //! A pointer to the block containing the free list + block_id_t free_list; + //! The number of blocks that is in the file as of this database header. If the file is larger than BLOCK_SIZE * + //! block_count any blocks appearing AFTER block_count are implicitly part of the free_list. + uint64_t block_count; + + void Serialize(Serializer &ser); + static DatabaseHeader Deserialize(Deserializer &source); }; + } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/block.hpp +// +// +//===----------------------------------------------------------------------===// -namespace duckdb { -class QueryNode; -//! SelectStatement is a typical SELECT clause -class SelectStatement : public SQLStatement { -public: - SelectStatement() : SQLStatement(StatementType::SELECT_STATEMENT) { - } - //! The main query node - unique_ptr node; -public: - //! Create a copy of this SelectStatement - unique_ptr Copy() const override; - //! Serializes a SelectStatement to a stand-alone binary blob - void Serialize(Serializer &serializer); - //! Deserializes a blob back into a SelectStatement, returns nullptr if - //! deserialization is not possible - static unique_ptr Deserialize(Deserializer &source); - //! Whether or not the statements are equivalent - bool Equals(const SQLStatement *other) const; -}; -} // namespace duckdb namespace duckdb { -struct CreateTableInfo : public CreateInfo { - CreateTableInfo() : CreateInfo(CatalogType::TABLE_ENTRY, INVALID_SCHEMA) { - } - CreateTableInfo(string schema, string name) : CreateInfo(CatalogType::TABLE_ENTRY, schema), table(name) { - } +class Block : public FileBuffer { +public: + Block(Allocator &allocator, block_id_t id); + Block(FileBuffer &source, block_id_t id); - //! Table name to insert to - string table; - //! List of columns of the table - vector columns; - //! List of constraints on the table - vector> constraints; - //! CREATE TABLE from QUERY - unique_ptr query; + block_id_t id; +}; -public: - unique_ptr Copy() const override { - auto result = make_unique(schema, table); - CopyProperties(*result); - for (auto &column : columns) { - result->columns.push_back(column.Copy()); - } - for (auto &constraint : constraints) { - result->constraints.push_back(constraint->Copy()); - } - if (query) { - result->query = unique_ptr_cast(query->Copy()); - } - return move(result); - } +struct BlockPointer { + block_id_t block_id; + uint32_t offset; }; } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/parsed_data/bound_create_table_info.hpp +// duckdb/storage/table/row_group.hpp // // //===----------------------------------------------------------------------===// @@ -12011,10 +14187,11 @@ struct CreateTableInfo : public CreateInfo { + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/bound_constraint.hpp +// duckdb/storage/table/chunk_info.hpp // // //===----------------------------------------------------------------------===// @@ -12024,23 +14201,104 @@ struct CreateTableInfo : public CreateInfo { + namespace duckdb { -//! Bound equivalent of Constraint -class BoundConstraint { +class RowGroup; +struct SelectionVector; +class Transaction; + +enum class ChunkInfoType : uint8_t { CONSTANT_INFO, VECTOR_INFO, EMPTY_INFO }; + +class ChunkInfo { public: - explicit BoundConstraint(ConstraintType type) : type(type) {}; - virtual ~BoundConstraint() { + ChunkInfo(idx_t start, ChunkInfoType type) : start(start), type(type) { + } + virtual ~ChunkInfo() { } - ConstraintType type; + //! The row index of the first row + idx_t start; + //! The ChunkInfo type + ChunkInfoType type; + +public: + //! Gets up to max_count entries from the chunk info. If the ret is 0>ret>max_count, the selection vector is filled + //! with the tuples + virtual idx_t GetSelVector(Transaction &transaction, SelectionVector &sel_vector, idx_t max_count) = 0; + virtual idx_t GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id, + SelectionVector &sel_vector, idx_t max_count) = 0; + //! Returns whether or not a single row in the ChunkInfo should be used or not for the given transaction + virtual bool Fetch(Transaction &transaction, row_t row) = 0; + virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0; + + virtual void Serialize(Serializer &serialize) = 0; + static unique_ptr Deserialize(Deserializer &source); +}; + +class ChunkConstantInfo : public ChunkInfo { +public: + ChunkConstantInfo(idx_t start); + + atomic insert_id; + atomic delete_id; + +public: + idx_t GetSelVector(Transaction &transaction, SelectionVector &sel_vector, idx_t max_count) override; + idx_t GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id, + SelectionVector &sel_vector, idx_t max_count) override; + bool Fetch(Transaction &transaction, row_t row) override; + void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override; + + void Serialize(Serializer &serialize) override; + static unique_ptr Deserialize(Deserializer &source); + +private: + template + idx_t TemplatedGetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector, + idx_t max_count); +}; + +class ChunkVectorInfo : public ChunkInfo { +public: + ChunkVectorInfo(idx_t start); + + //! The transaction ids of the transactions that inserted the tuples (if any) + atomic inserted[STANDARD_VECTOR_SIZE]; + atomic insert_id; + atomic same_inserted_id; + + //! The transaction ids of the transactions that deleted the tuples (if any) + atomic deleted[STANDARD_VECTOR_SIZE]; + atomic any_deleted; + +public: + idx_t GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector, + idx_t max_count); + idx_t GetSelVector(Transaction &transaction, SelectionVector &sel_vector, idx_t max_count) override; + idx_t GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id, + SelectionVector &sel_vector, idx_t max_count) override; + bool Fetch(Transaction &transaction, row_t row) override; + void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override; + + void Append(idx_t start, idx_t end, transaction_t commit_id); + idx_t Delete(Transaction &transaction, row_t rows[], idx_t count); + void CommitDelete(transaction_t commit_id, row_t rows[], idx_t count); + + void Serialize(Serializer &serialize) override; + static unique_ptr Deserialize(Deserializer &source); + +private: + template + idx_t TemplatedGetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector, + idx_t max_count); }; -} // namespace duckdb +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/logical_operator.hpp +// duckdb/storage/table/append_state.hpp // // //===----------------------------------------------------------------------===// @@ -12052,7 +14310,7 @@ class BoundConstraint { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/enums/logical_operator_type.hpp +// duckdb/storage/buffer/buffer_handle.hpp // // //===----------------------------------------------------------------------===// @@ -12062,101 +14320,102 @@ class BoundConstraint { namespace duckdb { +class BlockHandle; +class FileBuffer; -//===--------------------------------------------------------------------===// -// Logical Operator Types -//===--------------------------------------------------------------------===// -enum class LogicalOperatorType : uint8_t { - LOGICAL_INVALID = 0, - LOGICAL_PROJECTION = 1, - LOGICAL_FILTER = 2, - LOGICAL_AGGREGATE_AND_GROUP_BY = 3, - LOGICAL_WINDOW = 4, - LOGICAL_UNNEST = 5, - LOGICAL_LIMIT = 6, - LOGICAL_ORDER_BY = 7, - LOGICAL_TOP_N = 8, - LOGICAL_COPY_TO_FILE = 10, - LOGICAL_DISTINCT = 11, - LOGICAL_SAMPLE = 12, +class BufferHandle { +public: + BufferHandle(shared_ptr handle, FileBuffer *node); + ~BufferHandle(); - // ----------------------------- - // Data sources - // ----------------------------- - LOGICAL_GET = 25, - LOGICAL_CHUNK_GET = 26, - LOGICAL_DELIM_GET = 27, - LOGICAL_EXPRESSION_GET = 28, - LOGICAL_DUMMY_SCAN = 29, - LOGICAL_EMPTY_RESULT = 30, - LOGICAL_CTE_REF = 31, - // ----------------------------- - // Joins - // ----------------------------- - LOGICAL_JOIN = 50, - LOGICAL_DELIM_JOIN = 51, - LOGICAL_COMPARISON_JOIN = 52, - LOGICAL_ANY_JOIN = 53, - LOGICAL_CROSS_PRODUCT = 54, - // ----------------------------- - // SetOps - // ----------------------------- - LOGICAL_UNION = 75, - LOGICAL_EXCEPT = 76, - LOGICAL_INTERSECT = 77, - LOGICAL_RECURSIVE_CTE = 78, + //! The block handle + shared_ptr handle; + //! The managed buffer node + FileBuffer *node; + data_ptr_t Ptr(); +}; - // ----------------------------- - // Updates - // ----------------------------- - LOGICAL_INSERT = 100, - LOGICAL_DELETE = 101, - LOGICAL_UPDATE = 102, +} // namespace duckdb - // ----------------------------- - // Schema - // ----------------------------- - LOGICAL_ALTER = 125, - LOGICAL_CREATE_TABLE = 126, - LOGICAL_CREATE_INDEX = 127, - LOGICAL_CREATE_SEQUENCE = 128, - LOGICAL_CREATE_VIEW = 129, - LOGICAL_CREATE_SCHEMA = 130, - LOGICAL_CREATE_MACRO = 131, - LOGICAL_DROP = 132, - LOGICAL_PRAGMA = 133, - LOGICAL_TRANSACTION = 134, - // ----------------------------- - // Explain - // ----------------------------- - LOGICAL_EXPLAIN = 150, - // ----------------------------- - // Show - // ----------------------------- - LOGICAL_SHOW = 160, +namespace duckdb { +class ColumnSegment; +class DataTable; +class RowGroup; +class UpdateSegment; +class ValiditySegment; - // ----------------------------- - // Helpers - // ----------------------------- - LOGICAL_PREPARE = 175, - LOGICAL_EXECUTE = 176, - LOGICAL_EXPORT = 177, - LOGICAL_VACUUM = 178, - LOGICAL_SET = 179, - LOGICAL_LOAD = 180 +struct TableAppendState; + +struct ColumnAppendState { + //! The current segment of the append + ColumnSegment *current; + //! Child append states + vector child_appends; + //! The write lock that is held by the append + unique_ptr lock; }; -string LogicalOperatorToString(LogicalOperatorType type); +struct RowGroupAppendState { + RowGroupAppendState(TableAppendState &parent_p) : parent(parent_p) { + } + + //! The parent append state + TableAppendState &parent; + //! The current row_group we are appending to + RowGroup *row_group; + //! The column append states + unique_ptr states; + //! Offset within the row_group + idx_t offset_in_row_group; +}; + +struct IndexLock { + unique_lock index_lock; +}; + +struct TableAppendState { + TableAppendState() : row_group_append_state(*this) { + } + + RowGroupAppendState row_group_append_state; + unique_lock append_lock; + row_t row_start; + row_t current_row; + idx_t remaining_append_count; +}; } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table/scan_state.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/logical_operator_visitor.hpp +// duckdb/execution/adaptive_filter.hpp +// +// +//===----------------------------------------------------------------------===// + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/expression/bound_aggregate_expression.hpp // // //===----------------------------------------------------------------------===// @@ -12164,172 +14423,122 @@ string LogicalOperatorToString(LogicalOperatorType type); + +#include + +namespace duckdb { +class BoundAggregateExpression : public Expression { +public: + BoundAggregateExpression(AggregateFunction function, vector> children, + unique_ptr filter, unique_ptr bind_info, bool distinct); + + //! The bound function expression + AggregateFunction function; + //! List of arguments to the function + vector> children; + //! The bound function data (if any) + unique_ptr bind_info; + //! True to aggregate on distinct values + bool distinct; + + //! Filter for this aggregate + unique_ptr filter; + +public: + bool IsAggregate() const override { + return true; + } + bool IsFoldable() const override { + return false; + } + + string ToString() const override; + + hash_t Hash() const override; + bool Equals(const BaseExpression *other) const override; + unique_ptr Copy() override; +}; +} // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/bound_tokens.hpp +// duckdb/planner/expression/bound_between_expression.hpp // // //===----------------------------------------------------------------------===// + + namespace duckdb { -//===--------------------------------------------------------------------===// -// Query Node -//===--------------------------------------------------------------------===// -class BoundQueryNode; -class BoundSelectNode; -class BoundSetOperationNode; -class BoundRecursiveCTENode; +class BoundBetweenExpression : public Expression { +public: + BoundBetweenExpression(unique_ptr input, unique_ptr lower, unique_ptr upper, + bool lower_inclusive, bool upper_inclusive); -//===--------------------------------------------------------------------===// -// Expressions -//===--------------------------------------------------------------------===// -class Expression; + unique_ptr input; + unique_ptr lower; + unique_ptr upper; + bool lower_inclusive; + bool upper_inclusive; -class BoundAggregateExpression; -class BoundBetweenExpression; -class BoundCaseExpression; -class BoundCastExpression; -class BoundColumnRefExpression; -class BoundComparisonExpression; -class BoundConjunctionExpression; -class BoundConstantExpression; -class BoundDefaultExpression; -class BoundFunctionExpression; -class BoundOperatorExpression; -class BoundParameterExpression; -class BoundReferenceExpression; -class BoundSubqueryExpression; -class BoundUnnestExpression; -class BoundWindowExpression; +public: + string ToString() const override; -//===--------------------------------------------------------------------===// -// TableRefs -//===--------------------------------------------------------------------===// -class BoundTableRef; + bool Equals(const BaseExpression *other) const override; -class BoundBaseTableRef; -class BoundCrossProductRef; -class BoundJoinRef; -class BoundSubqueryRef; -class BoundTableFunction; -class BoundEmptyTableRef; -class BoundExpressionListRef; -class BoundCTERef; + unique_ptr Copy() override; +public: + ExpressionType LowerComparisonType() { + return lower_inclusive ? ExpressionType::COMPARE_GREATERTHANOREQUALTO : ExpressionType::COMPARE_GREATERTHAN; + } + ExpressionType UpperComparisonType() { + return upper_inclusive ? ExpressionType::COMPARE_LESSTHANOREQUALTO : ExpressionType::COMPARE_LESSTHAN; + } +}; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/logical_tokens.hpp +// duckdb/planner/expression/bound_case_expression.hpp // // //===----------------------------------------------------------------------===// -namespace duckdb { - -class LogicalOperator; - -class LogicalAggregate; -class LogicalAnyJoin; -class LogicalChunkGet; -class LogicalComparisonJoin; -class LogicalCopyToFile; -class LogicalCreate; -class LogicalCreateTable; -class LogicalCreateIndex; -class LogicalCreateTable; -class LogicalCrossProduct; -class LogicalCTERef; -class LogicalDelete; -class LogicalDelimGet; -class LogicalDelimJoin; -class LogicalDistinct; -class LogicalDummyScan; -class LogicalEmptyResult; -class LogicalExecute; -class LogicalExplain; -class LogicalExport; -class LogicalExpressionGet; -class LogicalFilter; -class LogicalGet; -class LogicalInsert; -class LogicalJoin; -class LogicalLimit; -class LogicalOrder; -class LogicalPragma; -class LogicalPrepare; -class LogicalProjection; -class LogicalRecursiveCTE; -class LogicalSetOperation; -class LogicalSample; -class LogicalShow; -class LogicalSimple; -class LogicalSet; -class LogicalTopN; -class LogicalUnnest; -class LogicalUpdate; -class LogicalWindow; - -} // namespace duckdb - -#include namespace duckdb { -//! The LogicalOperatorVisitor is an abstract base class that implements the -//! Visitor pattern on LogicalOperator. -class LogicalOperatorVisitor { -public: - virtual ~LogicalOperatorVisitor() {}; - virtual void VisitOperator(LogicalOperator &op); - virtual void VisitExpression(unique_ptr *expression); +class BoundCaseExpression : public Expression { +public: + BoundCaseExpression(LogicalType type); + BoundCaseExpression(unique_ptr check, unique_ptr res_if_true, + unique_ptr res_if_false); - static void EnumerateExpressions(LogicalOperator &op, - const std::function *child)> &callback); + unique_ptr check; + unique_ptr result_if_true; + unique_ptr result_if_false; -protected: - //! Automatically calls the Visit method for LogicalOperator children of the current operator. Can be overloaded to - //! change this behavior. - void VisitOperatorChildren(LogicalOperator &op); - //! Automatically calls the Visit method for Expression children of the current operator. Can be overloaded to - //! change this behavior. - void VisitOperatorExpressions(LogicalOperator &op); +public: + string ToString() const override; - // The VisitExpressionChildren method is called at the end of every call to VisitExpression to recursively visit all - // expressions in an expression tree. It can be overloaded to prevent automatically visiting the entire tree. - virtual void VisitExpressionChildren(Expression &expression); + bool Equals(const BaseExpression *other) const override; - virtual unique_ptr VisitReplace(BoundAggregateExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundBetweenExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundCaseExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundCastExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundColumnRefExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundComparisonExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundConjunctionExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundConstantExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundDefaultExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundFunctionExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundOperatorExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundReferenceExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundSubqueryExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundParameterExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundWindowExpression &expr, unique_ptr *expr_ptr); - virtual unique_ptr VisitReplace(BoundUnnestExpression &expr, unique_ptr *expr_ptr); + unique_ptr Copy() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/column_binding.hpp +// duckdb/planner/expression/bound_cast_expression.hpp // // //===----------------------------------------------------------------------===// @@ -12340,90 +14549,84 @@ class LogicalOperatorVisitor { namespace duckdb { -struct ColumnBinding { - idx_t table_index; - idx_t column_index; +class BoundCastExpression : public Expression { +public: + BoundCastExpression(unique_ptr child, LogicalType target_type, bool try_cast = false); - ColumnBinding() : table_index(INVALID_INDEX), column_index(INVALID_INDEX) { - } - ColumnBinding(idx_t table, idx_t column) : table_index(table), column_index(column) { - } + //! The child type + unique_ptr child; + //! Whether to use try_cast or not. try_cast converts cast failures into NULLs instead of throwing an error. + bool try_cast; - bool operator==(const ColumnBinding &rhs) const { - return table_index == rhs.table_index && column_index == rhs.column_index; +public: + LogicalType source_type() { + return child->return_type; } -}; + //! Cast an expression to the specified SQL type if required + static unique_ptr AddCastToType(unique_ptr expr, const LogicalType &target_type); + //! Returns true if a cast is invertible (i.e. CAST(s -> t -> s) = s for all values of s). This is not true for e.g. + //! boolean casts, because that can be e.g. -1 -> TRUE -> 1. This is necessary to prevent some optimizer bugs. + static bool CastIsInvertible(const LogicalType &source_type, const LogicalType &target_type); + + string ToString() const override; + + bool Equals(const BaseExpression *other) const override; + + unique_ptr Copy() override; +}; } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/expression/bound_columnref_expression.hpp +// +// +//===----------------------------------------------------------------------===// -#include -#include -namespace duckdb { -//! LogicalOperator is the base class of the logical operators present in the -//! logical query tree -class LogicalOperator { -public: - explicit LogicalOperator(LogicalOperatorType type) : type(type) { - } - LogicalOperator(LogicalOperatorType type, vector> expressions) - : type(type), expressions(move(expressions)) { - } - virtual ~LogicalOperator() { - } - //! The type of the logical operator - LogicalOperatorType type; - //! The set of children of the operator - vector> children; - //! The set of expressions contained within the operator, if any - vector> expressions; - //! The types returned by this logical operator. Set by calling LogicalOperator::ResolveTypes. - vector types; - //! Estimated Cardinality - idx_t estimated_cardinality = 0; -public: - virtual vector GetColumnBindings() { - return {ColumnBinding(0, 0)}; - } - static vector GenerateColumnBindings(idx_t table_idx, idx_t column_count); - static vector MapTypes(const vector &types, const vector &projection_map); - static vector MapBindings(const vector &types, const vector &projection_map); - //! Resolve the types of the logical operator and its children - void ResolveOperatorTypes(); +namespace duckdb { - virtual string GetName() const; - virtual string ParamsToString() const; - virtual string ToString(idx_t depth = 0) const; - void Print(); +//! A BoundColumnRef expression represents a ColumnRef expression that was bound to an actual table and column index. It +//! is not yet executable, however. The ColumnBindingResolver transforms the BoundColumnRefExpressions into +//! BoundExpressions, which refer to indexes into the physical chunks that pass through the executor. +class BoundColumnRefExpression : public Expression { +public: + BoundColumnRefExpression(LogicalType type, ColumnBinding binding, idx_t depth = 0); + BoundColumnRefExpression(string alias, LogicalType type, ColumnBinding binding, idx_t depth = 0); + + //! Column index set by the binder, used to generate the final BoundExpression + ColumnBinding binding; + //! The subquery depth (i.e. depth 0 = current query, depth 1 = parent query, depth 2 = parent of parent, etc...). + //! This is only non-zero for correlated expressions inside subqueries. + idx_t depth; - void AddChild(unique_ptr child) { - children.push_back(move(child)); +public: + bool IsScalar() const override { + return false; } - - virtual idx_t EstimateCardinality(ClientContext &context) { - // simple estimator, just take the max of the children - idx_t max_cardinality = 0; - for (auto &child : children) { - max_cardinality = MaxValue(child->EstimateCardinality(context), max_cardinality); - } - return max_cardinality; + bool IsFoldable() const override { + return false; } -protected: - //! Resolve types for this specific operator - virtual void ResolveTypes() = 0; + string ToString() const override; + + bool Equals(const BaseExpression *other) const override; + hash_t Hash() const override; + + unique_ptr Copy() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/table/persistent_table_data.hpp +// duckdb/planner/expression/bound_comparison_expression.hpp // // //===----------------------------------------------------------------------===// @@ -12432,21 +14635,31 @@ class LogicalOperator { -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/table/segment_tree.hpp -// -// -//===----------------------------------------------------------------------===// +namespace duckdb { + +class BoundComparisonExpression : public Expression { +public: + BoundComparisonExpression(ExpressionType type, unique_ptr left, unique_ptr right); + + unique_ptr left; + unique_ptr right; + +public: + string ToString() const override; + bool Equals(const BaseExpression *other) const override; + unique_ptr Copy() override; +public: + static LogicalType BindComparison(LogicalType left_type, LogicalType right_type); +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/storage_lock.hpp +// duckdb/planner/expression/bound_conjunction_expression.hpp // // //===----------------------------------------------------------------------===// @@ -12454,52 +14667,30 @@ class LogicalOperator { -#include -#include namespace duckdb { -class StorageLock; - -enum class StorageLockType { SHARED = 0, EXCLUSIVE = 1 }; -class StorageLockKey { +class BoundConjunctionExpression : public Expression { public: - StorageLockKey(StorageLock &lock, StorageLockType type); - ~StorageLockKey(); - -private: - StorageLock &lock; - StorageLockType type; -}; + explicit BoundConjunctionExpression(ExpressionType type); + BoundConjunctionExpression(ExpressionType type, unique_ptr left, unique_ptr right); -class StorageLock { - friend class StorageLockKey; + vector> children; public: - StorageLock(); - - //! Get an exclusive lock - unique_ptr GetExclusiveLock(); - //! Get a shared lock - unique_ptr GetSharedLock(); + string ToString() const override; -private: - std::mutex exclusive_lock; - std::atomic read_count; + bool Equals(const BaseExpression *other) const override; -private: - //! Release an exclusive lock - void ReleaseExclusiveLock(); - //! Release a shared lock - void ReleaseSharedLock(); + unique_ptr Copy() override; }; - } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/table/segment_base.hpp +// duckdb/planner/expression/bound_default_expression.hpp // // //===----------------------------------------------------------------------===// @@ -12510,134 +14701,100 @@ class StorageLock { namespace duckdb { -class SegmentBase { +class BoundDefaultExpression : public Expression { public: - SegmentBase(idx_t start, idx_t count) : start(start), count(count) { + explicit BoundDefaultExpression(LogicalType type = LogicalType()) + : Expression(ExpressionType::VALUE_DEFAULT, ExpressionClass::BOUND_DEFAULT, type) { } - virtual ~SegmentBase() { - // destroy the chain of segments iteratively (rather than recursively) - while (next && next->next) { - next = move(next->next); - } + +public: + bool IsScalar() const override { + return false; + } + bool IsFoldable() const override { + return false; } - //! The start row id of this chunk - idx_t start; - //! The amount of entries in this storage chunk - idx_t count; - //! The next segment after this one - unique_ptr next; -}; + string ToString() const override { + return "DEFAULT"; + } + unique_ptr Copy() override { + return make_unique(return_type); + } +}; } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/expression/bound_operator_expression.hpp +// +// +//===----------------------------------------------------------------------===// + + -namespace duckdb { -struct SegmentNode { - idx_t row_start; - SegmentBase *node; -}; +namespace duckdb { -//! The SegmentTree maintains a list of all segments of a specific column in a table, and allows searching for a segment -//! by row number -class SegmentTree { +class BoundOperatorExpression : public Expression { public: - //! The initial segment of the tree - unique_ptr root_node; - //! The nodes in the tree, can be binary searched - vector nodes; - //! Lock to access or modify the nodes - mutex node_lock; + BoundOperatorExpression(ExpressionType type, LogicalType return_type); + + vector> children; public: - //! Gets a pointer to the first segment. Useful for scans. - SegmentBase *GetRootSegment(); - //! Gets a pointer to the last segment. Useful for appends. - SegmentBase *GetLastSegment(); - //! Gets a pointer to a specific column segment for the given row - SegmentBase *GetSegment(idx_t row_number); - //! Append a column segment to the tree - void AppendSegment(unique_ptr segment); + string ToString() const override; - //! Replace this tree with another tree, taking over its nodes in-place - void Replace(SegmentTree &other); + bool Equals(const BaseExpression *other) const override; - //! Get the segment index of the column segment for the given row (does not lock the segment tree!) - idx_t GetSegmentIndex(idx_t row_number); + unique_ptr Copy() override; }; - } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/expression/bound_parameter_expression.hpp +// +// +//===----------------------------------------------------------------------===// -namespace duckdb { -class BaseStatistics; -class PersistentSegment; -class PersistentColumnData { -public: - virtual ~PersistentColumnData(); - vector> segments; - unique_ptr stats; - idx_t total_rows = 0; -}; -class StandardPersistentColumnData : public PersistentColumnData { -public: - unique_ptr validity; -}; -class PersistentTableData { -public: - explicit PersistentTableData(idx_t column_count); - ~PersistentTableData(); - vector> column_data; - shared_ptr versions; -}; +namespace duckdb { -} // namespace duckdb +class BoundParameterExpression : public Expression { +public: + explicit BoundParameterExpression(idx_t parameter_nr); + idx_t parameter_nr; + Value *value; -namespace duckdb { -class CatalogEntry; +public: + bool IsScalar() const override; + bool HasParameter() const override; + bool IsFoldable() const override; -struct BoundCreateTableInfo { - explicit BoundCreateTableInfo(unique_ptr base) : base(move(base)) { - } + string ToString() const override; - //! The schema to create the table in - SchemaCatalogEntry *schema; - //! The base CreateInfo object - unique_ptr base; - //! The map of column names -> column index, used during binding - unordered_map name_map; - //! List of constraints on the table - vector> constraints; - //! List of bound constraints on the table - vector> bound_constraints; - //! Bound default values - vector> bound_defaults; - //! Dependents of the table (in e.g. default values) - unordered_set dependencies; - //! The existing table data on disk (if any) - unique_ptr data; - //! CREATE TABLE from QUERY - unique_ptr query; + bool Equals(const BaseExpression *other) const override; + hash_t Hash() const override; - CreateTableInfo &Base() { - return (CreateTableInfo &)*base; - } + unique_ptr Copy() override; }; - } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/constraints/not_null_constraint.hpp +// duckdb/planner/expression/bound_reference_expression.hpp // // //===----------------------------------------------------------------------===// @@ -12648,31 +14805,36 @@ struct BoundCreateTableInfo { namespace duckdb { -class NotNullConstraint : public Constraint { +//! A BoundReferenceExpression represents a physical index into a DataChunk +class BoundReferenceExpression : public Expression { public: - explicit NotNullConstraint(column_t index) : Constraint(ConstraintType::NOT_NULL), index(index) {}; - ~NotNullConstraint() override { - } + BoundReferenceExpression(string alias, LogicalType type, idx_t index); + BoundReferenceExpression(LogicalType type, idx_t index); - //! Column index this constraint pertains to - column_t index; + //! Index used to access data in the chunks + idx_t index; public: - string ToString() const override; - - unique_ptr Copy() override; + bool IsScalar() const override { + return false; + } + bool IsFoldable() const override { + return false; + } - //! Serialize to a stand-alone binary blob - void Serialize(Serializer &serializer) override; - //! Deserializes a NotNullConstraint - static unique_ptr Deserialize(Deserializer &source); -}; + string ToString() const override; + hash_t Hash() const override; + bool Equals(const BaseExpression *other) const override; + + unique_ptr Copy() override; +}; } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/data_table.hpp +// duckdb/planner/expression/bound_subquery_expression.hpp // // //===----------------------------------------------------------------------===// @@ -12682,7 +14844,7 @@ class NotNullConstraint : public Constraint { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/enums/index_type.hpp +// duckdb/common/enums/subquery_type.hpp // // //===----------------------------------------------------------------------===// @@ -12694,35 +14856,132 @@ class NotNullConstraint : public Constraint { namespace duckdb { //===--------------------------------------------------------------------===// -// Index Types +// Subquery Types //===--------------------------------------------------------------------===// -enum class IndexType { - INVALID = 0, // invalid index type - ART = 1 // Adaptive Radix Tree +enum class SubqueryType : uint8_t { + INVALID = 0, + SCALAR = 1, // Regular scalar subquery + EXISTS = 2, // EXISTS (SELECT...) + NOT_EXISTS = 3, // NOT EXISTS(SELECT...) + ANY = 4, // x = ANY(SELECT...) OR x IN (SELECT...) }; } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/binder.hpp +// +// +//===----------------------------------------------------------------------===// + + + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/index.hpp +// duckdb/parser/tokens.hpp // // //===----------------------------------------------------------------------===// +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Statements +//===--------------------------------------------------------------------===// +class SQLStatement; + +class AlterStatement; +class CallStatement; +class CopyStatement; +class CreateStatement; +class DeleteStatement; +class DropStatement; +class InsertStatement; +class SelectStatement; +class TransactionStatement; +class UpdateStatement; +class PrepareStatement; +class ExecuteStatement; +class PragmaStatement; +class ShowStatement; +class ExplainStatement; +class ExportStatement; +class VacuumStatement; +class RelationStatement; +class SetStatement; +class LoadStatement; + +//===--------------------------------------------------------------------===// +// Query Node +//===--------------------------------------------------------------------===// +class QueryNode; +class SelectNode; +class SetOperationNode; +class RecursiveCTENode; + +//===--------------------------------------------------------------------===// +// Expressions +//===--------------------------------------------------------------------===// +class ParsedExpression; + +class BetweenExpression; +class CaseExpression; +class CastExpression; +class CollateExpression; +class ColumnRefExpression; +class ComparisonExpression; +class ConjunctionExpression; +class ConstantExpression; +class DefaultExpression; +class FunctionExpression; +class LambdaExpression; +class OperatorExpression; +class ParameterExpression; +class PositionalReferenceExpression; +class StarExpression; +class SubqueryExpression; +class WindowExpression; + +//===--------------------------------------------------------------------===// +// Constraints +//===--------------------------------------------------------------------===// +class Constraint; + +class NotNullConstraint; +class CheckConstraint; +class UniqueConstraint; +//===--------------------------------------------------------------------===// +// TableRefs +//===--------------------------------------------------------------------===// +class TableRef; +class BaseTableRef; +class CrossProductRef; +class JoinRef; +class SubqueryRef; +class TableFunctionRef; +class EmptyTableRef; +class ExpressionListRef; +//===--------------------------------------------------------------------===// +// Other +//===--------------------------------------------------------------------===// +struct SampleOptions; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/table/scan_state.hpp +// duckdb/planner/bind_context.hpp // // //===----------------------------------------------------------------------===// @@ -12733,17 +14992,96 @@ enum class IndexType { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/buffer/buffer_handle.hpp +// duckdb/catalog/catalog_entry/table_catalog_entry.hpp // // //===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { + +class ColumnStatistics; +class DataTable; +struct CreateTableInfo; +struct BoundCreateTableInfo; + +struct RenameColumnInfo; +struct AddColumnInfo; +struct RemoveColumnInfo; +struct SetDefaultInfo; +struct ChangeColumnTypeInfo; + +//! A table catalog entry +class TableCatalogEntry : public StandardEntry { +public: + //! Create a real TableCatalogEntry and initialize storage for it + TableCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, BoundCreateTableInfo *info, + std::shared_ptr inherited_storage = nullptr); + + //! A reference to the underlying storage unit used for this table + std::shared_ptr storage; + //! A list of columns that are part of this table + vector columns; + //! A list of constraints that are part of this table + vector> constraints; + //! A list of constraints that are part of this table + vector> bound_constraints; + //! A map of column name to column index + unordered_map name_map; + +public: + unique_ptr AlterEntry(ClientContext &context, AlterInfo *info) override; + //! Returns whether or not a column with the given name exists + bool ColumnExists(const string &name); + //! Returns a reference to the column of the specified name. Throws an + //! exception if the column does not exist. + ColumnDefinition &GetColumn(const string &name); + //! Returns a list of types of the table + vector GetTypes(); + string ToSQL() override; + + //! Add lower case aliases to a name map (e.g. "Hello" -> "hello" is also acceptable) + static void AddLowerCaseAliases(unordered_map &name_map); + + //! Serialize the meta information of the TableCatalogEntry a serializer + virtual void Serialize(Serializer &serializer); + //! Deserializes to a CreateTableInfo + static unique_ptr Deserialize(Deserializer &source); + + unique_ptr Copy(ClientContext &context) override; + + void SetAsRoot() override; + + void CommitAlter(AlterInfo &info); + void CommitDrop(); + + //! Returns the column index of the specified column name. + //! If the column does not exist: + //! If if_exists is true, returns INVALID_INDEX + //! If if_exists is false, throws an exception + idx_t GetColumnIndex(string &name, bool if_exists = false); + +private: + unique_ptr RenameColumn(ClientContext &context, RenameColumnInfo &info); + unique_ptr AddColumn(ClientContext &context, AddColumnInfo &info); + unique_ptr RemoveColumn(ClientContext &context, RemoveColumnInfo &info); + unique_ptr SetDefault(ClientContext &context, SetDefaultInfo &info); + unique_ptr ChangeColumnType(ClientContext &context, ChangeColumnTypeInfo &info); +}; +} // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/storage_info.hpp +// duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp // // //===----------------------------------------------------------------------===// @@ -12752,82 +15090,78 @@ enum class IndexType { -namespace duckdb { -class Serializer; -class Deserializer; -//! The version number of the database storage format -extern const uint64_t VERSION_NUMBER; -using block_id_t = int64_t; +namespace duckdb { -#define INVALID_BLOCK (-1) +class Catalog; +class Constraint; -// maximum block id, 2^62 -#define MAXIMUM_BLOCK 4611686018427388000LL +struct CreateTableFunctionInfo; -//! The MainHeader is the first header in the storage file. The MainHeader is typically written only once for a database -//! file. -struct MainHeader { - static constexpr idx_t MAGIC_BYTE_SIZE = 4; - static constexpr idx_t FLAG_COUNT = 4; - // the magic bytes in front of the file - // should be "DUCK" - static const char MAGIC_BYTES[]; - //! The version of the database - uint64_t version_number; - //! The set of flags used by the database - uint64_t flags[FLAG_COUNT]; +//! A table function in the catalog +class TableFunctionCatalogEntry : public StandardEntry { +public: + TableFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateTableFunctionInfo *info); - void Serialize(Serializer &ser); - static MainHeader Deserialize(Deserializer &source); + //! The table function + vector functions; }; +} // namespace duckdb + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/expression/columnref_expression.hpp +// +// +//===----------------------------------------------------------------------===// -//! The DatabaseHeader contains information about the current state of the database. Every storage file has two -//! DatabaseHeaders. On startup, the DatabaseHeader with the highest iteration count is used as the active header. When -//! a checkpoint is performed, the active DatabaseHeader is switched by increasing the iteration count of the -//! DatabaseHeader. -struct DatabaseHeader { - //! The iteration count, increases by 1 every time the storage is checkpointed. - uint64_t iteration; - //! A pointer to the initial meta block - block_id_t meta_block; - //! A pointer to the block containing the free list - block_id_t free_list; - //! The number of blocks that is in the file as of this database header. If the file is larger than BLOCK_SIZE * - //! block_count any blocks appearing AFTER block_count are implicitly part of the free_list. - uint64_t block_count; - void Serialize(Serializer &ser); - static DatabaseHeader Deserialize(Deserializer &source); -}; -} // namespace duckdb namespace duckdb { -class BlockHandle; -class FileBuffer; -class BufferHandle { +//! Represents a reference to a column from either the FROM clause or from an +//! alias +class ColumnRefExpression : public ParsedExpression { public: - BufferHandle(shared_ptr handle, FileBuffer *node); - ~BufferHandle(); + //! Specify both the column and table name + ColumnRefExpression(string column_name, string table_name); + //! Only specify the column name, the table name will be derived later + explicit ColumnRefExpression(string column_name); - //! The block handle - shared_ptr handle; - //! The managed buffer node - FileBuffer *node; - data_ptr_t Ptr(); -}; + //! Column name that is referenced + string column_name; + //! Table name of the column name that is referenced (optional) + string table_name; + +public: + bool IsScalar() const override { + return false; + } + + string GetName() const override; + string ToString() const override; + static bool Equals(const ColumnRefExpression *a, const ColumnRefExpression *b); + hash_t Hash() const override; + + unique_ptr Copy() const override; + + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(ExpressionType type, Deserializer &source); +}; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/table/column_segment.hpp +// duckdb/parser/qualified_name_set.hpp // // //===----------------------------------------------------------------------===// @@ -12837,7 +15171,7 @@ class BufferHandle { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/block.hpp +// duckdb/parser/qualified_name.hpp // // //===----------------------------------------------------------------------===// @@ -12847,25 +15181,163 @@ class BufferHandle { - namespace duckdb { -class Block : public FileBuffer { -public: - explicit Block(block_id_t id); +struct QualifiedName { + string schema; + string name; - block_id_t id; + //! Parse the (optional) schema and a name from a string in the format of e.g. "schema"."table"; if there is no dot + //! the schema will be set to INVALID_SCHEMA + static QualifiedName Parse(string input) { + string schema; + string name; + idx_t idx = 0; + vector entries; + string entry; + normal: + //! quote + for (; idx < input.size(); idx++) { + if (input[idx] == '"') { + idx++; + goto quoted; + } else if (input[idx] == '.') { + goto separator; + } + entry += input[idx]; + } + goto end; + separator: + entries.push_back(entry); + entry = ""; + idx++; + goto normal; + quoted: + //! look for another quote + for (; idx < input.size(); idx++) { + if (input[idx] == '"') { + //! unquote + idx++; + goto normal; + } + entry += input[idx]; + } + throw ParserException("Unterminated quote in qualified name!"); + end: + if (entries.empty()) { + schema = INVALID_SCHEMA; + name = entry; + } else if (entries.size() == 1) { + schema = entries[0]; + name = entry; + } else { + throw ParserException("Expected schema.entry or entry: too many entries found"); + } + return QualifiedName {schema, name}; + } +}; + +struct QualifiedColumnName { + QualifiedColumnName() { + } + QualifiedColumnName(string table_p, string column_p) : table(move(table_p)), column(move(column_p)) { + } + + string schema; + string table; + string column; }; } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/types/hash.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +struct string_t; + +// efficient hash function that maximizes the avalanche effect and minimizes +// bias +// see: https://nullprogram.com/blog/2018/07/31/ + +inline hash_t murmurhash64(uint64_t x) { + return x * UINT64_C(0xbf58476d1ce4e5b9); +} + +inline hash_t murmurhash32(uint32_t x) { + return murmurhash64(x); +} + +template +hash_t Hash(T value) { + return murmurhash32(value); +} + +//! Combine two hashes by XORing them +inline hash_t CombineHash(hash_t left, hash_t right) { + return left ^ right; +} + +template <> +hash_t Hash(uint64_t val); +template <> +hash_t Hash(int64_t val); +template <> +hash_t Hash(hugeint_t val); +template <> +hash_t Hash(float val); +template <> +hash_t Hash(double val); +template <> +hash_t Hash(const char *val); +template <> +hash_t Hash(char *val); +template <> +hash_t Hash(string_t val); +template <> +hash_t Hash(interval_t val); +hash_t Hash(const char *val, size_t size); +hash_t Hash(uint8_t *val, size_t size); + +} // namespace duckdb + + + +namespace duckdb { + +struct QualifiedColumnHashFunction { + uint64_t operator()(const QualifiedColumnName &a) const { + std::hash str_hasher; + return str_hasher(a.schema) ^ str_hasher(a.table) ^ str_hasher(a.column); + } +}; + +struct QualifiedColumnEquality { + bool operator()(const QualifiedColumnName &a, const QualifiedColumnName &b) const { + return a.schema == b.schema && a.table == b.table && a.column == b.column; + } +}; + +using qualified_column_set_t = unordered_set; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/buffer_manager.hpp +// duckdb/planner/expression_binder.hpp // // //===----------------------------------------------------------------------===// @@ -12876,7 +15348,7 @@ class Block : public FileBuffer { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/buffer/buffer_list.hpp +// duckdb/parser/expression/bound_expression.hpp // // //===----------------------------------------------------------------------===// @@ -12885,156 +15357,155 @@ class Block : public FileBuffer { + + namespace duckdb { -struct BufferEntry { - explicit BufferEntry(unique_ptr buffer) : buffer(move(buffer)), ref_count(1), prev(nullptr) { - } - ~BufferEntry() { - while (next) { - next = move(next->next); - } +//! BoundExpression is an intermediate dummy class used by the binder. It is a ParsedExpression but holds an Expression. +//! It represents a successfully bound expression. It is used in the Binder to prevent re-binding of already bound parts +//! when dealing with subqueries. +class BoundExpression : public ParsedExpression { +public: + BoundExpression(unique_ptr expr) + : ParsedExpression(ExpressionType::INVALID, ExpressionClass::BOUND_EXPRESSION), expr(move(expr)) { } - //! The actual buffer - unique_ptr buffer; - //! The amount of references to this entry - idx_t ref_count; - //! Next node - unique_ptr next; - //! Prev entry - BufferEntry *prev; -}; + unique_ptr expr; -class BufferList { public: - BufferList() : last(nullptr), count(0) { + string ToString() const override { + return expr->ToString(); } -public: - //! Removes the first element (root) from the buffer list and returns it, O(1) - unique_ptr Pop(); - //! Erase the specified element from the list and returns it, O(1) - unique_ptr Erase(BufferEntry *entry); - //! Insert an entry to the back of the list - void Append(unique_ptr entry); + bool Equals(const BaseExpression *other) const override { + return false; + } + hash_t Hash() const override { + return 0; + } -private: - //! Root pointer - unique_ptr root; - //! Pointer to last element in list - BufferEntry *last; - //! The amount of entries in the list - idx_t count; + unique_ptr Copy() const override { + throw SerializationException("Cannot copy or serialize bound expression"); + } }; } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/buffer/managed_buffer.hpp -// -// -//===----------------------------------------------------------------------===// +namespace duckdb { +class Binder; +class ClientContext; +class QueryNode; -namespace duckdb { -class DatabaseInstance; +class ScalarFunctionCatalogEntry; +class AggregateFunctionCatalogEntry; +class MacroCatalogEntry; +class CatalogEntry; +class SimpleFunction; -//! Managed buffer is an arbitrarily-sized buffer that is at least of size >= BLOCK_SIZE -class ManagedBuffer : public FileBuffer { -public: - ManagedBuffer(DatabaseInstance &db, idx_t size, bool can_destroy, block_id_t id); +struct MacroBinding; - DatabaseInstance &db; - //! Whether or not the managed buffer can be freely destroyed when unpinned. - //! - If can_destroy is true, the buffer can be destroyed when unpinned and hence be unrecoverable. After being - //! destroyed, Pin() will return false. - //! - If can_destroy is false, the buffer will instead be written to a temporary file on disk when unloaded from - //! memory, and read back into memory when Pin() is called. - bool can_destroy; - //! The internal id of the buffer - block_id_t id; +struct BindResult { + explicit BindResult(string error) : error(error) { + } + explicit BindResult(unique_ptr expr) : expression(move(expr)) { + } + + bool HasError() { + return !error.empty(); + } + + unique_ptr expression; + string error; }; -} // namespace duckdb +class ExpressionBinder { +public: + ExpressionBinder(Binder &binder, ClientContext &context, bool replace_binder = false); + virtual ~ExpressionBinder(); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/block_manager.hpp -// -// -//===----------------------------------------------------------------------===// + //! The target type that should result from the binder. If the result is not of this type, a cast to this type will + //! be added. Defaults to INVALID. + LogicalType target_type; +public: + unique_ptr Bind(unique_ptr &expr, LogicalType *result_type = nullptr, + bool root_expression = true); + //! Returns whether or not any columns have been bound by the expression binder + bool BoundColumns() { + return bound_columns; + } + string Bind(unique_ptr *expr, idx_t depth, bool root_expression = false); + // Bind table names to ColumnRefExpressions + static void BindTableNames(Binder &binder, ParsedExpression &expr, + unordered_map *alias_map = nullptr); + static unique_ptr PushCollation(ClientContext &context, unique_ptr source, + const string &collation, bool equality_only = false); + static void TestCollation(ClientContext &context, const string &collation); + bool BindCorrelatedColumns(unique_ptr &expr); + void BindChild(unique_ptr &expr, idx_t depth, string &error); + static void ExtractCorrelatedExpressions(Binder &binder, Expression &expr); -namespace duckdb { -class ClientContext; -class DatabaseInstance; + static bool ContainsNullType(const LogicalType &type); + static LogicalType ExchangeNullType(const LogicalType &type); -//! BlockManager is an abstract representation to manage blocks on DuckDB. When writing or reading blocks, the -//! BlockManager creates and accesses blocks. The concrete types implements how blocks are stored. -class BlockManager { -public: - virtual ~BlockManager() = default; +protected: + virtual BindResult BindExpression(unique_ptr *expr_ptr, idx_t depth, + bool root_expression = false); - virtual void StartCheckpoint() = 0; - //! Creates a new block inside the block manager - virtual unique_ptr CreateBlock() = 0; - //! Return the next free block id - virtual block_id_t GetFreeBlockId() = 0; - //! Returns whether or not a specified block is the root block - virtual bool IsRootBlock(block_id_t root) { - return false; - }; - //! Mark a block as "modified"; modified blocks are added to the free list after a checkpoint (i.e. their data is - //! assumed to be rewritten) - virtual void MarkBlockAsModified(block_id_t block_id) { - } - //! Get the first meta block id - virtual block_id_t GetMetaBlock() = 0; - //! Read the content of the block from disk - virtual void Read(Block &block) = 0; - //! Writes the block to disk - virtual void Write(FileBuffer &block, block_id_t block_id) = 0; - //! Writes the block to disk - void Write(Block &block) { - Write(block, block.id); - } - //! Write the header; should be the final step of a checkpoint - virtual void WriteHeader(DatabaseHeader header) = 0; + BindResult BindExpression(BetweenExpression &expr, idx_t depth); + BindResult BindExpression(CaseExpression &expr, idx_t depth); + BindResult BindExpression(CollateExpression &expr, idx_t depth); + BindResult BindExpression(CastExpression &expr, idx_t depth); + BindResult BindExpression(ColumnRefExpression &expr, idx_t depth); + BindResult BindExpression(ComparisonExpression &expr, idx_t depth); + BindResult BindExpression(ConjunctionExpression &expr, idx_t depth); + BindResult BindExpression(ConstantExpression &expr, idx_t depth); + BindResult BindExpression(FunctionExpression &expr, idx_t depth, unique_ptr *expr_ptr); + BindResult BindExpression(LambdaExpression &expr, idx_t depth); + BindResult BindExpression(OperatorExpression &expr, idx_t depth); + BindResult BindExpression(ParameterExpression &expr, idx_t depth); + BindResult BindExpression(PositionalReferenceExpression &ref, idx_t depth); + BindResult BindExpression(StarExpression &expr, idx_t depth); + BindResult BindExpression(SubqueryExpression &expr, idx_t depth); - //! Returns the number of total blocks - virtual idx_t TotalBlocks() { - return 0; - } - //! Returns the number of free blocks - virtual idx_t FreeBlocks() { - return 0; - } +protected: + virtual BindResult BindFunction(FunctionExpression &expr, ScalarFunctionCatalogEntry *function, idx_t depth); + virtual BindResult BindAggregate(FunctionExpression &expr, AggregateFunctionCatalogEntry *function, idx_t depth); + virtual BindResult BindUnnest(FunctionExpression &expr, idx_t depth); + virtual BindResult BindMacro(FunctionExpression &expr, MacroCatalogEntry *macro, idx_t depth, + unique_ptr *expr_ptr); - static BlockManager &GetBlockManager(ClientContext &context); - static BlockManager &GetBlockManager(DatabaseInstance &db); -}; -} // namespace duckdb + virtual void ReplaceMacroParametersRecursive(unique_ptr &expr); + virtual void ReplaceMacroParametersRecursive(ParsedExpression &expr, QueryNode &node); + virtual void ReplaceMacroParametersRecursive(ParsedExpression &expr, TableRef &ref); + virtual string UnsupportedAggregateMessage(); + virtual string UnsupportedUnnestMessage(); + + Binder &binder; + ClientContext &context; + ExpressionBinder *stored_binder; + MacroBinding *macro_binding; + bool bound_columns = false; +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/buffer/block_handle.hpp +// duckdb/planner/table_binding.hpp // // //===----------------------------------------------------------------------===// @@ -13045,145 +15516,184 @@ class BlockManager { + + namespace duckdb { -class BufferHandle; -class BufferManager; -class DatabaseInstance; -class FileBuffer; +class BindContext; +class BoundQueryNode; +class ColumnRefExpression; +class SubqueryRef; +class LogicalGet; +class TableCatalogEntry; +class TableFunctionCatalogEntry; +class BoundTableFunction; -enum class BlockState : uint8_t { BLOCK_UNLOADED = 0, BLOCK_LOADED = 1 }; +//! A Binding represents a binding to a table, table-producing function or subquery with a specified table index. +struct Binding { + Binding(const string &alias, vector types, vector names, idx_t index); + virtual ~Binding() = default; -class BlockHandle { - friend struct BufferEvictionNode; - friend class BufferHandle; - friend class BufferManager; + //! The alias of the binding + string alias; + //! The table index of the binding + idx_t index; + vector types; + //! Column names of the subquery + vector names; + //! Name -> index for the names + unordered_map name_map; public: - BlockHandle(DatabaseInstance &db, block_id_t block_id); - BlockHandle(DatabaseInstance &db, block_id_t block_id, unique_ptr buffer, bool can_destroy, - idx_t alloc_size); - ~BlockHandle(); + bool TryGetBindingIndex(const string &column_name, column_t &column_index); + bool HasMatchingBinding(const string &column_name); + virtual BindResult Bind(ColumnRefExpression &colref, idx_t depth); +}; - DatabaseInstance &db; +//! TableBinding is exactly like the Binding, except it keeps track of which columns were bound in the linked LogicalGet +//! node for projection pushdown purposes. +struct TableBinding : public Binding { + TableBinding(const string &alias, vector types, vector names, LogicalGet &get, idx_t index, + bool add_row_id = false); + + //! the underlying LogicalGet + LogicalGet &get; public: - block_id_t BlockId() { - return block_id; - } + BindResult Bind(ColumnRefExpression &colref, idx_t depth) override; +}; -private: - static unique_ptr Load(shared_ptr &handle); - void Unload(); - bool CanUnload(); +//! MacroBinding is like the Binding, except the alias and index are set by default. Used for binding Macro +//! Params/Arguments. +struct MacroBinding : public Binding { + MacroBinding(vector types_p, vector names_p, string macro_name); - //! The block-level lock - mutex lock; - //! Whether or not the block is loaded/unloaded - BlockState state; - // amount of concurrent readers - int32_t readers; - //! The block id of the block - block_id_t block_id; - //! Pointer to loaded data (if any) - unique_ptr buffer; - //! Internal eviction timestamp - idx_t eviction_timestamp; - //! Whether or not the buffer can be destroyed (only used for temporary buffers) - bool can_destroy; - //! The memory usage of the block - idx_t memory_usage; + //! Arguments + vector> arguments; + //! The name of the macro + string macro_name; + +public: + BindResult Bind(ColumnRefExpression &colref, idx_t depth) override; + + //! Given the parameter colref, returns a copy of the argument that was supplied for this parameter + unique_ptr ParamToArg(ColumnRefExpression &colref); }; } // namespace duckdb -#include -#include - namespace duckdb { -class DatabaseInstance; -struct EvictionQueue; +class Binder; +class LogicalGet; +class BoundQueryNode; -//! The buffer manager is in charge of handling memory management for the database. It hands out memory buffers that can -//! be used by the database internally. -class BufferManager { - friend class BufferHandle; - friend class BlockHandle; - friend class BlockPointer; +struct UsingColumnSet { + string primary_binding; + unordered_set bindings; +}; +//! The BindContext object keeps track of all the tables and columns that are +//! encountered during the binding process. +class BindContext { public: - BufferManager(DatabaseInstance &db, string temp_directory, idx_t maximum_memory); - ~BufferManager(); + //! Keep track of recursive CTE references + unordered_map> cte_references; - //! Register a block with the given block id in the base file - shared_ptr RegisterBlock(block_id_t block_id); +public: + //! Given a column name, find the matching table it belongs to. Throws an + //! exception if no table has a column of the given name. + string GetMatchingBinding(const string &column_name); + //! Like GetMatchingBinding, but instead of throwing an error if multiple tables have the same binding it will + //! return a list of all the matching ones + unordered_set GetMatchingBindings(const string &column_name); + //! Like GetMatchingBindings, but returns the top 3 most similar bindings (in levenshtein distance) instead of the + //! matching ones + vector GetSimilarBindings(const string &column_name); - //! Register an in-memory buffer of arbitrary size, as long as it is >= BLOCK_SIZE. can_destroy signifies whether or - //! not the buffer can be destroyed when unpinned, or whether or not it needs to be written to a temporary file so - //! it can be reloaded. The resulting buffer will already be allocated, but needs to be pinned in order to be used. - shared_ptr RegisterMemory(idx_t alloc_size, bool can_destroy); + Binding *GetCTEBinding(const string &ctename); + //! Binds a column expression to the base table. Returns the bound expression + //! or throws an exception if the column could not be bound. + BindResult BindColumn(ColumnRefExpression &colref, idx_t depth); + string BindColumn(PositionalReferenceExpression &ref, string &table_name, string &column_name); + BindResult BindColumn(PositionalReferenceExpression &ref, idx_t depth); - //! Allocate an in-memory buffer with a single pin. - //! The allocated memory is released when the buffer handle is destroyed. - unique_ptr Allocate(idx_t alloc_size); + //! Generate column expressions for all columns that are present in the + //! referenced tables. This is used to resolve the * expression in a + //! selection list. + void GenerateAllColumnExpressions(vector> &new_select_list, + const string &relation_name = ""); + const vector> &GetBindingsList() { + return bindings_list; + } - unique_ptr Pin(shared_ptr &handle); - void Unpin(shared_ptr &handle); + //! Adds a base table with the given alias to the BindContext. + void AddBaseTable(idx_t index, const string &alias, const vector &names, const vector &types, + LogicalGet &get); + //! Adds a call to a table function with the given alias to the BindContext. + void AddTableFunction(idx_t index, const string &alias, const vector &names, + const vector &types, LogicalGet &get); + //! Adds a subquery with a given alias to the BindContext. + void AddSubquery(idx_t index, const string &alias, SubqueryRef &ref, BoundQueryNode &subquery); + //! Adds a base table with the given alias to the BindContext. + void AddGenericBinding(idx_t index, const string &alias, const vector &names, + const vector &types); - void UnregisterBlock(block_id_t block_id, bool can_destroy); + //! Adds a base table with the given alias to the CTE BindContext. + //! We need this to correctly bind recursive CTEs with multiple references. + void AddCTEBinding(idx_t index, const string &alias, const vector &names, const vector &types); - //! Set a new memory limit to the buffer manager, throws an exception if the new limit is too low and not enough - //! blocks can be evicted - void SetLimit(idx_t limit = (idx_t)-1); + //! Add an implicit join condition (e.g. USING (x)) + void AddUsingBinding(const string &column_name, UsingColumnSet set); - static BufferManager &GetBufferManager(ClientContext &context); - static BufferManager &GetBufferManager(DatabaseInstance &db); + //! Returns any using column set for the given column name, or nullptr if there is none. On conflict (multiple using + //! column sets with the same name) throw an exception. + UsingColumnSet *GetUsingBinding(const string &column_name); + //! Returns any using column set for the given column name, or nullptr if there is none + UsingColumnSet *GetUsingBinding(const string &column_name, const string &binding_name); + //! Erase a using binding from the set of using bindings + void RemoveUsingBinding(const string &column_name, UsingColumnSet *set); - idx_t GetUsedMemory() { - return current_memory; + unordered_map> GetCTEBindings() { + return cte_bindings; } - idx_t GetMaxMemory() { - return maximum_memory; + void SetCTEBindings(unordered_map> bindings) { + cte_bindings = bindings; } -private: - //! Evict blocks until the currently used memory + extra_memory fit, returns false if this was not possible - //! (i.e. not enough blocks could be evicted) - bool EvictBlocks(idx_t extra_memory, idx_t memory_limit); + //! Alias a set of column names for the specified table, using the original names if there are not enough aliases + //! specified. + static vector AliasColumnNames(const string &table_name, const vector &names, + const vector &column_aliases); - //! Write a temporary buffer to disk - void WriteTemporaryBuffer(ManagedBuffer &buffer); - //! Read a temporary buffer from disk - unique_ptr ReadTemporaryBuffer(block_id_t id); - //! Get the path of the temporary buffer - string GetTemporaryPath(block_id_t id); + //! Add all the bindings from a BindContext to this BindContext. The other BindContext is destroyed in the process. + void AddContext(BindContext other); - void DeleteTemporaryFile(block_id_t id); +private: + void AddBinding(const string &alias, unique_ptr binding); + //! Gets a binding of the specified name. Returns a nullptr and sets the out_error if the binding could not be + //! found. + Binding *GetBinding(const string &name, string &out_error); private: - //! The database instance - DatabaseInstance &db; - //! The current amount of memory that is occupied by the buffer manager (in bytes) - std::atomic current_memory; - //! The maximum amount of memory that the buffer manager can keep (in bytes) - std::atomic maximum_memory; - //! The directory name where temporary files are stored - string temp_directory; - //! The lock for the set of blocks - std::mutex manager_lock; - //! A mapping of block id -> BlockPointer - unordered_map> blocks; - //! Eviction queue - unique_ptr queue; - //! The temporary id used for managed buffers - block_id_t temporary_id; + //! The set of bindings + unordered_map> bindings; + //! The list of bindings in insertion order + vector> bindings_list; + //! The set of columns used in USING join conditions + unordered_map> using_columns; + + //! The set of CTE bindings + unordered_map> cte_bindings; }; } // namespace duckdb + + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/statistics/segment_statistics.hpp +// duckdb/planner/bound_statement.hpp // // //===----------------------------------------------------------------------===// @@ -13192,211 +15702,254 @@ class BufferManager { - - namespace duckdb { -struct TableFilter; - -class SegmentStatistics { -public: - SegmentStatistics(LogicalType type, idx_t type_size); - SegmentStatistics(LogicalType type, idx_t type_size, unique_ptr statistics); - - LogicalType type; - idx_t type_size; - //! Type-specific statistics of the segment - unique_ptr statistics; - -public: - bool CheckZonemap(TableFilter &filter); - void Reset(); +struct BoundStatement { + unique_ptr plan; + vector types; + vector names; }; } // namespace duckdb namespace duckdb { -class BlockManager; -class ColumnSegment; -class ColumnData; -class Transaction; -class BaseStatistics; -struct TableFilter; -struct ColumnFetchState; -struct ColumnScanState; -enum class ColumnSegmentType : uint8_t { TRANSIENT, PERSISTENT }; -//! TableFilter represents a filter pushed down into the table scan. - -class ColumnSegment : public SegmentBase { -public: - //! Initialize an empty column segment of the specified type - ColumnSegment(LogicalType type, ColumnSegmentType segment_type, idx_t start, idx_t count = 0); - - ColumnSegment(LogicalType type, ColumnSegmentType segment_type, idx_t start, idx_t count, - unique_ptr statistics); +class BoundResultModifier; +class ClientContext; +class ExpressionBinder; +class LimitModifier; +class OrderBinder; +class TableCatalogEntry; +class ViewCatalogEntry; - ~ColumnSegment() override = default; +struct CreateInfo; +struct BoundCreateTableInfo; +struct BoundCreateFunctionInfo; +struct CommonTableExpressionInfo; - //! The type stored in the column +struct CorrelatedColumnInfo { + ColumnBinding binding; LogicalType type; - //! The size of the type - idx_t type_size; - //! The column segment type (transient or persistent) - ColumnSegmentType segment_type; - //! The statistics for the segment - SegmentStatistics stats; - -public: - virtual void InitializeScan(ColumnScanState &state) = 0; - //! Scan one vector from this segment - virtual void Scan(ColumnScanState &state, idx_t vector_index, Vector &result) = 0; - //! Fetch the base table vector index that belongs to this row - virtual void Fetch(ColumnScanState &state, idx_t vector_index, Vector &result) = 0; - //! Fetch a value of the specific row id and append it to the result - virtual void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) = 0; -}; - -} // namespace duckdb - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/execution/adaptive_filter.hpp -// -// -//===----------------------------------------------------------------------===// - + string name; + idx_t depth; + explicit CorrelatedColumnInfo(BoundColumnRefExpression &expr) + : binding(expr.binding), type(expr.return_type), name(expr.GetName()), depth(expr.depth) { + } -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/expression/bound_aggregate_expression.hpp -// -// -//===----------------------------------------------------------------------===// + bool operator==(const CorrelatedColumnInfo &rhs) const { + return binding == rhs.binding; + } +}; +//! Bind the parsed query tree to the actual columns present in the catalog. +/*! + The binder is responsible for binding tables and columns to actual physical + tables and columns in the catalog. In the process, it also resolves types of + all expressions. +*/ +class Binder : public std::enable_shared_from_this { + friend class ExpressionBinder; + friend class RecursiveSubqueryPlanner; +public: + static shared_ptr CreateBinder(ClientContext &context, Binder *parent = nullptr, bool inherit_ctes = true); + //! The client context + ClientContext &context; + //! A mapping of names to common table expressions + unordered_map CTE_bindings; + //! The CTEs that have already been bound + unordered_set bound_ctes; + //! The bind context + BindContext bind_context; + //! The set of correlated columns bound by this binder (FIXME: this should probably be an unordered_set and not a + //! vector) + vector correlated_columns; + //! The set of parameter expressions bound by this binder + vector *parameters; + //! Whether or not the bound statement is read-only + bool read_only; + //! Whether or not the statement requires a valid transaction to run + bool requires_valid_transaction; + //! Whether or not the statement can be streamed to the client + bool allow_stream_result; + //! The alias for the currently processing subquery, if it exists + string alias; + //! Macro parameter bindings (if any) + MacroBinding *macro_binding = nullptr; +public: + BoundStatement Bind(SQLStatement &statement); + BoundStatement Bind(QueryNode &node); -#include + unique_ptr BindCreateTableInfo(unique_ptr info); + void BindCreateViewInfo(CreateViewInfo &base); + SchemaCatalogEntry *BindSchema(CreateInfo &info); + SchemaCatalogEntry *BindCreateFunctionInfo(CreateInfo &info); -namespace duckdb { -class BoundAggregateExpression : public Expression { -public: - BoundAggregateExpression(AggregateFunction function, vector> children, - unique_ptr filter, unique_ptr bind_info, bool distinct); + //! Check usage, and cast named parameters to their types + static void BindNamedParameters(unordered_map &types, unordered_map &values, + QueryErrorContext &error_context, string &func_name); - //! The bound function expression - AggregateFunction function; - //! List of arguments to the function - vector> children; - //! The bound function data (if any) - unique_ptr bind_info; - //! True to aggregate on distinct values - bool distinct; + unique_ptr Bind(TableRef &ref); + unique_ptr CreatePlan(BoundTableRef &ref); - //! Filter for this aggregate - unique_ptr filter; + //! Generates an unused index for a table + idx_t GenerateTableIndex(); -public: - bool IsAggregate() const override { - return true; - } - bool IsFoldable() const override { - return false; - } + //! Add a common table expression to the binder + void AddCTE(const string &name, CommonTableExpressionInfo *cte); + //! Find a common table expression by name; returns nullptr if none exists + CommonTableExpressionInfo *FindCTE(const string &name, bool skip = false); - string ToString() const override; + bool CTEIsAlreadyBound(CommonTableExpressionInfo *cte); - hash_t Hash() const override; - bool Equals(const BaseExpression *other) const override; - unique_ptr Copy() override; -}; -} // namespace duckdb + void PushExpressionBinder(ExpressionBinder *binder); + void PopExpressionBinder(); + void SetActiveBinder(ExpressionBinder *binder); + ExpressionBinder *GetActiveBinder(); + bool HasActiveBinder(); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/expression/bound_between_expression.hpp -// -// -//===----------------------------------------------------------------------===// + vector &GetActiveBinders(); + void MergeCorrelatedColumns(vector &other); + //! Add a correlated column to this binder (if it does not exist) + void AddCorrelatedColumn(const CorrelatedColumnInfo &info); + string FormatError(ParsedExpression &expr_context, const string &message); + string FormatError(TableRef &ref_context, const string &message); + string FormatErrorRecursive(idx_t query_location, const string &message, vector &values); + template + string FormatErrorRecursive(idx_t query_location, const string &msg, vector &values, T param, + Args... params) { + values.push_back(ExceptionFormatValue::CreateFormatValue(param)); + return FormatErrorRecursive(query_location, msg, values, params...); + } + template + string FormatError(idx_t query_location, const string &msg, Args... params) { + vector values; + return FormatErrorRecursive(query_location, msg, values, params...); + } -namespace duckdb { +private: + //! The parent binder (if any) + shared_ptr parent; + //! The vector of active binders + vector active_binders; + //! The count of bound_tables + idx_t bound_tables; + //! Whether or not the binder has any unplanned subqueries that still need to be planned + bool has_unplanned_subqueries = false; + //! Whether or not subqueries should be planned already + bool plan_subquery = true; + //! Whether CTEs should reference the parent binder (if it exists) + bool inherit_ctes = true; + //! Whether or not the binder can contain NULLs as the root of expressions + bool can_contain_nulls = false; + //! The root statement of the query that is currently being parsed + SQLStatement *root_statement = nullptr; -class BoundBetweenExpression : public Expression { -public: - BoundBetweenExpression(unique_ptr input, unique_ptr lower, unique_ptr upper, - bool lower_inclusive, bool upper_inclusive); +private: + //! Bind the default values of the columns of a table + void BindDefaultValues(vector &columns, vector> &bound_defaults); + //! Bind a delimiter value (LIMIT or OFFSET) + unique_ptr BindDelimiter(ClientContext &context, unique_ptr delimiter, + int64_t &delimiter_value); - unique_ptr input; - unique_ptr lower; - unique_ptr upper; - bool lower_inclusive; - bool upper_inclusive; + //! Move correlated expressions from the child binder to this binder + void MoveCorrelatedExpressions(Binder &other); -public: - string ToString() const override; + BoundStatement Bind(SelectStatement &stmt); + BoundStatement Bind(InsertStatement &stmt); + BoundStatement Bind(CopyStatement &stmt); + BoundStatement Bind(DeleteStatement &stmt); + BoundStatement Bind(UpdateStatement &stmt); + BoundStatement Bind(CreateStatement &stmt); + BoundStatement Bind(DropStatement &stmt); + BoundStatement Bind(AlterStatement &stmt); + BoundStatement Bind(TransactionStatement &stmt); + BoundStatement Bind(PragmaStatement &stmt); + BoundStatement Bind(ExplainStatement &stmt); + BoundStatement Bind(VacuumStatement &stmt); + BoundStatement Bind(RelationStatement &stmt); + BoundStatement Bind(ShowStatement &stmt); + BoundStatement Bind(CallStatement &stmt); + BoundStatement Bind(ExportStatement &stmt); + BoundStatement Bind(SetStatement &stmt); + BoundStatement Bind(LoadStatement &stmt); - bool Equals(const BaseExpression *other) const override; + unique_ptr BindNode(SelectNode &node); + unique_ptr BindNode(SetOperationNode &node); + unique_ptr BindNode(RecursiveCTENode &node); + unique_ptr BindNode(QueryNode &node); - unique_ptr Copy() override; + unique_ptr VisitQueryNode(BoundQueryNode &node, unique_ptr root); + unique_ptr CreatePlan(BoundRecursiveCTENode &node); + unique_ptr CreatePlan(BoundSelectNode &statement); + unique_ptr CreatePlan(BoundSetOperationNode &node); + unique_ptr CreatePlan(BoundQueryNode &node); -public: - ExpressionType LowerComparisonType() { - return lower_inclusive ? ExpressionType::COMPARE_GREATERTHANOREQUALTO : ExpressionType::COMPARE_GREATERTHAN; - } - ExpressionType UpperComparisonType() { - return upper_inclusive ? ExpressionType::COMPARE_LESSTHANOREQUALTO : ExpressionType::COMPARE_LESSTHAN; - } -}; -} // namespace duckdb + unique_ptr Bind(BaseTableRef &ref); + unique_ptr Bind(CrossProductRef &ref); + unique_ptr Bind(JoinRef &ref); + unique_ptr Bind(SubqueryRef &ref, CommonTableExpressionInfo *cte = nullptr); + unique_ptr Bind(TableFunctionRef &ref); + unique_ptr Bind(EmptyTableRef &ref); + unique_ptr Bind(ExpressionListRef &ref); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/expression/bound_case_expression.hpp -// -// -//===----------------------------------------------------------------------===// + bool BindFunctionParameters(vector> &expressions, vector &arguments, + vector ¶meters, unordered_map &named_parameters, + unique_ptr &subquery, string &error); + unique_ptr CreatePlan(BoundBaseTableRef &ref); + unique_ptr CreatePlan(BoundCrossProductRef &ref); + unique_ptr CreatePlan(BoundJoinRef &ref); + unique_ptr CreatePlan(BoundSubqueryRef &ref); + unique_ptr CreatePlan(BoundTableFunction &ref); + unique_ptr CreatePlan(BoundEmptyTableRef &ref); + unique_ptr CreatePlan(BoundExpressionListRef &ref); + unique_ptr CreatePlan(BoundCTERef &ref); + unique_ptr BindTable(TableCatalogEntry &table, BaseTableRef &ref); + unique_ptr BindView(ViewCatalogEntry &view, BaseTableRef &ref); + unique_ptr BindTableOrView(BaseTableRef &ref); + BoundStatement BindCopyTo(CopyStatement &stmt); + BoundStatement BindCopyFrom(CopyStatement &stmt); + void BindModifiers(OrderBinder &order_binder, QueryNode &statement, BoundQueryNode &result); + void BindModifierTypes(BoundQueryNode &result, const vector &sql_types, idx_t projection_index); -namespace duckdb { + unique_ptr BindLimit(LimitModifier &limit_mod); + unique_ptr BindFilter(unique_ptr condition); + unique_ptr BindOrderExpression(OrderBinder &order_binder, unique_ptr expr); -class BoundCaseExpression : public Expression { -public: - BoundCaseExpression(LogicalType type); - BoundCaseExpression(unique_ptr check, unique_ptr res_if_true, - unique_ptr res_if_false); + unique_ptr PlanFilter(unique_ptr condition, unique_ptr root); - unique_ptr check; - unique_ptr result_if_true; - unique_ptr result_if_false; + void PlanSubqueries(unique_ptr *expr, unique_ptr *root); + unique_ptr PlanSubquery(BoundSubqueryExpression &expr, unique_ptr &root); -public: - string ToString() const override; + unique_ptr CastLogicalOperatorToTypes(vector &source_types, + vector &target_types, + unique_ptr op); - bool Equals(const BaseExpression *other) const override; + string FindBinding(const string &using_column, const string &join_side); + bool TryFindBinding(const string &using_column, const string &join_side, string &result); - unique_ptr Copy() override; +public: + // This should really be a private constructor, but make_shared does not allow it... + Binder(bool I_know_what_I_am_doing, ClientContext &context, shared_ptr parent, bool inherit_ctes); }; + } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/expression/bound_cast_expression.hpp +// duckdb/planner/bound_query_node.hpp // // //===----------------------------------------------------------------------===// @@ -13404,39 +15957,10 @@ class BoundCaseExpression : public Expression { - -namespace duckdb { - -class BoundCastExpression : public Expression { -public: - BoundCastExpression(unique_ptr child, LogicalType target_type); - - //! The child type - unique_ptr child; - -public: - LogicalType source_type() { - return child->return_type; - } - - //! Cast an expression to the specified SQL type if required - static unique_ptr AddCastToType(unique_ptr expr, const LogicalType &target_type); - //! Returns true if a cast is invertible (i.e. CAST(s -> t -> s) = s for all values of s). This is not true for e.g. - //! boolean casts, because that can be e.g. -1 -> TRUE -> 1. This is necessary to prevent some optimizer bugs. - static bool CastIsInvertible(const LogicalType &source_type, const LogicalType &target_type); - - string ToString() const override; - - bool Equals(const BaseExpression *other) const override; - - unique_ptr Copy() override; -}; -} // namespace duckdb - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/expression/bound_columnref_expression.hpp +// duckdb/planner/bound_result_modifier.hpp // // //===----------------------------------------------------------------------===// @@ -13446,124 +15970,125 @@ class BoundCastExpression : public Expression { -namespace duckdb { -//! A BoundColumnRef expression represents a ColumnRef expression that was bound to an actual table and column index. It -//! is not yet executable, however. The ColumnBindingResolver transforms the BoundColumnRefExpressions into -//! BoundExpressions, which refer to indexes into the physical chunks that pass through the executor. -class BoundColumnRefExpression : public Expression { -public: - BoundColumnRefExpression(LogicalType type, ColumnBinding binding, idx_t depth = 0); - BoundColumnRefExpression(string alias, LogicalType type, ColumnBinding binding, idx_t depth = 0); - //! Column index set by the binder, used to generate the final BoundExpression - ColumnBinding binding; - //! The subquery depth (i.e. depth 0 = current query, depth 1 = parent query, depth 2 = parent of parent, etc...). - //! This is only non-zero for correlated expressions inside subqueries. - idx_t depth; +namespace duckdb { +//! A ResultModifier +class BoundResultModifier { public: - bool IsScalar() const override { - return false; + explicit BoundResultModifier(ResultModifierType type) : type(type) { } - bool IsFoldable() const override { - return false; + virtual ~BoundResultModifier() { } - string ToString() const override; - - bool Equals(const BaseExpression *other) const override; - hash_t Hash() const override; - - unique_ptr Copy() override; + ResultModifierType type; }; -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/expression/bound_comparison_expression.hpp -// -// -//===----------------------------------------------------------------------===// - - - +struct BoundOrderByNode { + BoundOrderByNode(OrderType type, OrderByNullType null_order, unique_ptr expression) + : type(type), null_order(null_order), expression(move(expression)) { + } -namespace duckdb { - -class BoundComparisonExpression : public Expression { -public: - BoundComparisonExpression(ExpressionType type, unique_ptr left, unique_ptr right); - - unique_ptr left; - unique_ptr right; + OrderType type; + OrderByNullType null_order; + unique_ptr expression; +}; +class BoundLimitModifier : public BoundResultModifier { public: - string ToString() const override; - - bool Equals(const BaseExpression *other) const override; - - unique_ptr Copy() override; + BoundLimitModifier() : BoundResultModifier(ResultModifierType::LIMIT_MODIFIER) { + } + //! LIMIT + int64_t limit_val = NumericLimits::Maximum(); + //! OFFSET + int64_t offset_val = 0; + //! Expression in case limit is not constant + unique_ptr limit; + //! Expression in case limit is not constant + unique_ptr offset; +}; +class BoundOrderModifier : public BoundResultModifier { public: - static LogicalType BindComparison(LogicalType left_type, LogicalType right_type); + BoundOrderModifier() : BoundResultModifier(ResultModifierType::ORDER_MODIFIER) { + } + + //! List of order nodes + vector orders; }; -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/expression/bound_conjunction_expression.hpp -// -// -//===----------------------------------------------------------------------===// +class BoundDistinctModifier : public BoundResultModifier { +public: + BoundDistinctModifier() : BoundResultModifier(ResultModifierType::DISTINCT_MODIFIER) { + } + //! list of distinct on targets (if any) + vector> target_distincts; +}; +} // namespace duckdb namespace duckdb { -class BoundConjunctionExpression : public Expression { +//! Bound equivalent of QueryNode +class BoundQueryNode { public: - explicit BoundConjunctionExpression(ExpressionType type); - BoundConjunctionExpression(ExpressionType type, unique_ptr left, unique_ptr right); - - vector> children; + explicit BoundQueryNode(QueryNodeType type) : type(type) { + } + virtual ~BoundQueryNode() { + } -public: - string ToString() const override; + //! The type of the query node, either SetOperation or Select + QueryNodeType type; + //! The result modifiers that should be applied to this query node + vector> modifiers; - bool Equals(const BaseExpression *other) const override; + //! The names returned by this QueryNode. + vector names; + //! The types returned by this QueryNode. + vector types; - unique_ptr Copy() override; +public: + virtual idx_t GetRootIndex() = 0; }; -} // namespace duckdb - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/expression/bound_default_expression.hpp -// -// -//===----------------------------------------------------------------------===// - +} // namespace duckdb namespace duckdb { -class BoundDefaultExpression : public Expression { +class BoundSubqueryExpression : public Expression { public: - explicit BoundDefaultExpression(LogicalType type = LogicalType()) - : Expression(ExpressionType::VALUE_DEFAULT, ExpressionClass::BOUND_DEFAULT, type) { + explicit BoundSubqueryExpression(LogicalType return_type); + + bool IsCorrelated() { + return binder->correlated_columns.size() > 0; } + //! The binder used to bind the subquery node + shared_ptr binder; + //! The bound subquery node + unique_ptr subquery; + //! The subquery type + SubqueryType subquery_type; + //! the child expression to compare with (in case of IN, ANY, ALL operators) + unique_ptr child; + //! The comparison type of the child expression with the subquery (in case of ANY, ALL operators) + ExpressionType comparison_type; + //! The LogicalType of the subquery result. Only used for ANY expressions. + LogicalType child_type; + //! The target LogicalType of the subquery result (i.e. to which type it should be casted, if child_type <> + //! child_target). Only used for ANY expressions. + LogicalType child_target; + public: + bool HasSubquery() const override { + return true; + } bool IsScalar() const override { return false; } @@ -13571,21 +16096,18 @@ class BoundDefaultExpression : public Expression { return false; } - string ToString() const override { - return "DEFAULT"; - } + string ToString() const override; - unique_ptr Copy() override { - return make_unique(return_type); - } + bool Equals(const BaseExpression *other) const override; + + unique_ptr Copy() override; }; } // namespace duckdb - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/expression/bound_operator_expression.hpp +// duckdb/planner/expression/bound_unnest_expression.hpp // // //===----------------------------------------------------------------------===// @@ -13596,15 +16118,18 @@ class BoundDefaultExpression : public Expression { namespace duckdb { -class BoundOperatorExpression : public Expression { +//! Represents a function call that has been bound to a base function +class BoundUnnestExpression : public Expression { public: - BoundOperatorExpression(ExpressionType type, LogicalType return_type); + explicit BoundUnnestExpression(LogicalType return_type); - vector> children; + unique_ptr child; public: + bool IsFoldable() const override; string ToString() const override; + hash_t Hash() const override; bool Equals(const BaseExpression *other) const override; unique_ptr Copy() override; @@ -13614,43 +16139,17 @@ class BoundOperatorExpression : public Expression { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/expression/bound_parameter_expression.hpp +// duckdb/planner/expression/bound_window_expression.hpp // // //===----------------------------------------------------------------------===// - - - -namespace duckdb { - -class BoundParameterExpression : public Expression { -public: - explicit BoundParameterExpression(idx_t parameter_nr); - - idx_t parameter_nr; - Value *value; - -public: - bool IsScalar() const override; - bool HasParameter() const override; - bool IsFoldable() const override; - - string ToString() const override; - - bool Equals(const BaseExpression *other) const override; - hash_t Hash() const override; - - unique_ptr Copy() override; -}; -} // namespace duckdb - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/expression/bound_reference_expression.hpp +// duckdb/parser/expression/window_expression.hpp // // //===----------------------------------------------------------------------===// @@ -13659,195 +16158,272 @@ class BoundParameterExpression : public Expression { + namespace duckdb { -//! A BoundReferenceExpression represents a physical index into a DataChunk -class BoundReferenceExpression : public Expression { +enum class WindowBoundary : uint8_t { + INVALID = 0, + UNBOUNDED_PRECEDING = 1, + UNBOUNDED_FOLLOWING = 2, + CURRENT_ROW_RANGE = 3, + CURRENT_ROW_ROWS = 4, + EXPR_PRECEDING_ROWS = 5, + EXPR_FOLLOWING_ROWS = 6, + EXPR_PRECEDING_RANGE = 7, + EXPR_FOLLOWING_RANGE = 8 +}; + +//! The WindowExpression represents a window function in the query. They are a special case of aggregates which is why +//! they inherit from them. +class WindowExpression : public ParsedExpression { public: - BoundReferenceExpression(string alias, LogicalType type, idx_t index); - BoundReferenceExpression(LogicalType type, idx_t index); + WindowExpression(ExpressionType type, string schema_name, const string &function_name); - //! Index used to access data in the chunks - idx_t index; + //! Schema of the aggregate function + string schema; + //! Name of the aggregate function + string function_name; + //! The child expression of the main window aggregate + vector> children; + //! The set of expressions to partition by + vector> partitions; + //! The set of ordering clauses + vector orders; + //! The window boundaries + WindowBoundary start = WindowBoundary::INVALID; + WindowBoundary end = WindowBoundary::INVALID; + + unique_ptr start_expr; + unique_ptr end_expr; + //! Offset and default expressions for WINDOW_LEAD and WINDOW_LAG functions + unique_ptr offset_expr; + unique_ptr default_expr; public: - bool IsScalar() const override { - return false; - } - bool IsFoldable() const override { - return false; + bool IsWindow() const override { + return true; } + //! Get the name of the expression + string GetName() const override; + //! Convert the Expression to a String string ToString() const override; - hash_t Hash() const override; - bool Equals(const BaseExpression *other) const override; + static bool Equals(const WindowExpression *a, const WindowExpression *b); - unique_ptr Copy() override; + unique_ptr Copy() const override; + + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(ExpressionType type, Deserializer &source); }; } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/expression/bound_subquery_expression.hpp -// -// -//===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/enums/subquery_type.hpp -// -// -//===----------------------------------------------------------------------===// +namespace duckdb { +class AggregateFunction; + +class BoundWindowExpression : public Expression { +public: + BoundWindowExpression(ExpressionType type, LogicalType return_type, unique_ptr aggregate, + unique_ptr bind_info); + //! The bound aggregate function + unique_ptr aggregate; + //! The bound function info + unique_ptr bind_info; + //! The child expressions of the main window aggregate + vector> children; + //! The set of expressions to partition by + vector> partitions; + //! The set of ordering clauses + vector orders; + //! The window boundaries + WindowBoundary start = WindowBoundary::INVALID; + WindowBoundary end = WindowBoundary::INVALID; + unique_ptr start_expr; + unique_ptr end_expr; + //! Offset and default expressions for WINDOW_LEAD and WINDOW_LAG functions + unique_ptr offset_expr; + unique_ptr default_expr; +public: + bool IsWindow() const override { + return true; + } + bool IsFoldable() const override { + return false; + } -namespace duckdb { + string ToString() const override; -//===--------------------------------------------------------------------===// -// Subquery Types -//===--------------------------------------------------------------------===// -enum class SubqueryType : uint8_t { - INVALID = 0, - SCALAR = 1, // Regular scalar subquery - EXISTS = 2, // EXISTS (SELECT...) - NOT_EXISTS = 3, // NOT EXISTS(SELECT...) - ANY = 4, // x = ANY(SELECT...) OR x IN (SELECT...) -}; + bool KeysAreCompatible(const BoundWindowExpression *other) const; + bool Equals(const BaseExpression *other) const override; + unique_ptr Copy() override; +}; } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/binder.hpp -// -// -//===----------------------------------------------------------------------===// +#include +namespace duckdb { +class AdaptiveFilter { +public: + explicit AdaptiveFilter(const Expression &expr); + explicit AdaptiveFilter(TableFilterSet *table_filters); + void AdaptRuntimeStatistics(double duration); + vector permutation; +private: + //! used for adaptive expression reordering + idx_t iteration_count; + idx_t swap_idx; + idx_t right_random_border; + idx_t observe_interval; + idx_t execute_interval; + double runtime_sum; + double prev_mean; + bool observe; + bool warmup; + vector swap_likeliness; + std::default_random_engine generator; +}; +} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/tokens.hpp -// -// -//===----------------------------------------------------------------------===// +namespace duckdb { +class ColumnSegment; +class LocalTableStorage; +class Index; +class RowGroup; +class UpdateSegment; +class TableScanState; +class ColumnSegment; +class ValiditySegment; +class TableFilterSet; +struct SegmentScanState { + virtual ~SegmentScanState() { + } +}; -namespace duckdb { +struct IndexScanState { + virtual ~IndexScanState() { + } +}; -//===--------------------------------------------------------------------===// -// Statements -//===--------------------------------------------------------------------===// -class SQLStatement; +typedef unordered_map> buffer_handle_set_t; -class AlterStatement; -class CallStatement; -class CopyStatement; -class CreateStatement; -class DeleteStatement; -class DropStatement; -class InsertStatement; -class SelectStatement; -class TransactionStatement; -class UpdateStatement; -class PrepareStatement; -class ExecuteStatement; -class PragmaStatement; -class ShowStatement; -class ExplainStatement; -class ExportStatement; -class VacuumStatement; -class RelationStatement; -class SetStatement; -class LoadStatement; +struct ColumnScanState { + //! The column segment that is currently being scanned + ColumnSegment *current; + //! The current row index of the scan + idx_t row_index; + //! The internal row index (i.e. the position of the SegmentScanState) + idx_t internal_index; + //! Segment scan state + unique_ptr scan_state; + //! Child states of the vector + vector child_states; + //! Whether or not InitializeState has been called for this segment + bool initialized = false; + //! If this segment has already been checked for skipping purposes + bool segment_checked = false; -//===--------------------------------------------------------------------===// -// Query Node -//===--------------------------------------------------------------------===// -class QueryNode; -class SelectNode; -class SetOperationNode; -class RecursiveCTENode; +public: + //! Move the scan state forward by "count" rows (including all child states) + void Next(idx_t count); + //! Move ONLY this state forward by "count" rows (i.e. not the child states) + void NextInternal(idx_t count); + //! Move the scan state forward by STANDARD_VECTOR_SIZE rows + void NextVector(); +}; -//===--------------------------------------------------------------------===// -// Expressions -//===--------------------------------------------------------------------===// -class ParsedExpression; +struct ColumnFetchState { + //! The set of pinned block handles for this set of fetches + buffer_handle_set_t handles; + //! Any child states of the fetch + vector> child_states; +}; -class CaseExpression; -class CastExpression; -class CollateExpression; -class ColumnRefExpression; -class ComparisonExpression; -class ConjunctionExpression; -class ConstantExpression; -class DefaultExpression; -class FunctionExpression; -class LambdaExpression; -class OperatorExpression; -class ParameterExpression; -class PositionalReferenceExpression; -class StarExpression; -class SubqueryExpression; -class WindowExpression; +struct LocalScanState { + ~LocalScanState(); -//===--------------------------------------------------------------------===// -// Constraints -//===--------------------------------------------------------------------===// -class Constraint; + void SetStorage(LocalTableStorage *storage); + LocalTableStorage *GetStorage() { + return storage; + } -class NotNullConstraint; -class CheckConstraint; -class UniqueConstraint; + idx_t chunk_index; + idx_t max_index; + idx_t last_chunk_count; + TableFilterSet *table_filters; -//===--------------------------------------------------------------------===// -// TableRefs -//===--------------------------------------------------------------------===// -class TableRef; +private: + LocalTableStorage *storage = nullptr; +}; -class BaseTableRef; -class CrossProductRef; -class JoinRef; -class SubqueryRef; -class TableFunctionRef; -class EmptyTableRef; -class ExpressionListRef; +class RowGroupScanState { +public: + RowGroupScanState(TableScanState &parent_p) : parent(parent_p), vector_index(0), max_row(0) { + } -//===--------------------------------------------------------------------===// -// Other -//===--------------------------------------------------------------------===// -struct SampleOptions; + //! The parent scan state + TableScanState &parent; + //! The current row_group we are scanning + RowGroup *row_group; + //! The vector index within the row_group + idx_t vector_index; + //! The maximum row index of this row_group scan + idx_t max_row; + //! Child column scans + unique_ptr column_scans; -} // namespace duckdb +public: + //! Move to the next vector, skipping past the current one + void NextVector(); +}; -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/bind_context.hpp -// -// -//===----------------------------------------------------------------------===// +class TableScanState { +public: + TableScanState() : row_group_scan_state(*this), max_row(0) {}; + + //! The row_group scan state + RowGroupScanState row_group_scan_state; + //! The total maximum row index + idx_t max_row; + //! The column identifiers of the scan + vector column_ids; + //! The table filters (if any) + TableFilterSet *table_filters = nullptr; + //! Adaptive filter info (if any) + unique_ptr adaptive_filter; + //! Transaction-local scan state + LocalScanState local_state; +public: + //! Move to the next vector + void NextVector(); +}; +class CreateIndexScanState : public TableScanState { +public: + vector> locks; + unique_lock append_lock; + unique_lock delete_lock; +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/catalog/catalog_entry/table_catalog_entry.hpp +// duckdb/storage/statistics/segment_statistics.hpp // // //===----------------------------------------------------------------------===// @@ -13858,81 +16434,28 @@ struct SampleOptions; - - - namespace duckdb { -class ColumnStatistics; -class DataTable; -struct CreateTableInfo; -struct BoundCreateTableInfo; - -struct RenameColumnInfo; -struct AddColumnInfo; -struct RemoveColumnInfo; -struct SetDefaultInfo; -struct ChangeColumnTypeInfo; - -//! A table catalog entry -class TableCatalogEntry : public StandardEntry { -public: - //! Create a real TableCatalogEntry and initialize storage for it - TableCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, BoundCreateTableInfo *info, - std::shared_ptr inherited_storage = nullptr); - - //! A reference to the underlying storage unit used for this table - std::shared_ptr storage; - //! A list of columns that are part of this table - vector columns; - //! A list of constraints that are part of this table - vector> constraints; - //! A list of constraints that are part of this table - vector> bound_constraints; - //! A map of column name to column index - unordered_map name_map; - +class SegmentStatistics { public: - unique_ptr AlterEntry(ClientContext &context, AlterInfo *info) override; - //! Returns whether or not a column with the given name exists - bool ColumnExists(const string &name); - //! Returns a reference to the column of the specified name. Throws an - //! exception if the column does not exist. - ColumnDefinition &GetColumn(const string &name); - //! Returns a list of types of the table - vector GetTypes(); - //! Returns a list of types of the specified columns of the table - vector GetTypes(const vector &column_ids); - string ToSQL() override; - - //! Add lower case aliases to a name map (e.g. "Hello" -> "hello" is also acceptable) - static void AddLowerCaseAliases(unordered_map &name_map); - - //! Serialize the meta information of the TableCatalogEntry a serializer - virtual void Serialize(Serializer &serializer); - //! Deserializes to a CreateTableInfo - static unique_ptr Deserialize(Deserializer &source); - - unique_ptr Copy(ClientContext &context) override; + SegmentStatistics(LogicalType type); + SegmentStatistics(LogicalType type, unique_ptr statistics); - void SetAsRoot() override; + LogicalType type; - void CommitAlter(AlterInfo &info); - void CommitDrop(); + //! Type-specific statistics of the segment + unique_ptr statistics; -private: - unique_ptr RenameColumn(ClientContext &context, RenameColumnInfo &info); - unique_ptr AddColumn(ClientContext &context, AddColumnInfo &info); - unique_ptr RemoveColumn(ClientContext &context, RemoveColumnInfo &info); - unique_ptr SetDefault(ClientContext &context, SetDefaultInfo &info); - unique_ptr ChangeColumnType(ClientContext &context, ChangeColumnTypeInfo &info); +public: + void Reset(); }; + } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +// duckdb/common/enums/scan_options.hpp // // //===----------------------------------------------------------------------===// @@ -13941,170 +16464,243 @@ class TableCatalogEntry : public StandardEntry { +namespace duckdb { +enum class TableScanType : uint8_t { + //! Regular table scan: scan all tuples that are relevant for the current transaction + TABLE_SCAN_REGULAR = 0, + //! Scan all rows, including any deleted rows. Committed updates are merged in. + TABLE_SCAN_COMMITTED_ROWS = 1, + //! Scan all rows, including any deleted rows. Throws an exception if there are any uncommitted updates. + TABLE_SCAN_COMMITTED_ROWS_DISALLOW_UPDATES = 2, + //! Scan all rows, excluding any permanently deleted rows. + //! Permanently deleted rows are rows which no transaction will ever need again. + TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED = 3 +}; -namespace duckdb { +} // namespace duckdb -class Catalog; -class Constraint; -struct CreateTableFunctionInfo; -//! A table function in the catalog -class TableFunctionCatalogEntry : public StandardEntry { -public: - TableFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateTableFunctionInfo *info); +namespace duckdb { +class ColumnData; +class DatabaseInstance; +class DataTable; +struct DataTableInfo; +class ExpressionExecutor; +class TableDataWriter; +class UpdateSegment; +class Vector; +struct RowGroupPointer; +struct VersionNode; - //! The table function - vector functions; -}; -} // namespace duckdb +class RowGroup : public SegmentBase { +public: + friend class ColumnData; + friend class VersionDeleteState; +public: + static constexpr const idx_t ROW_GROUP_VECTOR_COUNT = 120; + static constexpr const idx_t ROW_GROUP_SIZE = STANDARD_VECTOR_SIZE * ROW_GROUP_VECTOR_COUNT; +public: + RowGroup(DatabaseInstance &db, DataTableInfo &table_info, idx_t start, idx_t count); + RowGroup(DatabaseInstance &db, DataTableInfo &table_info, const vector &types, + RowGroupPointer &pointer); + ~RowGroup(); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/expression/columnref_expression.hpp -// -// -//===----------------------------------------------------------------------===// +private: + //! The database instance + DatabaseInstance &db; + //! The table info of this row_group + DataTableInfo &table_info; + //! The version info of the row_group (inserted and deleted tuple info) + shared_ptr version_info; + //! The column data of the row_group + vector> columns; + //! The segment statistics for each of the columns + vector> stats; +public: + DatabaseInstance &GetDatabase() { + return db; + } + DataTableInfo &GetTableInfo() { + return table_info; + } + idx_t GetColumnIndex(ColumnData *data) { + for (idx_t i = 0; i < columns.size(); i++) { + if (columns[i].get() == data) { + return i; + } + } + return 0; + } + unique_ptr AlterType(ClientContext &context, const LogicalType &target_type, idx_t changed_idx, + ExpressionExecutor &executor, TableScanState &scan_state, DataChunk &scan_chunk); + unique_ptr AddColumn(ClientContext &context, ColumnDefinition &new_column, ExpressionExecutor &executor, + Expression *default_value, Vector &intermediate); + unique_ptr RemoveColumn(idx_t removed_column); + void CommitDrop(); + void CommitDropColumn(idx_t index); + void InitializeEmpty(const vector &types); -namespace duckdb { + //! Initialize a scan over this row_group + bool InitializeScan(RowGroupScanState &state); + bool InitializeScanWithOffset(RowGroupScanState &state, idx_t vector_offset); + //! Checks the given set of table filters against the row-group statistics. Returns false if the entire row group + //! can be skipped. + bool CheckZonemap(TableFilterSet &filters, const vector &column_ids); + //! Checks the given set of table filters against the per-segment statistics. Returns false if any segments were + //! skipped. + bool CheckZonemapSegments(RowGroupScanState &state); + void Scan(Transaction &transaction, RowGroupScanState &state, DataChunk &result); + void ScanCommitted(RowGroupScanState &state, DataChunk &result, TableScanType type); + + idx_t GetSelVector(Transaction &transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count); + idx_t GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx, + SelectionVector &sel_vector, idx_t max_count); + + //! For a specific row, returns true if it should be used for the transaction and false otherwise. + bool Fetch(Transaction &transaction, idx_t row); + //! Fetch a specific row from the row_group and insert it into the result at the specified index + void FetchRow(Transaction &transaction, ColumnFetchState &state, const vector &column_ids, row_t row_id, + DataChunk &result, idx_t result_idx); + + //! Append count rows to the version info + void AppendVersionInfo(Transaction &transaction, idx_t start, idx_t count, transaction_t commit_id); + //! Commit a previous append made by RowGroup::AppendVersionInfo + void CommitAppend(transaction_t commit_id, idx_t start, idx_t count); + //! Revert a previous append made by RowGroup::AppendVersionInfo + void RevertAppend(idx_t start); + + //! Delete the given set of rows in the version manager + idx_t Delete(Transaction &transaction, DataTable *table, row_t *row_ids, idx_t count); + + RowGroupPointer Checkpoint(TableDataWriter &writer, vector> &global_stats); + static void Serialize(RowGroupPointer &pointer, Serializer &serializer); + static RowGroupPointer Deserialize(Deserializer &source, const vector &columns); + + void InitializeAppend(Transaction &transaction, RowGroupAppendState &append_state, idx_t remaining_append_count); + void Append(RowGroupAppendState &append_state, DataChunk &chunk, idx_t append_count); + + void Update(Transaction &transaction, DataChunk &updates, row_t *ids, idx_t offset, idx_t count, + const vector &column_ids); + //! Update a single column; corresponds to DataTable::UpdateColumn + //! This method should only be called from the WAL + void UpdateColumn(Transaction &transaction, DataChunk &updates, Vector &row_ids, + const vector &column_path); + + void MergeStatistics(idx_t column_idx, BaseStatistics &other); + unique_ptr GetStatistics(idx_t column_idx); + + void GetStorageInfo(idx_t row_group_index, vector> &result); -//! Represents a reference to a column from either the FROM clause or from an -//! alias -class ColumnRefExpression : public ParsedExpression { -public: - //! Specify both the column and table name - ColumnRefExpression(string column_name, string table_name); - //! Only specify the column name, the table name will be derived later - explicit ColumnRefExpression(string column_name); + void Verify(); - //! Column name that is referenced - string column_name; - //! Table name of the column name that is referenced (optional) - string table_name; + void NextVector(RowGroupScanState &state); -public: - bool IsScalar() const override { - return false; - } +private: + ChunkInfo *GetChunkInfo(idx_t vector_idx); - string GetName() const override; - string ToString() const override; + template + void TemplatedScan(Transaction *transaction, RowGroupScanState &state, DataChunk &result); - static bool Equals(const ColumnRefExpression *a, const ColumnRefExpression *b); - hash_t Hash() const override; + static void CheckpointDeletes(VersionNode *versions, Serializer &serializer); + static shared_ptr DeserializeDeletes(Deserializer &source); - unique_ptr Copy() const override; +private: + mutex row_group_lock; + mutex stats_lock; +}; - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(ExpressionType type, Deserializer &source); +struct VersionNode { + unique_ptr info[RowGroup::ROW_GROUP_VECTOR_COUNT]; }; + } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/qualified_name_set.hpp -// -// -//===----------------------------------------------------------------------===// +namespace duckdb { +struct DataPointer { + uint64_t row_start; + uint64_t tuple_count; + BlockPointer block_pointer; + CompressionType compression_type; + //! Type-specific statistics of the segment + unique_ptr statistics; +}; -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/qualified_name.hpp -// -// -//===----------------------------------------------------------------------===// +struct RowGroupPointer { + uint64_t row_start; + uint64_t tuple_count; + //! The data pointers of the column segments stored in the row group + vector data_pointers; + //! The per-column statistics of the row group + vector> statistics; + //! The versions information of the row group (if any) + shared_ptr versions; +}; +} // namespace duckdb +namespace duckdb { +class BaseStatistics; +class PersistentTableData { +public: + explicit PersistentTableData(idx_t column_count); + ~PersistentTableData(); + vector row_groups; + vector> column_stats; +}; -namespace duckdb { +} // namespace duckdb -struct QualifiedName { - string schema; - string name; - //! Parse the (optional) schema and a name from a string in the format of e.g. "schema"."table"; if there is no dot - //! the schema will be set to INVALID_SCHEMA - static QualifiedName Parse(string input) { - string schema; - string name; - idx_t idx = 0; - vector entries; - string entry; - normal: - // quote - for (; idx < input.size(); idx++) { - if (input[idx] == '"') { - idx++; - goto quoted; - } else if (input[idx] == '.') { - goto separator; - } - entry += input[idx]; - } - goto end; - separator: - entries.push_back(entry); - entry = ""; - idx++; - goto normal; - quoted: - // look for another quote - for (; idx < input.size(); idx++) { - if (input[idx] == '"') { - // unquote - idx++; - goto normal; - } - entry += input[idx]; - } - throw ParserException("Unterminated quote in qualified name!"); - end: - if (entries.size() == 0) { - schema = INVALID_SCHEMA; - name = entry; - } else if (entries.size() == 1) { - schema = entries[0]; - name = entry; - } else { - throw ParserException("Expected schema.entry or entry: too many entries found"); - } - return QualifiedName {schema, name}; - } -}; +namespace duckdb { +class CatalogEntry; -struct QualifiedColumnName { - QualifiedColumnName() { - } - QualifiedColumnName(string table_p, string column_p) : table(move(table_p)), column(move(column_p)) { +struct BoundCreateTableInfo { + explicit BoundCreateTableInfo(unique_ptr base) : base(move(base)) { } - string schema; - string table; - string column; + //! The schema to create the table in + SchemaCatalogEntry *schema; + //! The base CreateInfo object + unique_ptr base; + //! The map of column names -> column index, used during binding + unordered_map name_map; + //! List of constraints on the table + vector> constraints; + //! List of bound constraints on the table + vector> bound_constraints; + //! Bound default values + vector> bound_defaults; + //! Dependents of the table (in e.g. default values) + unordered_set dependencies; + //! The existing table data on disk (if any) + unique_ptr data; + //! CREATE TABLE from QUERY + unique_ptr query; + + CreateTableInfo &Base() { + return (CreateTableInfo &)*base; + } }; } // namespace duckdb - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/types/hash.hpp +// duckdb/parser/constraints/not_null_constraint.hpp // // //===----------------------------------------------------------------------===// @@ -14113,75 +16709,60 @@ struct QualifiedColumnName { - namespace duckdb { -struct string_t; - -// efficient hash function that maximizes the avalanche effect and minimizes -// bias -// see: https://nullprogram.com/blog/2018/07/31/ +class NotNullConstraint : public Constraint { +public: + explicit NotNullConstraint(column_t index) : Constraint(ConstraintType::NOT_NULL), index(index) {}; + ~NotNullConstraint() override { + } -inline hash_t murmurhash64(uint64_t x) { - return x * UINT64_C(0xbf58476d1ce4e5b9); -} + //! Column index this constraint pertains to + column_t index; -inline hash_t murmurhash32(uint32_t x) { - return murmurhash64(x); -} +public: + string ToString() const override; -template -hash_t Hash(T value) { - return murmurhash32(value); -} + unique_ptr Copy() override; -//! Combine two hashes by XORing them -inline hash_t CombineHash(hash_t left, hash_t right) { - return left ^ right; -} + //! Serialize to a stand-alone binary blob + void Serialize(Serializer &serializer) override; + //! Deserializes a NotNullConstraint + static unique_ptr Deserialize(Deserializer &source); +}; -template <> -hash_t Hash(uint64_t val); -template <> -hash_t Hash(int64_t val); -template <> -hash_t Hash(hugeint_t val); -template <> -hash_t Hash(float val); -template <> -hash_t Hash(double val); -template <> -hash_t Hash(const char *val); -template <> -hash_t Hash(char *val); -template <> -hash_t Hash(string_t val); -template <> -hash_t Hash(interval_t val); -hash_t Hash(const char *val, size_t size); -hash_t Hash(char *val, size_t size); -hash_t Hash(uint8_t *val, size_t size); +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/data_table.hpp +// +// +//===----------------------------------------------------------------------===// -} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/index_type.hpp +// +// +//===----------------------------------------------------------------------===// -namespace duckdb { -struct QualifiedColumnHashFunction { - uint64_t operator()(const QualifiedColumnName &a) const { - std::hash str_hasher; - return str_hasher(a.schema) ^ str_hasher(a.table) ^ str_hasher(a.column); - } -}; -struct QualifiedColumnEquality { - bool operator()(const QualifiedColumnName &a, const QualifiedColumnName &b) const { - return a.schema == b.schema && a.table == b.table && a.column == b.column; - } -}; -using qualified_column_set_t = unordered_set; + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Index Types +//===--------------------------------------------------------------------===// +enum class IndexType { + INVALID = 0, // invalid index type + ART = 1 // Adaptive Radix Tree +}; } // namespace duckdb @@ -14189,7 +16770,7 @@ using qualified_column_set_t = unordered_set expr, unique_ptr parsed_expr) - : ParsedExpression(ExpressionType::INVALID, ExpressionClass::BOUND_EXPRESSION), expr(move(expr)), - parsed_expr(move(parsed_expr)) { - } + ExpressionExecutor(); + explicit ExpressionExecutor(const Expression *expression); + explicit ExpressionExecutor(const Expression &expression); + explicit ExpressionExecutor(const vector> &expressions); - unique_ptr expr; - unique_ptr parsed_expr; + //! Add an expression to the set of to-be-executed expressions of the executor + void AddExpression(const Expression &expr); -public: - string ToString() const override { - return expr->ToString(); + //! Execute the set of expressions with the given input chunk and store the result in the output chunk + void Execute(DataChunk *input, DataChunk &result); + void Execute(DataChunk &input, DataChunk &result) { + Execute(&input, result); + } + void Execute(DataChunk &result) { + Execute(nullptr, result); } - bool Equals(const BaseExpression *other) const override { - return parsed_expr->Equals(other); + //! Execute the ExpressionExecutor and put the result in the result vector; this should only be used for expression + //! executors with a single expression + void ExecuteExpression(DataChunk &input, Vector &result); + //! Execute the ExpressionExecutor and put the result in the result vector; this should only be used for expression + //! executors with a single expression + void ExecuteExpression(Vector &result); + //! Execute the ExpressionExecutor and generate a selection vector from all true values in the result; this should + //! only be used with a single boolean expression + idx_t SelectExpression(DataChunk &input, SelectionVector &sel); + + //! Execute the expression with index `expr_idx` and store the result in the result vector + void ExecuteExpression(idx_t expr_idx, Vector &result); + //! Evaluate a scalar expression and fold it into a single value + static Value EvaluateScalar(const Expression &expr); + + //! Initialize the state of a given expression + static unique_ptr InitializeState(const Expression &expr, ExpressionExecutorState &state); + + void SetChunk(DataChunk *chunk) { + this->chunk = chunk; } - hash_t Hash() const override { - return parsed_expr->Hash(); + void SetChunk(DataChunk &chunk) { + SetChunk(&chunk); } - unique_ptr Copy() const override { - throw SerializationException("Cannot copy or serialize bound expression"); - } -}; + vector> &GetStates(); -} // namespace duckdb + //! The expressions of the executor + vector expressions; + //! The data chunk of the current physical operator, used to resolve + //! column references and determines the output cardinality + DataChunk *chunk = nullptr; +protected: + void Initialize(const Expression &expr, ExpressionExecutorState &state); + static unique_ptr InitializeState(const BoundReferenceExpression &expr, + ExpressionExecutorState &state); + static unique_ptr InitializeState(const BoundBetweenExpression &expr, + ExpressionExecutorState &state); + static unique_ptr InitializeState(const BoundCaseExpression &expr, ExpressionExecutorState &state); + static unique_ptr InitializeState(const BoundCastExpression &expr, ExpressionExecutorState &state); + static unique_ptr InitializeState(const BoundComparisonExpression &expr, + ExpressionExecutorState &state); + static unique_ptr InitializeState(const BoundConjunctionExpression &expr, + ExpressionExecutorState &state); + static unique_ptr InitializeState(const BoundConstantExpression &expr, + ExpressionExecutorState &state); + static unique_ptr InitializeState(const BoundFunctionExpression &expr, + ExpressionExecutorState &state); + static unique_ptr InitializeState(const BoundOperatorExpression &expr, + ExpressionExecutorState &state); + static unique_ptr InitializeState(const BoundParameterExpression &expr, + ExpressionExecutorState &state); + void Execute(const Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + Vector &result); + void Execute(const BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + Vector &result); + void Execute(const BoundCaseExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + Vector &result); + void Execute(const BoundCastExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + Vector &result); + void Execute(const BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + Vector &result); + void Execute(const BoundConjunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, + idx_t count, Vector &result); + void Execute(const BoundConstantExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + Vector &result); + void Execute(const BoundFunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + Vector &result); + void Execute(const BoundOperatorExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + Vector &result); + void Execute(const BoundParameterExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + Vector &result); + void Execute(const BoundReferenceExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + Vector &result); -namespace duckdb { + //! Execute the (boolean-returning) expression and generate a selection vector with all entries that are "true" in + //! the result + idx_t Select(const Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + idx_t DefaultSelect(const Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); -class Binder; -class ClientContext; -class QueryNode; + idx_t Select(const BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + idx_t Select(const BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, + SelectionVector *true_sel, SelectionVector *false_sel); + idx_t Select(const BoundConjunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); -class ScalarFunctionCatalogEntry; -class AggregateFunctionCatalogEntry; -class MacroCatalogEntry; -class CatalogEntry; -class SimpleFunction; + //! Verify that the output of a step in the ExpressionExecutor is correct + void Verify(const Expression &expr, Vector &result, idx_t count); -struct MacroBinding; +private: + //! The states of the expression executor; this holds any intermediates and temporary states of expressions + vector> states; +}; +} // namespace duckdb -struct BindResult { - explicit BindResult(string error) : error(error) { - } - explicit BindResult(unique_ptr expr) : expression(move(expr)) { - } - bool HasError() { - return !error.empty(); - } +namespace duckdb { - unique_ptr expression; - string error; -}; +class ClientContext; +class Transaction; -class ExpressionBinder { -public: - ExpressionBinder(Binder &binder, ClientContext &context, bool replace_binder = false); - virtual ~ExpressionBinder(); +struct IndexLock; - unique_ptr Bind(unique_ptr &expr, LogicalType *result_type = nullptr, - bool root_expression = true); +//! The index is an abstract base class that serves as the basis for indexes +class Index { +public: + Index(IndexType type, const vector &column_ids, const vector> &unbound_expressions, + bool is_unique, bool is_primary); + virtual ~Index() = default; - //! Returns whether or not any columns have been bound by the expression binder - bool BoundColumns() { - return bound_columns; - } + //! The type of the index + IndexType type; + //! Column identifiers to extract from the base table + vector column_ids; + //! unordered_set of column_ids used by the index + unordered_set column_id_set; + //! Unbound expressions used by the index + vector> unbound_expressions; + //! The physical types stored in the index + vector types; + //! The logical types of the expressions + vector logical_types; + //! Whether or not the index is an index built to enforce a UNIQUE or PRIMARY KEY constraint + bool is_unique; + //! Whether or not the index is an index built to enforce a PRIMARY KEY constraint + bool is_primary; - string Bind(unique_ptr *expr, idx_t depth, bool root_expression = false); +public: + //! Initialize a scan on the index with the given expression and column ids + //! to fetch from the base table when we only have one query predicate + virtual unique_ptr InitializeScanSinglePredicate(Transaction &transaction, Value value, + ExpressionType expressionType) = 0; + //! Initialize a scan on the index with the given expression and column ids + //! to fetch from the base table for two query predicates + virtual unique_ptr InitializeScanTwoPredicates(Transaction &transaction, Value low_value, + ExpressionType low_expression_type, Value high_value, + ExpressionType high_expression_type) = 0; + //! Perform a lookup on the index, fetching up to max_count result ids. Returns true if all row ids were fetched, + //! and false otherwise. + virtual bool Scan(Transaction &transaction, DataTable &table, IndexScanState &state, idx_t max_count, + vector &result_ids) = 0; - // Bind table names to ColumnRefExpressions - static void BindTableNames(Binder &binder, ParsedExpression &expr, - unordered_map *alias_map = nullptr); - static unique_ptr PushCollation(ClientContext &context, unique_ptr source, - const string &collation, bool equality_only = false); - static void TestCollation(ClientContext &context, const string &collation); + //! Obtain a lock on the index + virtual void InitializeLock(IndexLock &state); + //! Called when data is appended to the index. The lock obtained from InitializeAppend must be held + virtual bool Append(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0; + bool Append(DataChunk &entries, Vector &row_identifiers); + //! Verify that data can be appended to the index + virtual void VerifyAppend(DataChunk &chunk) = 0; - bool BindCorrelatedColumns(unique_ptr &expr); + //! Called when data inside the index is Deleted + virtual void Delete(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0; + void Delete(DataChunk &entries, Vector &row_identifiers); - //! The target type that should result from the binder. If the result is not of this type, a cast to this type will - //! be added. Defaults to INVALID. - LogicalType target_type; + //! Insert data into the index. Does not lock the index. + virtual bool Insert(IndexLock &lock, DataChunk &input, Vector &row_identifiers) = 0; - void BindChild(unique_ptr &expr, idx_t depth, string &error); - static void ExtractCorrelatedExpressions(Binder &binder, Expression &expr); + //! Returns true if the index is affected by updates on the specified column ids, and false otherwise + bool IndexIsUpdated(const vector &column_ids) const; protected: - virtual BindResult BindExpression(unique_ptr *expr_ptr, idx_t depth, - bool root_expression = false); + void ExecuteExpressions(DataChunk &input, DataChunk &result); - BindResult BindExpression(CaseExpression &expr, idx_t depth); - BindResult BindExpression(CollateExpression &expr, idx_t depth); - BindResult BindExpression(CastExpression &expr, idx_t depth); - BindResult BindExpression(ColumnRefExpression &expr, idx_t depth); - BindResult BindExpression(ComparisonExpression &expr, idx_t depth); - BindResult BindExpression(ConjunctionExpression &expr, idx_t depth); - BindResult BindExpression(ConstantExpression &expr, idx_t depth); - BindResult BindExpression(FunctionExpression &expr, idx_t depth, unique_ptr *expr_ptr); - BindResult BindExpression(LambdaExpression &expr, idx_t depth); - BindResult BindExpression(OperatorExpression &expr, idx_t depth); - BindResult BindExpression(ParameterExpression &expr, idx_t depth); - BindResult BindExpression(PositionalReferenceExpression &ref, idx_t depth); - BindResult BindExpression(StarExpression &expr, idx_t depth); - BindResult BindExpression(SubqueryExpression &expr, idx_t depth); + //! Lock used for updating the index + mutex lock; + +private: + //! Bound expressions used by the index + vector> bound_expressions; + //! Expression executor for the index expressions + ExpressionExecutor executor; + + unique_ptr BindExpression(unique_ptr expr); +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table_statistics.hpp +// +// +//===----------------------------------------------------------------------===// -protected: - virtual BindResult BindFunction(FunctionExpression &expr, ScalarFunctionCatalogEntry *function, idx_t depth); - virtual BindResult BindAggregate(FunctionExpression &expr, AggregateFunctionCatalogEntry *function, idx_t depth); - virtual BindResult BindUnnest(FunctionExpression &expr, idx_t depth); - virtual BindResult BindMacro(FunctionExpression &expr, MacroCatalogEntry *macro, idx_t depth, - unique_ptr *expr_ptr); - virtual void ReplaceMacroParametersRecursive(unique_ptr &expr); - virtual void ReplaceMacroParametersRecursive(ParsedExpression &expr, QueryNode &node); - virtual void ReplaceMacroParametersRecursive(ParsedExpression &expr, TableRef &ref); - virtual void CheckForSideEffects(FunctionExpression &function, idx_t depth, string &error); - virtual string UnsupportedAggregateMessage(); - virtual string UnsupportedUnnestMessage(); - Binder &binder; - ClientContext &context; - ExpressionBinder *stored_binder; - MacroBinding *macro_binding; - bool bound_columns = false; + + +namespace duckdb { + +struct TableStatistics { + idx_t estimated_cardinality; }; } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/table_binding.hpp +// duckdb/storage/table/column_segment.hpp // // //===----------------------------------------------------------------------===// @@ -14366,183 +17041,159 @@ class ExpressionBinder { +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/buffer_manager.hpp +// +// +//===----------------------------------------------------------------------===// -namespace duckdb { -class BindContext; -class BoundQueryNode; -class ColumnRefExpression; -class SubqueryRef; -class LogicalGet; -class TableCatalogEntry; -class TableFunctionCatalogEntry; -class BoundTableFunction; -//! A Binding represents a binding to a table, table-producing function or subquery with a specified table index. -struct Binding { - Binding(const string &alias, vector types, vector names, idx_t index); - virtual ~Binding() = default; - //! The alias of the binding - string alias; - //! The table index of the binding - idx_t index; - vector types; - //! Column names of the subquery - vector names; - //! Name -> index for the names - unordered_map name_map; -public: - bool HasMatchingBinding(const string &column_name); - virtual BindResult Bind(ColumnRefExpression &colref, idx_t depth); -}; -//! TableBinding is exactly like the Binding, except it keeps track of which columns were bound in the linked LogicalGet -//! node for projection pushdown purposes. -struct TableBinding : public Binding { - TableBinding(const string &alias, vector types, vector names, LogicalGet &get, idx_t index, - bool add_row_id = false); - //! the underlying LogicalGet - LogicalGet &get; +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/block_manager.hpp +// +// +//===----------------------------------------------------------------------===// + + + -public: - BindResult Bind(ColumnRefExpression &colref, idx_t depth) override; -}; -//! MacroBinding is like the Binding, except the alias and index are set by default. Used for binding Macro -//! Params/Arguments. -struct MacroBinding : public Binding { - MacroBinding(vector types_p, vector names_p, string macro_name); - //! Arguments - vector> arguments; - //! The name of the macro - string macro_name; +namespace duckdb { +class ClientContext; +class DatabaseInstance; + +//! BlockManager is an abstract representation to manage blocks on DuckDB. When writing or reading blocks, the +//! BlockManager creates and accesses blocks. The concrete types implements how blocks are stored. +class BlockManager { public: - BindResult Bind(ColumnRefExpression &colref, idx_t depth) override; + virtual ~BlockManager() = default; - //! Given the parameter colref, returns a copy of the argument that was supplied for this parameter - unique_ptr ParamToArg(ColumnRefExpression &colref); -}; + virtual void StartCheckpoint() = 0; + //! Creates a new block inside the block manager + virtual unique_ptr CreateBlock() = 0; + //! Return the next free block id + virtual block_id_t GetFreeBlockId() = 0; + //! Returns whether or not a specified block is the root block + virtual bool IsRootBlock(block_id_t root) { + return false; + }; + //! Mark a block as "modified"; modified blocks are added to the free list after a checkpoint (i.e. their data is + //! assumed to be rewritten) + virtual void MarkBlockAsModified(block_id_t block_id) { + } + //! Get the first meta block id + virtual block_id_t GetMetaBlock() = 0; + //! Read the content of the block from disk + virtual void Read(Block &block) = 0; + //! Writes the block to disk + virtual void Write(FileBuffer &block, block_id_t block_id) = 0; + //! Writes the block to disk + void Write(Block &block) { + Write(block, block.id); + } + //! Write the header; should be the final step of a checkpoint + virtual void WriteHeader(DatabaseHeader header) = 0; + + //! Returns the number of total blocks + virtual idx_t TotalBlocks() { + return 0; + } + //! Returns the number of free blocks + virtual idx_t FreeBlocks() { + return 0; + } + static BlockManager &GetBlockManager(ClientContext &context); + static BlockManager &GetBlockManager(DatabaseInstance &db); +}; } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/buffer/block_handle.hpp +// +// +//===----------------------------------------------------------------------===// -namespace duckdb { -class Binder; -class LogicalGet; -class BoundQueryNode; -struct UsingColumnSet { - string primary_binding; - unordered_set bindings; -}; -//! The BindContext object keeps track of all the tables and columns that are -//! encountered during the binding process. -class BindContext { -public: - //! Keep track of recursive CTE references - unordered_map> cte_references; -public: - //! Given a column name, find the matching table it belongs to. Throws an - //! exception if no table has a column of the given name. - string GetMatchingBinding(const string &column_name); - //! Like GetMatchingBinding, but instead of throwing an error if multiple tables have the same binding it will - //! return a list of all the matching ones - unordered_set GetMatchingBindings(const string &column_name); - //! Like GetMatchingBindings, but returns the top 3 most similar bindings (in levenshtein distance) instead of the - //! matching ones - vector GetSimilarBindings(const string &column_name); - Binding *GetCTEBinding(const string &ctename); - //! Binds a column expression to the base table. Returns the bound expression - //! or throws an exception if the column could not be bound. - BindResult BindColumn(ColumnRefExpression &colref, idx_t depth); - string BindColumn(PositionalReferenceExpression &ref, string &table_name, string &column_name); - BindResult BindColumn(PositionalReferenceExpression &ref, idx_t depth); - //! Generate column expressions for all columns that are present in the - //! referenced tables. This is used to resolve the * expression in a - //! selection list. - void GenerateAllColumnExpressions(vector> &new_select_list, - const string &relation_name = ""); - const vector> &GetBindingsList() { - return bindings_list; - } - //! Adds a base table with the given alias to the BindContext. - void AddBaseTable(idx_t index, const string &alias, const vector &names, const vector &types, - LogicalGet &get); - //! Adds a call to a table function with the given alias to the BindContext. - void AddTableFunction(idx_t index, const string &alias, const vector &names, - const vector &types, LogicalGet &get); - //! Adds a subquery with a given alias to the BindContext. - void AddSubquery(idx_t index, const string &alias, SubqueryRef &ref, BoundQueryNode &subquery); - //! Adds a base table with the given alias to the BindContext. - void AddGenericBinding(idx_t index, const string &alias, const vector &names, - const vector &types); - //! Adds a base table with the given alias to the CTE BindContext. - //! We need this to correctly bind recursive CTEs with multiple references. - void AddCTEBinding(idx_t index, const string &alias, const vector &names, const vector &types); +namespace duckdb { +class BufferHandle; +class BufferManager; +class DatabaseInstance; +class FileBuffer; - //! Add an implicit join condition (e.g. USING (x)) - void AddUsingBinding(const string &column_name, UsingColumnSet set); +enum class BlockState : uint8_t { BLOCK_UNLOADED = 0, BLOCK_LOADED = 1 }; - //! Returns any using column set for the given column name, or nullptr if there is none. On conflict (multiple using - //! column sets with the same name) throw an exception. - UsingColumnSet *GetUsingBinding(const string &column_name); - //! Returns any using column set for the given column name, or nullptr if there is none - UsingColumnSet *GetUsingBinding(const string &column_name, const string &binding_name); - //! Erase a using binding from the set of using bindings - void RemoveUsingBinding(const string &column_name, UsingColumnSet *set); +class BlockHandle { + friend struct BufferEvictionNode; + friend class BufferHandle; + friend class BufferManager; - unordered_map> GetCTEBindings() { - return cte_bindings; - } - void SetCTEBindings(unordered_map> bindings) { - cte_bindings = bindings; - } +public: + BlockHandle(DatabaseInstance &db, block_id_t block_id); + BlockHandle(DatabaseInstance &db, block_id_t block_id, unique_ptr buffer, bool can_destroy, + idx_t block_size); + ~BlockHandle(); - //! Alias a set of column names for the specified table, using the original names if there are not enough aliases - //! specified. - static vector AliasColumnNames(const string &table_name, const vector &names, - const vector &column_aliases); + DatabaseInstance &db; - //! Add all the bindings from a BindContext to this BindContext. The other BindContext is destroyed in the process. - void AddContext(BindContext other); +public: + block_id_t BlockId() { + return block_id; + } -private: - void AddBinding(const string &alias, unique_ptr binding); - //! Gets a binding of the specified name. Returns a nullptr and sets the out_error if the binding could not be - //! found. - Binding *GetBinding(const string &name, string &out_error); + int32_t Readers() const { + return readers; + } private: - //! The set of bindings - unordered_map> bindings; - //! The list of bindings in insertion order - vector> bindings_list; - //! The set of columns used in USING join conditions - unordered_map> using_columns; - - //! The set of CTE bindings - unordered_map> cte_bindings; -}; -} // namespace duckdb + static unique_ptr Load(shared_ptr &handle); + void Unload(); + bool CanUnload(); + //! The block-level lock + mutex lock; + //! Whether or not the block is loaded/unloaded + BlockState state; + //! Amount of concurrent readers + atomic readers; + //! The block id of the block + const block_id_t block_id; + //! Pointer to loaded data (if any) + unique_ptr buffer; + //! Internal eviction timestamp + atomic eviction_timestamp; + //! Whether or not the buffer can be destroyed (only used for temporary buffers) + const bool can_destroy; + //! The memory usage of the block + idx_t memory_usage; +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/bound_statement.hpp +// duckdb/storage/buffer/managed_buffer.hpp // // //===----------------------------------------------------------------------===// @@ -14551,236 +17202,138 @@ class BindContext { + + namespace duckdb { +class DatabaseInstance; -struct BoundStatement { - unique_ptr plan; - vector types; - vector names; +//! Managed buffer is an arbitrarily-sized buffer that is at least of size >= BLOCK_SIZE +class ManagedBuffer : public FileBuffer { +public: + ManagedBuffer(DatabaseInstance &db, idx_t size, bool can_destroy, block_id_t id); + + DatabaseInstance &db; + //! Whether or not the managed buffer can be freely destroyed when unpinned. + //! - If can_destroy is true, the buffer can be destroyed when unpinned and hence be unrecoverable. After being + //! destroyed, Pin() will return false. + //! - If can_destroy is false, the buffer will instead be written to a temporary file on disk when unloaded from + //! memory, and read back into memory when Pin() is called. + bool can_destroy; + //! The internal id of the buffer + block_id_t id; }; } // namespace duckdb namespace duckdb { -class BoundResultModifier; -class ClientContext; -class ExpressionBinder; -class LimitModifier; -class OrderBinder; -class TableCatalogEntry; -class ViewCatalogEntry; - -struct CreateInfo; -struct BoundCreateTableInfo; -struct BoundCreateFunctionInfo; -struct CommonTableExpressionInfo; - -struct CorrelatedColumnInfo { - ColumnBinding binding; - LogicalType type; - string name; - idx_t depth; - - explicit CorrelatedColumnInfo(BoundColumnRefExpression &expr) - : binding(expr.binding), type(expr.return_type), name(expr.GetName()), depth(expr.depth) { - } - - bool operator==(const CorrelatedColumnInfo &rhs) const { - return binding == rhs.binding; - } -}; +class DatabaseInstance; +class TemporaryDirectoryHandle; +struct EvictionQueue; -//! Bind the parsed query tree to the actual columns present in the catalog. -/*! - The binder is responsible for binding tables and columns to actual physical - tables and columns in the catalog. In the process, it also resolves types of - all expressions. -*/ -class Binder : public std::enable_shared_from_this { - friend class ExpressionBinder; - friend class RecursiveSubqueryPlanner; +//! The buffer manager is in charge of handling memory management for the database. It hands out memory buffers that can +//! be used by the database internally. +class BufferManager { + friend class BufferHandle; + friend class BlockHandle; public: - static shared_ptr CreateBinder(ClientContext &context, Binder *parent = nullptr, bool inherit_ctes = true); - - //! The client context - ClientContext &context; - //! A mapping of names to common table expressions - unordered_map CTE_bindings; - //! The CTEs that have already been bound - unordered_set bound_ctes; - //! The bind context - BindContext bind_context; - //! The set of correlated columns bound by this binder (FIXME: this should probably be an unordered_set and not a - //! vector) - vector correlated_columns; - //! The set of parameter expressions bound by this binder - vector *parameters; - //! Whether or not the bound statement is read-only - bool read_only; - //! Whether or not the statement requires a valid transaction to run - bool requires_valid_transaction; - //! Whether or not the statement can be streamed to the client - bool allow_stream_result; - //! The alias for the currently processing subquery, if it exists - string alias; - //! Macro parameter bindings (if any) - MacroBinding *macro_binding = nullptr; + BufferManager(DatabaseInstance &db, string temp_directory, idx_t maximum_memory); + ~BufferManager(); -public: - BoundStatement Bind(SQLStatement &statement); - BoundStatement Bind(QueryNode &node); + //! Register a block with the given block id in the base file + shared_ptr RegisterBlock(block_id_t block_id); - unique_ptr BindCreateTableInfo(unique_ptr info); - void BindCreateViewInfo(CreateViewInfo &base); - SchemaCatalogEntry *BindSchema(CreateInfo &info); - SchemaCatalogEntry *BindCreateFunctionInfo(CreateInfo &info); + //! Register an in-memory buffer of arbitrary size, as long as it is >= BLOCK_SIZE. can_destroy signifies whether or + //! not the buffer can be destroyed when unpinned, or whether or not it needs to be written to a temporary file so + //! it can be reloaded. The resulting buffer will already be allocated, but needs to be pinned in order to be used. + shared_ptr RegisterMemory(idx_t block_size, bool can_destroy); - //! Check usage, and cast named parameters to their types - static void BindNamedParameters(unordered_map &types, unordered_map &values, - QueryErrorContext &error_context, string &func_name); + //! Convert an existing in-memory buffer into a persistent disk-backed block + shared_ptr ConvertToPersistent(BlockManager &block_manager, block_id_t block_id, + shared_ptr old_block); - unique_ptr Bind(TableRef &ref); - unique_ptr CreatePlan(BoundTableRef &ref); + //! Allocate an in-memory buffer with a single pin. + //! The allocated memory is released when the buffer handle is destroyed. + unique_ptr Allocate(idx_t block_size); - //! Generates an unused index for a table - idx_t GenerateTableIndex(); + //! Reallocate an in-memory buffer that is pinned. + void ReAllocate(shared_ptr &handle, idx_t block_size); - //! Add a common table expression to the binder - void AddCTE(const string &name, CommonTableExpressionInfo *cte); - //! Find a common table expression by name; returns nullptr if none exists - CommonTableExpressionInfo *FindCTE(const string &name, bool skip = false); + unique_ptr Pin(shared_ptr &handle); + void Unpin(shared_ptr &handle); - bool CTEIsAlreadyBound(CommonTableExpressionInfo *cte); + void UnregisterBlock(block_id_t block_id, bool can_destroy); - void PushExpressionBinder(ExpressionBinder *binder); - void PopExpressionBinder(); - void SetActiveBinder(ExpressionBinder *binder); - ExpressionBinder *GetActiveBinder(); - bool HasActiveBinder(); + //! Set a new memory limit to the buffer manager, throws an exception if the new limit is too low and not enough + //! blocks can be evicted + void SetLimit(idx_t limit = (idx_t)-1); - vector &GetActiveBinders(); + static BufferManager &GetBufferManager(ClientContext &context); + static BufferManager &GetBufferManager(DatabaseInstance &db); - void MergeCorrelatedColumns(vector &other); - //! Add a correlated column to this binder (if it does not exist) - void AddCorrelatedColumn(const CorrelatedColumnInfo &info); + idx_t GetUsedMemory() { + return current_memory; + } + idx_t GetMaxMemory() { + return maximum_memory; + } - string FormatError(ParsedExpression &expr_context, const string &message); - string FormatError(TableRef &ref_context, const string &message); - string FormatError(idx_t query_location, const string &message); + const string &GetTemporaryDirectory() { + return temp_directory; + } -private: - //! The parent binder (if any) - shared_ptr parent; - //! The vector of active binders - vector active_binders; - //! The count of bound_tables - idx_t bound_tables; - //! Whether or not the binder has any unplanned subqueries that still need to be planned - bool has_unplanned_subqueries = false; - //! Whether or not subqueries should be planned already - bool plan_subquery = true; - //! Whether CTEs should reference the parent binder (if it exists) - bool inherit_ctes = true; - //! The root statement of the query that is currently being parsed - SQLStatement *root_statement = nullptr; + void SetTemporaryDirectory(string new_dir); private: - //! Bind the default values of the columns of a table - void BindDefaultValues(vector &columns, vector> &bound_defaults); - - //! Move correlated expressions from the child binder to this binder - void MoveCorrelatedExpressions(Binder &other); - - BoundStatement Bind(SelectStatement &stmt); - BoundStatement Bind(InsertStatement &stmt); - BoundStatement Bind(CopyStatement &stmt); - BoundStatement Bind(DeleteStatement &stmt); - BoundStatement Bind(UpdateStatement &stmt); - BoundStatement Bind(CreateStatement &stmt); - BoundStatement Bind(DropStatement &stmt); - BoundStatement Bind(AlterStatement &stmt); - BoundStatement Bind(TransactionStatement &stmt); - BoundStatement Bind(PragmaStatement &stmt); - BoundStatement Bind(ExplainStatement &stmt); - BoundStatement Bind(VacuumStatement &stmt); - BoundStatement Bind(RelationStatement &stmt); - BoundStatement Bind(ShowStatement &stmt); - BoundStatement Bind(CallStatement &stmt); - BoundStatement Bind(ExportStatement &stmt); - BoundStatement Bind(SetStatement &stmt); - BoundStatement Bind(LoadStatement &stmt); - - unique_ptr BindNode(SelectNode &node); - unique_ptr BindNode(SetOperationNode &node); - unique_ptr BindNode(RecursiveCTENode &node); - unique_ptr BindNode(QueryNode &node); - - unique_ptr VisitQueryNode(BoundQueryNode &node, unique_ptr root); - unique_ptr CreatePlan(BoundRecursiveCTENode &node); - unique_ptr CreatePlan(BoundSelectNode &statement); - unique_ptr CreatePlan(BoundSetOperationNode &node); - unique_ptr CreatePlan(BoundQueryNode &node); - - unique_ptr Bind(BaseTableRef &ref); - unique_ptr Bind(CrossProductRef &ref); - unique_ptr Bind(JoinRef &ref); - unique_ptr Bind(SubqueryRef &ref, CommonTableExpressionInfo *cte = nullptr); - unique_ptr Bind(TableFunctionRef &ref); - unique_ptr Bind(EmptyTableRef &ref); - unique_ptr Bind(ExpressionListRef &ref); - - bool BindFunctionParameters(vector> &expressions, vector &arguments, - vector ¶meters, unordered_map &named_parameters, - unique_ptr &subquery, string &error); - - unique_ptr CreatePlan(BoundBaseTableRef &ref); - unique_ptr CreatePlan(BoundCrossProductRef &ref); - unique_ptr CreatePlan(BoundJoinRef &ref); - unique_ptr CreatePlan(BoundSubqueryRef &ref); - unique_ptr CreatePlan(BoundTableFunction &ref); - unique_ptr CreatePlan(BoundEmptyTableRef &ref); - unique_ptr CreatePlan(BoundExpressionListRef &ref); - unique_ptr CreatePlan(BoundCTERef &ref); - - unique_ptr BindTable(TableCatalogEntry &table, BaseTableRef &ref); - unique_ptr BindView(ViewCatalogEntry &view, BaseTableRef &ref); - unique_ptr BindTableOrView(BaseTableRef &ref); - - BoundStatement BindCopyTo(CopyStatement &stmt); - BoundStatement BindCopyFrom(CopyStatement &stmt); - - void BindModifiers(OrderBinder &order_binder, QueryNode &statement, BoundQueryNode &result); - void BindModifierTypes(BoundQueryNode &result, const vector &sql_types, idx_t projection_index); - - unique_ptr BindLimit(LimitModifier &limit_mod); - unique_ptr BindFilter(unique_ptr condition); - unique_ptr BindOrderExpression(OrderBinder &order_binder, unique_ptr expr); - - unique_ptr PlanFilter(unique_ptr condition, unique_ptr root); + //! Evict blocks until the currently used memory + extra_memory fit, returns false if this was not possible + //! (i.e. not enough blocks could be evicted) + bool EvictBlocks(idx_t extra_memory, idx_t memory_limit); - void PlanSubqueries(unique_ptr *expr, unique_ptr *root); - unique_ptr PlanSubquery(BoundSubqueryExpression &expr, unique_ptr &root); + //! Write a temporary buffer to disk + void WriteTemporaryBuffer(ManagedBuffer &buffer); + //! Read a temporary buffer from disk + unique_ptr ReadTemporaryBuffer(block_id_t id); + //! Get the path of the temporary buffer + string GetTemporaryPath(block_id_t id); - unique_ptr CastLogicalOperatorToTypes(vector &source_types, - vector &target_types, - unique_ptr op); + void DeleteTemporaryFile(block_id_t id); - string FindBinding(const string &using_column, const string &join_side); - bool TryFindBinding(const string &using_column, const string &join_side, string &result); + void RequireTemporaryDirectory(); -public: - // This should really be a private constructor, but make_shared does not allow it... - Binder(bool I_know_what_I_am_doing, ClientContext &context, shared_ptr parent, bool inherit_ctes); -}; + void AddToEvictionQueue(shared_ptr &handle); +private: + //! The database instance + DatabaseInstance &db; + //! The current amount of memory that is occupied by the buffer manager (in bytes) + atomic current_memory; + //! The maximum amount of memory that the buffer manager can keep (in bytes) + atomic maximum_memory; + //! The directory name where temporary files are stored + string temp_directory; + //! Lock for creating the temp handle + mutex temp_handle_lock; + //! Handle for the temporary directory + unique_ptr temp_directory_handle; + //! The lock for the set of blocks + mutex manager_lock; + //! A mapping of block id -> BlockHandle + unordered_map> blocks; + //! Eviction queue + unique_ptr queue; + //! The temporary id used for managed buffers + atomic temporary_id; +}; } // namespace duckdb + + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/bound_query_node.hpp +// duckdb/function/compression_function.hpp // // //===----------------------------------------------------------------------===// @@ -14788,157 +17341,280 @@ class Binder : public std::enable_shared_from_this { + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/bound_result_modifier.hpp +// duckdb/common/map.hpp // // //===----------------------------------------------------------------------===// +#include - +namespace duckdb { +using std::map; +} namespace duckdb { +class DatabaseInstance; +class ColumnData; +class ColumnDataCheckpointer; +class ColumnSegment; +class SegmentStatistics; -//! A ResultModifier -class BoundResultModifier { -public: - explicit BoundResultModifier(ResultModifierType type) : type(type) { - } - virtual ~BoundResultModifier() { - } +struct ColumnFetchState; +struct ColumnScanState; +struct SegmentScanState; - ResultModifierType type; +struct AnalyzeState { + virtual ~AnalyzeState() { + } }; -struct BoundOrderByNode { - BoundOrderByNode(OrderType type, OrderByNullType null_order, unique_ptr expression) - : type(type), null_order(null_order), expression(move(expression)) { +struct CompressionState { + virtual ~CompressionState() { } - - OrderType type; - OrderByNullType null_order; - unique_ptr expression; }; -class BoundLimitModifier : public BoundResultModifier { -public: - BoundLimitModifier() : BoundResultModifier(ResultModifierType::LIMIT_MODIFIER) { +struct CompressedSegmentState { + virtual ~CompressedSegmentState() { } - //! LIMIT - int64_t limit_val = NumericLimits::Maximum(); - //! OFFSET - int64_t offset_val = 0; - //! Expression in case limit is not constant - unique_ptr limit; - //! Expression in case limit is not constant - unique_ptr offset; }; -class BoundOrderModifier : public BoundResultModifier { -public: - BoundOrderModifier() : BoundResultModifier(ResultModifierType::ORDER_MODIFIER) { - } +//===--------------------------------------------------------------------===// +// Analyze +//===--------------------------------------------------------------------===// +//! The analyze functions are used to determine whether or not to use this compression method +//! The system first determines the potential compression methods to use based on the physical type of the column +//! After that the following steps are taken: +//! 1. The init_analyze is called to initialize the analyze state of every candidate compression method +//! 2. The analyze method is called with all of the input data in the order in which it must be stored. +//! analyze can return "false". In that case, the compression method is taken out of consideration early. +//! 3. The final_analyze method is called, which should return a score for the compression method + +//! The system then decides which compression function to use based on the analyzed score (returned from final_analyze) +typedef unique_ptr (*compression_init_analyze_t)(ColumnData &col_data, PhysicalType type); +typedef bool (*compression_analyze_t)(AnalyzeState &state, Vector &input, idx_t count); +typedef idx_t (*compression_final_analyze_t)(AnalyzeState &state); - //! List of order nodes - vector orders; -}; +//===--------------------------------------------------------------------===// +// Compress +//===--------------------------------------------------------------------===// +typedef unique_ptr (*compression_init_compression_t)(ColumnDataCheckpointer &checkpointer, + unique_ptr state); +typedef void (*compression_compress_data_t)(CompressionState &state, Vector &scan_vector, idx_t count); +typedef void (*compression_compress_finalize_t)(CompressionState &state); -class BoundDistinctModifier : public BoundResultModifier { -public: - BoundDistinctModifier() : BoundResultModifier(ResultModifierType::DISTINCT_MODIFIER) { - } +//===--------------------------------------------------------------------===// +// Uncompress / Scan +//===--------------------------------------------------------------------===// +typedef unique_ptr (*compression_init_segment_scan_t)(ColumnSegment &segment); +typedef void (*compression_scan_vector_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, + Vector &result); +typedef void (*compression_scan_partial_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, + Vector &result, idx_t result_offset); +typedef void (*compression_fetch_row_t)(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result, + idx_t result_idx); +typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count); - //! list of distinct on targets (if any) - vector> target_distincts; +//===--------------------------------------------------------------------===// +// Append (optional) +//===--------------------------------------------------------------------===// +typedef unique_ptr (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id); +typedef idx_t (*compression_append_t)(ColumnSegment &segment, SegmentStatistics &stats, VectorData &data, idx_t offset, + idx_t count); +typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row); + +class CompressionFunction { +public: + CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze, + compression_analyze_t analyze, compression_final_analyze_t final_analyze, + compression_init_compression_t init_compression, compression_compress_data_t compress, + compression_compress_finalize_t compress_finalize, compression_init_segment_scan_t init_scan, + compression_scan_vector_t scan_vector, compression_scan_partial_t scan_partial, + compression_fetch_row_t fetch_row, compression_skip_t skip, + compression_init_segment_t init_segment, compression_append_t append, + compression_revert_append_t revert_append) + : type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze), + init_compression(init_compression), compress(compress), compress_finalize(compress_finalize), + init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip), + init_segment(init_segment), append(append), revert_append(revert_append) { + } + + //! Compression type + CompressionType type; + //! The data type this function can compress + PhysicalType data_type; + + //! Analyze step: determine which compression function is the most effective + //! init_analyze is called once to set up the analyze state + compression_init_analyze_t init_analyze; + //! analyze is called several times (once per vector in the row group) + //! analyze should return true, unless compression is no longer possible with this compression method + //! in that case false should be returned + compression_analyze_t analyze; + //! final_analyze should return the score of the compression function + //! ideally this is the exact number of bytes required to store the data + //! this is not required/enforced: it can be an estimate as well + compression_final_analyze_t final_analyze; + + //! Compression step: actually compress the data + //! init_compression is called once to set up the comperssion state + compression_init_compression_t init_compression; + //! compress is called several times (once per vector in the row group) + compression_compress_data_t compress; + //! compress_finalize is called after + compression_compress_finalize_t compress_finalize; + + //! init_scan is called to set up the scan state + compression_init_segment_scan_t init_scan; + //! scan_vector scans an entire vector using the scan state + compression_scan_vector_t scan_vector; + //! scan_partial scans a subset of a vector + //! this can request > vector_size as well + //! this is used if a vector crosses segment boundaries, or for child columns of lists + compression_scan_partial_t scan_partial; + //! fetch an individual row from the compressed vector + //! used for index lookups + compression_fetch_row_t fetch_row; + //! Skip forward in the compressed segment + compression_skip_t skip; + + // Append functions + //! This only really needs to be defined for uncompressed segments + + //! Initialize a compressed segment (optional) + compression_init_segment_t init_segment; + //! Append to the compressed segment (optional) + compression_append_t append; + //! Revert append (optional) + compression_revert_append_t revert_append; }; -} // namespace duckdb +//! The set of compression functions +struct CompressionFunctionSet { + map> functions; +}; +} // namespace duckdb namespace duckdb { +class ColumnSegment; +class BlockManager; +class ColumnSegment; +class ColumnData; +class DatabaseInstance; +class Transaction; +class BaseStatistics; +class UpdateSegment; +class TableFilter; +struct ColumnFetchState; +struct ColumnScanState; +struct ColumnAppendState; -//! Bound equivalent of QueryNode -class BoundQueryNode { +enum class ColumnSegmentType : uint8_t { TRANSIENT, PERSISTENT }; +//! TableFilter represents a filter pushed down into the table scan. + +class ColumnSegment : public SegmentBase { public: - explicit BoundQueryNode(QueryNodeType type) : type(type) { - } - virtual ~BoundQueryNode() { - } + ~ColumnSegment() override; - //! The type of the query node, either SetOperation or Select - QueryNodeType type; - //! The result modifiers that should be applied to this query node - vector> modifiers; + //! The database instance + DatabaseInstance &db; + //! The type stored in the column + LogicalType type; + //! The size of the type + idx_t type_size; + //! The column segment type (transient or persistent) + ColumnSegmentType segment_type; + //! The compression function + CompressionFunction *function; + //! The statistics for the segment + SegmentStatistics stats; + //! The block that this segment relates to + shared_ptr block; - //! The names returned by this QueryNode. - vector names; - //! The types returned by this QueryNode. - vector types; + static unique_ptr CreatePersistentSegment(DatabaseInstance &db, block_id_t id, idx_t offset, + const LogicalType &type_p, idx_t start, idx_t count, + CompressionType compression_type, + unique_ptr statistics); + static unique_ptr CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start); public: - virtual idx_t GetRootIndex() = 0; -}; - -} // namespace duckdb + void InitializeScan(ColumnScanState &state); + //! Scan one vector from this segment + void Scan(ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset, bool entire_vector); + //! Fetch a value of the specific row id and append it to the result + void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx); + static void FilterSelection(SelectionVector &sel, Vector &result, const TableFilter &filter, + idx_t &approved_tuple_count, ValidityMask &mask); + //! Skip a scan forward to the row_index specified in the scan state + void Skip(ColumnScanState &state); -namespace duckdb { + //! Initialize an append of this segment. Appends are only supported on transient segments. + void InitializeAppend(ColumnAppendState &state); + //! Appends a (part of) vector to the segment, returns the amount of entries successfully appended + idx_t Append(ColumnAppendState &state, VectorData &data, idx_t offset, idx_t count); + //! Revert an append made to this segment + void RevertAppend(idx_t start_row); -class BoundSubqueryExpression : public Expression { -public: - explicit BoundSubqueryExpression(LogicalType return_type); + //! Convert a transient in-memory segment into a persistent segment blocked by an on-disk block. + //! Only used during checkpointing. + void ConvertToPersistent(block_id_t block_id, idx_t offset_in_block); - bool IsCorrelated() { - return binder->correlated_columns.size() > 0; + block_id_t GetBlockId() { + D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT); + return block_id; } - //! The binder used to bind the subquery node - shared_ptr binder; - //! The bound subquery node - unique_ptr subquery; - //! The subquery type - SubqueryType subquery_type; - //! the child expression to compare with (in case of IN, ANY, ALL operators) - unique_ptr child; - //! The comparison type of the child expression with the subquery (in case of ANY, ALL operators) - ExpressionType comparison_type; - //! The LogicalType of the subquery result. Only used for ANY expressions. - LogicalType child_type; - //! The target LogicalType of the subquery result (i.e. to which type it should be casted, if child_type <> - //! child_target). Only used for ANY expressions. - LogicalType child_target; - -public: - bool HasSubquery() const override { - return true; + idx_t GetBlockOffset() { + D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT); + return offset; } - bool IsScalar() const override { - return false; + + idx_t GetRelativeIndex(idx_t row_index) { + D_ASSERT(row_index >= this->start); + D_ASSERT(row_index <= this->start + this->count); + return row_index - this->start; } - bool IsFoldable() const override { - return false; + + CompressedSegmentState *GetSegmentState() { + return segment_state.get(); } - string ToString() const override; +public: + ColumnSegment(DatabaseInstance &db, LogicalType type, ColumnSegmentType segment_type, idx_t start, idx_t count, + CompressionFunction *function, unique_ptr statistics, block_id_t block_id, + idx_t offset); - bool Equals(const BaseExpression *other) const override; +private: + void Scan(ColumnScanState &state, idx_t scan_count, Vector &result); + void ScanPartial(ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset); - unique_ptr Copy() override; +private: + //! The block id that this segment relates to (persistent segment only) + block_id_t block_id; + //! The offset into the block (persistent segment only) + idx_t offset; + //! Storage associated with the compressed segment + unique_ptr segment_state; }; + } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/expression/bound_unnest_expression.hpp +// duckdb/transaction/local_storage.hpp // // //===----------------------------------------------------------------------===// @@ -14947,276 +17623,338 @@ class BoundSubqueryExpression : public Expression { + namespace duckdb { +class DataTable; +class WriteAheadLog; +struct TableAppendState; -//! Represents a function call that has been bound to a base function -class BoundUnnestExpression : public Expression { +class LocalTableStorage { public: - explicit BoundUnnestExpression(LogicalType return_type); + explicit LocalTableStorage(DataTable &table); + ~LocalTableStorage(); - unique_ptr child; + DataTable &table; + //! The main chunk collection holding the data + ChunkCollection collection; + //! The set of unique indexes + vector> indexes; + //! The set of deleted entries + unordered_map> deleted_entries; + //! The number of deleted rows + idx_t deleted_rows; + //! The number of active scans + idx_t active_scans = 0; public: - bool IsFoldable() const override; - string ToString() const override; - - hash_t Hash() const override; - bool Equals(const BaseExpression *other) const override; + void InitializeScan(LocalScanState &state, TableFilterSet *table_filters = nullptr); + idx_t EstimatedSize(); - unique_ptr Copy() override; + void Clear(); }; -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/planner/expression/bound_window_expression.hpp -// -// -//===----------------------------------------------------------------------===// +//! The LocalStorage class holds appends that have not been committed yet +class LocalStorage { +public: + struct CommitState { + unordered_map> append_states; + }; +public: + explicit LocalStorage(Transaction &transaction) : transaction(transaction) { + } + //! Initialize a scan of the local storage + void InitializeScan(DataTable *table, LocalScanState &state, TableFilterSet *table_filters); + //! Scan + void Scan(LocalScanState &state, const vector &column_ids, DataChunk &result); -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/expression/window_expression.hpp -// -// -//===----------------------------------------------------------------------===// + //! Append a chunk to the local storage + void Append(DataTable *table, DataChunk &chunk); + //! Delete a set of rows from the local storage + idx_t Delete(DataTable *table, Vector &row_ids, idx_t count); + //! Update a set of rows in the local storage + void Update(DataTable *table, Vector &row_ids, const vector &column_ids, DataChunk &data); + + //! Commits the local storage, writing it to the WAL and completing the commit + void Commit(LocalStorage::CommitState &commit_state, Transaction &transaction, WriteAheadLog *log, + transaction_t commit_id); + + bool ChangesMade() noexcept { + return table_storage.size() > 0; + } + idx_t EstimatedSize(); + bool Find(DataTable *table) { + return table_storage.find(table) != table_storage.end(); + } + idx_t AddedRows(DataTable *table) { + auto entry = table_storage.find(table); + if (entry == table_storage.end()) { + return 0; + } + return entry->second->collection.Count() - entry->second->deleted_rows; + } + void AddColumn(DataTable *old_dt, DataTable *new_dt, ColumnDefinition &new_column, Expression *default_value); + void ChangeType(DataTable *old_dt, DataTable *new_dt, idx_t changed_idx, const LogicalType &target_type, + const vector &bound_columns, Expression &cast_expr); +private: + LocalTableStorage *GetStorage(DataTable *table); + template + bool ScanTableStorage(DataTable &table, LocalTableStorage &storage, T &&fun); -namespace duckdb { +private: + Transaction &transaction; + unordered_map> table_storage; -enum class WindowBoundary : uint8_t { - INVALID = 0, - UNBOUNDED_PRECEDING = 1, - UNBOUNDED_FOLLOWING = 2, - CURRENT_ROW_RANGE = 3, - CURRENT_ROW_ROWS = 4, - EXPR_PRECEDING = 5, - EXPR_FOLLOWING = 6 + void Flush(DataTable &table, LocalTableStorage &storage); }; -//! The WindowExpression represents a window function in the query. They are a special case of aggregates which is why -//! they inherit from them. -class WindowExpression : public ParsedExpression { -public: - WindowExpression(ExpressionType type, string schema_name, const string &function_name); +} // namespace duckdb - //! Schema of the aggregate function - string schema; - //! Name of the aggregate function - string function_name; - //! The child expression of the main window aggregate - vector> children; - //! The set of expressions to partition by - vector> partitions; - //! The set of ordering clauses - vector orders; - //! The window boundaries - WindowBoundary start = WindowBoundary::INVALID; - WindowBoundary end = WindowBoundary::INVALID; - unique_ptr start_expr; - unique_ptr end_expr; - //! Offset and default expressions for WINDOW_LEAD and WINDOW_LAG functions - unique_ptr offset_expr; - unique_ptr default_expr; -public: - bool IsWindow() const override { - return true; - } - string ToString() const override; - static bool Equals(const WindowExpression *a, const WindowExpression *b); - unique_ptr Copy() const override; - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(ExpressionType type, Deserializer &source); -}; -} // namespace duckdb +namespace duckdb { +class ClientContext; +class ColumnDefinition; +class DataTable; +class RowGroup; +class StorageManager; +class TableCatalogEntry; +class Transaction; +class WriteAheadLog; +class TableDataWriter; +class TableIndexList { +public: + //! Scan the catalog set, invoking the callback method for every entry + template + void Scan(T &&callback) { + // lock the catalog set + lock_guard lock(indexes_lock); + for (auto &index : indexes) { + if (callback(*index)) { + break; + } + } + } + void AddIndex(unique_ptr index) { + D_ASSERT(index); + lock_guard lock(indexes_lock); + indexes.push_back(move(index)); + } + void RemoveIndex(Index *index) { + D_ASSERT(index); + lock_guard lock(indexes_lock); -namespace duckdb { -class AggregateFunction; + for (idx_t index_idx = 0; index_idx < indexes.size(); index_idx++) { + auto &index_entry = indexes[index_idx]; + if (index_entry.get() == index) { + indexes.erase(indexes.begin() + index_idx); + break; + } + } + } -class BoundWindowExpression : public Expression { -public: - BoundWindowExpression(ExpressionType type, LogicalType return_type, unique_ptr aggregate, - unique_ptr bind_info); + bool Empty() { + lock_guard lock(indexes_lock); + return indexes.empty(); + } - //! The bound aggregate function - unique_ptr aggregate; - //! The bound function info - unique_ptr bind_info; - //! The child expressions of the main window aggregate - vector> children; - //! The set of expressions to partition by - vector> partitions; - //! The set of ordering clauses - vector orders; - //! The window boundaries - WindowBoundary start = WindowBoundary::INVALID; - WindowBoundary end = WindowBoundary::INVALID; + idx_t Count() { + lock_guard lock(indexes_lock); + return indexes.size(); + } - unique_ptr start_expr; - unique_ptr end_expr; - //! Offset and default expressions for WINDOW_LEAD and WINDOW_LAG functions - unique_ptr offset_expr; - unique_ptr default_expr; +private: + //! Indexes associated with the current table + mutex indexes_lock; + vector> indexes; +}; -public: - bool IsWindow() const override { - return true; - } - bool IsFoldable() const override { - return false; +struct DataTableInfo { + DataTableInfo(DatabaseInstance &db, string schema, string table) + : db(db), cardinality(0), schema(move(schema)), table(move(table)) { } - string ToString() const override; + //! The database instance of the table + DatabaseInstance &db; + //! The amount of elements in the table. Note that this number signifies the amount of COMMITTED entries in the + //! table. It can be inaccurate inside of transactions. More work is needed to properly support that. + atomic cardinality; + // schema of the table + string schema; + // name of the table + string table; - bool KeysAreCompatible(const BoundWindowExpression *other) const; - bool Equals(const BaseExpression *other) const override; + TableIndexList indexes; - unique_ptr Copy() override; + bool IsTemporary() { + return schema == TEMP_SCHEMA; + } }; -} // namespace duckdb +struct ParallelTableScanState { + RowGroup *current_row_group; + idx_t vector_index; + bool transaction_local_data; +}; +//! DataTable represents a physical table on disk +class DataTable { +public: + //! Constructs a new data table from an (optional) set of persistent segments + DataTable(DatabaseInstance &db, const string &schema, const string &table, vector types, + unique_ptr data = nullptr); + //! Constructs a DataTable as a delta on an existing data table with a newly added column + DataTable(ClientContext &context, DataTable &parent, ColumnDefinition &new_column, Expression *default_value); + //! Constructs a DataTable as a delta on an existing data table but with one column removed + DataTable(ClientContext &context, DataTable &parent, idx_t removed_column); + //! Constructs a DataTable as a delta on an existing data table but with one column changed type + DataTable(ClientContext &context, DataTable &parent, idx_t changed_idx, const LogicalType &target_type, + vector bound_columns, Expression &cast_expr); -#include -namespace duckdb { -struct TableFilter; + shared_ptr info; + //! Types managed by data table + vector types; + //! A reference to the database instance + DatabaseInstance &db; -class AdaptiveFilter { public: - explicit AdaptiveFilter(Expression &expr); - explicit AdaptiveFilter(TableFilterSet *table_filters); - void AdaptRuntimeStatistics(double duration); - vector permutation; + void InitializeScan(TableScanState &state, const vector &column_ids, + TableFilterSet *table_filter = nullptr); + void InitializeScan(Transaction &transaction, TableScanState &state, const vector &column_ids, + TableFilterSet *table_filters = nullptr); -private: - //! used for adaptive expression reordering - idx_t iteration_count; - idx_t swap_idx; - idx_t right_random_border; - idx_t observe_interval; - idx_t execute_interval; - double runtime_sum; - double prev_mean; - bool observe; - bool warmup; - vector swap_likeliness; - std::default_random_engine generator; -}; -} // namespace duckdb + //! Returns the maximum amount of threads that should be assigned to scan this data table + idx_t MaxThreads(ClientContext &context); + void InitializeParallelScan(ParallelTableScanState &state); + bool NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state, + const vector &column_ids); + + //! Scans up to STANDARD_VECTOR_SIZE elements from the table starting + //! from offset and store them in result. Offset is incremented with how many + //! elements were returned. + //! Returns true if all pushed down filters were executed during data fetching + void Scan(Transaction &transaction, DataChunk &result, TableScanState &state, vector &column_ids); + //! Fetch data from the specific row identifiers from the base table + void Fetch(Transaction &transaction, DataChunk &result, const vector &column_ids, Vector &row_ids, + idx_t fetch_count, ColumnFetchState &state); -namespace duckdb { -class LocalTableStorage; -class Index; -class MorselInfo; -class UpdateSegment; -class PersistentSegment; -class TransientSegment; -class ValiditySegment; -struct TableFilterSet; + //! Append a DataChunk to the table. Throws an exception if the columns don't match the tables' columns. + void Append(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk); + //! Delete the entries with the specified row identifier from the table + idx_t Delete(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, idx_t count); + //! Update the entries with the specified row identifier from the table + void Update(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, const vector &column_ids, + DataChunk &data); + //! Update a single (sub-)column along a column path + //! The column_path vector is a *path* towards a column within the table + //! i.e. if we have a table with a single column S STRUCT(A INT, B INT) + //! and we update the validity mask of "S.B" + //! the column path is: + //! 0 (first column of table) + //! -> 1 (second subcolumn of struct) + //! -> 0 (first subcolumn of INT) + //! This method should only be used from the WAL replay. It does not verify update constraints. + void UpdateColumn(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, + const vector &column_path, DataChunk &updates); -struct IndexScanState { - virtual ~IndexScanState() { - } -}; + //! Add an index to the DataTable + void AddIndex(unique_ptr index, const vector> &expressions); -typedef unordered_map> buffer_handle_set_t; + //! Begin appending structs to this table, obtaining necessary locks, etc + void InitializeAppend(Transaction &transaction, TableAppendState &state, idx_t append_count); + //! Append a chunk to the table using the AppendState obtained from BeginAppend + void Append(Transaction &transaction, DataChunk &chunk, TableAppendState &state); + //! Commit the append + void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count); + //! Write a segment of the table to the WAL + void WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count); + //! Revert a set of appends made by the given AppendState, used to revert appends in the event of an error during + //! commit (e.g. because of an I/O exception) + void RevertAppend(idx_t start_row, idx_t count); + void RevertAppendInternal(idx_t start_row, idx_t count); -struct ColumnScanState { - //! The column segment that is currently being scanned - ColumnSegment *current; - //! The vector index of the transient segment - idx_t vector_index; - //! The primary buffer handle - unique_ptr primary_handle; - //! Child states of the vector - vector child_states; - //! Whether or not InitializeState has been called for this segment - bool initialized = false; - //! If this segment has already been checked for skipping puorposes - bool segment_checked = false; - //! The update segment of the current column - UpdateSegment *updates; - //! FIXME: all these vector offsets should be merged into a single row_index - //! The vector index within the current update segment - idx_t vector_index_updates; + void ScanTableSegment(idx_t start_row, idx_t count, const std::function &function); -public: - //! Move on to the next vector in the scan - void Next(); -}; + //! Append a chunk with the row ids [row_start, ..., row_start + chunk.size()] to all indexes of the table, returns + //! whether or not the append succeeded + bool AppendToIndexes(TableAppendState &state, DataChunk &chunk, row_t row_start); + //! Remove a chunk with the row ids [row_start, ..., row_start + chunk.size()] from all indexes of the table + void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, row_t row_start); + //! Remove the chunk with the specified set of row identifiers from all indexes of the table + void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, Vector &row_identifiers); + //! Remove the row identifiers from all the indexes of the table + void RemoveFromIndexes(Vector &row_identifiers, idx_t count); -struct ColumnFetchState { - //! The set of pinned block handles for this set of fetches - buffer_handle_set_t handles; - //! Any child states of the fetch - vector> child_states; -}; + void SetAsRoot() { + this->is_root = true; + } + + unique_ptr GetStatistics(ClientContext &context, column_t column_id); -struct LocalScanState { - ~LocalScanState(); + //! Checkpoint the table to the specified table data writer + BlockPointer Checkpoint(TableDataWriter &writer); + void CommitDropTable(); + void CommitDropColumn(idx_t index); - void SetStorage(LocalTableStorage *storage); - LocalTableStorage *GetStorage() { - return storage; - } + idx_t GetTotalRows(); - idx_t chunk_index; - idx_t max_index; - idx_t last_chunk_count; - TableFilterSet *table_filters; + //! Appends an empty row_group to the table + void AppendRowGroup(idx_t start_row); + + vector> GetStorageInfo(); private: - LocalTableStorage *storage = nullptr; -}; + //! Verify constraints with a chunk from the Append containing all columns of the table + void VerifyAppendConstraints(TableCatalogEntry &table, DataChunk &chunk); + //! Verify constraints with a chunk from the Update containing only the specified column_ids + void VerifyUpdateConstraints(TableCatalogEntry &table, DataChunk &chunk, const vector &column_ids); -class TableScanState { -public: - TableScanState() {}; - idx_t current_row, max_row; - idx_t base_row; - unique_ptr column_scans; - idx_t column_count; - TableFilterSet *table_filters = nullptr; - unique_ptr adaptive_filter; - LocalScanState local_state; - MorselInfo *version_info; + void InitializeScanWithOffset(TableScanState &state, const vector &column_ids, idx_t start_row, + idx_t end_row); + bool InitializeScanInRowGroup(TableScanState &state, const vector &column_ids, + TableFilterSet *table_filters, RowGroup *row_group, idx_t vector_index, + idx_t max_row); + bool ScanBaseTable(Transaction &transaction, DataChunk &result, TableScanState &state); - //! Move to the next vector - void NextVector(); -}; + //! The CreateIndexScan is a special scan that is used to create an index on the table, it keeps locks on the table + void InitializeCreateIndexScan(CreateIndexScanState &state, const vector &column_ids); + bool ScanCreateIndex(CreateIndexScanState &state, DataChunk &result, TableScanType type); -class CreateIndexScanState : public TableScanState { -public: - vector> locks; - std::unique_lock append_lock; - std::unique_lock delete_lock; +private: + //! Lock for appending entries to the table + mutex append_lock; + //! The number of rows in the table + atomic total_rows; + //! The segment trees holding the various row_groups of the table + shared_ptr row_groups; + //! Column statistics + vector> column_stats; + //! The statistics lock + mutex stats_lock; + //! Whether or not the data table is the root DataTable for this table; the root DataTable is the newest version + //! that can be appended to + atomic is_root; }; - } // namespace duckdb - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/execution/expression_executor.hpp +// duckdb/function/pragma_function.hpp // // //===----------------------------------------------------------------------===// @@ -15224,13 +17962,10 @@ class CreateIndexScanState : public TableScanState { - - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/random_engine.hpp +// duckdb/parser/parsed_data/pragma_info.hpp // // //===----------------------------------------------------------------------===// @@ -15239,252 +17974,101 @@ class CreateIndexScanState : public TableScanState { -#include - -namespace duckdb { - -struct RandomEngine { - std::mt19937 random_engine; - explicit RandomEngine(int64_t seed) { - if (seed < 0) { - std::random_device rd; - random_engine.seed(rd()); - } else { - random_engine.seed(seed); - } - } - - //! Generate a random number between min and max - double NextRandom(double min, double max) { - std::uniform_real_distribution dist(min, max); - return dist(random_engine); - } - //! Generate a random number between 0 and 1 - double NextRandom() { - return NextRandom(0, 1); - } - uint32_t NextRandomInteger() { - std::uniform_int_distribution dist(0, NumericLimits::Maximum()); - return dist(random_engine); - } -}; -} // namespace duckdb namespace duckdb { -class ExecutionContext; -//! ExpressionExecutor is responsible for executing a set of expressions and storing the result in a data chunk -class ExpressionExecutor { -public: - ExpressionExecutor(); - explicit ExpressionExecutor(Expression *expression); - explicit ExpressionExecutor(Expression &expression); - explicit ExpressionExecutor(vector> &expressions); - - //! Add an expression to the set of to-be-executed expressions of the executor - void AddExpression(Expression &expr); - - //! Execute the set of expressions with the given input chunk and store the result in the output chunk - void Execute(DataChunk *input, DataChunk &result); - void Execute(DataChunk &input, DataChunk &result) { - Execute(&input, result); - } - void Execute(DataChunk &result) { - Execute(nullptr, result); - } - - //! Execute the ExpressionExecutor and put the result in the result vector; this should only be used for expression - //! executors with a single expression - void ExecuteExpression(DataChunk &input, Vector &result); - //! Execute the ExpressionExecutor and put the result in the result vector; this should only be used for expression - //! executors with a single expression - void ExecuteExpression(Vector &result); - //! Execute the ExpressionExecutor and generate a selection vector from all true values in the result; this should - //! only be used with a single boolean expression - idx_t SelectExpression(DataChunk &input, SelectionVector &sel); - //! Execute the expression with index `expr_idx` and store the result in the result vector - void ExecuteExpression(idx_t expr_idx, Vector &result); - //! Evaluate a scalar expression and fold it into a single value - static Value EvaluateScalar(Expression &expr); +enum class PragmaType : uint8_t { PRAGMA_STATEMENT, PRAGMA_ASSIGNMENT, PRAGMA_CALL }; - //! Initialize the state of a given expression - static unique_ptr InitializeState(Expression &expr, ExpressionExecutorState &state); +struct PragmaInfo : public ParseInfo { + //! Name of the PRAGMA statement + string name; + //! Parameter list (if any) + vector parameters; + //! Named parameter list (if any) + unordered_map named_parameters; - void SetChunk(DataChunk *chunk) { - this->chunk = chunk; - } - void SetChunk(DataChunk &chunk) { - SetChunk(&chunk); +public: + unique_ptr Copy() const { + auto result = make_unique(); + result->name = name; + result->parameters = parameters; + result->named_parameters = named_parameters; + return result; } - - vector> &GetStates(); - - //! Count the number of time the executor called - uint64_t total_count = 0; - //! Count the number of time the executor called since last sampling - uint64_t current_count = 0; - //! Show the next sample - uint64_t next_sample = 0; - //! Count the number of samples - uint64_t sample_count = 0; - //! Count the number of tuples in all samples - uint64_t sample_tuples_count = 0; - //! Count the number of tuples processed by this executor - uint64_t tuples_count = 0; - - //! The expressions of the executor - vector expressions; - //! The data chunk of the current physical operator, used to resolve - //! column references and determines the output cardinality - DataChunk *chunk = nullptr; - -protected: - void Initialize(Expression &expr, ExpressionExecutorState &state); - - static unique_ptr InitializeState(BoundReferenceExpression &expr, ExpressionExecutorState &state); - static unique_ptr InitializeState(BoundBetweenExpression &expr, ExpressionExecutorState &state); - static unique_ptr InitializeState(BoundCaseExpression &expr, ExpressionExecutorState &state); - static unique_ptr InitializeState(BoundCastExpression &expr, ExpressionExecutorState &state); - static unique_ptr InitializeState(BoundComparisonExpression &expr, ExpressionExecutorState &state); - static unique_ptr InitializeState(BoundConjunctionExpression &expr, - ExpressionExecutorState &state); - static unique_ptr InitializeState(BoundConstantExpression &expr, ExpressionExecutorState &state); - static unique_ptr InitializeState(BoundFunctionExpression &expr, ExpressionExecutorState &state); - static unique_ptr InitializeState(BoundOperatorExpression &expr, ExpressionExecutorState &state); - static unique_ptr InitializeState(BoundParameterExpression &expr, ExpressionExecutorState &state); - - void Execute(Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); - - void Execute(BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result); - void Execute(BoundCaseExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result); - void Execute(BoundCastExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result); - - void Execute(BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result); - void Execute(BoundConjunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result); - void Execute(BoundConstantExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result); - void Execute(BoundFunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result); - void Execute(BoundOperatorExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result); - void Execute(BoundParameterExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result); - void Execute(BoundReferenceExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - Vector &result); - - //! Execute the (boolean-returning) expression and generate a selection vector with all entries that are "true" in - //! the result - idx_t Select(Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel); - idx_t DefaultSelect(Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel); - - idx_t Select(BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel); - idx_t Select(BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel); - idx_t Select(BoundConjunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel); - - //! Verify that the output of a step in the ExpressionExecutor is correct - void Verify(Expression &expr, Vector &result, idx_t count); - -private: - //! The states of the expression executor; this holds any intermediates and temporary states of expressions - vector> states; - //! the random number generator used for adaptive expression reordering - RandomEngine random; }; + } // namespace duckdb -namespace duckdb { +namespace duckdb { class ClientContext; -class Transaction; -struct IndexLock; +//! Return a substitute query to execute instead of this pragma statement +typedef string (*pragma_query_t)(ClientContext &context, const FunctionParameters ¶meters); +//! Execute the main pragma function +typedef void (*pragma_function_t)(ClientContext &context, const FunctionParameters ¶meters); -//! The index is an abstract base class that serves as the basis for indexes -class Index { +//! Pragma functions are invoked by calling PRAGMA x +//! Pragma functions come in three types: +//! * Call: function call, e.g. PRAGMA table_info('tbl') +//! -> call statements can take multiple parameters +//! * Statement: statement without parameters, e.g. PRAGMA show_tables +//! -> this is similar to a call pragma but without parameters +//! * Assignment: value assignment, e.g. PRAGMA memory_limit='8GB' +//! -> assignments take a single parameter +//! -> assignments can also be called through SET memory_limit='8GB' +//! Pragma functions can either return a new query to execute (pragma_query_t) +//! or they can +class PragmaFunction : public SimpleNamedParameterFunction { public: - Index(IndexType type, vector column_ids, vector> unbound_expressions); - virtual ~Index() = default; + // Call + static PragmaFunction PragmaCall(const string &name, pragma_query_t query, vector arguments, + LogicalType varargs = LogicalType::INVALID); + static PragmaFunction PragmaCall(const string &name, pragma_function_t function, vector arguments, + LogicalType varargs = LogicalType::INVALID); + // Statement + static PragmaFunction PragmaStatement(const string &name, pragma_query_t query); + static PragmaFunction PragmaStatement(const string &name, pragma_function_t function); + // Assignment + static PragmaFunction PragmaAssignment(const string &name, pragma_query_t query, LogicalType type); + static PragmaFunction PragmaAssignment(const string &name, pragma_function_t function, LogicalType type); - //! Lock used for updating the index - std::mutex lock; - //! The type of the index - IndexType type; - //! Column identifiers to extract from the base table - vector column_ids; - //! unordered_set of column_ids used by the index - unordered_set column_id_set; - //! Unbound expressions used by the index - vector> unbound_expressions; - //! The physical types stored in the index - vector types; - //! The logical types of the expressions - vector logical_types; + string ToString() override; public: - //! Initialize a scan on the index with the given expression and column ids - //! to fetch from the base table when we only have one query predicate - virtual unique_ptr InitializeScanSinglePredicate(Transaction &transaction, Value value, - ExpressionType expressionType) = 0; - //! Initialize a scan on the index with the given expression and column ids - //! to fetch from the base table for two query predicates - virtual unique_ptr InitializeScanTwoPredicates(Transaction &transaction, Value low_value, - ExpressionType low_expression_type, Value high_value, - ExpressionType high_expression_type) = 0; - //! Perform a lookup on the index, fetching up to max_count result ids. Returns true if all row ids were fetched, - //! and false otherwise. - virtual bool Scan(Transaction &transaction, DataTable &table, IndexScanState &state, idx_t max_count, - vector &result_ids) = 0; + PragmaType type; - //! Obtain a lock on the index - virtual void InitializeLock(IndexLock &state); - //! Called when data is appended to the index. The lock obtained from InitializeAppend must be held - virtual bool Append(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0; - bool Append(DataChunk &entries, Vector &row_identifiers); - //! Verify that data can be appended to the index - virtual void VerifyAppend(DataChunk &chunk) { - } + pragma_query_t query; + pragma_function_t function; + unordered_map named_parameters; - //! Called when data inside the index is Deleted - virtual void Delete(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0; - void Delete(DataChunk &entries, Vector &row_identifiers); +private: + PragmaFunction(string name, PragmaType pragma_type, pragma_query_t query, pragma_function_t function, + vector arguments, LogicalType varargs); +}; + +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/parser.hpp +// +// +//===----------------------------------------------------------------------===// - //! Insert data into the index. Does not lock the index. - virtual bool Insert(IndexLock &lock, DataChunk &input, Vector &row_identifiers) = 0; - //! Returns true if the index is affected by updates on the specified column ids, and false otherwise - bool IndexIsUpdated(vector &column_ids); -protected: - void ExecuteExpressions(DataChunk &input, DataChunk &result); -private: - //! Bound expressions used by the index - vector> bound_expressions; - //! Expression executor for the index expressions - ExpressionExecutor executor; - unique_ptr BindExpression(unique_ptr expr); -}; -} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/table_statistics.hpp +// duckdb/parser/simplified_token.hpp // // //===----------------------------------------------------------------------===// @@ -15493,31 +18077,79 @@ class Index { - namespace duckdb { -struct TableStatistics { - idx_t estimated_cardinality; +//! Simplified tokens are a simplified (dense) representation of the lexer +//! Used for simple syntax highlighting in the tests +enum class SimplifiedTokenType : uint8_t { + SIMPLIFIED_TOKEN_IDENTIFIER, + SIMPLIFIED_TOKEN_NUMERIC_CONSTANT, + SIMPLIFIED_TOKEN_STRING_CONSTANT, + SIMPLIFIED_TOKEN_OPERATOR, + SIMPLIFIED_TOKEN_KEYWORD, + SIMPLIFIED_TOKEN_COMMENT +}; + +struct SimplifiedToken { + SimplifiedTokenType type; + idx_t start; }; } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/column_data.hpp -// -// -//===----------------------------------------------------------------------===// +namespace duckdb_libpgquery { +struct PGNode; +struct PGList; +} // namespace duckdb_libpgquery + +namespace duckdb { + +//! The parser is responsible for parsing the query and converting it into a set +//! of parsed statements. The parsed statements can then be converted into a +//! plan and executed. +class Parser { +public: + Parser(); + + //! Attempts to parse a query into a series of SQL statements. Returns + //! whether or not the parsing was successful. If the parsing was + //! successful, the parsed statements will be stored in the statements + //! variable. + void ParseQuery(const string &query); + + //! Tokenize a query, returning the raw tokens together with their locations + static vector Tokenize(const string &query); + //! Returns true if the given text matches a keyword of the parser + static bool IsKeyword(const string &text); + //! Parses a list of expressions (i.e. the list found in a SELECT clause) + static vector> ParseExpressionList(const string &select_list); + //! Parses a list as found in an ORDER BY expression (i.e. including optional ASCENDING/DESCENDING modifiers) + static vector ParseOrderList(const string &select_list); + //! Parses an update list (i.e. the list found in the SET clause of an UPDATE statement) + static void ParseUpdateList(const string &update_list, vector &update_columns, + vector> &expressions); + //! Parses a VALUES list (i.e. the list of expressions after a VALUES clause) + static vector>> ParseValuesList(const string &value_list); + //! Parses a column list (i.e. as found in a CREATE TABLE statement) + static vector ParseColumnList(const string &column_list); + //! The parsed SQL statements from an invocation to ParseQuery. + vector> statements; +private: + //! Transform a Postgres parse tree into a set of SQL Statements + bool TransformList(duckdb_libpgquery::PGList *tree); + //! Transform a single Postgres parse node into a SQL Statement. + unique_ptr TransformNode(duckdb_libpgquery::PGNode *stmt); +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/table/append_state.hpp +// duckdb/storage/object_cache.hpp // // //===----------------------------------------------------------------------===// @@ -15528,41 +18160,49 @@ struct TableStatistics { + + + namespace duckdb { -class UpdateSegment; -class TransientSegment; -class ValiditySegment; +class ClientContext; -struct ColumnAppendState { - //! The current segment of the append - TransientSegment *current; - //! The update segment to append to - UpdateSegment *updates; - //! Child append states - vector child_appends; - //! The write lock that is held by the append - unique_ptr lock; +//! ObjectCache is the base class for objects caches in DuckDB +class ObjectCacheEntry { +public: + virtual ~ObjectCacheEntry() { + } }; -struct IndexLock { - std::unique_lock index_lock; -}; +class ObjectCache { +public: + shared_ptr Get(string key) { + lock_guard glock(lock); + auto entry = cache.find(key); + if (entry == cache.end()) { + return nullptr; + } + return entry->second; + } -struct TableAppendState { - std::unique_lock append_lock; - unique_ptr index_locks; - unique_ptr states; - row_t row_start; - row_t current_row; -}; + void Put(string key, shared_ptr value) { + lock_guard glock(lock); + cache[key] = move(value); + } -} // namespace duckdb + static ObjectCache &GetObjectCache(ClientContext &context); + static bool ObjectCacheEnabled(ClientContext &context); +private: + //! Object Cache + unordered_map> cache; + mutex lock; +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/table/persistent_segment.hpp +// duckdb/planner/table_filter.hpp // // //===----------------------------------------------------------------------===// @@ -15571,10 +18211,11 @@ struct TableAppendState { + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/uncompressed_segment.hpp +// duckdb/common/enums/filter_propagate_result.hpp // // //===----------------------------------------------------------------------===// @@ -15583,116 +18224,59 @@ struct TableAppendState { +namespace duckdb { +enum class FilterPropagateResult : uint8_t { + NO_PRUNING_POSSIBLE = 0, + FILTER_ALWAYS_TRUE = 1, + FILTER_ALWAYS_FALSE = 2, + FILTER_TRUE_OR_NULL = 3, + FILTER_FALSE_OR_NULL = 4 +}; +} // namespace duckdb -namespace duckdb { -class BlockHandle; -class ColumnData; -class Transaction; -class StorageManager; - -struct ColumnAppendState; -struct UpdateInfo; -//! An uncompressed segment represents an uncompressed segment of a column residing in a block -class UncompressedSegment { -public: - UncompressedSegment(DatabaseInstance &db, PhysicalType type, idx_t row_start); - virtual ~UncompressedSegment(); +namespace duckdb { +class BaseStatistics; - //! The storage manager - DatabaseInstance &db; - //! Type of the uncompressed segment - PhysicalType type; - //! The block that this segment relates to - shared_ptr block; - //! The size of a vector of this type - idx_t vector_size; - //! The maximum amount of vectors that can be stored in this segment - idx_t max_vector_count; - //! The current amount of tuples that are stored in this segment - idx_t tuple_count; - //! The starting row of this segment - idx_t row_start; +enum class TableFilterType : uint8_t { + CONSTANT_COMPARISON = 0, // constant comparison (e.g. =C, >C, >=C, (STANDARD_VECTOR_SIZE, tuple_count - vector_index * STANDARD_VECTOR_SIZE); + virtual ~TableFilter() { } - virtual void Verify(); + TableFilterType filter_type; -protected: - //! Fetch base table data - virtual void FetchBaseData(ColumnScanState &state, idx_t vector_index, Vector &result) = 0; +public: + //! Returns true if the statistics indicate that the segment can contain values that satisfy that filter + virtual FilterPropagateResult CheckStatistics(BaseStatistics &stats) = 0; + virtual string ToString(const string &column_name) = 0; }; -} // namespace duckdb - - -namespace duckdb { -class DatabaseInstance; - -class PersistentSegment : public ColumnSegment { +class TableFilterSet { public: - PersistentSegment(DatabaseInstance &db, block_id_t id, idx_t offset, const LogicalType &type, idx_t start, - idx_t count, unique_ptr statistics); - - //! The storage manager - DatabaseInstance &db; - //! The block id that this segment relates to - block_id_t block_id; - //! The offset into the block - idx_t offset; - //! The uncompressed segment that the data of the persistent segment is loaded into - unique_ptr data; + unordered_map> filters; public: - void InitializeScan(ColumnScanState &state) override; - //! Scan one vector from this persistent segment - void Scan(ColumnScanState &state, idx_t vector_index, Vector &result) override; - //! Fetch the base table vector index that belongs to this row - void Fetch(ColumnScanState &state, idx_t vector_index, Vector &result) override; - //! Fetch a value of the specific row id and append it to the result - void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx) override; + void PushFilter(idx_t table_index, unique_ptr filter); }; } // namespace duckdb - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/data_pointer.hpp +// duckdb/storage/statistics/string_statistics.hpp // // //===----------------------------------------------------------------------===// @@ -15701,144 +18285,90 @@ class PersistentSegment : public ColumnSegment { +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/statistics/validity_statistics.hpp +// +// +//===----------------------------------------------------------------------===// + + + namespace duckdb { +class Serializer; +class Deserializer; +class Vector; -class DataPointer { +class ValidityStatistics : public BaseStatistics { public: - uint64_t row_start; - uint64_t tuple_count; - block_id_t block_id; - uint32_t offset; - //! Type-specific statistics of the segment - unique_ptr statistics; -}; + ValidityStatistics(bool has_null = false, bool has_no_null = true); -} // namespace duckdb + //! Whether or not the segment can contain NULL values + bool has_null; + //! Whether or not the segment can contain values that are not null + bool has_no_null; +public: + void Merge(const BaseStatistics &other) override; + bool IsConstant() override; -namespace duckdb { -class ColumnData; -class DatabaseInstance; -class TableDataWriter; -class PersistentSegment; -class PersistentColumnData; -class Transaction; + unique_ptr Copy() override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(Deserializer &source); + void Verify(Vector &vector, const SelectionVector &sel, idx_t count) override; -struct DataTableInfo; + static unique_ptr Combine(const unique_ptr &lstats, + const unique_ptr &rstats); + + string ToString() override; +}; -struct ColumnCheckpointState { - ColumnCheckpointState(ColumnData &column_data, TableDataWriter &writer); - virtual ~ColumnCheckpointState(); +} // namespace duckdb - ColumnData &column_data; - TableDataWriter &writer; - SegmentTree new_tree; - vector data_pointers; - unique_ptr global_stats; - unique_ptr current_segment; - unique_ptr segment_stats; +namespace duckdb { +class StringStatistics : public BaseStatistics { public: - virtual void CreateEmptySegment(); - virtual void FlushSegment(); - virtual void AppendData(Vector &data, idx_t count); - virtual void FlushToDisk(); -}; + constexpr static uint32_t MAX_STRING_MINMAX_SIZE = 8; -class ColumnData { public: - ColumnData(DatabaseInstance &db, DataTableInfo &table_info, LogicalType type, idx_t column_idx); - virtual ~ColumnData() { - } - - DataTableInfo &table_info; - //! The type of the column - LogicalType type; - //! The database - DatabaseInstance &db; - //! The column index of the column - idx_t column_idx; - //! The segments holding the data of the column - SegmentTree data; - //! The segments holding the updates of the column - SegmentTree updates; - //! The amount of persistent rows - idx_t persistent_rows; - -public: - virtual bool CheckZonemap(ColumnScanState &state, TableFilter &filter) = 0; - - //! Initialize a scan of the column - virtual void InitializeScan(ColumnScanState &state) = 0; - //! Initialize a scan starting at the specified offset - virtual void InitializeScanWithOffset(ColumnScanState &state, idx_t vector_idx) = 0; - //! Scan the next vector from the column - virtual void Scan(Transaction &transaction, ColumnScanState &state, Vector &result) = 0; - //! Scan the next vector from the column and apply a selection vector to filter the data - void FilterScan(Transaction &transaction, ColumnScanState &state, Vector &result, SelectionVector &sel, - idx_t &approved_tuple_count); - //! Scan the next vector from the column, throwing an exception if there are any outstanding updates - virtual void IndexScan(ColumnScanState &state, Vector &result, bool allow_pending_updates) = 0; - //! Executes the filters directly in the table's data - void Select(Transaction &transaction, ColumnScanState &state, Vector &result, SelectionVector &sel, - idx_t &approved_tuple_count, vector &table_filter); - //! Initialize an appending phase for this column - virtual void InitializeAppend(ColumnAppendState &state); - //! Append a vector of type [type] to the end of the column - void Append(ColumnAppendState &state, Vector &vector, idx_t count); - virtual void AppendData(ColumnAppendState &state, VectorData &vdata, idx_t count); - //! Revert a set of appends to the ColumnData - virtual void RevertAppend(row_t start_row); - - //! Update the specified row identifiers - virtual void Update(Transaction &transaction, Vector &updates, Vector &row_ids, idx_t count) = 0; - - //! Fetch the vector from the column data that belongs to this specific row - virtual void Fetch(ColumnScanState &state, row_t row_id, Vector &result); - //! Fetch a specific row id and append it to the vector - virtual void FetchRow(ColumnFetchState &state, Transaction &transaction, row_t row_id, Vector &result, - idx_t result_idx); - - void SetStatistics(unique_ptr new_stats); - void MergeStatistics(BaseStatistics &other); - virtual unique_ptr GetStatistics(); - - virtual void CommitDropColumn(); - - virtual unique_ptr CreateCheckpointState(TableDataWriter &writer); - virtual void Checkpoint(TableDataWriter &writer); - - virtual void Initialize(PersistentColumnData &column_data); - - static void BaseDeserialize(DatabaseInstance &db, Deserializer &source, const LogicalType &type, - PersistentColumnData &result); - static unique_ptr Deserialize(DatabaseInstance &db, Deserializer &source, - const LogicalType &type); + explicit StringStatistics(LogicalType type); -protected: - //! Append a transient segment - void AppendTransientSegment(idx_t start_row); - //! Append an update segment segment - void AppendUpdateSegment(idx_t start_row, idx_t count = 0); + //! The minimum value of the segment, potentially truncated + data_t min[MAX_STRING_MINMAX_SIZE]; + //! The maximum value of the segment, potentially truncated + data_t max[MAX_STRING_MINMAX_SIZE]; + //! Whether or not the column can contain unicode characters + bool has_unicode; + //! The maximum string length in bytes + uint32_t max_string_length; + //! Whether or not the segment contains any big strings in overflow blocks + bool has_overflow_strings; -protected: - mutex stats_lock; - //! The statistics of the column - unique_ptr statistics; -}; +public: + void Update(const string_t &value); + void Merge(const BaseStatistics &other) override; -} // namespace duckdb + unique_ptr Copy() override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(Deserializer &source, LogicalType type); + void Verify(Vector &vector, const SelectionVector &sel, idx_t count) override; + FilterPropagateResult CheckZonemap(ExpressionType comparison_type, const string &value); + string ToString() override; +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/transaction/local_storage.hpp +// duckdb/storage/statistics/numeric_statistics.hpp // // //===----------------------------------------------------------------------===// @@ -15848,371 +18378,239 @@ class ColumnData { -namespace duckdb { -class DataTable; -class WriteAheadLog; -struct TableAppendState; -class LocalTableStorage { -public: - explicit LocalTableStorage(DataTable &table); - ~LocalTableStorage(); - DataTable &table; - //! The main chunk collection holding the data - ChunkCollection collection; - //! The set of unique indexes - vector> indexes; - //! The set of deleted entries - unordered_map> deleted_entries; - //! The number of deleted rows - idx_t deleted_rows; - //! The number of active scans - idx_t active_scans = 0; -public: - void InitializeScan(LocalScanState &state, TableFilterSet *table_filters = nullptr); - idx_t EstimatedSize(); +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/string_util.hpp +// +// +//===----------------------------------------------------------------------===// - void Clear(); -}; -//! The LocalStorage class holds appends that have not been committed yet -class LocalStorage { -public: - struct CommitState { - unordered_map> append_states; - }; -public: - explicit LocalStorage(Transaction &transaction) : transaction(transaction) { - } - //! Initialize a scan of the local storage - void InitializeScan(DataTable *table, LocalScanState &state, TableFilterSet *table_filters); - //! Scan - void Scan(LocalScanState &state, const vector &column_ids, DataChunk &result); - //! Append a chunk to the local storage - void Append(DataTable *table, DataChunk &chunk); - //! Delete a set of rows from the local storage - void Delete(DataTable *table, Vector &row_ids, idx_t count); - //! Update a set of rows in the local storage - void Update(DataTable *table, Vector &row_ids, vector &column_ids, DataChunk &data); - //! Commits the local storage, writing it to the WAL and completing the commit - void Commit(LocalStorage::CommitState &commit_state, Transaction &transaction, WriteAheadLog *log, - transaction_t commit_id); - bool ChangesMade() noexcept { - return table_storage.size() > 0; +namespace duckdb { +/** + * String Utility Functions + * Note that these are not the most efficient implementations (i.e., they copy + * memory) and therefore they should only be used for debug messages and other + * such things. + */ +class StringUtil { +public: + static bool CharacterIsSpace(char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; } - idx_t EstimatedSize(); - - bool Find(DataTable *table) { - return table_storage.find(table) != table_storage.end(); + static bool CharacterIsNewline(char c) { + return c == '\n' || c == '\r'; } - - idx_t AddedRows(DataTable *table) { - auto entry = table_storage.find(table); - if (entry == table_storage.end()) { - return 0; + static bool CharacterIsDigit(char c) { + return c >= '0' && c <= '9'; + } + static char CharacterToLower(char c) { + if (c >= 'A' && c <= 'Z') { + return c - ('A' - 'a'); } - return entry->second->collection.Count() - entry->second->deleted_rows; + return c; } - void AddColumn(DataTable *old_dt, DataTable *new_dt, ColumnDefinition &new_column, Expression *default_value); - void ChangeType(DataTable *old_dt, DataTable *new_dt, idx_t changed_idx, const LogicalType &target_type, - const vector &bound_columns, Expression &cast_expr); - -private: - LocalTableStorage *GetStorage(DataTable *table); - - template - bool ScanTableStorage(DataTable &table, LocalTableStorage &storage, T &&fun); - -private: - Transaction &transaction; - unordered_map> table_storage; + //! Returns true if the needle string exists in the haystack + static bool Contains(const string &haystack, const string &needle); - void Flush(DataTable &table, LocalTableStorage &storage); -}; + //! Returns true if the target string starts with the given prefix + static bool StartsWith(string str, string prefix); -} // namespace duckdb + //! Returns true if the target string ends with the given suffix. + static bool EndsWith(const string &str, const string &suffix); + //! Repeat a string multiple times + static string Repeat(const string &str, const idx_t n); + //! Split the input string based on newline char + static vector Split(const string &str, char delimiter); -#include -#include + //! Join multiple strings into one string. Components are concatenated by the given separator + static string Join(const vector &input, const string &separator); -namespace duckdb { -class ClientContext; -class ColumnDefinition; -class DataTable; -class StorageManager; -class TableCatalogEntry; -class Transaction; -class WriteAheadLog; -class TableDataWriter; + //! Join multiple items of container with given size, transformed to string + //! using function, into one string using the given separator + template + static string Join(const C &input, S count, const string &separator, Func f) { + // The result + std::string result; -struct DataTableInfo { - DataTableInfo(string schema, string table) : cardinality(0), schema(move(schema)), table(move(table)) { - } + // If the input isn't empty, append the first element. We do this so we + // don't need to introduce an if into the loop. + if (count > 0) { + result += f(input[0]); + } - //! The amount of elements in the table. Note that this number signifies the amount of COMMITTED entries in the - //! table. It can be inaccurate inside of transactions. More work is needed to properly support that. - std::atomic cardinality; - // schema of the table - string schema; - // name of the table - string table; - //! Indexes associated with the current table - vector> indexes; + // Append the remaining input components, after the first + for (size_t i = 1; i < count; i++) { + result += separator + f(input[i]); + } - bool IsTemporary() { - return schema == TEMP_SCHEMA; + return result; } -}; - -struct ParallelTableScanState { - idx_t current_row; - bool transaction_local_data; -}; - -//! DataTable represents a physical table on disk -class DataTable { -public: - //! Constructs a new data table from an (optional) set of persistent segments - DataTable(DatabaseInstance &db, const string &schema, const string &table, vector types, - unique_ptr data = nullptr); - //! Constructs a DataTable as a delta on an existing data table with a newly added column - DataTable(ClientContext &context, DataTable &parent, ColumnDefinition &new_column, Expression *default_value); - //! Constructs a DataTable as a delta on an existing data table but with one column removed - DataTable(ClientContext &context, DataTable &parent, idx_t removed_column); - //! Constructs a DataTable as a delta on an existing data table but with one column changed type - DataTable(ClientContext &context, DataTable &parent, idx_t changed_idx, const LogicalType &target_type, - vector bound_columns, Expression &cast_expr); - - shared_ptr info; - //! Types managed by data table - vector types; - //! A reference to the database instance - DatabaseInstance &db; - -public: - void InitializeScan(TableScanState &state, const vector &column_ids, - TableFilterSet *table_filter = nullptr); - void InitializeScan(Transaction &transaction, TableScanState &state, const vector &column_ids, - TableFilterSet *table_filters = nullptr); - - //! Returns the maximum amount of threads that should be assigned to scan this data table - idx_t MaxThreads(ClientContext &context); - void InitializeParallelScan(ParallelTableScanState &state); - bool NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state, - const vector &column_ids); - - //! Scans up to STANDARD_VECTOR_SIZE elements from the table starting - //! from offset and store them in result. Offset is incremented with how many - //! elements were returned. - //! Returns true if all pushed down filters were executed during data fetching - void Scan(Transaction &transaction, DataChunk &result, TableScanState &state, vector &column_ids); - - //! Fetch data from the specific row identifiers from the base table - void Fetch(Transaction &transaction, DataChunk &result, vector &column_ids, Vector &row_ids, - idx_t fetch_count, ColumnFetchState &state); - - //! Append a DataChunk to the table. Throws an exception if the columns don't match the tables' columns. - void Append(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk); - //! Delete the entries with the specified row identifier from the table - void Delete(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, idx_t count); - //! Update the entries with the specified row identifier from the table - void Update(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, vector &column_ids, - DataChunk &data); - - //! Add an index to the DataTable - void AddIndex(unique_ptr index, vector> &expressions); - //! Begin appending structs to this table, obtaining necessary locks, etc - void InitializeAppend(Transaction &transaction, TableAppendState &state, idx_t append_count); - //! Append a chunk to the table using the AppendState obtained from BeginAppend - void Append(Transaction &transaction, DataChunk &chunk, TableAppendState &state); - //! Commit the append - void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count); - //! Write a segment of the table to the WAL - void WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count); - //! Revert a set of appends made by the given AppendState, used to revert appends in the event of an error during - //! commit (e.g. because of an I/O exception) - void RevertAppend(idx_t start_row, idx_t count); - void RevertAppendInternal(idx_t start_row, idx_t count); + //! Return a string that formats the give number of bytes + static string BytesToHumanReadableString(idx_t bytes); - void ScanTableSegment(idx_t start_row, idx_t count, const std::function &function); + //! Convert a string to uppercase + static string Upper(const string &str); - //! Append a chunk with the row ids [row_start, ..., row_start + chunk.size()] to all indexes of the table, returns - //! whether or not the append succeeded - bool AppendToIndexes(TableAppendState &state, DataChunk &chunk, row_t row_start); - //! Remove a chunk with the row ids [row_start, ..., row_start + chunk.size()] from all indexes of the table - void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, row_t row_start); - //! Remove the chunk with the specified set of row identifiers from all indexes of the table - void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, Vector &row_identifiers); - //! Remove the row identifiers from all the indexes of the table - void RemoveFromIndexes(Vector &row_identifiers, idx_t count); + //! Convert a string to lowercase + static string Lower(const string &str); - void SetAsRoot() { - this->is_root = true; + //! Format a string using printf semantics + template + static string Format(const string fmt_str, Args... params) { + return Exception::ConstructMessage(fmt_str, params...); } - unique_ptr GetStatistics(ClientContext &context, column_t column_id); - - //! Checkpoint the table to the specified table data writer - void Checkpoint(TableDataWriter &writer); - void CheckpointDeletes(TableDataWriter &writer); - void CommitDropTable(); - void CommitDropColumn(idx_t index); - - idx_t GetTotalRows(); - -private: - //! Verify constraints with a chunk from the Append containing all columns of the table - void VerifyAppendConstraints(TableCatalogEntry &table, DataChunk &chunk); - //! Verify constraints with a chunk from the Update containing only the specified column_ids - void VerifyUpdateConstraints(TableCatalogEntry &table, DataChunk &chunk, vector &column_ids); + //! Split the input string into a vector of strings based on the split string + static vector Split(const string &input, const string &split); - void InitializeScanWithOffset(TableScanState &state, const vector &column_ids, - TableFilterSet *table_filters, idx_t start_row, idx_t end_row); - bool CheckZonemap(TableScanState &state, const vector &column_ids, TableFilterSet *table_filters, - idx_t ¤t_row); - bool ScanBaseTable(Transaction &transaction, DataChunk &result, TableScanState &state, - const vector &column_ids, idx_t ¤t_row, idx_t max_row); - bool ScanCreateIndex(CreateIndexScanState &state, const vector &column_ids, DataChunk &result, - idx_t ¤t_row, idx_t max_row, bool allow_pending_updates = false); + //! Remove the whitespace char in the left end of the string + static void LTrim(string &str); + //! Remove the whitespace char in the right end of the string + static void RTrim(string &str); + //! Remove the whitespace char in the left and right end of the string + static void Trim(string &str); - //! Figure out which of the row ids to use for the given transaction by looking at inserted/deleted data. Returns - //! the amount of rows to use and places the row_ids in the result_rows array. - idx_t FetchRows(Transaction &transaction, Vector &row_identifiers, idx_t fetch_count, row_t result_rows[]); + static string Replace(string source, const string &from, const string &to); - //! The CreateIndexScan is a special scan that is used to create an index on the table, it keeps locks on the table - void InitializeCreateIndexScan(CreateIndexScanState &state, const vector &column_ids); - void CreateIndexScan(CreateIndexScanState &structure, const vector &column_ids, DataChunk &result, - bool allow_pending_updates = false); + //! Get the levenshtein distance from two strings + static idx_t LevenshteinDistance(const string &s1, const string &s2); -private: - //! Lock for appending entries to the table - std::mutex append_lock; - //! The segment tree holding the persistent versions - shared_ptr versions; - //! The number of rows in the table - idx_t total_rows; - //! The physical columns of the table - vector> columns; - //! Whether or not the data table is the root DataTable for this table; the root DataTable is the newest version - //! that can be appended to - bool is_root; + //! Get the top-n strings (sorted by the given score distance) from a set of scores. + //! At least one entry is returned (if there is one). + //! Strings are only returned if they have a score less than the threshold. + static vector TopNStrings(vector> scores, idx_t n = 5, idx_t threshold = 5); + //! Computes the levenshtein distance of each string in strings, and compares it to target, then returns TopNStrings + //! with the given params. + static vector TopNLevenshtein(const vector &strings, const string &target, idx_t n = 5, + idx_t threshold = 5); + static string CandidatesMessage(const vector &candidates, const string &candidate = "Candidate bindings"); }; } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/function/pragma_function.hpp -// -// -//===----------------------------------------------------------------------===// - - //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/pragma_info.hpp +// duckdb/common/windows_undefs.hpp // // //===----------------------------------------------------------------------===// +#ifdef WIN32 +#ifdef min +#undef min +#endif +#ifdef max +#undef max +#endif +#ifdef ERROR +#undef ERROR +#endif +#ifdef small +#undef small +#endif -namespace duckdb { +#endif -enum class PragmaType : uint8_t { PRAGMA_STATEMENT, PRAGMA_ASSIGNMENT, PRAGMA_CALL }; -struct PragmaInfo : public ParseInfo { - //! Name of the PRAGMA statement - string name; - //! Parameter list (if any) - vector parameters; - //! Named parameter list (if any) - unordered_map named_parameters; -public: - unique_ptr Copy() const { - auto result = make_unique(); - result->name = name; - result->parameters = parameters; - result->named_parameters = named_parameters; - return result; - } -}; +namespace duckdb { -} // namespace duckdb +class NumericStatistics : public BaseStatistics { +public: + explicit NumericStatistics(LogicalType type); + NumericStatistics(LogicalType type, Value min, Value max); + //! The minimum value of the segment + Value min; + //! The maximum value of the segment + Value max; +public: + void Merge(const BaseStatistics &other) override; -namespace duckdb { -class ClientContext; + bool IsConstant() override; -//! Return a substitute query to execute instead of this pragma statement -typedef string (*pragma_query_t)(ClientContext &context, const FunctionParameters ¶meters); -//! Execute the main pragma function -typedef void (*pragma_function_t)(ClientContext &context, const FunctionParameters ¶meters); + FilterPropagateResult CheckZonemap(ExpressionType comparison_type, const Value &constant); -//! Pragma functions are invoked by calling PRAGMA x -//! Pragma functions come in three types: -//! * Call: function call, e.g. PRAGMA table_info('tbl') -//! -> call statements can take multiple parameters -//! * Statement: statement without parameters, e.g. PRAGMA show_tables -//! -> this is similar to a call pragma but without parameters -//! * Assignment: value assignment, e.g. PRAGMA memory_limit='8GB' -//! -> assignments take a single parameter -//! -> assignments can also be called through SET memory_limit='8GB' -//! Pragma functions can either return a new query to execute (pragma_query_t) -//! or they can -class PragmaFunction : public SimpleNamedParameterFunction { -public: - // Call - static PragmaFunction PragmaCall(const string &name, pragma_query_t query, vector arguments, - LogicalType varargs = LogicalType::INVALID); - static PragmaFunction PragmaCall(const string &name, pragma_function_t function, vector arguments, - LogicalType varargs = LogicalType::INVALID); - // Statement - static PragmaFunction PragmaStatement(const string &name, pragma_query_t query); - static PragmaFunction PragmaStatement(const string &name, pragma_function_t function); - // Assignment - static PragmaFunction PragmaAssignment(const string &name, pragma_query_t query, LogicalType type); - static PragmaFunction PragmaAssignment(const string &name, pragma_function_t function, LogicalType type); + unique_ptr Copy() override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(Deserializer &source, LogicalType type); + void Verify(Vector &vector, const SelectionVector &sel, idx_t count) override; string ToString() override; -public: - PragmaType type; +private: + template + void TemplatedVerify(Vector &vector, const SelectionVector &sel, idx_t count); - pragma_query_t query; - pragma_function_t function; - unordered_map named_parameters; +public: + template + static inline void UpdateValue(T new_value, T &min, T &max) { + if (LessThan::Operation(new_value, min)) { + min = new_value; + } + if (GreaterThan::Operation(new_value, max)) { + max = new_value; + } + } -private: - PragmaFunction(string name, PragmaType pragma_type, pragma_query_t query, pragma_function_t function, - vector arguments, LogicalType varargs); + template + static inline void Update(SegmentStatistics &stats, T new_value); }; +template <> +void NumericStatistics::Update(SegmentStatistics &stats, int8_t new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, int16_t new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, int32_t new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, int64_t new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, uint8_t new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, uint16_t new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, uint32_t new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, uint64_t new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, hugeint_t new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, float new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, double new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, interval_t new_value); +template <> +void NumericStatistics::Update(SegmentStatistics &stats, list_entry_t new_value); + } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parser.hpp +// duckdb/planner/filter/conjunction_filter.hpp // // //===----------------------------------------------------------------------===// @@ -16222,11 +18620,37 @@ class PragmaFunction : public SimpleNamedParameterFunction { +namespace duckdb { + +class ConjunctionOrFilter : public TableFilter { +public: + ConjunctionOrFilter(); + + //! The filters to OR together + vector> child_filters; + +public: + FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; + string ToString(const string &column_name) override; +}; + +class ConjunctionAndFilter : public TableFilter { +public: + ConjunctionAndFilter(); + + //! The filters to OR together + vector> child_filters; + +public: + FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; + string ToString(const string &column_name) override; +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/simplified_token.hpp +// duckdb/planner/filter/constant_filter.hpp // // //===----------------------------------------------------------------------===// @@ -16235,79 +18659,41 @@ class PragmaFunction : public SimpleNamedParameterFunction { -namespace duckdb { - -//! Simplified tokens are a simplified (dense) representation of the lexer -//! Used for simple syntax highlighting in the tests -enum class SimplifiedTokenType : uint8_t { - SIMPLIFIED_TOKEN_IDENTIFIER, - SIMPLIFIED_TOKEN_NUMERIC_CONSTANT, - SIMPLIFIED_TOKEN_STRING_CONSTANT, - SIMPLIFIED_TOKEN_OPERATOR, - SIMPLIFIED_TOKEN_KEYWORD, - SIMPLIFIED_TOKEN_COMMENT -}; - -struct SimplifiedToken { - SimplifiedTokenType type; - idx_t start; -}; -} // namespace duckdb +namespace duckdb { -namespace duckdb_libpgquery { -struct PGNode; -struct PGList; -} // namespace duckdb_libpgquery +class ConstantFilter : public TableFilter { +public: + ConstantFilter(ExpressionType comparison_type, Value constant); -namespace duckdb { + //! The comparison type (e.g. COMPARE_EQUAL, COMPARE_GREATERTHAN, COMPARE_LESSTHAN, ...) + ExpressionType comparison_type; + //! The constant value to filter on + Value constant; -//! The parser is responsible for parsing the query and converting it into a set -//! of parsed statements. The parsed statements can then be converted into a -//! plan and executed. -class Parser { public: - Parser(); + FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; + string ToString(const string &column_name) override; +}; - //! Attempts to parse a query into a series of SQL statements. Returns - //! whether or not the parsing was successful. If the parsing was - //! successful, the parsed statements will be stored in the statements - //! variable. - void ParseQuery(const string &query); +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/operator/persistent/buffered_csv_reader.hpp +// +// +//===----------------------------------------------------------------------===// - //! Tokenize a query, returning the raw tokens together with their locations - static vector Tokenize(const string &query); - //! Returns true if the given text matches a keyword of the parser - static bool IsKeyword(const string &text); - //! Parses a list of expressions (i.e. the list found in a SELECT clause) - static vector> ParseExpressionList(const string &select_list); - //! Parses a list as found in an ORDER BY expression (i.e. including optional ASCENDING/DESCENDING modifiers) - static vector ParseOrderList(const string &select_list); - //! Parses an update list (i.e. the list found in the SET clause of an UPDATE statement) - static void ParseUpdateList(const string &update_list, vector &update_columns, - vector> &expressions); - //! Parses a VALUES list (i.e. the list of expressions after a VALUES clause) - static vector>> ParseValuesList(const string &value_list); - //! Parses a column list (i.e. as found in a CREATE TABLE statement) - static vector ParseColumnList(const string &column_list); - //! The parsed SQL statements from an invocation to ParseQuery. - vector> statements; -private: - //! Transform a Postgres parse tree into a set of SQL Statements - bool TransformList(duckdb_libpgquery::PGList *tree); - //! Transform a single Postgres parse node into a SQL Statement. - unique_ptr TransformNode(duckdb_libpgquery::PGNode *stmt); -}; -} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/object_cache.hpp +// duckdb/function/scalar/strftime.hpp // // //===----------------------------------------------------------------------===// @@ -16317,83 +18703,387 @@ class Parser { +namespace duckdb { + +enum class StrTimeSpecifier : uint8_t { + ABBREVIATED_WEEKDAY_NAME = 0, // %a - Abbreviated weekday name. (Sun, Mon, ...) + FULL_WEEKDAY_NAME = 1, // %A Full weekday name. (Sunday, Monday, ...) + WEEKDAY_DECIMAL = 2, // %w - Weekday as a decimal number. (0, 1, ..., 6) + DAY_OF_MONTH_PADDED = 3, // %d - Day of the month as a zero-padded decimal. (01, 02, ..., 31) + DAY_OF_MONTH = 4, // %-d - Day of the month as a decimal number. (1, 2, ..., 30) + ABBREVIATED_MONTH_NAME = 5, // %b - Abbreviated month name. (Jan, Feb, ..., Dec) + FULL_MONTH_NAME = 6, // %B - Full month name. (January, February, ...) + MONTH_DECIMAL_PADDED = 7, // %m - Month as a zero-padded decimal number. (01, 02, ..., 12) + MONTH_DECIMAL = 8, // %-m - Month as a decimal number. (1, 2, ..., 12) + YEAR_WITHOUT_CENTURY_PADDED = 9, // %y - Year without century as a zero-padded decimal number. (00, 01, ..., 99) + YEAR_WITHOUT_CENTURY = 10, // %-y - Year without century as a decimal number. (0, 1, ..., 99) + YEAR_DECIMAL = 11, // %Y - Year with century as a decimal number. (2013, 2019 etc.) + HOUR_24_PADDED = 12, // %H - Hour (24-hour clock) as a zero-padded decimal number. (00, 01, ..., 23) + HOUR_24_DECIMAL = 13, // %-H - Hour (24-hour clock) as a decimal number. (0, 1, ..., 23) + HOUR_12_PADDED = 14, // %I - Hour (12-hour clock) as a zero-padded decimal number. (01, 02, ..., 12) + HOUR_12_DECIMAL = 15, // %-I - Hour (12-hour clock) as a decimal number. (1, 2, ... 12) + AM_PM = 16, // %p - Locale’s AM or PM. (AM, PM) + MINUTE_PADDED = 17, // %M - Minute as a zero-padded decimal number. (00, 01, ..., 59) + MINUTE_DECIMAL = 18, // %-M - Minute as a decimal number. (0, 1, ..., 59) + SECOND_PADDED = 19, // %S - Second as a zero-padded decimal number. (00, 01, ..., 59) + SECOND_DECIMAL = 20, // %-S - Second as a decimal number. (0, 1, ..., 59) + MICROSECOND_PADDED = 21, // %f - Microsecond as a decimal number, zero-padded on the left. (000000 - 999999) + MILLISECOND_PADDED = 22, // %g - Millisecond as a decimal number, zero-padded on the left. (000 - 999) + UTC_OFFSET = 23, // %z - UTC offset in the form +HHMM or -HHMM. ( ) + TZ_NAME = 24, // %Z - Time zone name. ( ) + DAY_OF_YEAR_PADDED = 25, // %j - Day of the year as a zero-padded decimal number. (001, 002, ..., 366) + DAY_OF_YEAR_DECIMAL = 26, // %-j - Day of the year as a decimal number. (1, 2, ..., 366) + WEEK_NUMBER_PADDED_SUN_FIRST = + 27, // %U - Week number of the year (Sunday as the first day of the week). All days in a new year preceding the + // first Sunday are considered to be in week 0. (00, 01, ..., 53) + WEEK_NUMBER_PADDED_MON_FIRST = + 28, // %W - Week number of the year (Monday as the first day of the week). All days in a new year preceding the + // first Monday are considered to be in week 0. (00, 01, ..., 53) + LOCALE_APPROPRIATE_DATE_AND_TIME = + 29, // %c - Locale’s appropriate date and time representation. (Mon Sep 30 07:06:05 2013) + LOCALE_APPROPRIATE_DATE = 30, // %x - Locale’s appropriate date representation. (09/30/13) + LOCALE_APPROPRIATE_TIME = 31 // %X - Locale’s appropriate time representation. (07:06:05) +}; + +struct StrTimeFormat { +public: + virtual ~StrTimeFormat() { + } + + static string ParseFormatSpecifier(string format_string, StrTimeFormat &format); + +protected: + //! The format specifiers + vector specifiers; + //! The literals that appear in between the format specifiers + //! The following must hold: literals.size() = specifiers.size() + 1 + //! Format is literals[0], specifiers[0], literals[1], ..., specifiers[n - 1], literals[n] + vector literals; + //! The constant size that appears in the format string + idx_t constant_size; + //! The max numeric width of the specifier (if it is parsed as a number), or -1 if it is not a number + vector numeric_width; + void AddLiteral(string literal); + virtual void AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier); +}; + +struct StrfTimeFormat : public StrTimeFormat { + idx_t GetLength(date_t date, dtime_t time); + + void FormatString(date_t date, int32_t data[7], char *target); + void FormatString(date_t date, dtime_t time, char *target); + +protected: + //! The variable-length specifiers. To determine total string size, these need to be checked. + vector var_length_specifiers; + //! Whether or not the current specifier is a special "date" specifier (i.e. one that requires a date_t object to + //! generate) + vector is_date_specifier; + + void AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) override; + static idx_t GetSpecifierLength(StrTimeSpecifier specifier, date_t date, dtime_t time); + char *WriteString(char *target, const string_t &str); + char *Write2(char *target, uint8_t value); + char *WritePadded2(char *target, int32_t value); + char *WritePadded3(char *target, uint32_t value); + char *WritePadded(char *target, int32_t value, int32_t padding); + bool IsDateSpecifier(StrTimeSpecifier specifier); + char *WriteDateSpecifier(StrTimeSpecifier specifier, date_t date, char *target); + char *WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t data[], char *target); +}; + +struct StrpTimeFormat : public StrTimeFormat { +public: + //! Type-safe parsing argument + struct ParseResult { + int32_t data[7]; + string error_message; + idx_t error_position = INVALID_INDEX; + + date_t ToDate(); + timestamp_t ToTimestamp(); + string FormatError(string_t input, const string &format_specifier); + }; + //! The full format specifier, for error messages + string format_specifier; + + bool Parse(string_t str, ParseResult &result); + + bool TryParseDate(string_t str, date_t &result, string &error_message); + bool TryParseTimestamp(string_t str, timestamp_t &result, string &error_message); + + date_t ParseDate(string_t str); + timestamp_t ParseTimestamp(string_t str); + +protected: + static string FormatStrpTimeError(const string &input, idx_t position); + void AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) override; + int NumericSpecifierWidth(StrTimeSpecifier specifier); + int32_t TryParseCollection(const char *data, idx_t &pos, idx_t size, const string_t collection[], + idx_t collection_count); +}; + +} // namespace duckdb + + +#include +#include namespace duckdb { -class ClientContext; +struct CopyInfo; +struct FileHandle; +struct StrpTimeFormat; -//! ObjectCache is the base class for objects caches in DuckDB -class ObjectCacheEntry { -public: - virtual ~ObjectCacheEntry() { - } -}; +class FileSystem; -class ObjectCache { -public: - shared_ptr Get(std::string key) { - lock_guard glock(lock); - auto entry = cache.find(key); - if (entry == cache.end()) { - return nullptr; +//! The shifts array allows for linear searching of multi-byte values. For each position, it determines the next +//! position given that we encounter a byte with the given value. +/*! For example, if we have a string "ABAC", the shifts array will have the following values: + * [0] --> ['A'] = 1, all others = 0 + * [1] --> ['B'] = 2, ['A'] = 1, all others = 0 + * [2] --> ['A'] = 3, all others = 0 + * [3] --> ['C'] = 4 (match), 'B' = 2, 'A' = 1, all others = 0 + * Suppose we then search in the following string "ABABAC", our progression will be as follows: + * 'A' -> [1], 'B' -> [2], 'A' -> [3], 'B' -> [2], 'A' -> [3], 'C' -> [4] (match!) + */ +struct TextSearchShiftArray { + TextSearchShiftArray(); + explicit TextSearchShiftArray(string search_term); + + inline bool Match(uint8_t &position, uint8_t byte_value) { + if (position >= length) { + return false; } - return entry->second; + position = shifts[position * 255 + byte_value]; + return position == length; } - void Put(std::string key, shared_ptr value) { - lock_guard glock(lock); - cache[key] = move(value); - } + idx_t length; + unique_ptr shifts; +}; - static ObjectCache &GetObjectCache(ClientContext &context); - static bool ObjectCacheEnabled(ClientContext &context); +struct BufferedCSVReaderOptions { + //! The file path of the CSV file to read + string file_path; + //! Whether file is compressed or not, and if so which compression type + //! ("infer" (default; infer from file extention), "gzip", "none") + string compression = "infer"; + //! Whether or not to automatically detect dialect and datatypes + bool auto_detect = false; + //! Whether or not a delimiter was defined by the user + bool has_delimiter = false; + //! Delimiter to separate columns within each line + string delimiter = ","; + //! Whether or not a quote sign was defined by the user + bool has_quote = false; + //! Quote used for columns that contain reserved characters, e.g., delimiter + string quote = "\""; + //! Whether or not an escape character was defined by the user + bool has_escape = false; + //! Escape character to escape quote character + string escape; + //! Whether or not a header information was given by the user + bool has_header = false; + //! Whether or not the file has a header line + bool header = false; + //! Whether or not header names shall be normalized + bool normalize_names = false; + //! How many leading rows to skip + idx_t skip_rows = 0; + //! Expected number of columns + idx_t num_cols = 0; + //! Specifies the string that represents a null value + string null_str; + //! True, if column with that index must skip null check + vector force_not_null; + //! Size of sample chunk used for dialect and type detection + idx_t sample_chunk_size = STANDARD_VECTOR_SIZE; + //! Number of sample chunks used for type detection + idx_t sample_chunks = 10; + //! Number of samples to buffer + idx_t buffer_size = STANDARD_VECTOR_SIZE * 100; + //! Consider all columns to be of type varchar + bool all_varchar = false; + //! The date format to use (if any is specified) + std::map date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}}; + //! Whether or not a type format is specified + std::map has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}}; + + std::string toString() const { + return "DELIMITER='" + delimiter + (has_delimiter ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) + + ", QUOTE='" + quote + (has_quote ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) + + ", ESCAPE='" + escape + (has_escape ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) + + ", HEADER=" + std::to_string(header) + + (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) + + ", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) + + ", ALL_VARCHAR=" + std::to_string(all_varchar); + } +}; + +enum class ParserMode : uint8_t { PARSING = 0, SNIFFING_DIALECT = 1, SNIFFING_DATATYPES = 2, PARSING_HEADER = 3 }; + +//! Buffered CSV reader is a class that reads values from a stream and parses them as a CSV file +class BufferedCSVReader { + //! Initial buffer read size; can be extended for long lines + static constexpr idx_t INITIAL_BUFFER_SIZE = 16384; + //! Maximum CSV line size: specified because if we reach this amount, we likely have the wrong delimiters + static constexpr idx_t MAXIMUM_CSV_LINE_SIZE = 1048576; + ParserMode mode; + +public: + BufferedCSVReader(ClientContext &context, BufferedCSVReaderOptions options, + const vector &requested_types = vector()); + + BufferedCSVReader(FileSystem &fs, BufferedCSVReaderOptions options, + const vector &requested_types = vector()); + + FileSystem &fs; + BufferedCSVReaderOptions options; + vector sql_types; + vector col_names; + unique_ptr file_handle; + bool plain_file_source = false; + idx_t file_size = 0; + FileCompressionType compression = FileCompressionType::UNCOMPRESSED; + + unique_ptr buffer; + idx_t buffer_size; + idx_t position; + idx_t start = 0; + + idx_t linenr = 0; + bool linenr_estimated = false; + + vector sniffed_column_counts; + bool row_empty = false; + idx_t sample_chunk_idx = 0; + bool jumping_samples = false; + bool end_of_file_reached = false; + bool bom_checked = false; + + idx_t bytes_in_chunk = 0; + double bytes_per_line_avg = 0; + + vector> cached_buffers; + + TextSearchShiftArray delimiter_search, escape_search, quote_search; + + DataChunk parse_chunk; + + std::queue> cached_chunks; + +public: + //! Extract a single DataChunk from the CSV file and stores it in insert_chunk + void ParseCSV(DataChunk &insert_chunk); private: - //! Object Cache - std::unordered_map> cache; - std::mutex lock; + //! Initialize Parser + void Initialize(const vector &requested_types); + //! Initializes the parse_chunk with varchar columns and aligns info with new number of cols + void InitParseChunk(idx_t num_cols); + //! Initializes the TextSearchShiftArrays for complex parser + void PrepareComplexParser(); + //! Try to parse a single datachunk from the file. Throws an exception if anything goes wrong. + void ParseCSV(ParserMode mode); + //! Try to parse a single datachunk from the file. Returns whether or not the parsing is successful + bool TryParseCSV(ParserMode mode); + //! Extract a single DataChunk from the CSV file and stores it in insert_chunk + bool TryParseCSV(ParserMode mode, DataChunk &insert_chunk, string &error_message); + //! Sniffs CSV dialect and determines skip rows, header row, column types and column names + vector SniffCSV(const vector &requested_types); + //! Change the date format for the type to the string + void SetDateFormat(const string &format_specifier, const LogicalTypeId &sql_type); + //! Try to cast a string value to the specified sql type + bool TryCastValue(const Value &value, const LogicalType &sql_type); + //! Try to cast a vector of values to the specified sql type + bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type); + //! Skips skip_rows, reads header row from input stream + void SkipRowsAndReadHeader(idx_t skip_rows, bool skip_header); + //! Jumps back to the beginning of input stream and resets necessary internal states + void JumpToBeginning(idx_t skip_rows, bool skip_header); + //! Jumps back to the beginning of input stream and resets necessary internal states + bool JumpToNextSample(); + //! Resets the buffer + void ResetBuffer(); + //! Resets the steam + void ResetStream(); + + //! Parses a CSV file with a one-byte delimiter, escape and quote character + bool TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message); + //! Parses more complex CSV files with multi-byte delimiters, escapes or quotes + bool TryParseComplexCSV(DataChunk &insert_chunk, string &error_message); + + //! Adds a value to the current row + void AddValue(char *str_val, idx_t length, idx_t &column, vector &escape_positions); + //! Adds a row to the insert_chunk, returns true if the chunk is filled as a result of this row being added + bool AddRow(DataChunk &insert_chunk, idx_t &column); + //! Finalizes a chunk, parsing all values that have been added so far and adding them to the insert_chunk + void Flush(DataChunk &insert_chunk); + //! Reads a new buffer from the CSV file if the current one has been exhausted + bool ReadBuffer(idx_t &start); + + unique_ptr OpenCSV(const BufferedCSVReaderOptions &options); + + //! First phase of auto detection: detect CSV dialect (i.e. delimiter, quote rules, etc) + void DetectDialect(const vector &requested_types, BufferedCSVReaderOptions &original_options, + vector &info_candidates, idx_t &best_num_cols); + //! Second phase of auto detection: detect candidate types for each column + void DetectCandidateTypes(const vector &type_candidates, + const map> &format_template_candidates, + const vector &info_candidates, + BufferedCSVReaderOptions &original_options, idx_t best_num_cols, + vector> &best_sql_types_candidates, + std::map> &best_format_candidates, + DataChunk &best_header_row); + //! Third phase of auto detection: detect header of CSV file + void DetectHeader(const vector> &best_sql_types_candidates, const DataChunk &best_header_row); + //! Fourth phase of auto detection: refine the types of each column and select which types to use for each column + vector RefineTypeDetection(const vector &type_candidates, + const vector &requested_types, + vector> &best_sql_types_candidates, + map> &best_format_candidates); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/planner/table_filter.hpp +// duckdb/common/types/vector_cache.hpp // // //===----------------------------------------------------------------------===// -#include - namespace duckdb { +class Vector; -//! TableFilter represents a filter pushed down into the table scan. -struct TableFilter { - TableFilter(Value constant, ExpressionType comparison_type, idx_t column_index) - : constant(std::move(constant)), comparison_type(comparison_type), column_index(column_index) {}; +//! The VectorCache holds cached data for +class VectorCache { +public: + // Instantiate a vector cache with the given type + VectorCache(const LogicalType &type); - Value constant; - ExpressionType comparison_type; - idx_t column_index; -}; + buffer_ptr buffer; + +public: + void ResetFromCache(Vector &result) const; -struct TableFilterSet { - unordered_map> filters; + const LogicalType &GetType() const; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/statistics/string_statistics.hpp +// duckdb/planner/filter/null_filter.hpp // // //===----------------------------------------------------------------------===// @@ -16404,43 +19094,29 @@ struct TableFilterSet { namespace duckdb { -class StringStatistics : public BaseStatistics { +class IsNullFilter : public TableFilter { public: - constexpr static uint32_t MAX_STRING_MINMAX_SIZE = 8; + IsNullFilter(); public: - explicit StringStatistics(LogicalType type); - - //! The minimum value of the segment, potentially truncated - data_t min[MAX_STRING_MINMAX_SIZE]; - //! The maximum value of the segment, potentially truncated - data_t max[MAX_STRING_MINMAX_SIZE]; - //! Whether or not the column can contain unicode characters - bool has_unicode; - //! The maximum string length in bytes - uint32_t max_string_length; - //! Whether or not the segment contains any big strings in overflow blocks - bool has_overflow_strings; + FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; + string ToString(const string &column_name) override; +}; +class IsNotNullFilter : public TableFilter { public: - void Update(const string_t &value); - void Merge(const BaseStatistics &other) override; - - unique_ptr Copy() override; - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(Deserializer &source, LogicalType type); - void Verify(Vector &vector, idx_t count) override; - - bool CheckZonemap(ExpressionType comparison_type, const string &value); + IsNotNullFilter(); - string ToString() override; +public: + FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; + string ToString(const string &column_name) override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/statistics/numeric_statistics.hpp +// duckdb/parser/expression/lambda_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16449,10 +19125,35 @@ class StringStatistics : public BaseStatistics { + +namespace duckdb { + +//! LambdaExpression represents a lambda operator that can be used for e.g. mapping an expression to a list +//! Lambda expressions are written in the form of "capture -> expr", e.g. "x -> x + 1" +class LambdaExpression : public ParsedExpression { +public: + LambdaExpression(vector parameters, unique_ptr expression); + + vector parameters; + unique_ptr expression; + +public: + string ToString() const override; + + static bool Equals(const LambdaExpression *a, const LambdaExpression *b); + hash_t Hash() const override; + + unique_ptr Copy() const override; + + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(ExpressionType type, Deserializer &source); +}; + +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/storage/statistics/validity_statistics.hpp +// duckdb/parser/expression/subquery_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16461,145 +19162,86 @@ class StringStatistics : public BaseStatistics { -namespace duckdb { -class Serializer; -class Deserializer; -class Vector; -class ValidityStatistics : public BaseStatistics { -public: - explicit ValidityStatistics(bool has_null = false); - //! Whether or not the segment can contain NULL values - bool has_null; +namespace duckdb { +//! Represents a subquery +class SubqueryExpression : public ParsedExpression { public: - void Merge(const BaseStatistics &other) override; - unique_ptr Copy() override; - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(Deserializer &source); - void Verify(Vector &vector, idx_t count) override; - - static unique_ptr Combine(const unique_ptr &lstats, - const unique_ptr &rstats); - - string ToString() override; -}; - -} // namespace duckdb + SubqueryExpression(); + //! The actual subquery + unique_ptr subquery; + //! The subquery type + SubqueryType subquery_type; + //! the child expression to compare with (in case of IN, ANY, ALL operators, empty for EXISTS queries and scalar + //! subquery) + unique_ptr child; + //! The comparison type of the child expression with the subquery (in case of ANY, ALL operators), empty otherwise + ExpressionType comparison_type; +public: + bool HasSubquery() const override { + return true; + } + bool IsScalar() const override { + return false; + } + string ToString() const override; + static bool Equals(const SubqueryExpression *a, const SubqueryExpression *b); + unique_ptr Copy() const override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(ExpressionType type, Deserializer &source); +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/windows_undefs.hpp +// duckdb/parser/expression/case_expression.hpp // // //===----------------------------------------------------------------------===// -#ifdef WIN32 - -#ifdef min -#undef min -#endif - -#ifdef max -#undef max -#endif - -#ifdef ERROR -#undef ERROR -#endif - -#ifdef GetCValue -#undef GetCValue -#endif - -#ifdef small -#undef small -#endif -#endif namespace duckdb { -class NumericStatistics : public BaseStatistics { -public: - explicit NumericStatistics(LogicalType type); - NumericStatistics(LogicalType type, Value min, Value max); - - //! The minimum value of the segment - Value min; - //! The maximum value of the segment - Value max; +struct CaseCheck { + unique_ptr when_expr; + unique_ptr then_expr; +}; +//! The CaseExpression represents a CASE expression in the query +class CaseExpression : public ParsedExpression { public: - void Merge(const BaseStatistics &other) override; - bool CheckZonemap(ExpressionType comparison_type, const Value &constant); - - unique_ptr Copy() override; - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(Deserializer &source, LogicalType type); - void Verify(Vector &vector, idx_t count) override; - - string ToString() override; + CaseExpression(); -private: - template - void TemplatedVerify(Vector &vector, idx_t count); + vector case_checks; + unique_ptr else_expr; public: - template - static inline void UpdateValue(T new_value, T &min, T &max) { - if (LessThan::Operation(new_value, min)) { - min = new_value; - } - if (GreaterThan::Operation(new_value, max)) { - max = new_value; - } - } + string ToString() const override; - template - static inline void Update(SegmentStatistics &stats, T new_value); -}; + static bool Equals(const CaseExpression *a, const CaseExpression *b); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, int8_t new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, int16_t new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, int32_t new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, int64_t new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, uint8_t new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, uint16_t new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, uint32_t new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, uint64_t new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, hugeint_t new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, float new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, double new_value); -template <> -void NumericStatistics::Update(SegmentStatistics &stats, interval_t new_value); + unique_ptr Copy() const override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(ExpressionType type, Deserializer &source); +}; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/case_expression.hpp +// duckdb/parser/expression/positional_reference_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16608,28 +19250,23 @@ void NumericStatistics::Update(SegmentStatistics &stats, interval_t - namespace duckdb { - -struct CaseCheck { - unique_ptr when_expr; - unique_ptr then_expr; -}; - -//! The CaseExpression represents a CASE expression in the query -class CaseExpression : public ParsedExpression { +class PositionalReferenceExpression : public ParsedExpression { public: - CaseExpression(); + PositionalReferenceExpression(idx_t index); - vector case_checks; - unique_ptr else_expr; + idx_t index; public: - string ToString() const override; + bool IsScalar() const override { + return false; + } - static bool Equals(const CaseExpression *a, const CaseExpression *b); + string ToString() const override; + static bool Equals(const PositionalReferenceExpression *a, const PositionalReferenceExpression *b); unique_ptr Copy() const override; + hash_t Hash() const override; void Serialize(Serializer &serializer) override; static unique_ptr Deserialize(ExpressionType type, Deserializer &source); @@ -16638,7 +19275,7 @@ class CaseExpression : public ParsedExpression { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/cast_expression.hpp +// duckdb/parser/expression/function_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16649,34 +19286,45 @@ class CaseExpression : public ParsedExpression { namespace duckdb { - -//! CastExpression represents a type cast from one SQL type to another SQL type -class CastExpression : public ParsedExpression { +//! Represents a function call +class FunctionExpression : public ParsedExpression { public: - CastExpression(LogicalType target, unique_ptr child, bool try_cast = false); + FunctionExpression(string schema_name, const string &function_name, vector> children, + unique_ptr filter = nullptr, bool distinct = false, bool is_operator = false); + FunctionExpression(const string &function_name, vector> children, + unique_ptr filter = nullptr, bool distinct = false, bool is_operator = false); - //! The child of the cast expression - unique_ptr child; - //! The type to cast to - LogicalType cast_type; - //! Whether or not this is a try_cast expression - bool try_cast; + //! Schema of the function + string schema; + //! Function name + string function_name; + //! Whether or not the function is an operator, only used for rendering + bool is_operator; + //! List of arguments to the function + vector> children; + //! Whether or not the aggregate function is distinct, only used for aggregates + bool distinct; + //! Expression representing a filter, only used for aggregates + unique_ptr filter; public: string ToString() const override; - static bool Equals(const CastExpression *a, const CastExpression *b); - unique_ptr Copy() const override; + static bool Equals(const FunctionExpression *a, const FunctionExpression *b); + hash_t Hash() const override; + + //! Serializes a FunctionExpression to a stand-alone binary blob void Serialize(Serializer &serializer) override; + //! Deserializes a blob back into an FunctionExpression static unique_ptr Deserialize(ExpressionType type, Deserializer &source); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/collate_expression.hpp +// duckdb/parser/expression/parameter_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16686,23 +19334,24 @@ class CastExpression : public ParsedExpression { namespace duckdb { - -//! CollateExpression represents a COLLATE statement -class CollateExpression : public ParsedExpression { +class ParameterExpression : public ParsedExpression { public: - CollateExpression(string collation, unique_ptr child); + ParameterExpression(); - //! The child of the cast expression - unique_ptr child; - //! The collation clause - string collation; + idx_t parameter_nr; public: - string ToString() const override; + bool IsScalar() const override { + return true; + } + bool HasParameter() const override { + return true; + } - static bool Equals(const CollateExpression *a, const CollateExpression *b); + string ToString() const override; unique_ptr Copy() const override; + hash_t Hash() const override; void Serialize(Serializer &serializer) override; static unique_ptr Deserialize(ExpressionType type, Deserializer &source); @@ -16711,7 +19360,7 @@ class CollateExpression : public ParsedExpression { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/comparison_expression.hpp +// duckdb/parser/expression/default_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16721,23 +19370,20 @@ class CollateExpression : public ParsedExpression { namespace duckdb { -//! ComparisonExpression represents a boolean comparison (e.g. =, >=, <>). Always returns a boolean -//! and has two children. -class ComparisonExpression : public ParsedExpression { +//! Represents the default value of a column +class DefaultExpression : public ParsedExpression { public: - ComparisonExpression(ExpressionType type, unique_ptr left, unique_ptr right); - - unique_ptr left; - unique_ptr right; + DefaultExpression(); public: - string ToString() const override; + bool IsScalar() const override { + return false; + } - static bool Equals(const ComparisonExpression *a, const ComparisonExpression *b); + string ToString() const override; unique_ptr Copy() const override; - void Serialize(Serializer &serializer) override; static unique_ptr Deserialize(ExpressionType type, Deserializer &source); }; } // namespace duckdb @@ -16781,7 +19427,7 @@ class ConjunctionExpression : public ParsedExpression { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/constant_expression.hpp +// duckdb/parser/expression/operator_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16792,19 +19438,19 @@ class ConjunctionExpression : public ParsedExpression { namespace duckdb { -//! ConstantExpression represents a constant value in the query -class ConstantExpression : public ParsedExpression { +//! Represents a built-in operator expression +class OperatorExpression : public ParsedExpression { public: - explicit ConstantExpression(Value val); + explicit OperatorExpression(ExpressionType type, unique_ptr left = nullptr, + unique_ptr right = nullptr); + OperatorExpression(ExpressionType type, vector> children); - //! The constant value referenced - Value value; + vector> children; public: string ToString() const override; - static bool Equals(const ConstantExpression *a, const ConstantExpression *b); - hash_t Hash() const override; + static bool Equals(const OperatorExpression *a, const OperatorExpression *b); unique_ptr Copy() const override; @@ -16815,7 +19461,7 @@ class ConstantExpression : public ParsedExpression { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/default_expression.hpp +// duckdb/parser/expression/between_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16825,27 +19471,34 @@ class ConstantExpression : public ParsedExpression { namespace duckdb { -//! Represents the default value of a column -class DefaultExpression : public ParsedExpression { -public: - DefaultExpression(); +class BetweenExpression : public ParsedExpression { public: - bool IsScalar() const override { - return false; - } + BetweenExpression(unique_ptr input, unique_ptr lower, + unique_ptr upper); + + unique_ptr input; + unique_ptr lower; + unique_ptr upper; +public: string ToString() const override; + static bool Equals(const BetweenExpression *a, const BetweenExpression *b); + unique_ptr Copy() const override; + void Serialize(Serializer &serializer) override; static unique_ptr Deserialize(ExpressionType type, Deserializer &source); }; } // namespace duckdb + + + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/function_expression.hpp +// duckdb/parser/expression/cast_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16856,45 +19509,35 @@ class DefaultExpression : public ParsedExpression { namespace duckdb { -//! Represents a function call -class FunctionExpression : public ParsedExpression { + +//! CastExpression represents a type cast from one SQL type to another SQL type +class CastExpression : public ParsedExpression { public: - FunctionExpression(string schema_name, const string &function_name, vector> &children, - unique_ptr filter = nullptr, bool distinct = false, bool is_operator = false); - FunctionExpression(const string &function_name, vector> &children, - unique_ptr filter = nullptr, bool distinct = false, bool is_operator = false); + CastExpression(LogicalType target, unique_ptr child, bool try_cast = false); - //! Schema of the function - string schema; - //! Function name - string function_name; - //! Whether or not the function is an operator, only used for rendering - bool is_operator; - //! List of arguments to the function - vector> children; - //! Whether or not the aggregate function is distinct, only used for aggregates - bool distinct; - //! Expression representing a filter, only used for aggregates - unique_ptr filter; + //! The child of the cast expression + unique_ptr child; + //! The type to cast to + LogicalType cast_type; + //! Whether or not this is a try_cast expression + bool try_cast; public: string ToString() const override; - unique_ptr Copy() const override; + static bool Equals(const CastExpression *a, const CastExpression *b); - static bool Equals(const FunctionExpression *a, const FunctionExpression *b); - hash_t Hash() const override; + unique_ptr Copy() const override; - //! Serializes a FunctionExpression to a stand-alone binary blob void Serialize(Serializer &serializer) override; - //! Deserializes a blob back into an FunctionExpression static unique_ptr Deserialize(ExpressionType type, Deserializer &source); }; } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/lambda_expression.hpp +// duckdb/parser/expression/collate_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16903,46 +19546,70 @@ class FunctionExpression : public ParsedExpression { - namespace duckdb { -//! LambdaExpression represents a lambda operator that can be used for e.g. mapping an expression to a list -//! Lambda expressions are written in the form of "capture -> expr", e.g. "x -> x + 1" -class LambdaExpression : public ParsedExpression { +//! CollateExpression represents a COLLATE statement +class CollateExpression : public ParsedExpression { public: - LambdaExpression(vector parameters, unique_ptr expression); + CollateExpression(string collation, unique_ptr child); - vector parameters; - unique_ptr expression; + //! The child of the cast expression + unique_ptr child; + //! The collation clause + string collation; public: string ToString() const override; - static bool Equals(const LambdaExpression *a, const LambdaExpression *b); - hash_t Hash() const override; + static bool Equals(const CollateExpression *a, const CollateExpression *b); unique_ptr Copy() const override; void Serialize(Serializer &serializer) override; static unique_ptr Deserialize(ExpressionType type, Deserializer &source); }; - } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/expression/comparison_expression.hpp +// +// +//===----------------------------------------------------------------------===// + + + +namespace duckdb { +//! ComparisonExpression represents a boolean comparison (e.g. =, >=, <>). Always returns a boolean +//! and has two children. +class ComparisonExpression : public ParsedExpression { +public: + ComparisonExpression(ExpressionType type, unique_ptr left, unique_ptr right); + unique_ptr left; + unique_ptr right; +public: + string ToString() const override; + static bool Equals(const ComparisonExpression *a, const ComparisonExpression *b); + unique_ptr Copy() const override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(ExpressionType type, Deserializer &source); +}; +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/operator_expression.hpp +// duckdb/parser/expression/constant_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16953,31 +19620,68 @@ class LambdaExpression : public ParsedExpression { namespace duckdb { -//! Represents a built-in operator expression -class OperatorExpression : public ParsedExpression { -public: - explicit OperatorExpression(ExpressionType type, unique_ptr left = nullptr, - unique_ptr right = nullptr); - OperatorExpression(ExpressionType type, vector> children); - vector> children; +//! ConstantExpression represents a constant value in the query +class ConstantExpression : public ParsedExpression { +public: + explicit ConstantExpression(Value val); + + //! The constant value referenced + Value value; + +public: + string ToString() const override; + + static bool Equals(const ConstantExpression *a, const ConstantExpression *b); + hash_t Hash() const override; + + unique_ptr Copy() const override; + + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(ExpressionType type, Deserializer &source); +}; + +} // namespace duckdb + + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/expression/star_expression.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +//! Represents a * expression in the SELECT clause +class StarExpression : public ParsedExpression { +public: + StarExpression(); public: string ToString() const override; - static bool Equals(const OperatorExpression *a, const OperatorExpression *b); - unique_ptr Copy() const override; - void Serialize(Serializer &serializer) override; static unique_ptr Deserialize(ExpressionType type, Deserializer &source); }; } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/parameter_expression.hpp +// duckdb/parser/expression/table_star_expression.hpp // // //===----------------------------------------------------------------------===// @@ -16987,34 +19691,31 @@ class OperatorExpression : public ParsedExpression { namespace duckdb { -class ParameterExpression : public ParsedExpression { -public: - ParameterExpression(); - - idx_t parameter_nr; +//! Represents a table.* expression in the SELECT clause +class TableStarExpression : public ParsedExpression { public: - bool IsScalar() const override { - return true; - } - bool HasParameter() const override { - return true; - } + explicit TableStarExpression(string relation_name); +public: string ToString() const override; + static bool Equals(const TableStarExpression *a, const TableStarExpression *b); + unique_ptr Copy() const override; - hash_t Hash() const override; void Serialize(Serializer &serializer) override; static unique_ptr Deserialize(ExpressionType type, Deserializer &source); + + string relation_name; }; } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/positional_reference_expression.hpp +// duckdb/parser/parsed_data/create_sequence_info.hpp // // //===----------------------------------------------------------------------===// @@ -17023,33 +19724,50 @@ class ParameterExpression : public ParsedExpression { -namespace duckdb { -class PositionalReferenceExpression : public ParsedExpression { -public: - PositionalReferenceExpression(idx_t index); - idx_t index; +namespace duckdb { -public: - bool IsScalar() const override { - return false; +struct CreateSequenceInfo : public CreateInfo { + CreateSequenceInfo() + : CreateInfo(CatalogType::SEQUENCE_ENTRY), name(string()), usage_count(0), increment(1), min_value(1), + max_value(NumericLimits::Maximum()), start_value(1), cycle(false) { } - string ToString() const override; - - static bool Equals(const PositionalReferenceExpression *a, const PositionalReferenceExpression *b); - unique_ptr Copy() const override; - hash_t Hash() const override; + //! Sequence name to create + string name; + //! Usage count of the sequence + uint64_t usage_count; + //! The increment value + int64_t increment; + //! The minimum value of the sequence + int64_t min_value; + //! The maximum value of the sequence + int64_t max_value; + //! The start value of the sequence + int64_t start_value; + //! Whether or not the sequence cycles + bool cycle; - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(ExpressionType type, Deserializer &source); +public: + unique_ptr Copy() const override { + auto result = make_unique(); + CopyProperties(*result); + result->name = name; + result->usage_count = usage_count; + result->increment = increment; + result->min_value = min_value; + result->max_value = max_value; + result->start_value = start_value; + result->cycle = cycle; + return move(result); + } }; -} // namespace duckdb +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/star_expression.hpp +// duckdb/parser/parsed_data/show_select_info.hpp // // //===----------------------------------------------------------------------===// @@ -17058,26 +19776,31 @@ class PositionalReferenceExpression : public ParsedExpression { -namespace duckdb { - -//! Represents a * expression in the SELECT clause -class StarExpression : public ParsedExpression { -public: - StarExpression(); -public: - string ToString() const override; +namespace duckdb { - unique_ptr Copy() const override; +struct ShowSelectInfo : public ParseInfo { + //! Types of projected columns + vector types; + //! The QueryNode of select query + unique_ptr query; + //! Aliases of projected columns + vector aliases; - static unique_ptr Deserialize(ExpressionType type, Deserializer &source); + unique_ptr Copy() { + auto result = make_unique(); + result->types = types; + result->query = query->Copy(); + result->aliases = aliases; + return result; + } }; -} // namespace duckdb +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/subquery_expression.hpp +// duckdb/parser/parsed_data/drop_info.hpp // // //===----------------------------------------------------------------------===// @@ -17087,47 +19810,41 @@ class StarExpression : public ParsedExpression { - namespace duckdb { -//! Represents a subquery -class SubqueryExpression : public ParsedExpression { -public: - SubqueryExpression(); +struct DropInfo : public ParseInfo { + DropInfo() : schema(INVALID_SCHEMA), if_exists(false), cascade(false) { + } - //! The actual subquery - unique_ptr subquery; - //! The subquery type - SubqueryType subquery_type; - //! the child expression to compare with (in case of IN, ANY, ALL operators, empty for EXISTS queries and scalar - //! subquery) - unique_ptr child; - //! The comparison type of the child expression with the subquery (in case of ANY, ALL operators), empty otherwise - ExpressionType comparison_type; + //! The catalog type to drop + CatalogType type; + //! Schema name to drop from, if any + string schema; + //! Element name to drop + string name; + //! Ignore if the entry does not exist instead of failing + bool if_exists = false; + //! Cascade drop (drop all dependents instead of throwing an error if there + //! are any) + bool cascade = false; public: - bool HasSubquery() const override { - return true; - } - bool IsScalar() const override { - return false; + unique_ptr Copy() const { + auto result = make_unique(); + result->type = type; + result->schema = schema; + result->name = name; + result->if_exists = if_exists; + result->cascade = cascade; + return result; } - - string ToString() const override; - - static bool Equals(const SubqueryExpression *a, const SubqueryExpression *b); - - unique_ptr Copy() const override; - - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(ExpressionType type, Deserializer &source); }; -} // namespace duckdb +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/expression/table_star_expression.hpp +// duckdb/parser/parsed_data/create_view_info.hpp // // //===----------------------------------------------------------------------===// @@ -17136,28 +19853,37 @@ class SubqueryExpression : public ParsedExpression { -namespace duckdb { - -//! Represents a table.* expression in the SELECT clause -class TableStarExpression : public ParsedExpression { -public: - explicit TableStarExpression(string relation_name); - -public: - string ToString() const override; - static bool Equals(const TableStarExpression *a, const TableStarExpression *b); +namespace duckdb { - unique_ptr Copy() const override; +struct CreateViewInfo : public CreateInfo { + CreateViewInfo() : CreateInfo(CatalogType::VIEW_ENTRY) { + } + CreateViewInfo(string schema, string view_name) + : CreateInfo(CatalogType::VIEW_ENTRY, schema), view_name(view_name) { + } - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(ExpressionType type, Deserializer &source); + //! Table name to insert to + string view_name; + //! Aliases of the view + vector aliases; + //! Return types + vector types; + //! The SelectStatement of the view + unique_ptr query; - string relation_name; +public: + unique_ptr Copy() const override { + auto result = make_unique(schema, view_name); + CopyProperties(*result); + result->aliases = aliases; + result->types = types; + result->query = unique_ptr_cast(query->Copy()); + return move(result); + } }; -} // namespace duckdb - +} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // @@ -17383,64 +20109,23 @@ struct RenameViewInfo : public AlterViewInfo { RenameViewInfo(string schema, string view, string new_name) : AlterViewInfo(AlterViewType::RENAME_VIEW, schema, view), new_view_name(new_name) { } - ~RenameViewInfo() override { - } - - //! Relation new name - string new_view_name; - -public: - unique_ptr Copy() const override; - void Serialize(Serializer &serializer) override; - static unique_ptr Deserialize(Deserializer &source, string schema, string table); -}; - -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/parsed_data/create_aggregate_function_info.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { - -struct CreateAggregateFunctionInfo : public CreateFunctionInfo { - explicit CreateAggregateFunctionInfo(AggregateFunction function) - : CreateFunctionInfo(CatalogType::AGGREGATE_FUNCTION_ENTRY), functions(function.name) { - this->name = function.name; - functions.AddFunction(move(function)); - } - - explicit CreateAggregateFunctionInfo(AggregateFunctionSet set) - : CreateFunctionInfo(CatalogType::AGGREGATE_FUNCTION_ENTRY), functions(move(set)) { - this->name = functions.name; - for (auto &func : functions.functions) { - func.name = functions.name; - } + ~RenameViewInfo() override { } - AggregateFunctionSet functions; + //! Relation new name + string new_view_name; public: - unique_ptr Copy() const override { - auto result = make_unique(functions); - CopyProperties(*result); - return move(result); - } + unique_ptr Copy() const override; + void Serialize(Serializer &serializer) override; + static unique_ptr Deserialize(Deserializer &source, string schema, string table); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_collation_info.hpp +// duckdb/parser/parsed_data/create_schema_info.hpp // // //===----------------------------------------------------------------------===// @@ -17449,30 +20134,15 @@ struct CreateAggregateFunctionInfo : public CreateFunctionInfo { - namespace duckdb { -struct CreateCollationInfo : public CreateInfo { - CreateCollationInfo(string name_p, ScalarFunction function_p, bool combinable_p, bool not_required_for_equality_p) - : CreateInfo(CatalogType::COLLATION_ENTRY), function(move(function_p)), combinable(combinable_p), - not_required_for_equality(not_required_for_equality_p) { - this->name = move(name_p); +struct CreateSchemaInfo : public CreateInfo { + CreateSchemaInfo() : CreateInfo(CatalogType::SCHEMA_ENTRY) { } - //! The name of the collation - string name; - //! The collation function to push in case collation is required - ScalarFunction function; - //! Whether or not the collation can be combined with other collations. - bool combinable; - //! Whether or not the collation is required for equality comparisons or not. For many collations a binary - //! comparison for equality comparisons is correct, allowing us to skip the collation in these cases which greatly - //! speeds up processing. - bool not_required_for_equality; - public: unique_ptr Copy() const override { - auto result = make_unique(name, function, combinable, not_required_for_equality); + auto result = make_unique(); CopyProperties(*result); return move(result); } @@ -17482,20 +20152,7 @@ struct CreateCollationInfo : public CreateInfo { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_index_info.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/tableref/basetableref.hpp +// duckdb/parser/parsed_data/create_aggregate_function_info.hpp // // //===----------------------------------------------------------------------===// @@ -17506,65 +20163,28 @@ struct CreateCollationInfo : public CreateInfo { namespace duckdb { -//! Represents a TableReference to a base table in the schema -class BaseTableRef : public TableRef { -public: - BaseTableRef() : TableRef(TableReferenceType::BASE_TABLE), schema_name(INVALID_SCHEMA) { - } - - //! Schema name - string schema_name; - //! Table name - string table_name; - //! Alises for the column names - vector column_name_alias; -public: - string ToString() const override { - return "GET(" + schema_name + "." + table_name + ")"; +struct CreateAggregateFunctionInfo : public CreateFunctionInfo { + explicit CreateAggregateFunctionInfo(AggregateFunction function) + : CreateFunctionInfo(CatalogType::AGGREGATE_FUNCTION_ENTRY), functions(function.name) { + this->name = function.name; + functions.AddFunction(move(function)); } - bool Equals(const TableRef *other_p) const override; - - unique_ptr Copy() override; - - //! Serializes a blob into a BaseTableRef - void Serialize(Serializer &serializer) override; - //! Deserializes a blob back into a BaseTableRef - static unique_ptr Deserialize(Deserializer &source); -}; -} // namespace duckdb - - - -namespace duckdb { - -struct CreateIndexInfo : public CreateInfo { - CreateIndexInfo() : CreateInfo(CatalogType::INDEX_ENTRY) { + explicit CreateAggregateFunctionInfo(AggregateFunctionSet set) + : CreateFunctionInfo(CatalogType::AGGREGATE_FUNCTION_ENTRY), functions(move(set)) { + this->name = functions.name; + for (auto &func : functions.functions) { + func.name = functions.name; + } } - //! Index Type (e.g., B+-tree, Skip-List, ...) - IndexType index_type; - //! Name of the Index - string index_name; - //! If it is an unique index - bool unique = false; - //! The table to create the index on - unique_ptr table; - //! Set of expressions to index by - vector> expressions; + AggregateFunctionSet functions; public: unique_ptr Copy() const override { - auto result = make_unique(); + auto result = make_unique(functions); CopyProperties(*result); - result->index_type = index_type; - result->index_name = index_name; - result->unique = unique; - result->table = unique_ptr_cast(table->Copy()); - for (auto &expr : expressions) { - result->expressions.push_back(expr->Copy()); - } return move(result); } }; @@ -17644,7 +20264,7 @@ struct CreateMacroInfo : public CreateFunctionInfo { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_pragma_function_info.hpp +// duckdb/parser/parsed_data/transaction_info.hpp // // //===----------------------------------------------------------------------===// @@ -17653,30 +20273,41 @@ struct CreateMacroInfo : public CreateFunctionInfo { - namespace duckdb { -struct CreatePragmaFunctionInfo : public CreateFunctionInfo { - explicit CreatePragmaFunctionInfo(PragmaFunction function) - : CreateFunctionInfo(CatalogType::PRAGMA_FUNCTION_ENTRY) { - functions.push_back(move(function)); - this->name = function.name; - } - CreatePragmaFunctionInfo(string name, vector functions_) - : CreateFunctionInfo(CatalogType::PRAGMA_FUNCTION_ENTRY), functions(move(functions_)) { - this->name = name; - for (auto &function : functions) { - function.name = name; - } +enum class TransactionType : uint8_t { INVALID, BEGIN_TRANSACTION, COMMIT, ROLLBACK }; + +struct TransactionInfo : public ParseInfo { + explicit TransactionInfo(TransactionType type) : type(type) { } - vector functions; + //! The type of transaction statement + TransactionType type; +}; + +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/parsed_data/vacuum_info.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +struct LoadInfo : public ParseInfo { + std::string filename; public: - unique_ptr Copy() const override { - auto result = make_unique(functions[0].name, functions); - CopyProperties(*result); - return move(result); + unique_ptr Copy() const { + auto result = make_unique(); + result->filename = filename; + return result; } }; @@ -17684,7 +20315,20 @@ struct CreatePragmaFunctionInfo : public CreateFunctionInfo { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_schema_info.hpp +// duckdb/parser/parsed_data/create_index_info.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/tableref/basetableref.hpp // // //===----------------------------------------------------------------------===// @@ -17693,16 +20337,64 @@ struct CreatePragmaFunctionInfo : public CreateFunctionInfo { + namespace duckdb { +//! Represents a TableReference to a base table in the schema +class BaseTableRef : public TableRef { +public: + BaseTableRef() : TableRef(TableReferenceType::BASE_TABLE), schema_name(INVALID_SCHEMA) { + } -struct CreateSchemaInfo : public CreateInfo { - CreateSchemaInfo() : CreateInfo(CatalogType::SCHEMA_ENTRY) { + //! Schema name + string schema_name; + //! Table name + string table_name; + //! Alises for the column names + vector column_name_alias; + +public: + string ToString() const override; + bool Equals(const TableRef *other_p) const override; + + unique_ptr Copy() override; + + //! Serializes a blob into a BaseTableRef + void Serialize(Serializer &serializer) override; + //! Deserializes a blob back into a BaseTableRef + static unique_ptr Deserialize(Deserializer &source); +}; +} // namespace duckdb + + + +namespace duckdb { + +struct CreateIndexInfo : public CreateInfo { + CreateIndexInfo() : CreateInfo(CatalogType::INDEX_ENTRY) { } + //! Index Type (e.g., B+-tree, Skip-List, ...) + IndexType index_type; + //! Name of the Index + string index_name; + //! If it is an unique index + bool unique = false; + //! The table to create the index on + unique_ptr table; + //! Set of expressions to index by + vector> expressions; + public: unique_ptr Copy() const override { - auto result = make_unique(); + auto result = make_unique(); CopyProperties(*result); + result->index_type = index_type; + result->index_name = index_name; + result->unique = unique; + result->table = unique_ptr_cast(table->Copy()); + for (auto &expr : expressions) { + result->expressions.push_back(expr->Copy()); + } return move(result); } }; @@ -17711,7 +20403,7 @@ struct CreateSchemaInfo : public CreateInfo { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_sequence_info.hpp +// duckdb/parser/parsed_data/create_collation_info.hpp // // //===----------------------------------------------------------------------===// @@ -17723,38 +20415,28 @@ struct CreateSchemaInfo : public CreateInfo { namespace duckdb { -struct CreateSequenceInfo : public CreateInfo { - CreateSequenceInfo() - : CreateInfo(CatalogType::SEQUENCE_ENTRY), name(string()), usage_count(0), increment(1), min_value(1), - max_value(NumericLimits::Maximum()), start_value(1), cycle(false) { +struct CreateCollationInfo : public CreateInfo { + CreateCollationInfo(string name_p, ScalarFunction function_p, bool combinable_p, bool not_required_for_equality_p) + : CreateInfo(CatalogType::COLLATION_ENTRY), function(move(function_p)), combinable(combinable_p), + not_required_for_equality(not_required_for_equality_p) { + this->name = move(name_p); } - //! Sequence name to create - string name; - //! Usage count of the sequence - uint64_t usage_count; - //! The increment value - int64_t increment; - //! The minimum value of the sequence - int64_t min_value; - //! The maximum value of the sequence - int64_t max_value; - //! The start value of the sequence - int64_t start_value; - //! Whether or not the sequence cycles - bool cycle; + //! The name of the collation + string name; + //! The collation function to push in case collation is required + ScalarFunction function; + //! Whether or not the collation can be combined with other collations. + bool combinable; + //! Whether or not the collation is required for equality comparisons or not. For many collations a binary + //! comparison for equality comparisons is correct, allowing us to skip the collation in these cases which greatly + //! speeds up processing. + bool not_required_for_equality; public: unique_ptr Copy() const override { - auto result = make_unique(); + auto result = make_unique(name, function, combinable, not_required_for_equality); CopyProperties(*result); - result->name = name; - result->usage_count = usage_count; - result->increment = increment; - result->min_value = min_value; - result->max_value = max_value; - result->start_value = start_value; - result->cycle = cycle; return move(result); } }; @@ -17763,7 +20445,7 @@ struct CreateSequenceInfo : public CreateInfo { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/create_view_info.hpp +// duckdb/parser/parsed_data/create_pragma_function_info.hpp // // //===----------------------------------------------------------------------===// @@ -17775,29 +20457,26 @@ struct CreateSequenceInfo : public CreateInfo { namespace duckdb { -struct CreateViewInfo : public CreateInfo { - CreateViewInfo() : CreateInfo(CatalogType::VIEW_ENTRY) { +struct CreatePragmaFunctionInfo : public CreateFunctionInfo { + explicit CreatePragmaFunctionInfo(PragmaFunction function) + : CreateFunctionInfo(CatalogType::PRAGMA_FUNCTION_ENTRY) { + functions.push_back(move(function)); + this->name = function.name; } - CreateViewInfo(string schema, string view_name) - : CreateInfo(CatalogType::VIEW_ENTRY, schema), view_name(view_name) { + CreatePragmaFunctionInfo(string name, vector functions_) + : CreateFunctionInfo(CatalogType::PRAGMA_FUNCTION_ENTRY), functions(move(functions_)) { + this->name = name; + for (auto &function : functions) { + function.name = name; + } } - //! Table name to insert to - string view_name; - //! Aliases of the view - vector aliases; - //! Return types - vector types; - //! The SelectStatement of the view - unique_ptr query; + vector functions; public: unique_ptr Copy() const override { - auto result = make_unique(schema, view_name); + auto result = make_unique(functions[0].name, functions); CopyProperties(*result); - result->aliases = aliases; - result->types = types; - result->query = unique_ptr_cast(query->Copy()); return move(result); } }; @@ -17806,7 +20485,7 @@ struct CreateViewInfo : public CreateInfo { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/drop_info.hpp +// duckdb/parser/parsed_data/export_table_data.hpp // // //===----------------------------------------------------------------------===// @@ -17818,32 +20497,19 @@ struct CreateViewInfo : public CreateInfo { namespace duckdb { -struct DropInfo : public ParseInfo { - DropInfo() : schema(INVALID_SCHEMA), if_exists(false), cascade(false) { - } +struct ExportedTableData { + //! Name of the exported table + string table_name; - //! The catalog type to drop - CatalogType type; - //! Schema name to drop from, if any - string schema; - //! Element name to drop - string name; - //! Ignore if the entry does not exist instead of failing - bool if_exists = false; - //! Cascade drop (drop all dependents instead of throwing an error if there - //! are any) - bool cascade = false; + //! Name of the schema + string schema_name; -public: - unique_ptr Copy() const { - auto result = make_unique(); - result->type = type; - result->schema = schema; - result->name = name; - result->if_exists = if_exists; - result->cascade = cascade; - return result; - } + //! Path to be exported + string file_path; +}; + +struct BoundExportData : public ParseInfo { + unordered_map data; }; } // namespace duckdb @@ -17861,22 +20527,15 @@ struct DropInfo : public ParseInfo { namespace duckdb { -struct LoadInfo : public ParseInfo { - std::string filename; - -public: - unique_ptr Copy() const { - auto result = make_unique(); - result->filename = filename; - return result; - } +struct VacuumInfo : public ParseInfo { + // nothing for now }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/show_select_info.hpp +// duckdb/parser/tableref/expressionlistref.hpp // // //===----------------------------------------------------------------------===// @@ -17886,30 +20545,37 @@ struct LoadInfo : public ParseInfo { -namespace duckdb { -struct ShowSelectInfo : public ParseInfo { - //! Types of projected columns - vector types; - //! The QueryNode of select query - unique_ptr query; - //! Aliases of projected columns - vector aliases; - unique_ptr Copy() { - auto result = make_unique(); - result->types = types; - result->query = query->Copy(); - result->aliases = aliases; - return result; +namespace duckdb { +//! Represents an expression list as generated by a VALUES statement +class ExpressionListRef : public TableRef { +public: + ExpressionListRef() : TableRef(TableReferenceType::EXPRESSION_LIST) { } -}; + //! Value list, only used for VALUES statement + vector>> values; + //! Expected SQL types + vector expected_types; + //! The set of expected names + vector expected_names; + +public: + bool Equals(const TableRef *other_p) const override; + + unique_ptr Copy() override; + + //! Serializes a blob into a ExpressionListRef + void Serialize(Serializer &serializer) override; + //! Deserializes a blob back into a ExpressionListRef + static unique_ptr Deserialize(Deserializer &source); +}; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/parser/parsed_data/transaction_info.hpp +// duckdb/parser/tableref/table_function_ref.hpp // // //===----------------------------------------------------------------------===// @@ -17918,37 +20584,34 @@ struct ShowSelectInfo : public ParseInfo { -namespace duckdb { - -enum class TransactionType : uint8_t { INVALID, BEGIN_TRANSACTION, COMMIT, ROLLBACK }; -struct TransactionInfo : public ParseInfo { - explicit TransactionInfo(TransactionType type) : type(type) { - } - //! The type of transaction statement - TransactionType type; -}; -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/parsed_data/vacuum_info.hpp -// -// -//===----------------------------------------------------------------------===// +namespace duckdb { +//! Represents a Table producing function +class TableFunctionRef : public TableRef { +public: + TableFunctionRef() : TableRef(TableReferenceType::TABLE_FUNCTION) { + } + unique_ptr function; + vector column_name_alias; + // if the function takes a subquery as argument its in here + unique_ptr subquery; +public: + string ToString() const override; + bool Equals(const TableRef *other_p) const override; -namespace duckdb { + unique_ptr Copy() override; -struct VacuumInfo : public ParseInfo { - // nothing for now + //! Serializes a blob into a BaseTableRef + void Serialize(Serializer &serializer) override; + //! Deserializes a blob back into a BaseTableRef + static unique_ptr Deserialize(Deserializer &source); }; - } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB @@ -17985,6 +20648,8 @@ class CrossProductRef : public TableRef { static unique_ptr Deserialize(Deserializer &source); }; } // namespace duckdb + + //===----------------------------------------------------------------------===// // DuckDB // @@ -18015,46 +20680,8 @@ class EmptyTableRef : public TableRef { static unique_ptr Deserialize(Deserializer &source); }; } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/tableref/expressionlistref.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - - -namespace duckdb { -//! Represents an expression list as generated by a VALUES statement -class ExpressionListRef : public TableRef { -public: - ExpressionListRef() : TableRef(TableReferenceType::EXPRESSION_LIST) { - } - - //! Value list, only used for VALUES statement - vector>> values; - //! Expected SQL types - vector expected_types; - //! The set of expected names - vector expected_names; -public: - bool Equals(const TableRef *other_p) const override; - - unique_ptr Copy() override; - //! Serializes a blob into a ExpressionListRef - void Serialize(Serializer &serializer) override; - //! Deserializes a blob back into a ExpressionListRef - static unique_ptr Deserialize(Deserializer &source); -}; -} // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // @@ -18103,10 +20730,6 @@ class JoinRef : public TableRef { }; } // namespace duckdb - - - - //===----------------------------------------------------------------------===// // DuckDB // @@ -18143,47 +20766,4 @@ class SubqueryRef : public TableRef { }; } // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/parser/tableref/table_function_ref.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - - -namespace duckdb { -//! Represents a Table producing function -class TableFunctionRef : public TableRef { -public: - TableFunctionRef() : TableRef(TableReferenceType::TABLE_FUNCTION) { - } - - unique_ptr function; - vector column_name_alias; - - // if the function takes a subquery as argument its in here - unique_ptr subquery; - -public: - string ToString() const override { - return function->ToString(); - } - - bool Equals(const TableRef *other_p) const override; - - unique_ptr Copy() override; - - //! Serializes a blob into a BaseTableRef - void Serialize(Serializer &serializer) override; - //! Deserializes a blob back into a BaseTableRef - static unique_ptr Deserialize(Deserializer &source); -}; -} // namespace duckdb diff --git a/velox/external/duckdb/parquet-amalgamation.cpp b/velox/external/duckdb/parquet-amalgamation.cpp index e9ea8843775d..38678562795c 100644 --- a/velox/external/duckdb/parquet-amalgamation.cpp +++ b/velox/external/duckdb/parquet-amalgamation.cpp @@ -9,11513 +9,10475 @@ // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list -/** - * Autogenerated by Thrift Compiler (0.11.0) +/* ****************************************************************** + * Common functions of New Generation Entropy library + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list -/** - * Autogenerated by Thrift Compiler (0.11.0) +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ -#ifndef parquet_CONSTANTS_H -#define parquet_CONSTANTS_H +#ifndef MEM_H_MODULE +#define MEM_H_MODULE +/*-**************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ -namespace duckdb_parquet { namespace format { -class parquetConstants { - public: - parquetConstants(); +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif -}; +#ifndef __has_builtin +# define __has_builtin(x) 0 /* compat. with non-clang compilers */ +#endif -extern const parquetConstants g_parquet_constants; +/* code only tested on 32 and 64 bits systems */ +#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } +MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } -}} // namespace +/* detects whether we are being compiled under msan */ +#if defined (__has_feature) +# if __has_feature(memory_sanitizer) +# define MEMORY_SANITIZER 1 +# endif +#endif +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else +# include +#if CHAR_BIT != 8 +# error "this implementation requires char to be exactly 8-bit type" +#endif + typedef unsigned char BYTE; +#if USHRT_MAX != 65535 +# error "this implementation requires short to be exactly 16-bit type" +#endif + typedef unsigned short U16; + typedef signed short S16; +#if UINT_MAX != 4294967295 +# error "this implementation requires int to be exactly 32-bit type" +#endif + typedef unsigned int U32; + typedef signed int S32; +/* note : there are no limits defined for long long type in C90. + * limits exist in C99, however, in such case, is preferred */ + typedef unsigned long long U64; + typedef signed long long S64; #endif +namespace duckdb_zstd { -// LICENSE_CHANGE_END +/*-************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } -namespace duckdb_parquet { namespace format { +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} -const parquetConstants g_parquet_constants; +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) -parquetConstants::parquetConstants() { +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + __pragma( pack(push, 1) ) + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; + __pragma( pack(pop) ) +#else + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; +#endif + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; } -}} // namespace +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; memcpy(&val, memPtr, sizeof(val)); return val; +} -// LICENSE_CHANGE_END +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 -// See the end of this file for a list +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} -/** - * Autogenerated by Thrift Compiler (0.11.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ +#endif /* MEM_FORCE_MEMORY_ACCESS */ +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap32)) + return __builtin_bswap32(in); +#else + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +#endif +} -#include -#include +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap64)) + return __builtin_bswap64(in); +#else + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +#endif +} +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} +/*=== Little endian r/w ===*/ -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} -#ifndef _THRIFT_TOSTRING_H_ -#define _THRIFT_TOSTRING_H_ 1 +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} -#include -#include -#include -#include -#include -#include -#include +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} -namespace duckdb_apache { -namespace thrift { +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} -template -std::string to_string(const T& t) { - std::ostringstream o; - o << t; - return o.str(); +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); } -// TODO: replace the computations below with std::numeric_limits::max_digits10 once C++11 -// is enabled. -inline std::string to_string(const float& t) { - std::ostringstream o; - o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); - o << t; - return o.str(); +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); } -inline std::string to_string(const double& t) { - std::ostringstream o; - o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); - o << t; - return o.str(); +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); } -inline std::string to_string(const long double& t) { - std::ostringstream o; - o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); - o << t; - return o.str(); +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); } -template -std::string to_string(const std::map& m); +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} -template -std::string to_string(const std::set& s); +/*=== Big endian r/w ===*/ -template -std::string to_string(const std::vector& t); +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} -template -std::string to_string(const typename std::pair& v) { - std::ostringstream o; - o << to_string(v.first) << ": " << to_string(v.second); - return o.str(); +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); } -template -std::string to_string(const T& beg, const T& end) { - std::ostringstream o; - for (T it = beg; it != end; ++it) { - if (it != beg) - o << ", "; - o << to_string(*it); - } - return o.str(); +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); } -template -std::string to_string(const std::vector& t) { - std::ostringstream o; - o << "[" << to_string(t.begin(), t.end()) << "]"; - return o.str(); +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); } -template -std::string to_string(const std::map& m) { - std::ostringstream o; - o << "{" << to_string(m.begin(), m.end()) << "}"; - return o.str(); +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); } -template -std::string to_string(const std::set& s) { - std::ostringstream o; - o << "{" << to_string(s.begin(), s.end()) << "}"; - return o.str(); +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); } + } -} // duckdb_apache::thrift -#endif // _THRIFT_TOSTRING_H_ +#endif /* MEM_H_MODULE */ // LICENSE_CHANGE_END -namespace duckdb_parquet { namespace format { -int _kTypeValues[] = { - Type::BOOLEAN, - Type::INT32, - Type::INT64, - Type::INT96, - Type::FLOAT, - Type::DOUBLE, - Type::BYTE_ARRAY, - Type::FIXED_LEN_BYTE_ARRAY -}; -const char* _kTypeNames[] = { - "BOOLEAN", - "INT32", - "INT64", - "INT96", - "FLOAT", - "DOUBLE", - "BYTE_ARRAY", - "FIXED_LEN_BYTE_ARRAY" -}; -const std::map _Type_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -std::ostream& operator<<(std::ostream& out, const Type::type& val) { - std::map::const_iterator it = _Type_VALUES_TO_NAMES.find(val); - if (it != _Type_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -int _kConvertedTypeValues[] = { - ConvertedType::UTF8, - ConvertedType::MAP, - ConvertedType::MAP_KEY_VALUE, - ConvertedType::LIST, - ConvertedType::ENUM, - ConvertedType::DECIMAL, - ConvertedType::DATE, - ConvertedType::TIME_MILLIS, - ConvertedType::TIME_MICROS, - ConvertedType::TIMESTAMP_MILLIS, - ConvertedType::TIMESTAMP_MICROS, - ConvertedType::UINT_8, - ConvertedType::UINT_16, - ConvertedType::UINT_32, - ConvertedType::UINT_64, - ConvertedType::INT_8, - ConvertedType::INT_16, - ConvertedType::INT_32, - ConvertedType::INT_64, - ConvertedType::JSON, - ConvertedType::BSON, - ConvertedType::INTERVAL -}; -const char* _kConvertedTypeNames[] = { - "UTF8", - "MAP", - "MAP_KEY_VALUE", - "LIST", - "ENUM", - "DECIMAL", - "DATE", - "TIME_MILLIS", - "TIME_MICROS", - "TIMESTAMP_MILLIS", - "TIMESTAMP_MICROS", - "UINT_8", - "UINT_16", - "UINT_32", - "UINT_64", - "INT_8", - "INT_16", - "INT_32", - "INT_64", - "JSON", - "BSON", - "INTERVAL" -}; -const std::map _ConvertedType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(22, _kConvertedTypeValues, _kConvertedTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); +/* Note : this module is expected to remain private, do not expose it */ -std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val) { - std::map::const_iterator it = _ConvertedType_VALUES_TO_NAMES.find(val); - if (it != _ConvertedType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE -int _kFieldRepetitionTypeValues[] = { - FieldRepetitionType::REQUIRED, - FieldRepetitionType::OPTIONAL, - FieldRepetitionType::REPEATED -}; -const char* _kFieldRepetitionTypeNames[] = { - "REQUIRED", - "OPTIONAL", - "REPEATED" -}; -const std::map _FieldRepetitionType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(3, _kFieldRepetitionTypeValues, _kFieldRepetitionTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); +/* **************************************** +* Dependencies +******************************************/ +#include /* size_t */ -std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val) { - std::map::const_iterator it = _FieldRepetitionType_VALUES_TO_NAMES.find(val); - if (it != _FieldRepetitionType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} -int _kEncodingValues[] = { - Encoding::PLAIN, - Encoding::PLAIN_DICTIONARY, - Encoding::RLE, - Encoding::BIT_PACKED, - Encoding::DELTA_BINARY_PACKED, - Encoding::DELTA_LENGTH_BYTE_ARRAY, - Encoding::DELTA_BYTE_ARRAY, - Encoding::RLE_DICTIONARY -}; -const char* _kEncodingNames[] = { - "PLAIN", - "PLAIN_DICTIONARY", - "RLE", - "BIT_PACKED", - "DELTA_BINARY_PACKED", - "DELTA_LENGTH_BYTE_ARRAY", - "DELTA_BYTE_ARRAY", - "RLE_DICTIONARY" -}; -const std::map _Encoding_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(8, _kEncodingValues, _kEncodingNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -std::ostream& operator<<(std::ostream& out, const Encoding::type& val) { - std::map::const_iterator it = _Encoding_VALUES_TO_NAMES.find(val); - if (it != _Encoding_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -int _kCompressionCodecValues[] = { - CompressionCodec::UNCOMPRESSED, - CompressionCodec::SNAPPY, - CompressionCodec::GZIP, - CompressionCodec::LZO, - CompressionCodec::BROTLI, - CompressionCodec::LZ4, - CompressionCodec::ZSTD -}; -const char* _kCompressionCodecNames[] = { - "UNCOMPRESSED", - "SNAPPY", - "GZIP", - "LZO", - "BROTLI", - "LZ4", - "ZSTD" -}; -const std::map _CompressionCodec_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(7, _kCompressionCodecValues, _kCompressionCodecNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 -std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val) { - std::map::const_iterator it = _CompressionCodec_VALUES_TO_NAMES.find(val); - if (it != _CompressionCodec_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} +/*===== dependency =====*/ +#include /* size_t */ -int _kPageTypeValues[] = { - PageType::DATA_PAGE, - PageType::INDEX_PAGE, - PageType::DICTIONARY_PAGE, - PageType::DATA_PAGE_V2 -}; -const char* _kPageTypeNames[] = { - "DATA_PAGE", - "INDEX_PAGE", - "DICTIONARY_PAGE", - "DATA_PAGE_V2" -}; -const std::map _PageType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); -std::ostream& operator<<(std::ostream& out, const PageType::type& val) { - std::map::const_iterator it = _PageType_VALUES_TO_NAMES.find(val); - if (it != _PageType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDERRORLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDERRORLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#endif +namespace duckdb_zstd { +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; -int _kBoundaryOrderValues[] = { - BoundaryOrder::UNORDERED, - BoundaryOrder::ASCENDING, - BoundaryOrder::DESCENDING -}; -const char* _kBoundaryOrderNames[] = { - "UNORDERED", - "ASCENDING", - "DESCENDING" -}; -const std::map _BoundaryOrder_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(3, _kBoundaryOrderValues, _kBoundaryOrderNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ -std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val) { - std::map::const_iterator it = _BoundaryOrder_VALUES_TO_NAMES.find(val); - if (it != _BoundaryOrder_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; } +#endif /* ZSTD_ERRORS_H_398273423 */ -Statistics::~Statistics() throw() { -} +// LICENSE_CHANGE_END + /* enum list */ -void Statistics::__set_max(const std::string& val) { - this->max = val; -__isset.max = true; -} +namespace duckdb_zstd { +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif -void Statistics::__set_min(const std::string& val) { - this->min = val; -__isset.min = true; -} -void Statistics::__set_null_count(const int64_t val) { - this->null_count = val; -__isset.null_count = true; -} +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name -void Statistics::__set_distinct_count(const int64_t val) { - this->distinct_count = val; -__isset.distinct_count = true; -} -void Statistics::__set_max_value(const std::string& val) { - this->max_value = val; -__isset.max_value = true; -} +/*-**************************************** +* Error codes handling +******************************************/ +#undef ERROR /* already defined on Visual Studio */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) -void Statistics::__set_min_value(const std::string& val) { - this->min_value = val; -__isset.min_value = true; -} -std::ostream& operator<<(std::ostream& out, const Statistics& obj) -{ - obj.printTo(out); - return out; -} +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } -uint32_t Statistics::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/* check and forward error code */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - xfer += iprot->readStructBegin(fname); +/*-**************************************** +* Error Strings +******************************************/ - using ::duckdb_apache::thrift::protocol::TProtocolException; +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max); - this->__isset.max = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min); - this->__isset.min = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->null_count); - this->__isset.null_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->distinct_count); - this->__isset.distinct_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max_value); - this->__isset.max_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min_value); - this->__isset.min_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +} - xfer += iprot->readStructEnd(); +#endif /* ERROR_H_MODULE */ - return xfer; -} -uint32_t Statistics::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Statistics"); +// LICENSE_CHANGE_END + /* ERR_*, ERROR */ - if (this->__isset.max) { - xfer += oprot->writeFieldBegin("max", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->max); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min) { - xfer += oprot->writeFieldBegin("min", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->min); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.null_count) { - xfer += oprot->writeFieldBegin("null_count", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->null_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.distinct_count) { - xfer += oprot->writeFieldBegin("distinct_count", ::duckdb_apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->distinct_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.max_value) { - xfer += oprot->writeFieldBegin("max_value", ::duckdb_apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeBinary(this->max_value); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min_value) { - xfer += oprot->writeFieldBegin("min_value", ::duckdb_apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeBinary(this->min_value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} -void swap(Statistics &a, Statistics &b) { - using ::std::swap; - swap(a.max, b.max); - swap(a.min, b.min); - swap(a.null_count, b.null_count); - swap(a.distinct_count, b.distinct_count); - swap(a.max_value, b.max_value); - swap(a.min_value, b.min_value); - swap(a.__isset, b.__isset); -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -Statistics::Statistics(const Statistics& other0) { - max = other0.max; - min = other0.min; - null_count = other0.null_count; - distinct_count = other0.distinct_count; - max_value = other0.max_value; - min_value = other0.min_value; - __isset = other0.__isset; -} -Statistics& Statistics::operator=(const Statistics& other1) { - max = other1.max; - min = other1.min; - null_count = other1.null_count; - distinct_count = other1.distinct_count; - max_value = other1.max_value; - min_value = other1.min_value; - __isset = other1.__isset; - return *this; -} -void Statistics::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "Statistics("; - out << "max="; (__isset.max ? (out << to_string(max)) : (out << "")); - out << ", " << "min="; (__isset.min ? (out << to_string(min)) : (out << "")); - out << ", " << "null_count="; (__isset.null_count ? (out << to_string(null_count)) : (out << "")); - out << ", " << "distinct_count="; (__isset.distinct_count ? (out << to_string(distinct_count)) : (out << "")); - out << ", " << "max_value="; (__isset.max_value ? (out << to_string(max_value)) : (out << "")); - out << ", " << "min_value="; (__isset.min_value ? (out << to_string(min_value)) : (out << "")); - out << ")"; -} +/* ****************************************************************** + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ +#ifndef FSE_H +#define FSE_H -StringType::~StringType() throw() { -} -std::ostream& operator<<(std::ostream& out, const StringType& obj) -{ - obj.printTo(out); - return out; -} +/*-***************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ -uint32_t StringType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +namespace duckdb_zstd { +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 - xfer += iprot->readStructBegin(fname); +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) - using ::duckdb_apache::thrift::protocol::TProtocolException; +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); - xfer += iprot->readStructEnd(); +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . - return xfer; -} + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); -uint32_t StringType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("StringType"); - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ -void swap(StringType &a, StringType &b) { - using ::std::swap; - (void) a; - (void) b; -} +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ -StringType::StringType(const StringType& other2) { - (void) other2; -} -StringType& StringType::operator=(const StringType& other3) { - (void) other3; - return *this; -} -void StringType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "StringType("; - out << ")"; -} +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -UUIDType::~UUIDType() throw() { -} -std::ostream& operator<<(std::ostream& out, const UUIDType& obj) -{ - obj.printTo(out); - return out; -} +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] (see hist.h) +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' -uint32_t UUIDType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/* *** COMPRESSION *** */ - xfer += iprot->readStructBegin(fname); +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); - using ::duckdb_apache::thrift::protocol::TProtocolException; +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue); +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); - xfer += iprot->readStructEnd(); +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); - return xfer; -} +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); -uint32_t UUIDType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("UUIDType"); +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). -void swap(UUIDType &a, UUIDType &b) { - using ::std::swap; - (void) a; - (void) b; -} +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). -UUIDType::UUIDType(const UUIDType& other4) { - (void) other4; -} -UUIDType& UUIDType::operator=(const UUIDType& other5) { - (void) other5; - return *this; -} -void UUIDType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "UUIDType("; - out << ")"; -} +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). -MapType::~MapType() throw() { -} +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). -std::ostream& operator<<(std::ostream& out, const MapType& obj) -{ - obj.printTo(out); - return out; -} +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ -uint32_t MapType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/* *** DECOMPRESSION *** */ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); - xfer += iprot->readStructBegin(fname); +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); - using ::duckdb_apache::thrift::protocol::TProtocolException; +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) - xfer += iprot->readStructEnd(); +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). - return xfer; -} +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). -uint32_t MapType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MapType"); +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; } -void swap(MapType &a, MapType &b) { - using ::std::swap; - (void) a; - (void) b; -} +#endif /* FSE_H */ -MapType::MapType(const MapType& other6) { - (void) other6; -} -MapType& MapType::operator=(const MapType& other7) { - (void) other7; - return *this; -} -void MapType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "MapType("; - out << ")"; -} -ListType::~ListType() throw() { -} +// LICENSE_CHANGE_END -std::ostream& operator<<(std::ostream& out, const ListType& obj) -{ - obj.printTo(out); - return out; -} -uint32_t ListType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/* ****************************************************************** + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ - xfer += iprot->readStructBegin(fname); +#ifndef FSE_H_FSE_STATIC_LINKING_ONLY +#define FSE_H_FSE_STATIC_LINKING_ONLY - using ::duckdb_apache::thrift::protocol::TProtocolException; +/* *** Dependency *** */ - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list - xfer += iprot->readStructEnd(); +/* ****************************************************************** + * bitstream + * Part of FSE library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE - return xfer; -} +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ -uint32_t ListType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ListType"); +/*-**************************************** +* Dependencies +******************************************/ + /* unaligned access routines */ - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} -void swap(ListType &a, ListType &b) { - using ::std::swap; - (void) a; - (void) b; -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -ListType::ListType(const ListType& other8) { - (void) other8; -} -ListType& ListType::operator=(const ListType& other9) { - (void) other9; - return *this; -} -void ListType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ListType("; - out << ")"; -} +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H -EnumType::~EnumType() throw() { -} +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ -std::ostream& operator<<(std::ostream& out, const EnumType& obj) -{ - obj.printTo(out); - return out; -} +#if !defined(ZSTD_NO_INLINE) +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif +#if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif -uint32_t EnumType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +#else - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +#define INLINE_KEYWORD +#define FORCE_INLINE_ATTR - xfer += iprot->readStructBegin(fname); +#endif - using ::duckdb_apache::thrift::protocol::TProtocolException; +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#if defined(__GNUC__) +# define UNUSED_ATTR __attribute__((unused)) +#else +# define UNUSED_ATTR +#endif - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif - xfer += iprot->readStructEnd(); +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#if defined(__GNUC__) || defined(__ICCARM__) +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#else +# define TARGET_ATTRIBUTE(target) +#endif - return xfer; -} +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif -uint32_t EnumType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EnumType"); +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if defined(NO_PREFETCH) +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +#else +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# else +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* NO_PREFETCH */ - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +#define CACHELINE_SIZE 64 -void swap(EnumType &a, EnumType &b) { - using ::std::swap; - (void) a; - (void) b; +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ } -EnumType::EnumType(const EnumType& other10) { - (void) other10; -} -EnumType& EnumType::operator=(const EnumType& other11) { - (void) other11; - return *this; -} -void EnumType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EnumType("; - out << ")"; -} +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif +#else +# define DONT_VECTORIZE +#endif +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#if defined(__GNUC__) +#ifndef LIKELY +#define LIKELY(x) (__builtin_expect((x), 1)) +#endif +#ifndef UNLIKELY +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#endif +#else +#ifndef LIKELY +#define LIKELY(x) (x) +#endif +#ifndef UNLIKELY +#define UNLIKELY(x) (x) +#endif +#endif -DateType::~DateType() throw() { -} +#endif /* ZSTD_COMPILER_H */ -std::ostream& operator<<(std::ostream& out, const DateType& obj) -{ - obj.printTo(out); - return out; -} +// LICENSE_CHANGE_END + /* UNLIKELY() */ -uint32_t DateType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list - xfer += iprot->readStructBegin(fname); +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ - using ::duckdb_apache::thrift::protocol::TProtocolException; +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 - xfer += iprot->readStructEnd(); +#if defined (__cplusplus) +extern "C" { +#endif - return xfer; -} -uint32_t DateType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DateType"); +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} -void swap(DateType &a, DateType &b) { - using ::std::swap; - (void) a; - (void) b; -} +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif -DateType::DateType(const DateType& other12) { - (void) other12; -} -DateType& DateType::operator=(const DateType& other13) { - (void) other13; - return *this; -} -void DateType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DateType("; - out << ")"; -} +/* DEBUGFILE can be defined externally, + * typically through compiler command line. + * note : currently useless. + * Value must be stderr or stdout */ +#ifndef DEBUGFILE +# define DEBUGFILE stderr +#endif -NullType::~NullType() throw() { -} -std::ostream& operator<<(std::ostream& out, const NullType& obj) -{ - obj.printTo(out); - return out; -} +/* recommended values for DEBUGLEVEL : + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively trigger high verbosity levels + * by modifying g_debug_level. + */ +#if (DEBUGLEVEL>=1) +# include +#else +# ifndef assert /* assert may be already defined, due to prior #include */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif -uint32_t NullType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +#if (DEBUGLEVEL>=2) +# include +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + fprintf(stderr, __VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif - xfer += iprot->readStructBegin(fname); - using ::duckdb_apache::thrift::protocol::TProtocolException; +#if defined (__cplusplus) +} +#endif +#endif /* DEBUG_H_12987983217 */ - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - xfer += iprot->readStructEnd(); +// LICENSE_CHANGE_END + /* assert(), DEBUGLOG(), RAWLOG() */ + /* error codes and messages */ - return xfer; -} -uint32_t NullType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NullType"); +/*========================================= +* Target specific +=========================================*/ +#if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +#elif defined(__ICCARM__) +# include +#endif - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) -void swap(NullType &a, NullType &b) { - using ::std::swap; - (void) a; - (void) b; -} +namespace duckdb_zstd { -NullType::NullType(const NullType& other14) { - (void) other14; -} -NullType& NullType::operator=(const NullType& other15) { - (void) other15; - return *this; -} -void NullType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "NullType("; - out << ")"; -} +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct { + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); -DecimalType::~DecimalType() throw() { -} +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ -void DecimalType::__set_scale(const int32_t val) { - this->scale = val; -} +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct { + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; -void DecimalType::__set_precision(const int32_t val) { - this->precision = val; -} -std::ostream& operator<<(std::ostream& out, const DecimalType& obj) -{ - obj.printTo(out); - return out; -} +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); -uint32_t DecimalType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ - xfer += iprot->readStructBegin(fname); - using ::duckdb_apache::thrift::protocol::TProtocolException; +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ - bool isset_scale = false; - bool isset_precision = false; +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - isset_scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - isset_precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ - xfer += iprot->readStructEnd(); - if (!isset_scale) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_precision) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} -uint32_t DecimalType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DecimalType"); - - xfer += oprot->writeFieldBegin("scale", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanReverse ( &r, val ) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif + } +} - xfer += oprot->writeFieldBegin("precision", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; } -void swap(DecimalType &a, DecimalType &b) { - using ::std::swap; - swap(a.scale, b.scale); - swap(a.precision, b.precision); +/*! BIT_addBits() : + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; } -DecimalType::DecimalType(const DecimalType& other16) { - scale = other16.scale; - precision = other16.precision; -} -DecimalType& DecimalType::operator=(const DecimalType& other17) { - scale = other17.scale; - precision = other17.precision; - return *this; -} -void DecimalType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DecimalType("; - out << "scale=" << to_string(scale); - out << ", " << "precision=" << to_string(precision); - out << ")"; +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; } +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} -MilliSeconds::~MilliSeconds() throw() { +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; } -std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) { - obj.printTo(out); - return out; + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); } -uint32_t MilliSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); - xfer += iprot->readStructBegin(fname); + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + /* fall-through */ - using ::duckdb_apache::thrift::protocol::TProtocolException; + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + /* fall-through */ + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + /* fall-through */ - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + /* fall-through */ - xfer += iprot->readStructEnd(); + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + /* fall-through */ - return xfer; -} + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + /* fall-through */ -uint32_t MilliSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MilliSeconds"); + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ + } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; + return srcSize; } -void swap(MilliSeconds &a, MilliSeconds &b) { - using ::std::swap; - (void) a; - (void) b; +MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; } -MilliSeconds::MilliSeconds(const MilliSeconds& other18) { - (void) other18; -} -MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other19) { - (void) other19; - return *this; -} -void MilliSeconds::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "MilliSeconds("; - out << ")"; +MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ + assert(nbBits < BIT_MASK_SIZE); + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; } - -MicroSeconds::~MicroSeconds() throw() { +MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; } -std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ +MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { - obj.printTo(out); - return out; + /* arbitrate between double-shift and shift+mask */ +#if 1 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + /* this code path is slower on my os-x laptop */ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif } - -uint32_t MicroSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); } -uint32_t MicroSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MicroSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; } -void swap(MicroSeconds &a, MicroSeconds &b) { - using ::std::swap; - (void) a; - (void) b; +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; } -MicroSeconds::MicroSeconds(const MicroSeconds& other20) { - (void) other20; -} -MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other21) { - (void) other21; - return *this; +/*! BIT_readBitsFast() : + * unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; } -void MicroSeconds::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "MicroSeconds("; - out << ")"; + +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; } +/*! BIT_reloadDStream() : + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not read beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; -NanoSeconds::~NanoSeconds() throw() { + if (bitD->ptr >= bitD->limitPtr) { + return BIT_reloadDStreamFast(bitD); + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } } -std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) +/*! BIT_endOfDStream() : + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) { - obj.printTo(out); - return out; + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); } +} -uint32_t NanoSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +#endif /* BITSTREAM_H_MODULE */ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - xfer += iprot->readStructBegin(fname); +// LICENSE_CHANGE_END - using ::duckdb_apache::thrift::protocol::TProtocolException; +namespace duckdb_zstd { - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - xfer += iprot->readStructEnd(); +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<writeStructBegin("NanoSeconds"); - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/* ***************************************** + * FSE advanced API + ***************************************** */ -void swap(NanoSeconds &a, NanoSeconds &b) { - using ::std::swap; - (void) a; - (void) b; -} +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ -NanoSeconds::NanoSeconds(const NanoSeconds& other22) { - (void) other22; -} -NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other23) { - (void) other23; - return *this; -} -void NanoSeconds::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "NanoSeconds("; - out << ")"; -} +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. + */ +#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); +/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ -TimeUnit::~TimeUnit() throw() { -} +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `(1<MILLIS = val; -__isset.MILLIS = true; -} +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); +/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ -void TimeUnit::__set_MICROS(const MicroSeconds& val) { - this->MICROS = val; -__isset.MICROS = true; -} +size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); +/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ -void TimeUnit::__set_NANOS(const NanoSeconds& val) { - this->NANOS = val; -__isset.NANOS = true; -} -std::ostream& operator<<(std::ostream& out, const TimeUnit& obj) -{ - obj.printTo(out); - return out; -} +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */ +typedef enum { + FSE_repeat_none, /**< Cannot use the previous table */ + FSE_repeat_check, /**< Can use the previous table but it must be checked */ + FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } FSE_repeat; -uint32_t TimeUnit::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/* ***************************************** +* FSE symbol compression API +*******************************************/ +/*! + This API consists of small unitary functions, which highly benefit from being inlined. + Hence their body are included in next section. +*/ +typedef struct { + ptrdiff_t value; + const void* stateTable; + const void* symbolTT; + unsigned stateLog; +} FSE_CState_t; - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); - xfer += iprot->readStructBegin(fname); +static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); - using ::duckdb_apache::thrift::protocol::TProtocolException; +static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); +/**< +These functions are inner components of FSE_compress_usingCTable(). +They allow the creation of custom streams, mixing multiple tables and bit sources. - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->MILLIS.read(iprot); - this->__isset.MILLIS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->MICROS.read(iprot); - this->__isset.MICROS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->NANOS.read(iprot); - this->__isset.NANOS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +A key property to keep in mind is that encoding and decoding are done **in reverse direction**. +So the first symbol you will encode is the last you will decode, like a LIFO stack. - xfer += iprot->readStructEnd(); +You will need a few variables to track your CStream. They are : - return xfer; -} +FSE_CTable ct; // Provided by FSE_buildCTable() +BIT_CStream_t bitStream; // bitStream tracking structure +FSE_CState_t state; // State tracking structure (can have several) -uint32_t TimeUnit::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeUnit"); - if (this->__isset.MILLIS) { - xfer += oprot->writeFieldBegin("MILLIS", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->MILLIS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MICROS) { - xfer += oprot->writeFieldBegin("MICROS", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MICROS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.NANOS) { - xfer += oprot->writeFieldBegin("NANOS", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); - xfer += this->NANOS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +The first thing to do is to init bitStream and state. + size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); + FSE_initCState(&state, ct); -void swap(TimeUnit &a, TimeUnit &b) { - using ::std::swap; - swap(a.MILLIS, b.MILLIS); - swap(a.MICROS, b.MICROS); - swap(a.NANOS, b.NANOS); - swap(a.__isset, b.__isset); -} +Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); +You can then encode your input data, byte after byte. +FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. +Remember decoding will be done in reverse direction. + FSE_encodeByte(&bitStream, &state, symbol); -TimeUnit::TimeUnit(const TimeUnit& other24) { - MILLIS = other24.MILLIS; - MICROS = other24.MICROS; - NANOS = other24.NANOS; - __isset = other24.__isset; -} -TimeUnit& TimeUnit::operator=(const TimeUnit& other25) { - MILLIS = other25.MILLIS; - MICROS = other25.MICROS; - NANOS = other25.NANOS; - __isset = other25.__isset; - return *this; -} -void TimeUnit::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TimeUnit("; - out << "MILLIS="; (__isset.MILLIS ? (out << to_string(MILLIS)) : (out << "")); - out << ", " << "MICROS="; (__isset.MICROS ? (out << to_string(MICROS)) : (out << "")); - out << ", " << "NANOS="; (__isset.NANOS ? (out << to_string(NANOS)) : (out << "")); - out << ")"; -} +At any time, you can also add any bit sequence. +Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders + BIT_addBits(&bitStream, bitField, nbBits); +The above methods don't commit data to memory, they just store it into local register, for speed. +Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +Writing data to memory is a manual operation, performed by the flushBits function. + BIT_flushBits(&bitStream); -TimestampType::~TimestampType() throw() { -} +Your last FSE encoding operation shall be to flush your last state value(s). + FSE_flushState(&bitStream, &state); +Finally, you must close the bitStream. +The function returns the size of CStream in bytes. +If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) +If there is an error, it returns an errorCode (which can be tested using FSE_isError()). + size_t size = BIT_closeCStream(&bitStream); +*/ -void TimestampType::__set_isAdjustedToUTC(const bool val) { - this->isAdjustedToUTC = val; -} -void TimestampType::__set_unit(const TimeUnit& val) { - this->unit = val; -} -std::ostream& operator<<(std::ostream& out, const TimestampType& obj) -{ - obj.printTo(out); - return out; -} +/* ***************************************** +* FSE symbol decompression API +*******************************************/ +typedef struct { + size_t state; + const void* table; /* precise table may vary, depending on U16 */ +} FSE_DState_t; -uint32_t TimestampType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); - xfer += iprot->readStructBegin(fname); +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); - using ::duckdb_apache::thrift::protocol::TProtocolException; +/**< +Let's now decompose FSE_decompress_usingDTable() into its unitary components. +You will decode FSE-encoded symbols from the bitStream, +and also any other bitFields you put in, **in reverse order**. - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; +You will need a few variables to track your bitStream. They are : - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +BIT_DStream_t DStream; // Stream context +FSE_DState_t DState; // State context. Multiple ones are possible +FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() - xfer += iprot->readStructEnd(); +The first thing to do is to init the bitStream. + errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} +You should then retrieve your initial state(s) +(in reverse flushing order if you have several ones) : + errorCode = FSE_initDState(&DState, &DStream, DTablePtr); -uint32_t TimestampType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimestampType"); +You can then decode your data, symbol after symbol. +For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. +Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). + unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::duckdb_apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); +You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) +Note : maximum allowed nbBits is 25, for 32-bits compatibility + size_t bitField = BIT_readBits(&DStream, nbBits); - xfer += oprot->writeFieldBegin("unit", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); +All above operations only read from local register (which size depends on size_t). +Refueling the register from memory is manually performed by the reload method. + endSignal = FSE_reloadDStream(&DStream); - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +BIT_reloadDStream() result tells if there is still some more data to read from DStream. +BIT_DStream_unfinished : there is still some data left into the DStream. +BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. +BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. +BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. -void swap(TimestampType &a, TimestampType &b) { - using ::std::swap; - swap(a.isAdjustedToUTC, b.isAdjustedToUTC); - swap(a.unit, b.unit); -} +When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, +to properly detect the exact end of stream. +After each decoded symbol, check if DStream is fully consumed using this simple test : + BIT_reloadDStream(&DStream) >= BIT_DStream_completed -TimestampType::TimestampType(const TimestampType& other26) { - isAdjustedToUTC = other26.isAdjustedToUTC; - unit = other26.unit; -} -TimestampType& TimestampType::operator=(const TimestampType& other27) { - isAdjustedToUTC = other27.isAdjustedToUTC; - unit = other27.unit; - return *this; -} -void TimestampType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TimestampType("; - out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); - out << ", " << "unit=" << to_string(unit); - out << ")"; -} +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ -TimeType::~TimeType() throw() { -} +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ -void TimeType::__set_isAdjustedToUTC(const bool val) { - this->isAdjustedToUTC = val; -} +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ -void TimeType::__set_unit(const TimeUnit& val) { - this->unit = val; -} -std::ostream& operator<<(std::ostream& out, const TimeType& obj) +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) { - obj.printTo(out); - return out; + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); + statePtr->stateLog = tableLog; } -uint32_t TimeType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} - xfer += iprot->readStructBegin(fname); +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} - using ::duckdb_apache::thrift::protocol::TProtocolException; - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; } -uint32_t TimeType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeType"); - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::duckdb_apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); +/* ====== Decompression ====== */ - xfer += oprot->writeFieldBegin("unit", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ -void swap(TimeType &a, TimeType &b) { - using ::std::swap; - swap(a.isAdjustedToUTC, b.isAdjustedToUTC); - swap(a.unit, b.unit); +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; } -TimeType::TimeType(const TimeType& other28) { - isAdjustedToUTC = other28.isAdjustedToUTC; - unit = other28.unit; -} -TimeType& TimeType::operator=(const TimeType& other29) { - isAdjustedToUTC = other29.isAdjustedToUTC; - unit = other29.unit; - return *this; -} -void TimeType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TimeType("; - out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); - out << ", " << "unit=" << to_string(unit); - out << ")"; +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; } - -IntType::~IntType() throw() { +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; } +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); -void IntType::__set_bitWidth(const int8_t val) { - this->bitWidth = val; + DStatePtr->state = DInfo.newState + lowBits; + return symbol; } -void IntType::__set_isSigned(const bool val) { - this->isSigned = val; -} -std::ostream& operator<<(std::ostream& out, const IntType& obj) +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { - obj.printTo(out); - return out; + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; } +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} -uint32_t IntType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - xfer += iprot->readStructBegin(fname); - using ::duckdb_apache::thrift::protocol::TProtocolException; +#ifndef FSE_COMMONDEFS_ONLY - bool isset_bitWidth = false; - bool isset_isSigned = false; +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_BYTE) { - xfer += iprot->readByte(this->bitWidth); - isset_bitWidth = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isSigned); - isset_isSigned = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif - xfer += iprot->readStructEnd(); +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t - if (!isset_bitWidth) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_isSigned) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} -uint32_t IntType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IntType"); +#endif /* !FSE_COMMONDEFS_ONLY */ - xfer += oprot->writeFieldBegin("bitWidth", ::duckdb_apache::thrift::protocol::T_BYTE, 1); - xfer += oprot->writeByte(this->bitWidth); - xfer += oprot->writeFieldEnd(); - xfer += oprot->writeFieldBegin("isSigned", ::duckdb_apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->isSigned); - xfer += oprot->writeFieldEnd(); +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U<writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +#define FSE_TABLELOG_ABSOLUTE_MAX 15 +#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif -void swap(IntType &a, IntType &b) { - using ::std::swap; - swap(a.bitWidth, b.bitWidth); - swap(a.isSigned, b.isSigned); -} +#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) -IntType::IntType(const IntType& other30) { - bitWidth = other30.bitWidth; - isSigned = other30.isSigned; -} -IntType& IntType::operator=(const IntType& other31) { - bitWidth = other31.bitWidth; - isSigned = other31.isSigned; - return *this; -} -void IntType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "IntType("; - out << "bitWidth=" << to_string(bitWidth); - out << ", " << "isSigned=" << to_string(isSigned); - out << ")"; } +#endif /* FSE_H_FSE_STATIC_LINKING_ONLY */ -JsonType::~JsonType() throw() { -} -std::ostream& operator<<(std::ostream& out, const JsonType& obj) -{ - obj.printTo(out); - return out; -} +// LICENSE_CHANGE_END -uint32_t JsonType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list - xfer += iprot->readStructBegin(fname); +/* ****************************************************************** + * huff0 huffman codec, + * part of Finite State Entropy library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ - using ::duckdb_apache::thrift::protocol::TProtocolException; +#ifndef HUF_H_298734234 +#define HUF_H_298734234 +/* *** Dependencies *** */ +#include /* size_t */ - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - xfer += iprot->readStructEnd(); +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif - return xfer; -} +namespace duckdb_zstd { -uint32_t JsonType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("JsonType"); +/* ========================== */ +/* *** simple functions *** */ +/* ========================== */ - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/** HUF_compress() : + * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + * 'dst' buffer must be already allocated. + * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + * @return : size of compressed data (<= `dstCapacity`). + * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) + */ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); -void swap(JsonType &a, JsonType &b) { - using ::std::swap; - (void) a; - (void) b; -} +/** HUF_decompress() : + * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + * into already allocated buffer 'dst', of minimum size 'dstSize'. + * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + * Note : in contrast with FSE, HUF_decompress can regenerate + * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + * because it knows size to regenerate (originalSize). + * @return : size of regenerated data (== originalSize), + * or an error code, which can be tested using HUF_isError() + */ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); -JsonType::JsonType(const JsonType& other32) { - (void) other32; -} -JsonType& JsonType::operator=(const JsonType& other33) { - (void) other33; - return *this; -} -void JsonType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "JsonType("; - out << ")"; -} +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ -BsonType::~BsonType() throw() { -} +/* Error Management */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ -std::ostream& operator<<(std::ostream& out, const BsonType& obj) -{ - obj.printTo(out); - return out; -} +/* *** Advanced function *** */ -uint32_t BsonType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/** HUF_compress2() : + * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. + * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog); - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/** HUF_compress4X_wksp() : + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize); - xfer += iprot->readStructBegin(fname); +#endif /* HUF_H_298734234 */ - using ::duckdb_apache::thrift::protocol::TProtocolException; +// LICENSE_CHANGE_END - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - xfer += iprot->readStructEnd(); - return xfer; -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -uint32_t BsonType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BsonType"); +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of a dynamic library, + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + * **************************************************************** */ +#ifndef HUF_H_HUF_STATIC_LINKING_ONLY +#define HUF_H_HUF_STATIC_LINKING_ONLY - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/* *** Dependencies *** */ + /* U32 */ -void swap(BsonType &a, BsonType &b) { - using ::std::swap; - (void) a; - (void) b; -} -BsonType::BsonType(const BsonType& other34) { - (void) other34; -} -BsonType& BsonType::operator=(const BsonType& other35) { - (void) other35; - return *this; -} -void BsonType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "BsonType("; - out << ")"; -} +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif -LogicalType::~LogicalType() throw() { -} +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ -void LogicalType::__set_STRING(const StringType& val) { - this->STRING = val; -__isset.STRING = true; -} +/* static allocation of HUF's Compression Table */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ + void* name##hv = &(name##hb); \ + HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ -void LogicalType::__set_MAP(const MapType& val) { - this->MAP = val; -__isset.MAP = true; -} +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } -void LogicalType::__set_LIST(const ListType& val) { - this->LIST = val; -__isset.LIST = true; -} -void LogicalType::__set_ENUM(const EnumType& val) { - this->ENUM = val; -__isset.ENUM = true; -} +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +#endif -void LogicalType::__set_DECIMAL(const DecimalType& val) { - this->DECIMAL = val; -__isset.DECIMAL = true; -} +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif -void LogicalType::__set_DATE(const DateType& val) { - this->DATE = val; -__isset.DATE = true; -} -void LogicalType::__set_TIME(const TimeType& val) { - this->TIME = val; -__isset.TIME = true; -} +/* **************************************** + * HUF detailed API + * ****************************************/ -void LogicalType::__set_TIMESTAMP(const TimestampType& val) { - this->TIMESTAMP = val; -__isset.TIMESTAMP = true; -} +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "zstd/common/fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); -void LogicalType::__set_INTEGER(const IntType& val) { - this->INTEGER = val; -__isset.INTEGER = true; -} +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } HUF_repeat; +/** HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); -void LogicalType::__set_UNKNOWN(const NullType& val) { - this->UNKNOWN = val; -__isset.UNKNOWN = true; -} +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. + */ +#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); -void LogicalType::__set_JSON(const JsonType& val) { - this->JSON = val; -__isset.JSON = true; -} +/*! HUF_readStats() : + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); -void LogicalType::__set_BSON(const BsonType& val) { - this->BSON = val; -__isset.BSON = true; -} +/** HUF_readCTable() : + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); -void LogicalType::__set_UUID(const UUIDType& val) { - this->UUID = val; -__isset.UUID = true; -} -std::ostream& operator<<(std::ostream& out, const LogicalType& obj) -{ - obj.printTo(out); - return out; -} +/** HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); +/* + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ -uint32_t LogicalType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/** + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) - xfer += iprot->readStructBegin(fname); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif - using ::duckdb_apache::thrift::protocol::TProtocolException; +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->STRING.read(iprot); - this->__isset.STRING = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->MAP.read(iprot); - this->__isset.MAP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->LIST.read(iprot); - this->__isset.LIST = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->ENUM.read(iprot); - this->__isset.ENUM = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->DECIMAL.read(iprot); - this->__isset.DECIMAL = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->DATE.read(iprot); - this->__isset.DATE = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->TIME.read(iprot); - this->__isset.TIME = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->TIMESTAMP.read(iprot); - this->__isset.TIMESTAMP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->INTEGER.read(iprot); - this->__isset.INTEGER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->UNKNOWN.read(iprot); - this->__isset.UNKNOWN = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->JSON.read(iprot); - this->__isset.JSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->BSON.read(iprot); - this->__isset.BSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->UUID.read(iprot); - this->__isset.UUID = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +/* ====================== */ +/* single stream variants */ +/* ====================== */ - xfer += iprot->readStructEnd(); +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +/** HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); - return xfer; -} +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +#endif -uint32_t LogicalType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("LogicalType"); +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif - if (this->__isset.STRING) { - xfer += oprot->writeFieldBegin("STRING", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->STRING.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MAP) { - xfer += oprot->writeFieldBegin("MAP", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MAP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.LIST) { - xfer += oprot->writeFieldBegin("LIST", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); - xfer += this->LIST.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENUM) { - xfer += oprot->writeFieldBegin("ENUM", ::duckdb_apache::thrift::protocol::T_STRUCT, 4); - xfer += this->ENUM.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DECIMAL) { - xfer += oprot->writeFieldBegin("DECIMAL", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); - xfer += this->DECIMAL.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DATE) { - xfer += oprot->writeFieldBegin("DATE", ::duckdb_apache::thrift::protocol::T_STRUCT, 6); - xfer += this->DATE.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIME) { - xfer += oprot->writeFieldBegin("TIME", ::duckdb_apache::thrift::protocol::T_STRUCT, 7); - xfer += this->TIME.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIMESTAMP) { - xfer += oprot->writeFieldBegin("TIMESTAMP", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->TIMESTAMP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.INTEGER) { - xfer += oprot->writeFieldBegin("INTEGER", ::duckdb_apache::thrift::protocol::T_STRUCT, 10); - xfer += this->INTEGER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UNKNOWN) { - xfer += oprot->writeFieldBegin("UNKNOWN", ::duckdb_apache::thrift::protocol::T_STRUCT, 11); - xfer += this->UNKNOWN.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.JSON) { - xfer += oprot->writeFieldBegin("JSON", ::duckdb_apache::thrift::protocol::T_STRUCT, 12); - xfer += this->JSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.BSON) { - xfer += oprot->writeFieldBegin("BSON", ::duckdb_apache::thrift::protocol::T_STRUCT, 13); - xfer += this->BSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UUID) { - xfer += oprot->writeFieldBegin("UUID", ::duckdb_apache::thrift::protocol::T_STRUCT, 14); - xfer += this->UUID.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif -void swap(LogicalType &a, LogicalType &b) { - using ::std::swap; - swap(a.STRING, b.STRING); - swap(a.MAP, b.MAP); - swap(a.LIST, b.LIST); - swap(a.ENUM, b.ENUM); - swap(a.DECIMAL, b.DECIMAL); - swap(a.DATE, b.DATE); - swap(a.TIME, b.TIME); - swap(a.TIMESTAMP, b.TIMESTAMP); - swap(a.INTEGER, b.INTEGER); - swap(a.UNKNOWN, b.UNKNOWN); - swap(a.JSON, b.JSON); - swap(a.BSON, b.BSON); - swap(a.UUID, b.UUID); - swap(a.__isset, b.__isset); -} +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); -LogicalType::LogicalType(const LogicalType& other36) { - STRING = other36.STRING; - MAP = other36.MAP; - LIST = other36.LIST; - ENUM = other36.ENUM; - DECIMAL = other36.DECIMAL; - DATE = other36.DATE; - TIME = other36.TIME; - TIMESTAMP = other36.TIMESTAMP; - INTEGER = other36.INTEGER; - UNKNOWN = other36.UNKNOWN; - JSON = other36.JSON; - BSON = other36.BSON; - UUID = other36.UUID; - __isset = other36.__isset; -} -LogicalType& LogicalType::operator=(const LogicalType& other37) { - STRING = other37.STRING; - MAP = other37.MAP; - LIST = other37.LIST; - ENUM = other37.ENUM; - DECIMAL = other37.DECIMAL; - DATE = other37.DATE; - TIME = other37.TIME; - TIMESTAMP = other37.TIMESTAMP; - INTEGER = other37.INTEGER; - UNKNOWN = other37.UNKNOWN; - JSON = other37.JSON; - BSON = other37.BSON; - UUID = other37.UUID; - __isset = other37.__isset; - return *this; -} -void LogicalType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "LogicalType("; - out << "STRING="; (__isset.STRING ? (out << to_string(STRING)) : (out << "")); - out << ", " << "MAP="; (__isset.MAP ? (out << to_string(MAP)) : (out << "")); - out << ", " << "LIST="; (__isset.LIST ? (out << to_string(LIST)) : (out << "")); - out << ", " << "ENUM="; (__isset.ENUM ? (out << to_string(ENUM)) : (out << "")); - out << ", " << "DECIMAL="; (__isset.DECIMAL ? (out << to_string(DECIMAL)) : (out << "")); - out << ", " << "DATE="; (__isset.DATE ? (out << to_string(DATE)) : (out << "")); - out << ", " << "TIME="; (__isset.TIME ? (out << to_string(TIME)) : (out << "")); - out << ", " << "TIMESTAMP="; (__isset.TIMESTAMP ? (out << to_string(TIMESTAMP)) : (out << "")); - out << ", " << "INTEGER="; (__isset.INTEGER ? (out << to_string(INTEGER)) : (out << "")); - out << ", " << "UNKNOWN="; (__isset.UNKNOWN ? (out << to_string(UNKNOWN)) : (out << "")); - out << ", " << "JSON="; (__isset.JSON ? (out << to_string(JSON)) : (out << "")); - out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "")); - out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "")); - out << ")"; } - -SchemaElement::~SchemaElement() throw() { -} +#endif /* HUF_STATIC_LINKING_ONLY */ -void SchemaElement::__set_type(const Type::type val) { - this->type = val; -__isset.type = true; -} -void SchemaElement::__set_type_length(const int32_t val) { - this->type_length = val; -__isset.type_length = true; -} +// LICENSE_CHANGE_END -void SchemaElement::__set_repetition_type(const FieldRepetitionType::type val) { - this->repetition_type = val; -__isset.repetition_type = true; -} -void SchemaElement::__set_name(const std::string& val) { - this->name = val; -} +namespace duckdb_zstd { -void SchemaElement::__set_num_children(const int32_t val) { - this->num_children = val; -__isset.num_children = true; -} +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } -void SchemaElement::__set_converted_type(const ConvertedType::type val) { - this->converted_type = val; -__isset.converted_type = true; -} -void SchemaElement::__set_scale(const int32_t val) { - this->scale = val; -__isset.scale = true; -} +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } -void SchemaElement::__set_precision(const int32_t val) { - this->precision = val; -__isset.precision = true; -} +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } -void SchemaElement::__set_field_id(const int32_t val) { - this->field_id = val; -__isset.field_id = true; -} -void SchemaElement::__set_logicalType(const LogicalType& val) { - this->logicalType = val; -__isset.logicalType = true; -} -std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) { - obj.printTo(out); - return out; -} + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + if (hbSize < 4) { + /* This function only works when hbSize >= 4 */ + char buffer[4]; + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 4); -uint32_t SchemaElement::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + /* init */ + memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) & (charnum<=*maxSVPtr)) { + if (previous0) { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) { + n0 += 24; + if (ip < iend-5) { + ip += 2; + bitStream = MEM_readLE32(ip) >> bitCount; + } else { + bitStream >>= 16; + bitCount += 16; + } } + while ((bitStream & 3) == 3) { + n0 += 3; + bitStream >>= 2; + bitCount += 2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ + ip += bitCount>>3; + bitCount &= 7; + bitStream = MEM_readLE32(ip) >> bitCount; + } else { + bitStream >>= 2; + } } + { int const max = (2*threshold-1) - remaining; + int count; - xfer += iprot->readStructBegin(fname); + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } - using ::duckdb_apache::thrift::protocol::TProtocolException; + count--; /* extra accuracy */ + remaining -= count < 0 ? -count : count; /* -1 means +1 */ + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + while (remaining < threshold) { + nbBits--; + threshold >>= 1; + } - bool isset_name = false; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> (bitCount & 31); + } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ + if (remaining != 1) return ERROR(corruption_detected); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast38; - xfer += iprot->readI32(ecast38); - this->type = (Type::type)ecast38; - this->__isset.type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->type_length); - this->__isset.type_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast39; - xfer += iprot->readI32(ecast39); - this->repetition_type = (FieldRepetitionType::type)ecast39; - this->__isset.repetition_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->name); - isset_name = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_children); - this->__isset.num_children = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast40; - xfer += iprot->readI32(ecast40); - this->converted_type = (ConvertedType::type)ecast40; - this->__isset.converted_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - this->__isset.scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - this->__isset.precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->field_id); - this->__isset.field_id = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->logicalType.read(iprot); - this->__isset.logicalType = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_name) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; + ip += (bitCount+7)>>3; + return ip-istart; } -uint32_t SchemaElement::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SchemaElement"); - - if (this->__isset.type) { - xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->type); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.type_length) { - xfer += oprot->writeFieldBegin("type_length", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->type_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_type) { - xfer += oprot->writeFieldBegin("repetition_type", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32((int32_t)this->repetition_type); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("name", ::duckdb_apache::thrift::protocol::T_STRING, 4); - xfer += oprot->writeString(this->name); - xfer += oprot->writeFieldEnd(); - if (this->__isset.num_children) { - xfer += oprot->writeFieldBegin("num_children", ::duckdb_apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->num_children); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.converted_type) { - xfer += oprot->writeFieldBegin("converted_type", ::duckdb_apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32((int32_t)this->converted_type); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.scale) { - xfer += oprot->writeFieldBegin("scale", ::duckdb_apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.precision) { - xfer += oprot->writeFieldBegin("precision", ::duckdb_apache::thrift::protocol::T_I32, 8); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.field_id) { - xfer += oprot->writeFieldBegin("field_id", ::duckdb_apache::thrift::protocol::T_I32, 9); - xfer += oprot->writeI32(this->field_id); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.logicalType) { - xfer += oprot->writeFieldBegin("logicalType", ::duckdb_apache::thrift::protocol::T_STRUCT, 10); - xfer += this->logicalType.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; -void swap(SchemaElement &a, SchemaElement &b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.type_length, b.type_length); - swap(a.repetition_type, b.repetition_type); - swap(a.name, b.name); - swap(a.num_children, b.num_children); - swap(a.converted_type, b.converted_type); - swap(a.scale, b.scale); - swap(a.precision, b.precision); - swap(a.field_id, b.field_id); - swap(a.logicalType, b.logicalType); - swap(a.__isset, b.__isset); -} + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ -SchemaElement::SchemaElement(const SchemaElement& other41) { - type = other41.type; - type_length = other41.type_length; - repetition_type = other41.repetition_type; - name = other41.name; - num_children = other41.num_children; - converted_type = other41.converted_type; - scale = other41.scale; - precision = other41.precision; - field_id = other41.field_id; - logicalType = other41.logicalType; - __isset = other41.__isset; -} -SchemaElement& SchemaElement::operator=(const SchemaElement& other42) { - type = other42.type; - type_length = other42.type_length; - repetition_type = other42.repetition_type; - name = other42.name; - num_children = other42.num_children; - converted_type = other42.converted_type; - scale = other42.scale; - precision = other42.precision; - field_id = other42.field_id; - logicalType = other42.logicalType; - __isset = other42.__isset; - return *this; -} -void SchemaElement::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "SchemaElement("; - out << "type="; (__isset.type ? (out << to_string(type)) : (out << "")); - out << ", " << "type_length="; (__isset.type_length ? (out << to_string(type_length)) : (out << "")); - out << ", " << "repetition_type="; (__isset.repetition_type ? (out << to_string(repetition_type)) : (out << "")); - out << ", " << "name=" << to_string(name); - out << ", " << "num_children="; (__isset.num_children ? (out << to_string(num_children)) : (out << "")); - out << ", " << "converted_type="; (__isset.converted_type ? (out << to_string(converted_type)) : (out << "")); - out << ", " << "scale="; (__isset.scale ? (out << to_string(scale)) : (out << "")); - out << ", " << "precision="; (__isset.precision ? (out << to_string(precision)) : (out << "")); - out << ", " << "field_id="; (__isset.field_id ? (out << to_string(field_id)) : (out << "")); - out << ", " << "logicalType="; (__isset.logicalType ? (out << to_string(logicalType)) : (out << "")); - out << ")"; -} + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + /* collect weight stats */ + memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); -DataPageHeader::~DataPageHeader() throw() { -} + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ -void DataPageHeader::__set_num_values(const int32_t val) { - this->num_values = val; + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; } -void DataPageHeader::__set_encoding(const Encoding::type val) { - this->encoding = val; } -void DataPageHeader::__set_definition_level_encoding(const Encoding::type val) { - this->definition_level_encoding = val; -} -void DataPageHeader::__set_repetition_level_encoding(const Encoding::type val) { - this->repetition_level_encoding = val; -} +// LICENSE_CHANGE_END -void DataPageHeader::__set_statistics(const Statistics& val) { - this->statistics = val; -__isset.statistics = true; -} -std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) -{ - obj.printTo(out); - return out; -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -uint32_t DataPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/* The purpose of this file is to have a single list of error strings embedded in binary */ - xfer += iprot->readStructBegin(fname); - using ::duckdb_apache::thrift::protocol::TProtocolException; - bool isset_num_values = false; - bool isset_encoding = false; - bool isset_definition_level_encoding = false; - bool isset_repetition_level_encoding = false; +namespace duckdb_zstd { - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) +const char* ERR_getErrorString(ERR_enum code) +{ +#ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; + return "Error strings stripped"; +#else + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast43; - xfer += iprot->readI32(ecast43); - this->encoding = (Encoding::type)ecast43; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast44; - xfer += iprot->readI32(ecast44); - this->definition_level_encoding = (Encoding::type)ecast44; - isset_definition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast45; - xfer += iprot->readI32(ecast45); - this->repetition_level_encoding = (Encoding::type)ecast45; - isset_repetition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; + case PREFIX(maxCode): + default: return notErrorCode; } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); +#endif +} - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; } -uint32_t DataPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeader"); - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); +// LICENSE_CHANGE_END - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); - xfer += oprot->writeFieldBegin("definition_level_encoding", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32((int32_t)this->definition_level_encoding); - xfer += oprot->writeFieldEnd(); +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list - xfer += oprot->writeFieldBegin("repetition_level_encoding", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->repetition_level_encoding); - xfer += oprot->writeFieldEnd(); +/* ****************************************************************** + * FSE : Finite State Entropy decoder + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} -void swap(DataPageHeader &a, DataPageHeader &b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.encoding, b.encoding); - swap(a.definition_level_encoding, b.definition_level_encoding); - swap(a.repetition_level_encoding, b.repetition_level_encoding); - swap(a.statistics, b.statistics); - swap(a.__isset, b.__isset); -} +/* ************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ -DataPageHeader::DataPageHeader(const DataPageHeader& other46) { - num_values = other46.num_values; - encoding = other46.encoding; - definition_level_encoding = other46.definition_level_encoding; - repetition_level_encoding = other46.repetition_level_encoding; - statistics = other46.statistics; - __isset = other46.__isset; -} -DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other47) { - num_values = other47.num_values; - encoding = other47.encoding; - definition_level_encoding = other47.definition_level_encoding; - repetition_level_encoding = other47.repetition_level_encoding; - statistics = other47.statistics; - __isset = other47.__isset; - return *this; -} -void DataPageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DataPageHeader("; - out << "num_values=" << to_string(num_values); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "definition_level_encoding=" << to_string(definition_level_encoding); - out << ", " << "repetition_level_encoding=" << to_string(repetition_level_encoding); - out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ")"; -} -IndexPageHeader::~IndexPageHeader() throw() { -} -std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) -{ - obj.printTo(out); - return out; -} -uint32_t IndexPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/* ************************************************************** +* Error Management +****************************************************************/ +// #define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ - xfer += iprot->readStructBegin(fname); - using ::duckdb_apache::thrift::protocol::TProtocolException; +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) - xfer += iprot->readStructEnd(); +namespace duckdb_zstd { - return xfer; +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); } -uint32_t IndexPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IndexPageHeader"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; +void FSE_freeDTable (FSE_DTable* dt) +{ + free(dt); } -void swap(IndexPageHeader &a, IndexPageHeader &b) { - using ::std::swap; - (void) a; - (void) b; -} +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; -IndexPageHeader::IndexPageHeader(const IndexPageHeader& other48) { - (void) other48; -} -IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other49) { - (void) other49; - return *this; -} -void IndexPageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "IndexPageHeader("; - out << ")"; -} + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); -DictionaryPageHeader::~DictionaryPageHeader() throw() { -} + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + memcpy(dt, &DTableH, sizeof(DTableH)); + } + /* Spread symbols */ + { U32 const tableMask = tableSize-1; + U32 const step = FSE_TABLESTEP(tableSize); + U32 s, position = 0; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } -void DictionaryPageHeader::__set_num_values(const int32_t val) { - this->num_values = val; -} + /* Build Decoding table */ + { U32 u; + for (u=0; uencoding = val; + return 0; } -void DictionaryPageHeader::__set_is_sorted(const bool val) { - this->is_sorted = val; -__isset.is_sorted = true; -} -std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) + +#ifndef FSE_COMMONDEFS_ONLY + +/*-******************************************************* +* Decompression (Byte symbols) +*********************************************************/ +size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) { - obj.printTo(out); - return out; -} + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const cell = (FSE_decode_t*)dPtr; + DTableH->tableLog = 0; + DTableH->fastMode = 0; -uint32_t DictionaryPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; + return 0; +} - xfer += iprot->readStructBegin(fname); - using ::duckdb_apache::thrift::protocol::TProtocolException; +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; - bool isset_num_values = false; - bool isset_encoding = false; + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast50; - xfer += iprot->readI32(ecast50); - this->encoding = (Encoding::type)ecast50; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_sorted); - this->__isset.is_sorted = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; sreadFieldEnd(); - } - - xfer += iprot->readStructEnd(); - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; + return 0; } -uint32_t DictionaryPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DictionaryPageHeader"); +FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt, const unsigned fast) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-3; - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); + BIT_DStream_t bitD; + FSE_DState_t state1; + FSE_DState_t state2; - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); + /* Init */ + CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize)); - if (this->__isset.is_sorted) { - xfer += oprot->writeFieldBegin("is_sorted", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->is_sorted); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} + FSE_initDState(&state1, &bitD, dt); + FSE_initDState(&state2, &bitD, dt); -void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.encoding, b.encoding); - swap(a.is_sorted, b.is_sorted); - swap(a.__isset, b.__isset); -} +#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) -DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other51) { - num_values = other51.num_values; - encoding = other51.encoding; - is_sorted = other51.is_sorted; - __isset = other51.__isset; -} -DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other52) { - num_values = other52.num_values; - encoding = other52.encoding; - is_sorted = other52.is_sorted; - __isset = other52.__isset; - return *this; -} -void DictionaryPageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DictionaryPageHeader("; - out << "num_values=" << to_string(num_values); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "is_sorted="; (__isset.is_sorted ? (out << to_string(is_sorted)) : (out << "")); - out << ")"; -} + /* 4 symbols per loop */ + for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); -DataPageHeaderV2::~DataPageHeaderV2() throw() { -} + op[1] = FSE_GETSYMBOL(&state2); + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } -void DataPageHeaderV2::__set_num_values(const int32_t val) { - this->num_values = val; -} + op[2] = FSE_GETSYMBOL(&state1); -void DataPageHeaderV2::__set_num_nulls(const int32_t val) { - this->num_nulls = val; -} + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); -void DataPageHeaderV2::__set_num_rows(const int32_t val) { - this->num_rows = val; -} + op[3] = FSE_GETSYMBOL(&state2); + } -void DataPageHeaderV2::__set_encoding(const Encoding::type val) { - this->encoding = val; -} + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } -void DataPageHeaderV2::__set_definition_levels_byte_length(const int32_t val) { - this->definition_levels_byte_length = val; -} + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } -void DataPageHeaderV2::__set_repetition_levels_byte_length(const int32_t val) { - this->repetition_levels_byte_length = val; + return op-ostart; } -void DataPageHeaderV2::__set_is_compressed(const bool val) { - this->is_compressed = val; -__isset.is_compressed = true; -} -void DataPageHeaderV2::__set_statistics(const Statistics& val) { - this->statistics = val; -__isset.statistics = true; -} -std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) { - obj.printTo(out); - return out; + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); } -uint32_t DataPageHeaderV2::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; + /* normal FSE decoding mode */ + size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(NCountLength)) return NCountLength; + /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + ip += NCountLength; + cSrcSize -= NCountLength; - xfer += iprot->readStructBegin(fname); + CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); - using ::duckdb_apache::thrift::protocol::TProtocolException; + return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ +} - bool isset_num_values = false; - bool isset_num_nulls = false; - bool isset_num_rows = false; - bool isset_encoding = false; - bool isset_definition_levels_byte_length = false; - bool isset_repetition_levels_byte_length = false; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_nulls); - isset_num_nulls = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast53; - xfer += iprot->readI32(ecast53); - this->encoding = (Encoding::type)ecast53; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->definition_levels_byte_length); - isset_definition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->repetition_levels_byte_length); - isset_repetition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_compressed); - this->__isset.is_compressed = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; - xfer += iprot->readStructEnd(); +size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) +{ + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); +} - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_nulls) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; } -uint32_t DataPageHeaderV2::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeaderV2"); +#endif /* FSE_COMMONDEFS_ONLY */ - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_nulls", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->num_nulls); - xfer += oprot->writeFieldEnd(); - xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->num_rows); - xfer += oprot->writeFieldEnd(); +// LICENSE_CHANGE_END - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); - xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::duckdb_apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->definition_levels_byte_length); - xfer += oprot->writeFieldEnd(); +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list - xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::duckdb_apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(this->repetition_levels_byte_length); - xfer += oprot->writeFieldEnd(); +/* + * xxHash - Fast Hash algorithm + * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash homepage: http://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ - if (this->__isset.is_compressed) { - xfer += oprot->writeFieldBegin("is_compressed", ::duckdb_apache::thrift::protocol::T_BOOL, 7); - xfer += oprot->writeBool(this->is_compressed); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} -void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.num_nulls, b.num_nulls); - swap(a.num_rows, b.num_rows); - swap(a.encoding, b.encoding); - swap(a.definition_levels_byte_length, b.definition_levels_byte_length); - swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length); - swap(a.is_compressed, b.is_compressed); - swap(a.statistics, b.statistics); - swap(a.__isset, b.__isset); -} +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ + defined(__ICCARM__) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif -DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other54) { - num_values = other54.num_values; - num_nulls = other54.num_nulls; - num_rows = other54.num_rows; - encoding = other54.encoding; - definition_levels_byte_length = other54.definition_levels_byte_length; - repetition_levels_byte_length = other54.repetition_levels_byte_length; - is_compressed = other54.is_compressed; - statistics = other54.statistics; - __isset = other54.__isset; -} -DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other55) { - num_values = other55.num_values; - num_nulls = other55.num_nulls; - num_rows = other55.num_rows; - encoding = other55.encoding; - definition_levels_byte_length = other55.definition_levels_byte_length; - repetition_levels_byte_length = other55.repetition_levels_byte_length; - is_compressed = other55.is_compressed; - statistics = other55.statistics; - __isset = other55.__isset; - return *this; -} -void DataPageHeaderV2::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DataPageHeaderV2("; - out << "num_values=" << to_string(num_values); - out << ", " << "num_nulls=" << to_string(num_nulls); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "definition_levels_byte_length=" << to_string(definition_levels_byte_length); - out << ", " << "repetition_levels_byte_length=" << to_string(repetition_levels_byte_length); - out << ", " << "is_compressed="; (__isset.is_compressed ? (out << to_string(is_compressed)) : (out << "")); - out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ")"; -} +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independence be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif -PageHeader::~PageHeader() throw() { -} +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif -void PageHeader::__set_type(const PageType::type val) { - this->type = val; -} +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for malloc(), free() */ +#include +#include /* size_t */ +/* for memcpy() */ +#include -void PageHeader::__set_uncompressed_page_size(const int32_t val) { - this->uncompressed_page_size = val; -} -void PageHeader::__set_compressed_page_size(const int32_t val) { - this->compressed_page_size = val; -} -void PageHeader::__set_crc(const int32_t val) { - this->crc = val; -__isset.crc = true; -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -void PageHeader::__set_data_page_header(const DataPageHeader& val) { - this->data_page_header = val; -__isset.data_page_header = true; -} +/* + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ -void PageHeader::__set_index_page_header(const IndexPageHeader& val) { - this->index_page_header = val; -__isset.index_page_header = true; -} +/* Notice extracted from xxHash homepage : -void PageHeader::__set_dictionary_page_header(const DictionaryPageHeader& val) { - this->dictionary_page_header = val; -__isset.dictionary_page_header = true; -} +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. -void PageHeader::__set_data_page_header_v2(const DataPageHeaderV2& val) { - this->data_page_header_v2 = val; -__isset.data_page_header_v2 = true; -} -std::ostream& operator<<(std::ostream& out, const PageHeader& obj) -{ - obj.printTo(out); - return out; -} +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 -uint32_t PageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ - xfer += iprot->readStructBegin(fname); +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 - using ::duckdb_apache::thrift::protocol::TProtocolException; - bool isset_type = false; - bool isset_uncompressed_page_size = false; - bool isset_compressed_page_size = false; +/* **************************** +* Definitions +******************************/ +#include /* size_t */ +namespace duckdb_zstd { +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast56; - xfer += iprot->readI32(ecast56); - this->type = (PageType::type)ecast56; - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->uncompressed_page_size); - isset_uncompressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->crc); - this->__isset.crc = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header.read(iprot); - this->__isset.data_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->index_page_header.read(iprot); - this->__isset.index_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->dictionary_page_header.read(iprot); - this->__isset.dictionary_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header_v2.read(iprot); - this->__isset.data_page_header_v2 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - xfer += iprot->readStructEnd(); +/* **************************** +* API modifier +******************************/ +/** XXH_PRIVATE_API +* This is useful if you want to include xxhash functions in `static` mode +* in order to inline them, and remove their symbol from the public list. +* Methodology : +* #define XXH_PRIVATE_API +* #include "zstd/common/xxhash.h" +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module anymore. +*/ +#ifdef XXH_PRIVATE_API +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_PRIVATE_API */ - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_uncompressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} +/*!XXH_NAMESPACE, aka Namespace Emulation : -uint32_t PageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageHeader"); +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, - xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->type); - xfer += oprot->writeFieldEnd(); +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). - xfer += oprot->writeFieldBegin("uncompressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->uncompressed_page_size); - xfer += oprot->writeFieldEnd(); +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif - xfer += oprot->writeFieldBegin("compressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - if (this->__isset.crc) { - xfer += oprot->writeFieldBegin("crc", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(this->crc); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header) { - xfer += oprot->writeFieldBegin("data_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); - xfer += this->data_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.index_page_header) { - xfer += oprot->writeFieldBegin("index_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 6); - xfer += this->index_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_header) { - xfer += oprot->writeFieldBegin("dictionary_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 7); - xfer += this->dictionary_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header_v2) { - xfer += oprot->writeFieldBegin("data_page_header_v2", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->data_page_header_v2.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); -void swap(PageHeader &a, PageHeader &b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.uncompressed_page_size, b.uncompressed_page_size); - swap(a.compressed_page_size, b.compressed_page_size); - swap(a.crc, b.crc); - swap(a.data_page_header, b.data_page_header); - swap(a.index_page_header, b.index_page_header); - swap(a.dictionary_page_header, b.dictionary_page_header); - swap(a.data_page_header_v2, b.data_page_header_v2); - swap(a.__isset, b.__isset); -} -PageHeader::PageHeader(const PageHeader& other57) { - type = other57.type; - uncompressed_page_size = other57.uncompressed_page_size; - compressed_page_size = other57.compressed_page_size; - crc = other57.crc; - data_page_header = other57.data_page_header; - index_page_header = other57.index_page_header; - dictionary_page_header = other57.dictionary_page_header; - data_page_header_v2 = other57.data_page_header_v2; - __isset = other57.__isset; -} -PageHeader& PageHeader::operator=(const PageHeader& other58) { - type = other58.type; - uncompressed_page_size = other58.uncompressed_page_size; - compressed_page_size = other58.compressed_page_size; - crc = other58.crc; - data_page_header = other58.data_page_header; - index_page_header = other58.index_page_header; - dictionary_page_header = other58.dictionary_page_header; - data_page_header_v2 = other58.data_page_header_v2; - __isset = other58.__isset; - return *this; -} -void PageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "PageHeader("; - out << "type=" << to_string(type); - out << ", " << "uncompressed_page_size=" << to_string(uncompressed_page_size); - out << ", " << "compressed_page_size=" << to_string(compressed_page_size); - out << ", " << "crc="; (__isset.crc ? (out << to_string(crc)) : (out << "")); - out << ", " << "data_page_header="; (__isset.data_page_header ? (out << to_string(data_page_header)) : (out << "")); - out << ", " << "index_page_header="; (__isset.index_page_header ? (out << to_string(index_page_header)) : (out << "")); - out << ", " << "dictionary_page_header="; (__isset.dictionary_page_header ? (out << to_string(dictionary_page_header)) : (out << "")); - out << ", " << "data_page_header_v2="; (__isset.data_page_header_v2 ? (out << to_string(data_page_header_v2)) : (out << "")); - out << ")"; -} +/* **************************** +* Simple Hash Functions +******************************/ +typedef unsigned int XXH32_hash_t; +typedef unsigned long long XXH64_hash_t; +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); -KeyValue::~KeyValue() throw() { -} - - -void KeyValue::__set_key(const std::string& val) { - this->key = val; -} - -void KeyValue::__set_value(const std::string& val) { - this->value = val; -__isset.value = true; -} -std::ostream& operator<<(std::ostream& out, const KeyValue& obj) -{ - obj.printTo(out); - return out; -} +/*! +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ -uint32_t KeyValue::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/* **************************** +* Streaming Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/*! State allocation, compatible with dynamic libraries */ - xfer += iprot->readStructBegin(fname); +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); - using ::duckdb_apache::thrift::protocol::TProtocolException; +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); - bool isset_key = false; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->key); - isset_key = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->value); - this->__isset.value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +/* hash streaming */ - xfer += iprot->readStructEnd(); +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); - if (!isset_key) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); -uint32_t KeyValue::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("KeyValue"); +/* +These functions generate the xxHash of an input provided in multiple segments. +Note that, for small input, they are slower than single-call functions, due to state management. +For small input, prefer `XXH32()` and `XXH64()` . - xfer += oprot->writeFieldBegin("key", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->key); - xfer += oprot->writeFieldEnd(); +XXH state must first be allocated, using XXH*_createState() . - if (this->__isset.value) { - xfer += oprot->writeFieldBegin("value", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeString(this->value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +Start a new hash by initializing state with a seed, using XXH*_reset(). -void swap(KeyValue &a, KeyValue &b) { - using ::std::swap; - swap(a.key, b.key); - swap(a.value, b.value); - swap(a.__isset, b.__isset); -} +Then, feed the hash state by calling XXH*_update() as many times as necessary. +Obviously, input must be allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. -KeyValue::KeyValue(const KeyValue& other59) { - key = other59.key; - value = other59.value; - __isset = other59.__isset; -} -KeyValue& KeyValue::operator=(const KeyValue& other60) { - key = other60.key; - value = other60.value; - __isset = other60.__isset; - return *this; -} -void KeyValue::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "KeyValue("; - out << "key=" << to_string(key); - out << ", " << "value="; (__isset.value ? (out << to_string(value)) : (out << "")); - out << ")"; -} +Finally, a hash value can be produced anytime, by using XXH*_digest(). +This function returns the nn-bits hash as an int or long long. +It's still possible to continue inserting input into the hash state after a digest, +and generate some new hashes later on, by calling again XXH*_digest(). -SortingColumn::~SortingColumn() throw() { -} +When done, free XXH state space if it was allocated dynamically. +*/ -void SortingColumn::__set_column_idx(const int32_t val) { - this->column_idx = val; -} +/* ************************** +* Utils +****************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ +# define __restrict /* disable restrict */ +#endif -void SortingColumn::__set_descending(const bool val) { - this->descending = val; -} +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* __restrict dst_state, const XXH32_state_t* __restrict src_state); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* __restrict dst_state, const XXH64_state_t* __restrict src_state); -void SortingColumn::__set_nulls_first(const bool val) { - this->nulls_first = val; -} -std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) -{ - obj.printTo(out); - return out; -} +/* ************************** +* Canonical representation +****************************/ +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; -uint32_t SortingColumn::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); - xfer += iprot->readStructBegin(fname); +} - using ::duckdb_apache::thrift::protocol::TProtocolException; +#endif /* XXHASH_H_5627135585666179 */ - bool isset_column_idx = false; - bool isset_descending = false; - bool isset_nulls_first = false; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_idx); - isset_column_idx = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->descending); - isset_descending = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->nulls_first); - isset_nulls_first = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - xfer += iprot->readStructEnd(); +// LICENSE_CHANGE_END - if (!isset_column_idx) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_descending) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_nulls_first) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} -uint32_t SortingColumn::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SortingColumn"); - xfer += oprot->writeFieldBegin("column_idx", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->column_idx); - xfer += oprot->writeFieldEnd(); +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list - xfer += oprot->writeFieldBegin("descending", ::duckdb_apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->descending); - xfer += oprot->writeFieldEnd(); - xfer += oprot->writeFieldBegin("nulls_first", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->nulls_first); - xfer += oprot->writeFieldEnd(); +/* ================================================================================================ + This section contains definitions which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! +=================================================================================================== */ +#ifndef XXH_STATIC_H_3543687687345 +#define XXH_STATIC_H_3543687687345 - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +namespace duckdb_zstd { -void swap(SortingColumn &a, SortingColumn &b) { - using ::std::swap; - swap(a.column_idx, b.column_idx); - swap(a.descending, b.descending); - swap(a.nulls_first, b.nulls_first); -} +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ -SortingColumn::SortingColumn(const SortingColumn& other61) { - column_idx = other61.column_idx; - descending = other61.descending; - nulls_first = other61.nulls_first; -} -SortingColumn& SortingColumn::operator=(const SortingColumn& other62) { - column_idx = other62.column_idx; - descending = other62.descending; - nulls_first = other62.nulls_first; - return *this; -} -void SortingColumn::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "SortingColumn("; - out << "column_idx=" << to_string(column_idx); - out << ", " << "descending=" << to_string(descending); - out << ", " << "nulls_first=" << to_string(nulls_first); - out << ")"; -} + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH32_state_t */ + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH64_state_t */ -PageEncodingStats::~PageEncodingStats() throw() { } +// # ifdef XXH_PRIVATE_API +// # include "xxhash.cpp" /* include xxhash functions as `static`, for inlining */ +// # endif +#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ -void PageEncodingStats::__set_page_type(const PageType::type val) { - this->page_type = val; -} -void PageEncodingStats::__set_encoding(const Encoding::type val) { - this->encoding = val; -} +// LICENSE_CHANGE_END -void PageEncodingStats::__set_count(const int32_t val) { - this->count = val; -} -std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) -{ - obj.printTo(out); - return out; -} +/* ************************************* +* Compiler Specific Options +***************************************/ +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif -uint32_t PageEncodingStats::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +#if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR - xfer += iprot->readStructBegin(fname); - using ::duckdb_apache::thrift::protocol::TProtocolException; +/* ************************************* +* Basic Types +***************************************/ +#ifndef MEM_MODULE +# define MEM_MODULE +# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +# else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ +# endif +#endif - bool isset_page_type = false; - bool isset_encoding = false; - bool isset_count = false; +namespace duckdb_zstd { +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast63; - xfer += iprot->readI32(ecast63); - this->page_type = (PageType::type)ecast63; - isset_page_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast64; - xfer += iprot->readI32(ecast64); - this->encoding = (Encoding::type)ecast64; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->count); - isset_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - xfer += iprot->readStructEnd(); +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } - if (!isset_page_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_count) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) -uint32_t PageEncodingStats::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageEncodingStats"); +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; - xfer += oprot->writeFieldBegin("page_type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->page_type); - xfer += oprot->writeFieldEnd(); +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); +#else - xfer += oprot->writeFieldBegin("count", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->count); - xfer += oprot->writeFieldEnd(); +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; } -void swap(PageEncodingStats &a, PageEncodingStats &b) { - using ::std::swap; - swap(a.page_type, b.page_type); - swap(a.encoding, b.encoding); - swap(a.count, b.count); +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; } -PageEncodingStats::PageEncodingStats(const PageEncodingStats& other65) { - page_type = other65.page_type; - encoding = other65.encoding; - count = other65.count; -} -PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other66) { - page_type = other66.page_type; - encoding = other66.encoding; - count = other66.count; - return *this; -} -void PageEncodingStats::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "PageEncodingStats("; - out << "page_type=" << to_string(page_type); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "count=" << to_string(count); - out << ")"; -} +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ -ColumnMetaData::~ColumnMetaData() throw() { -} +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +#if defined(__ICCARM__) +# include +# define XXH_rotl32(x,r) __ROR(x,(32 - r)) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif -void ColumnMetaData::__set_type(const Type::type val) { - this->type = val; +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); } - -void ColumnMetaData::__set_encodings(const std::vector & val) { - this->encodings = val; +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); } +#endif -void ColumnMetaData::__set_path_in_schema(const std::vector & val) { - this->path_in_schema = val; -} -void ColumnMetaData::__set_codec(const CompressionCodec::type val) { - this->codec = val; -} +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; -void ColumnMetaData::__set_num_values(const int64_t val) { - this->num_values = val; -} +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif -void ColumnMetaData::__set_total_uncompressed_size(const int64_t val) { - this->total_uncompressed_size = val; -} -void ColumnMetaData::__set_total_compressed_size(const int64_t val) { - this->total_compressed_size = val; -} +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; -void ColumnMetaData::__set_key_value_metadata(const std::vector & val) { - this->key_value_metadata = val; -__isset.key_value_metadata = true; +FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); } -void ColumnMetaData::__set_data_page_offset(const int64_t val) { - this->data_page_offset = val; +FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); } -void ColumnMetaData::__set_index_page_offset(const int64_t val) { - this->index_page_offset = val; -__isset.index_page_offset = true; +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); } -void ColumnMetaData::__set_dictionary_page_offset(const int64_t val) { - this->dictionary_page_offset = val; -__isset.dictionary_page_offset = true; +FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); } -void ColumnMetaData::__set_statistics(const Statistics& val) { - this->statistics = val; -__isset.statistics = true; +FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); } -void ColumnMetaData::__set_encoding_stats(const std::vector & val) { - this->encoding_stats = val; -__isset.encoding_stats = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) +static U64 XXH_readBE64(const void* ptr) { - obj.printTo(out); - return out; + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); } -uint32_t ColumnMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - xfer += iprot->readStructBegin(fname); +/* ************************************* +* Constants +***************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; - using ::duckdb_apache::thrift::protocol::TProtocolException; +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; - bool isset_type = false; - bool isset_encodings = false; - bool isset_path_in_schema = false; - bool isset_codec = false; - bool isset_num_values = false; - bool isset_total_uncompressed_size = false; - bool isset_total_compressed_size = false; - bool isset_data_page_offset = false; +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast67; - xfer += iprot->readI32(ecast67); - this->type = (Type::type)ecast67; - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->encodings.clear(); - uint32_t _size68; - ::duckdb_apache::thrift::protocol::TType _etype71; - xfer += iprot->readListBegin(_etype71, _size68); - this->encodings.resize(_size68); - uint32_t _i72; - for (_i72 = 0; _i72 < _size68; ++_i72) - { - int32_t ecast73; - xfer += iprot->readI32(ecast73); - this->encodings[_i72] = (Encoding::type)ecast73; - } - xfer += iprot->readListEnd(); - } - isset_encodings = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size74; - ::duckdb_apache::thrift::protocol::TType _etype77; - xfer += iprot->readListBegin(_etype77, _size74); - this->path_in_schema.resize(_size74); - uint32_t _i78; - for (_i78 = 0; _i78 < _size74; ++_i78) - { - xfer += iprot->readString(this->path_in_schema[_i78]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast79; - xfer += iprot->readI32(ecast79); - this->codec = (CompressionCodec::type)ecast79; - isset_codec = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_uncompressed_size); - isset_total_uncompressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - isset_total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size80; - ::duckdb_apache::thrift::protocol::TType _etype83; - xfer += iprot->readListBegin(_etype83, _size80); - this->key_value_metadata.resize(_size80); - uint32_t _i84; - for (_i84 = 0; _i84 < _size80; ++_i84) - { - xfer += this->key_value_metadata[_i84].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->data_page_offset); - isset_data_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->index_page_offset); - this->__isset.index_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->dictionary_page_offset); - this->__isset.dictionary_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->encoding_stats.clear(); - uint32_t _size85; - ::duckdb_apache::thrift::protocol::TType _etype88; - xfer += iprot->readListBegin(_etype88, _size85); - this->encoding_stats.resize(_size85); - uint32_t _i89; - for (_i89 = 0; _i89 < _size85; ++_i89) - { - xfer += this->encoding_stats[_i89].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.encoding_stats = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - xfer += iprot->readStructEnd(); +/* ************************** +* Utils +****************************/ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* __restrict dstState, const XXH32_state_t* __restrict srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encodings) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_codec) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_uncompressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_compressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_data_page_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* __restrict dstState, const XXH64_state_t* __restrict srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); } -uint32_t ColumnMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnMetaData"); - xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->type); - xfer += oprot->writeFieldEnd(); +/* *************************** +* Simple Hash Functions +*****************************/ - xfer += oprot->writeFieldBegin("encodings", ::duckdb_apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter90; - for (_iter90 = this->encodings.begin(); _iter90 != this->encodings.end(); ++_iter90) - { - xfer += oprot->writeI32((int32_t)(*_iter90)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} - xfer += oprot->writeFieldBegin("path_in_schema", ::duckdb_apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter91; - for (_iter91 = this->path_in_schema.begin(); _iter91 != this->path_in_schema.end(); ++_iter91) - { - xfer += oprot->writeString((*_iter91)); +FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); +#endif - xfer += oprot->writeFieldBegin("codec", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->codec); - xfer += oprot->writeFieldEnd(); + if (len>=16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->num_values); - xfer += oprot->writeFieldEnd(); + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p<=limit); - xfer += oprot->writeFieldBegin("total_uncompressed_size", ::duckdb_apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_uncompressed_size); - xfer += oprot->writeFieldEnd(); + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } - xfer += oprot->writeFieldBegin("total_compressed_size", ::duckdb_apache::thrift::protocol::T_I64, 7); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); + h32 += (U32) len; - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::duckdb_apache::thrift::protocol::T_LIST, 8); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter92; - for (_iter92 = this->key_value_metadata.begin(); _iter92 != this->key_value_metadata.end(); ++_iter92) - { - xfer += (*_iter92).write(oprot); - } - xfer += oprot->writeListEnd(); + while (p+4<=bEnd) { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("data_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 9); - xfer += oprot->writeI64(this->data_page_offset); - xfer += oprot->writeFieldEnd(); - if (this->__isset.index_page_offset) { - xfer += oprot->writeFieldBegin("index_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 10); - xfer += oprot->writeI64(this->index_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_offset) { - xfer += oprot->writeFieldBegin("dictionary_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 11); - xfer += oprot->writeI64(this->dictionary_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 12); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encoding_stats) { - xfer += oprot->writeFieldBegin("encoding_stats", ::duckdb_apache::thrift::protocol::T_LIST, 13); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - std::vector ::const_iterator _iter93; - for (_iter93 = this->encoding_stats.begin(); _iter93 != this->encoding_stats.end(); ++_iter93) - { - xfer += (*_iter93).write(oprot); - } - xfer += oprot->writeListEnd(); + while (pwriteFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} -void swap(ColumnMetaData &a, ColumnMetaData &b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.encodings, b.encodings); - swap(a.path_in_schema, b.path_in_schema); - swap(a.codec, b.codec); - swap(a.num_values, b.num_values); - swap(a.total_uncompressed_size, b.total_uncompressed_size); - swap(a.total_compressed_size, b.total_compressed_size); - swap(a.key_value_metadata, b.key_value_metadata); - swap(a.data_page_offset, b.data_page_offset); - swap(a.index_page_offset, b.index_page_offset); - swap(a.dictionary_page_offset, b.dictionary_page_offset); - swap(a.statistics, b.statistics); - swap(a.encoding_stats, b.encoding_stats); - swap(a.__isset, b.__isset); + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; } -ColumnMetaData::ColumnMetaData(const ColumnMetaData& other94) { - type = other94.type; - encodings = other94.encodings; - path_in_schema = other94.path_in_schema; - codec = other94.codec; - num_values = other94.num_values; - total_uncompressed_size = other94.total_uncompressed_size; - total_compressed_size = other94.total_compressed_size; - key_value_metadata = other94.key_value_metadata; - data_page_offset = other94.data_page_offset; - index_page_offset = other94.index_page_offset; - dictionary_page_offset = other94.dictionary_page_offset; - statistics = other94.statistics; - encoding_stats = other94.encoding_stats; - __isset = other94.__isset; -} -ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other95) { - type = other95.type; - encodings = other95.encodings; - path_in_schema = other95.path_in_schema; - codec = other95.codec; - num_values = other95.num_values; - total_uncompressed_size = other95.total_uncompressed_size; - total_compressed_size = other95.total_compressed_size; - key_value_metadata = other95.key_value_metadata; - data_page_offset = other95.data_page_offset; - index_page_offset = other95.index_page_offset; - dictionary_page_offset = other95.dictionary_page_offset; - statistics = other95.statistics; - encoding_stats = other95.encoding_stats; - __isset = other95.__isset; - return *this; -} -void ColumnMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnMetaData("; - out << "type=" << to_string(type); - out << ", " << "encodings=" << to_string(encodings); - out << ", " << "path_in_schema=" << to_string(path_in_schema); - out << ", " << "codec=" << to_string(codec); - out << ", " << "num_values=" << to_string(num_values); - out << ", " << "total_uncompressed_size=" << to_string(total_uncompressed_size); - out << ", " << "total_compressed_size=" << to_string(total_compressed_size); - out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "")); - out << ", " << "data_page_offset=" << to_string(data_page_offset); - out << ", " << "index_page_offset="; (__isset.index_page_offset ? (out << to_string(index_page_offset)) : (out << "")); - out << ", " << "dictionary_page_offset="; (__isset.dictionary_page_offset ? (out << to_string(dictionary_page_offset)) : (out << "")); - out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ", " << "encoding_stats="; (__isset.encoding_stats ? (out << to_string(encoding_stats)) : (out << "")); - out << ")"; + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_CREATESTATE_STATIC(state); + XXH32_reset(state, seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif } -EncryptionWithFooterKey::~EncryptionWithFooterKey() throw() { +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; } -std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) +static U64 XXH64_mergeRound(U64 acc, U64 val) { - obj.printTo(out); - return out; + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; } +FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) -uint32_t EncryptionWithFooterKey::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; - xfer += iprot->readStructBegin(fname); + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); - using ::duckdb_apache::thrift::protocol::TProtocolException; + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = seed + PRIME64_5; + } - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - xfer += iprot->readStructEnd(); + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } - return xfer; -} + while (pwriteStructBegin("EncryptionWithFooterKey"); + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; + return h64; } -void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { - using ::std::swap; - (void) a; - (void) b; -} -EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other96) { - (void) other96; -} -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other97) { - (void) other97; - return *this; -} -void EncryptionWithFooterKey::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EncryptionWithFooterKey("; - out << ")"; -} +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } -EncryptionWithColumnKey::~EncryptionWithColumnKey() throw() { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif } -void EncryptionWithColumnKey::__set_path_in_schema(const std::vector & val) { - this->path_in_schema = val; +/* ************************************************** +* Advanced Hash Functions +****************************************************/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; } -void EncryptionWithColumnKey::__set_key_metadata(const std::string& val) { - this->key_metadata = val; -__isset.key_metadata = true; +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); } -std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj) +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) { - obj.printTo(out); - return out; + XXH_free(statePtr); + return XXH_OK; } -uint32_t EncryptionWithColumnKey::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/*** Hash feed ***/ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} - xfer += iprot->readStructBegin(fname); - using ::duckdb_apache::thrift::protocol::TProtocolException; +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} - bool isset_path_in_schema = false; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; +FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size98; - ::duckdb_apache::thrift::protocol::TType _etype101; - xfer += iprot->readListBegin(_etype101, _size98); - this->path_in_schema.resize(_size98); - uint32_t _i102; - for (_i102 = 0; _i102 < _size98; ++_i102) - { - xfer += iprot->readString(this->path_in_schema[_i102]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; } - break; - default: - xfer += iprot->skip(ftype); - break; + p += 16-state->memsize; + state->memsize = 0; } - xfer += iprot->readFieldEnd(); - } - xfer += iprot->readStructEnd(); + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); -uint32_t EncryptionWithColumnKey::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } - xfer += oprot->writeFieldBegin("path_in_schema", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter103; - for (_iter103 = this->path_in_schema.begin(); _iter103 != this->path_in_schema.end(); ++_iter103) - { - xfer += oprot->writeString((*_iter103)); + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; + return XXH_OK; } -void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { - using ::std::swap; - swap(a.path_in_schema, b.path_in_schema); - swap(a.key_metadata, b.key_metadata); - swap(a.__isset, b.__isset); -} +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; -EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other104) { - path_in_schema = other104.path_in_schema; - key_metadata = other104.key_metadata; - __isset = other104.__isset; -} -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other105) { - path_in_schema = other105.path_in_schema; - key_metadata = other105.key_metadata; - __isset = other105.__isset; - return *this; -} -void EncryptionWithColumnKey::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EncryptionWithColumnKey("; - out << "path_in_schema=" << to_string(path_in_schema); - out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); - out << ")"; + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); } -ColumnCryptoMetaData::~ColumnCryptoMetaData() throw() { -} +FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; -void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val) { - this->ENCRYPTION_WITH_FOOTER_KEY = val; -__isset.ENCRYPTION_WITH_FOOTER_KEY = true; -} + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } -void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val) { - this->ENCRYPTION_WITH_COLUMN_KEY = val; -__isset.ENCRYPTION_WITH_COLUMN_KEY = true; + h32 += state->total_len_32; + + while (p+4<=bEnd) { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; } -std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj) + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) { - obj.printTo(out); - return out; + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); } -uint32_t ColumnCryptoMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/* **** XXH64 **** */ - xfer += iprot->readStructBegin(fname); +FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; - using ::duckdb_apache::thrift::protocol::TProtocolException; +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + state->total_len += len; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; - } else { - xfer += iprot->skip(ftype); + if (state->memsize + len < 32) { /* fill in tmp buffer */ + if (input != NULL) { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); } - break; - default: - xfer += iprot->skip(ftype); - break; + state->memsize += (U32)len; + return XXH_OK; } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - return xfer; -} - -uint32_t ColumnCryptoMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); - - if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { - using ::std::swap; - swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY); - swap(a.ENCRYPTION_WITH_COLUMN_KEY, b.ENCRYPTION_WITH_COLUMN_KEY); - swap(a.__isset, b.__isset); -} - -ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other106) { - ENCRYPTION_WITH_FOOTER_KEY = other106.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other106.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other106.__isset; -} -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other107) { - ENCRYPTION_WITH_FOOTER_KEY = other107.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other107.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other107.__isset; - return *this; -} -void ColumnCryptoMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnCryptoMetaData("; - out << "ENCRYPTION_WITH_FOOTER_KEY="; (__isset.ENCRYPTION_WITH_FOOTER_KEY ? (out << to_string(ENCRYPTION_WITH_FOOTER_KEY)) : (out << "")); - out << ", " << "ENCRYPTION_WITH_COLUMN_KEY="; (__isset.ENCRYPTION_WITH_COLUMN_KEY ? (out << to_string(ENCRYPTION_WITH_COLUMN_KEY)) : (out << "")); - out << ")"; -} + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; -ColumnChunk::~ColumnChunk() throw() { -} + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } -void ColumnChunk::__set_file_path(const std::string& val) { - this->file_path = val; -__isset.file_path = true; -} + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } -void ColumnChunk::__set_file_offset(const int64_t val) { - this->file_offset = val; + return XXH_OK; } -void ColumnChunk::__set_meta_data(const ColumnMetaData& val) { - this->meta_data = val; -__isset.meta_data = true; -} +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; -void ColumnChunk::__set_offset_index_offset(const int64_t val) { - this->offset_index_offset = val; -__isset.offset_index_offset = true; + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); } -void ColumnChunk::__set_offset_index_length(const int32_t val) { - this->offset_index_length = val; -__isset.offset_index_length = true; -} -void ColumnChunk::__set_column_index_offset(const int64_t val) { - this->column_index_offset = val; -__isset.column_index_offset = true; -} -void ColumnChunk::__set_column_index_length(const int32_t val) { - this->column_index_length = val; -__isset.column_index_length = true; -} +FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; -void ColumnChunk::__set_crypto_metadata(const ColumnCryptoMetaData& val) { - this->crypto_metadata = val; -__isset.crypto_metadata = true; -} + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; -void ColumnChunk::__set_encrypted_column_metadata(const std::string& val) { - this->encrypted_column_metadata = val; -__isset.encrypted_column_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) -{ - obj.printTo(out); - return out; -} + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 + PRIME64_5; + } + h64 += (U64) state->total_len; -uint32_t ColumnChunk::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } - xfer += iprot->readStructBegin(fname); + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; - bool isset_file_offset = false; + return h64; +} - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->file_path); - this->__isset.file_path = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - isset_file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->meta_data.read(iprot); - this->__isset.meta_data = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset_index_offset); - this->__isset.offset_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->offset_index_length); - this->__isset.offset_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->column_index_offset); - this->__isset.column_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_index_length); - this->__isset.column_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->crypto_metadata.read(iprot); - this->__isset.crypto_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->encrypted_column_metadata); - this->__isset.encrypted_column_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - xfer += iprot->readStructEnd(); +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - if (!isset_file_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); } -uint32_t ColumnChunk::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnChunk"); - if (this->__isset.file_path) { - xfer += oprot->writeFieldBegin("file_path", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->file_path); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("file_offset", ::duckdb_apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); +/* ************************** +* Canonical representation +****************************/ - if (this->__isset.meta_data) { - xfer += oprot->writeFieldBegin("meta_data", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); - xfer += this->meta_data.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_offset) { - xfer += oprot->writeFieldBegin("offset_index_offset", ::duckdb_apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->offset_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_length) { - xfer += oprot->writeFieldBegin("offset_index_length", ::duckdb_apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->offset_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_offset) { - xfer += oprot->writeFieldBegin("column_index_offset", ::duckdb_apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->column_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_length) { - xfer += oprot->writeFieldBegin("column_index_length", ::duckdb_apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->column_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.crypto_metadata) { - xfer += oprot->writeFieldBegin("crypto_metadata", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->crypto_metadata.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encrypted_column_metadata) { - xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->encrypted_column_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); } -void swap(ColumnChunk &a, ColumnChunk &b) { - using ::std::swap; - swap(a.file_path, b.file_path); - swap(a.file_offset, b.file_offset); - swap(a.meta_data, b.meta_data); - swap(a.offset_index_offset, b.offset_index_offset); - swap(a.offset_index_length, b.offset_index_length); - swap(a.column_index_offset, b.column_index_offset); - swap(a.column_index_length, b.column_index_length); - swap(a.crypto_metadata, b.crypto_metadata); - swap(a.encrypted_column_metadata, b.encrypted_column_metadata); - swap(a.__isset, b.__isset); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); } -ColumnChunk::ColumnChunk(const ColumnChunk& other108) { - file_path = other108.file_path; - file_offset = other108.file_offset; - meta_data = other108.meta_data; - offset_index_offset = other108.offset_index_offset; - offset_index_length = other108.offset_index_length; - column_index_offset = other108.column_index_offset; - column_index_length = other108.column_index_length; - crypto_metadata = other108.crypto_metadata; - encrypted_column_metadata = other108.encrypted_column_metadata; - __isset = other108.__isset; +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); } -ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other109) { - file_path = other109.file_path; - file_offset = other109.file_offset; - meta_data = other109.meta_data; - offset_index_offset = other109.offset_index_offset; - offset_index_length = other109.offset_index_length; - column_index_offset = other109.column_index_offset; - column_index_length = other109.column_index_length; - crypto_metadata = other109.crypto_metadata; - encrypted_column_metadata = other109.encrypted_column_metadata; - __isset = other109.__isset; - return *this; + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); } -void ColumnChunk::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnChunk("; - out << "file_path="; (__isset.file_path ? (out << to_string(file_path)) : (out << "")); - out << ", " << "file_offset=" << to_string(file_offset); - out << ", " << "meta_data="; (__isset.meta_data ? (out << to_string(meta_data)) : (out << "")); - out << ", " << "offset_index_offset="; (__isset.offset_index_offset ? (out << to_string(offset_index_offset)) : (out << "")); - out << ", " << "offset_index_length="; (__isset.offset_index_length ? (out << to_string(offset_index_length)) : (out << "")); - out << ", " << "column_index_offset="; (__isset.column_index_offset ? (out << to_string(column_index_offset)) : (out << "")); - out << ", " << "column_index_length="; (__isset.column_index_length ? (out << to_string(column_index_length)) : (out << "")); - out << ", " << "crypto_metadata="; (__isset.crypto_metadata ? (out << to_string(crypto_metadata)) : (out << "")); - out << ", " << "encrypted_column_metadata="; (__isset.encrypted_column_metadata ? (out << to_string(encrypted_column_metadata)) : (out << "")); - out << ")"; + } -RowGroup::~RowGroup() throw() { -} +// LICENSE_CHANGE_END -void RowGroup::__set_columns(const std::vector & val) { - this->columns = val; -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -void RowGroup::__set_total_byte_size(const int64_t val) { - this->total_byte_size = val; -} +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -void RowGroup::__set_num_rows(const int64_t val) { - this->num_rows = val; -} -void RowGroup::__set_sorting_columns(const std::vector & val) { - this->sorting_columns = val; -__isset.sorting_columns = true; -} -void RowGroup::__set_file_offset(const int64_t val) { - this->file_offset = val; -__isset.file_offset = true; -} +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, calloc, free */ +#include /* memset */ -void RowGroup::__set_total_compressed_size(const int64_t val) { - this->total_compressed_size = val; -__isset.total_compressed_size = true; -} -void RowGroup::__set_ordinal(const int16_t val) { - this->ordinal = val; -__isset.ordinal = true; -} -std::ostream& operator<<(std::ostream& out, const RowGroup& obj) -{ - obj.printTo(out); - return out; -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -uint32_t RowGroup::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE - xfer += iprot->readStructBegin(fname); +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ - using ::duckdb_apache::thrift::protocol::TProtocolException; +/*-************************************* +* Dependencies +***************************************/ +#ifdef __aarch64__ +#include +#endif - bool isset_columns = false; - bool isset_total_byte_size = false; - bool isset_num_rows = false; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->columns.clear(); - uint32_t _size110; - ::duckdb_apache::thrift::protocol::TType _etype113; - xfer += iprot->readListBegin(_etype113, _size110); - this->columns.resize(_size110); - uint32_t _i114; - for (_i114 = 0; _i114 < _size110; ++_i114) - { - xfer += this->columns[_i114].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_byte_size); - isset_total_byte_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->sorting_columns.clear(); - uint32_t _size115; - ::duckdb_apache::thrift::protocol::TType _etype118; - xfer += iprot->readListBegin(_etype118, _size115); - this->sorting_columns.resize(_size115); - uint32_t _i119; - for (_i119 = 0; _i119 < _size115; ++_i119) - { - xfer += this->sorting_columns[_i119].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.sorting_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - this->__isset.file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - this->__isset.total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I16) { - xfer += iprot->readI16(this->ordinal); - this->__isset.ordinal = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } + /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ - xfer += iprot->readStructEnd(); - if (!isset_columns) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_byte_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} -uint32_t RowGroup::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("RowGroup"); +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list - xfer += oprot->writeFieldBegin("columns", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter120; - for (_iter120 = this->columns.begin(); _iter120 != this->columns.end(); ++_iter120) - { - xfer += (*_iter120).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 - xfer += oprot->writeFieldBegin("total_byte_size", ::duckdb_apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->total_byte_size); - xfer += oprot->writeFieldEnd(); +/* ====== Dependency ======*/ +#include /* INT_MAX */ +#include /* size_t */ - xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - if (this->__isset.sorting_columns) { - xfer += oprot->writeFieldBegin("sorting_columns", ::duckdb_apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter121; - for (_iter121 = this->sorting_columns.begin(); _iter121 != this->sorting_columns.end(); ++_iter121) - { - xfer += (*_iter121).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.file_offset) { - xfer += oprot->writeFieldBegin("file_offset", ::duckdb_apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.total_compressed_size) { - xfer += oprot->writeFieldBegin("total_compressed_size", ::duckdb_apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ordinal) { - xfer += oprot->writeFieldBegin("ordinal", ::duckdb_apache::thrift::protocol::T_I16, 7); - xfer += oprot->writeI16(this->ordinal); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDLIB_API ZSTDLIB_VISIBILITY +#endif -void swap(RowGroup &a, RowGroup &b) { - using ::std::swap; - swap(a.columns, b.columns); - swap(a.total_byte_size, b.total_byte_size); - swap(a.num_rows, b.num_rows); - swap(a.sorting_columns, b.sorting_columns); - swap(a.file_offset, b.file_offset); - swap(a.total_compressed_size, b.total_compressed_size); - swap(a.ordinal, b.ordinal); - swap(a.__isset, b.__isset); -} +namespace duckdb_zstd { -RowGroup::RowGroup(const RowGroup& other122) { - columns = other122.columns; - total_byte_size = other122.total_byte_size; - num_rows = other122.num_rows; - sorting_columns = other122.sorting_columns; - file_offset = other122.file_offset; - total_compressed_size = other122.total_compressed_size; - ordinal = other122.ordinal; - __isset = other122.__isset; -} -RowGroup& RowGroup::operator=(const RowGroup& other123) { - columns = other123.columns; - total_byte_size = other123.total_byte_size; - num_rows = other123.num_rows; - sorting_columns = other123.sorting_columns; - file_offset = other123.file_offset; - total_compressed_size = other123.total_compressed_size; - ordinal = other123.ordinal; - __isset = other123.__isset; - return *this; -} -void RowGroup::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "RowGroup("; - out << "columns=" << to_string(columns); - out << ", " << "total_byte_size=" << to_string(total_byte_size); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "sorting_columns="; (__isset.sorting_columns ? (out << to_string(sorting_columns)) : (out << "")); - out << ", " << "file_offset="; (__isset.file_offset ? (out << to_string(file_offset)) : (out << "")); - out << ", " << "total_compressed_size="; (__isset.total_compressed_size ? (out << to_string(total_compressed_size)) : (out << "")); - out << ", " << "ordinal="; (__isset.ordinal ? (out << to_string(ordinal)) : (out << "")); - out << ")"; -} +/******************************************************************************* + Introduction + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. -TypeDefinedOrder::~TypeDefinedOrder() throw() { -} + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). -std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) -{ - obj.printTo(out); - return out; -} + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) -uint32_t TypeDefinedOrder::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ - xfer += iprot->readStructBegin(fname); +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 5 - using ::duckdb_apache::thrift::protocol::TProtocolException; +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) +ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) +ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif - xfer += iprot->readStructEnd(); +/* ************************************* + * Constants + ***************************************/ - return xfer; -} +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 -uint32_t TypeDefinedOrder::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TypeDefinedOrder"); +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} -void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { - using ::std::swap; - (void) a; - (void) b; -} -TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other124) { - (void) other124; -} -TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other125) { - (void) other125; - return *this; -} -void TypeDefinedOrder::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TypeDefinedOrder("; - out << ")"; -} +/*************************************** +* Simple API +***************************************/ +/*! ZSTD_compress() : + * Compresses `src` content as a single zstd compressed frame into already allocated `dst`. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); -ColumnOrder::~ColumnOrder() throw() { -} +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); -void ColumnOrder::__set_TYPE_ORDER(const TypeDefinedOrder& val) { - this->TYPE_ORDER = val; -__isset.TYPE_ORDER = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) -{ - obj.printTo(out); - return out; -} +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); -uint32_t ColumnOrder::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - xfer += iprot->readStructBegin(fname); +/*************************************** +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); - using ::duckdb_apache::thrift::protocol::TProtocolException; +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->TYPE_ORDER.read(iprot); - this->__isset.TYPE_ORDER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); - xfer += iprot->readStructEnd(); - return xfer; -} +/*************************************** +* Advanced compression API +***************************************/ -uint32_t ColumnOrder::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnOrder"); +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ - if (this->__isset.TYPE_ORDER) { - xfer += oprot->writeFieldBegin("TYPE_ORDER", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->TYPE_ORDER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} -void swap(ColumnOrder &a, ColumnOrder &b) { - using ::std::swap; - swap(a.TYPE_ORDER, b.TYPE_ORDER); - swap(a.__isset, b.__isset); -} +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; -ColumnOrder::ColumnOrder(const ColumnOrder& other126) { - TYPE_ORDER = other126.TYPE_ORDER; - __isset = other126.__isset; -} -ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other127) { - TYPE_ORDER = other127.TYPE_ORDER; - __isset = other127.__isset; - return *this; -} -void ColumnOrder::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnOrder("; - out << "TYPE_ORDER="; (__isset.TYPE_ORDER ? (out << to_string(TYPE_ORDER)) : (out << "")); - out << ")"; -} +typedef enum { -PageLocation::~PageLocation() throw() { -} + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ -void PageLocation::__set_offset(const int64_t val) { - this->offset = val; -} + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ -void PageLocation::__set_compressed_page_size(const int32_t val) { - this->compressed_page_size = val; -} + /* multi-threading parameters */ + /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * They return an error otherwise. */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression work is performed in parallel, within worker threads. + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ -void PageLocation::__set_first_row_index(const int64_t val) { - this->first_row_index = val; -} -std::ostream& operator<<(std::ostream& out, const PageLocation& obj) -{ - obj.printTo(out); - return out; -} + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004 +} ZSTD_cParameter; +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; -uint32_t PageLocation::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); - xfer += iprot->readStructBegin(fname); +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); - using ::duckdb_apache::thrift::protocol::TProtocolException; +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; - bool isset_offset = false; - bool isset_compressed_page_size = false; - bool isset_first_row_index = false; +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset); - isset_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->first_row_index); - isset_first_row_index = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); - xfer += iprot->readStructEnd(); - if (!isset_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_first_row_index) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} +/*************************************** +* Advanced decompression API +***************************************/ -uint32_t PageLocation::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageLocation"); +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ - xfer += oprot->writeFieldBegin("offset", ::duckdb_apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->offset); - xfer += oprot->writeFieldEnd(); +typedef enum { - xfer += oprot->writeFieldBegin("compressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ - xfer += oprot->writeFieldBegin("first_row_index", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->first_row_index); - xfer += oprot->writeFieldEnd(); + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001 - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +} ZSTD_dParameter; -void swap(PageLocation &a, PageLocation &b) { - using ::std::swap; - swap(a.offset, b.offset); - swap(a.compressed_page_size, b.compressed_page_size); - swap(a.first_row_index, b.first_row_index); -} +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); -PageLocation::PageLocation(const PageLocation& other128) { - offset = other128.offset; - compressed_page_size = other128.compressed_page_size; - first_row_index = other128.first_row_index; -} -PageLocation& PageLocation::operator=(const PageLocation& other129) { - offset = other129.offset; - compressed_page_size = other129.compressed_page_size; - first_row_index = other129.first_row_index; - return *this; -} -void PageLocation::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "PageLocation("; - out << "offset=" << to_string(offset); - out << ", " << "compressed_page_size=" << to_string(compressed_page_size); - out << ", " << "first_row_index=" << to_string(first_row_index); - out << ")"; -} +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); -OffsetIndex::~OffsetIndex() throw() { -} +/**************************** +* Streaming +****************************/ -void OffsetIndex::__set_page_locations(const std::vector & val) { - this->page_locations = val; -} -std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj) -{ - obj.printTo(out); - return out; -} +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; -uint32_t OffsetIndex::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - xfer += iprot->readStructBegin(fname); +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ - using ::duckdb_apache::thrift::protocol::TProtocolException; +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); - bool isset_page_locations = false; +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->page_locations.clear(); - uint32_t _size130; - ::duckdb_apache::thrift::protocol::TType _etype133; - xfer += iprot->readListBegin(_etype133, _size130); - this->page_locations.resize(_size130); - uint32_t _i134; - for (_i134 = 0; _i134 < _size130; ++_i134) - { - xfer += this->page_locations[_i134].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_page_locations = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +/*! ZSTD_compressStream2() : + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); - xfer += iprot->readStructEnd(); - if (!isset_page_locations) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ -uint32_t OffsetIndex::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("OffsetIndex"); - xfer += oprot->writeFieldBegin("page_locations", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter135; - for (_iter135 = this->page_locations.begin(); _iter135 != this->page_locations.end(); ++_iter135) - { - xfer += (*_iter135).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -void swap(OffsetIndex &a, OffsetIndex &b) { - using ::std::swap; - swap(a.page_locations, b.page_locations); -} -OffsetIndex::OffsetIndex(const OffsetIndex& other136) { - page_locations = other136.page_locations; -} -OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other137) { - page_locations = other137.page_locations; - return *this; -} -void OffsetIndex::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "OffsetIndex("; - out << "page_locations=" << to_string(page_locations); - out << ")"; -} +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); -ColumnIndex::~ColumnIndex() throw() { -} +/*===== Streaming decompression functions =====*/ +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); -void ColumnIndex::__set_null_pages(const std::vector & val) { - this->null_pages = val; -} +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -void ColumnIndex::__set_min_values(const std::vector & val) { - this->min_values = val; -} +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ -void ColumnIndex::__set_max_values(const std::vector & val) { - this->max_values = val; -} -void ColumnIndex::__set_boundary_order(const BoundaryOrder::type val) { - this->boundary_order = val; -} +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dict/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); -void ColumnIndex::__set_null_counts(const std::vector & val) { - this->null_counts = val; -__isset.null_counts = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) -{ - obj.printTo(out); - return out; -} +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); -uint32_t ColumnIndex::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); - xfer += iprot->readStructBegin(fname); +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); - using ::duckdb_apache::thrift::protocol::TProtocolException; +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); - bool isset_null_pages = false; - bool isset_min_values = false; - bool isset_max_values = false; - bool isset_boundary_order = false; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->null_pages.clear(); - uint32_t _size138; - ::duckdb_apache::thrift::protocol::TType _etype141; - xfer += iprot->readListBegin(_etype141, _size138); - this->null_pages.resize(_size138); - uint32_t _i142; - for (_i142 = 0; _i142 < _size138; ++_i142) - { - xfer += iprot->readBool(this->null_pages[_i142]); - } - xfer += iprot->readListEnd(); - } - isset_null_pages = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->min_values.clear(); - uint32_t _size143; - ::duckdb_apache::thrift::protocol::TType _etype146; - xfer += iprot->readListBegin(_etype146, _size143); - this->min_values.resize(_size143); - uint32_t _i147; - for (_i147 = 0; _i147 < _size143; ++_i147) - { - xfer += iprot->readBinary(this->min_values[_i147]); - } - xfer += iprot->readListEnd(); - } - isset_min_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->max_values.clear(); - uint32_t _size148; - ::duckdb_apache::thrift::protocol::TType _etype151; - xfer += iprot->readListBegin(_etype151, _size148); - this->max_values.resize(_size148); - uint32_t _i152; - for (_i152 = 0; _i152 < _size148; ++_i152) - { - xfer += iprot->readBinary(this->max_values[_i152]); - } - xfer += iprot->readListEnd(); - } - isset_max_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast153; - xfer += iprot->readI32(ecast153); - this->boundary_order = (BoundaryOrder::type)ecast153; - isset_boundary_order = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->null_counts.clear(); - uint32_t _size154; - ::duckdb_apache::thrift::protocol::TType _etype157; - xfer += iprot->readListBegin(_etype157, _size154); - this->null_counts.resize(_size154); - uint32_t _i158; - for (_i158 = 0; _i158 < _size154; ++_i158) - { - xfer += iprot->readI64(this->null_counts[_i158]); - } - xfer += iprot->readListEnd(); - } - this->__isset.null_counts = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +typedef struct ZSTD_DDict_s ZSTD_DDict; - xfer += iprot->readStructEnd(); +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); - if (!isset_null_pages) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_min_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_max_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_boundary_order) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); -uint32_t ColumnIndex::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnIndex"); +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); - xfer += oprot->writeFieldBegin("null_pages", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter159; - for (_iter159 = this->null_pages.begin(); _iter159 != this->null_pages.end(); ++_iter159) - { - xfer += oprot->writeBool((*_iter159)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - xfer += oprot->writeFieldBegin("min_values", ::duckdb_apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter160; - for (_iter160 = this->min_values.begin(); _iter160 != this->min_values.end(); ++_iter160) - { - xfer += oprot->writeBinary((*_iter160)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); +/******************************** + * Dictionary helper functions + *******************************/ - xfer += oprot->writeFieldBegin("max_values", ::duckdb_apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter161; - for (_iter161 = this->max_values.begin(); _iter161 != this->max_values.end(); ++_iter161) - { - xfer += oprot->writeBinary((*_iter161)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); - xfer += oprot->writeFieldBegin("boundary_order", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->boundary_order); - xfer += oprot->writeFieldEnd(); +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); - if (this->__isset.null_counts) { - xfer += oprot->writeFieldBegin("null_counts", ::duckdb_apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter162; - for (_iter162 = this->null_counts.begin(); _iter162 != this->null_counts.end(); ++_iter162) - { - xfer += oprot->writeI64((*_iter162)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); -void swap(ColumnIndex &a, ColumnIndex &b) { - using ::std::swap; - swap(a.null_pages, b.null_pages); - swap(a.min_values, b.min_values); - swap(a.max_values, b.max_values); - swap(a.boundary_order, b.boundary_order); - swap(a.null_counts, b.null_counts); - swap(a.__isset, b.__isset); -} -ColumnIndex::ColumnIndex(const ColumnIndex& other163) { - null_pages = other163.null_pages; - min_values = other163.min_values; - max_values = other163.max_values; - boundary_order = other163.boundary_order; - null_counts = other163.null_counts; - __isset = other163.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other164) { - null_pages = other164.null_pages; - min_values = other164.min_values; - max_values = other164.max_values; - boundary_order = other164.boundary_order; - null_counts = other164.null_counts; - __isset = other164.__isset; - return *this; -} -void ColumnIndex::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnIndex("; - out << "null_pages=" << to_string(null_pages); - out << ", " << "min_values=" << to_string(min_values); - out << ", " << "max_values=" << to_string(max_values); - out << ", " << "boundary_order=" << to_string(boundary_order); - out << ", " << "null_counts="; (__isset.null_counts ? (out << to_string(null_counts)) : (out << "")); - out << ")"; -} - - -AesGcmV1::~AesGcmV1() throw() { -} - - -void AesGcmV1::__set_aad_prefix(const std::string& val) { - this->aad_prefix = val; -__isset.aad_prefix = true; -} - -void AesGcmV1::__set_aad_file_unique(const std::string& val) { - this->aad_file_unique = val; -__isset.aad_file_unique = true; -} - -void AesGcmV1::__set_supply_aad_prefix(const bool val) { - this->supply_aad_prefix = val; -__isset.supply_aad_prefix = true; -} -std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t AesGcmV1::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/******************************************************************************* + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ - xfer += iprot->readStructBegin(fname); - using ::duckdb_apache::thrift::protocol::TProtocolException; +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); - xfer += iprot->readStructEnd(); +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); - return xfer; -} +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -uint32_t AesGcmV1::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmV1"); +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/* === Memory management === */ -void swap(AesGcmV1 &a, AesGcmV1 &b) { - using ::std::swap; - swap(a.aad_prefix, b.aad_prefix); - swap(a.aad_file_unique, b.aad_file_unique); - swap(a.supply_aad_prefix, b.supply_aad_prefix); - swap(a.__isset, b.__isset); -} +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); -AesGcmV1::AesGcmV1(const AesGcmV1& other165) { - aad_prefix = other165.aad_prefix; - aad_file_unique = other165.aad_file_unique; - supply_aad_prefix = other165.supply_aad_prefix; - __isset = other165.__isset; -} -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other166) { - aad_prefix = other166.aad_prefix; - aad_file_unique = other166.aad_file_unique; - supply_aad_prefix = other166.supply_aad_prefix; - __isset = other166.__isset; - return *this; -} -void AesGcmV1::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "AesGcmV1("; - out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "")); - out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "")); - out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "")); - out << ")"; } +#endif /* ZSTD_H_235446 */ -AesGcmCtrV1::~AesGcmCtrV1() throw() { -} - +// LICENSE_CHANGE_END -void AesGcmCtrV1::__set_aad_prefix(const std::string& val) { - this->aad_prefix = val; -__isset.aad_prefix = true; -} -void AesGcmCtrV1::__set_aad_file_unique(const std::string& val) { - this->aad_file_unique = val; -__isset.aad_file_unique = true; -} -void AesGcmCtrV1::__set_supply_aad_prefix(const bool val) { - this->supply_aad_prefix = val; -__isset.supply_aad_prefix = true; -} -std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj) -{ - obj.printTo(out); - return out; -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -uint32_t AesGcmCtrV1::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +#ifndef ZSTD_H_ZSTD_STATIC_LINKING_ONLY +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY - xfer += iprot->readStructBegin(fname); +namespace duckdb_zstd { - using ::duckdb_apache::thrift::protocol::TProtocolException; +/**************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 - xfer += iprot->readStructEnd(); - return xfer; -} +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 -uint32_t AesGcmCtrV1::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmCtrV1"); +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1<__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} -void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { - using ::std::swap; - swap(a.aad_prefix, b.aad_prefix); - swap(a.aad_file_unique, b.aad_file_unique); - swap(a.supply_aad_prefix, b.supply_aad_prefix); - swap(a.__isset, b.__isset); -} +/* LDM parameter bounds */ +#define ZSTD_LDM_HASHLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_LDM_HASHLOG_MAX ZSTD_HASHLOG_MAX +#define ZSTD_LDM_MINMATCH_MIN 4 +#define ZSTD_LDM_MINMATCH_MAX 4096 +#define ZSTD_LDM_BUCKETSIZELOG_MIN 1 +#define ZSTD_LDM_BUCKETSIZELOG_MAX 8 +#define ZSTD_LDM_HASHRATELOG_MIN 0 +#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other167) { - aad_prefix = other167.aad_prefix; - aad_file_unique = other167.aad_file_unique; - supply_aad_prefix = other167.supply_aad_prefix; - __isset = other167.__isset; -} -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other168) { - aad_prefix = other168.aad_prefix; - aad_file_unique = other168.aad_file_unique; - supply_aad_prefix = other168.supply_aad_prefix; - __isset = other168.__isset; - return *this; -} -void AesGcmCtrV1::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "AesGcmCtrV1("; - out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "")); - out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "")); - out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "")); - out << ")"; -} +/* Advanced parameter bounds */ +#define ZSTD_TARGETCBLOCKSIZE_MIN 64 +#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_SRCSIZEHINT_MIN 0 +#define ZSTD_SRCSIZEHINT_MAX INT_MAX +/* internal */ +#define ZSTD_HASHLOG3_MAX 17 -EncryptionAlgorithm::~EncryptionAlgorithm() throw() { -} +/* --- Advanced types --- */ -void EncryptionAlgorithm::__set_AES_GCM_V1(const AesGcmV1& val) { - this->AES_GCM_V1 = val; -__isset.AES_GCM_V1 = true; -} +typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; -void EncryptionAlgorithm::__set_AES_GCM_CTR_V1(const AesGcmCtrV1& val) { - this->AES_GCM_CTR_V1 = val; -__isset.AES_GCM_CTR_V1 = true; -} -std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj) -{ - obj.printTo(out); - return out; -} +typedef struct { + unsigned int matchPos; /* Match pos in dst */ + /* If seqDef.offset > 3, then this is seqDef.offset - 3 + * If seqDef.offset < 3, then this is the corresponding repeat offset + * But if seqDef.offset < 3 and litLength == 0, this is the + * repeat offset before the corresponding repeat offset + * And if seqDef.offset == 3 and litLength == 0, this is the + * most recent repeat offset - 1 + */ + unsigned int offset; + unsigned int litLength; /* Literal length */ + unsigned int matchLength; /* Match length */ + /* 0 when seq not rep and seqDef.offset otherwise + * when litLength == 0 this will be <= 4, otherwise <= 3 like normal + */ + unsigned int rep; +} ZSTD_Sequence; +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; -uint32_t EncryptionAlgorithm::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +typedef struct { + int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; - xfer += iprot->readStructBegin(fname); +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; - using ::duckdb_apache::thrift::protocol::TProtocolException; +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_V1.read(iprot); - this->__isset.AES_GCM_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_CTR_V1.read(iprot); - this->__isset.AES_GCM_CTR_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; - return xfer; -} +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; -uint32_t EncryptionAlgorithm::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionAlgorithm"); - if (this->__isset.AES_GCM_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_V1", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->AES_GCM_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.AES_GCM_CTR_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->AES_GCM_CTR_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/*************************************** +* Frame size functions +***************************************/ -void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { - using ::std::swap; - swap(a.AES_GCM_V1, b.AES_GCM_V1); - swap(a.AES_GCM_CTR_V1, b.AES_GCM_CTR_V1); - swap(a.__isset, b.__isset); -} +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other169) { - AES_GCM_V1 = other169.AES_GCM_V1; - AES_GCM_CTR_V1 = other169.AES_GCM_CTR_V1; - __isset = other169.__isset; -} -EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other170) { - AES_GCM_V1 = other170.AES_GCM_V1; - AES_GCM_CTR_V1 = other170.AES_GCM_CTR_V1; - __isset = other170.__isset; - return *this; -} -void EncryptionAlgorithm::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EncryptionAlgorithm("; - out << "AES_GCM_V1="; (__isset.AES_GCM_V1 ? (out << to_string(AES_GCM_V1)) : (out << "")); - out << ", " << "AES_GCM_CTR_V1="; (__isset.AES_GCM_CTR_V1 ? (out << to_string(AES_GCM_CTR_V1)) : (out << "")); - out << ")"; -} +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occured: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); -FileMetaData::~FileMetaData() throw() { -} +/*! ZSTD_getSequences() : + * Extract sequences from the sequence store + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * @return : number of sequences extracted + */ +ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); -void FileMetaData::__set_version(const int32_t val) { - this->version = val; -} +/*************************************** +* Memory management +***************************************/ -void FileMetaData::__set_schema(const std::vector & val) { - this->schema = val; -} +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); -void FileMetaData::__set_num_rows(const int64_t val) { - this->num_rows = val; -} +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); -void FileMetaData::__set_row_groups(const std::vector & val) { - this->row_groups = val; -} +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); -void FileMetaData::__set_key_value_metadata(const std::vector & val) { - this->key_value_metadata = val; -__isset.key_value_metadata = true; -} +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ -void FileMetaData::__set_created_by(const std::string& val) { - this->created_by = val; -__isset.created_by = true; -} +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ -void FileMetaData::__set_column_orders(const std::vector & val) { - this->column_orders = val; -__isset.column_orders = true; -} +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); -void FileMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { - this->encryption_algorithm = val; -__isset.encryption_algorithm = true; -} +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); -void FileMetaData::__set_footer_signing_key_metadata(const std::string& val) { - this->footer_signing_key_metadata = val; -__isset.footer_signing_key_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) -{ - obj.printTo(out); - return out; -} +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; -uint32_t FileMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); - xfer += iprot->readStructBegin(fname); +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); - using ::duckdb_apache::thrift::protocol::TProtocolException; - bool isset_version = false; - bool isset_schema = false; - bool isset_num_rows = false; - bool isset_row_groups = false; - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->version); - isset_version = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->schema.clear(); - uint32_t _size171; - ::duckdb_apache::thrift::protocol::TType _etype174; - xfer += iprot->readListBegin(_etype174, _size171); - this->schema.resize(_size171); - uint32_t _i175; - for (_i175 = 0; _i175 < _size171; ++_i175) - { - xfer += this->schema[_i175].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->row_groups.clear(); - uint32_t _size176; - ::duckdb_apache::thrift::protocol::TType _etype179; - xfer += iprot->readListBegin(_etype179, _size176); - this->row_groups.resize(_size176); - uint32_t _i180; - for (_i180 = 0; _i180 < _size176; ++_i180) - { - xfer += this->row_groups[_i180].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_row_groups = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size181; - ::duckdb_apache::thrift::protocol::TType _etype184; - xfer += iprot->readListBegin(_etype184, _size181); - this->key_value_metadata.resize(_size181); - uint32_t _i185; - for (_i185 = 0; _i185 < _size181; ++_i185) - { - xfer += this->key_value_metadata[_i185].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->created_by); - this->__isset.created_by = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->column_orders.clear(); - uint32_t _size186; - ::duckdb_apache::thrift::protocol::TType _etype189; - xfer += iprot->readListBegin(_etype189, _size186); - this->column_orders.resize(_size186); - uint32_t _i190; - for (_i190 = 0; _i190 < _size186; ++_i190) - { - xfer += this->column_orders[_i190].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.column_orders = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - this->__isset.encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->footer_signing_key_metadata); - this->__isset.footer_signing_key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +/*************************************** +* Advanced compression functions +***************************************/ - xfer += iprot->readStructEnd(); +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); - if (!isset_version) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_row_groups) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); -uint32_t FileMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileMetaData"); +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); - xfer += oprot->writeFieldBegin("version", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->version); - xfer += oprot->writeFieldEnd(); +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); - xfer += oprot->writeFieldBegin("schema", ::duckdb_apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter191; - for (_iter191 = this->schema.begin(); _iter191 != this->schema.end(); ++_iter191) - { - xfer += (*_iter191).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); - xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ +ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); - xfer += oprot->writeFieldBegin("row_groups", ::duckdb_apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter192; - for (_iter192 = this->row_groups.begin(); _iter192 != this->row_groups.end(); ++_iter192) - { - xfer += (*_iter192).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now REDUNDANT. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning in some future version */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::duckdb_apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter193; - for (_iter193 = this->key_value_metadata.begin(); _iter193 != this->key_value_metadata.end(); ++_iter193) - { - xfer += (*_iter193).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.created_by) { - xfer += oprot->writeFieldBegin("created_by", ::duckdb_apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeString(this->created_by); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_orders) { - xfer += oprot->writeFieldBegin("column_orders", ::duckdb_apache::thrift::protocol::T_LIST, 7); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter194; - for (_iter194 = this->column_orders.begin(); _iter194 != this->column_orders.end(); ++_iter194) - { - xfer += (*_iter194).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encryption_algorithm) { - xfer += oprot->writeFieldBegin("encryption_algorithm", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.footer_signing_key_metadata) { - xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->footer_signing_key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} -void swap(FileMetaData &a, FileMetaData &b) { - using ::std::swap; - swap(a.version, b.version); - swap(a.schema, b.schema); - swap(a.num_rows, b.num_rows); - swap(a.row_groups, b.row_groups); - swap(a.key_value_metadata, b.key_value_metadata); - swap(a.created_by, b.created_by); - swap(a.column_orders, b.column_orders); - swap(a.encryption_algorithm, b.encryption_algorithm); - swap(a.footer_signing_key_metadata, b.footer_signing_key_metadata); - swap(a.__isset, b.__isset); -} +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); -FileMetaData::FileMetaData(const FileMetaData& other195) { - version = other195.version; - schema = other195.schema; - num_rows = other195.num_rows; - row_groups = other195.row_groups; - key_value_metadata = other195.key_value_metadata; - created_by = other195.created_by; - column_orders = other195.column_orders; - encryption_algorithm = other195.encryption_algorithm; - footer_signing_key_metadata = other195.footer_signing_key_metadata; - __isset = other195.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other196) { - version = other196.version; - schema = other196.schema; - num_rows = other196.num_rows; - row_groups = other196.row_groups; - key_value_metadata = other196.key_value_metadata; - created_by = other196.created_by; - column_orders = other196.column_orders; - encryption_algorithm = other196.encryption_algorithm; - footer_signing_key_metadata = other196.footer_signing_key_metadata; - __isset = other196.__isset; - return *this; -} +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); -void FileMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "FileMetaData("; - out << "version=" << to_string(version); - out << ", " << "schema=" << to_string(schema); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "row_groups=" << to_string(row_groups); - out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "")); - out << ", " << "created_by="; (__isset.created_by ? (out << to_string(created_by)) : (out << "")); - out << ", " << "column_orders="; (__isset.column_orders ? (out << to_string(column_orders)) : (out << "")); - out << ", " << "encryption_algorithm="; (__isset.encryption_algorithm ? (out << to_string(encryption_algorithm)) : (out << "")); - out << ", " << "footer_signing_key_metadata="; (__isset.footer_signing_key_metadata ? (out << to_string(footer_signing_key_metadata)) : (out << "")); - out << ")"; -} +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ -FileCryptoMetaData::~FileCryptoMetaData() throw() { -} + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 -void FileCryptoMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { - this->encryption_algorithm = val; -} +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 -void FileCryptoMetaData::__set_key_metadata(const std::string& val) { - this->key_metadata = val; -__isset.key_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj) -{ - obj.printTo(out); - return out; -} +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 -uint32_t FileCryptoMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 - xfer += iprot->readStructBegin(fname); +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); - using ::duckdb_apache::thrift::protocol::TProtocolException; - bool isset_encryption_algorithm = false; +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - isset_encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); - xfer += iprot->readStructEnd(); +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); - if (!isset_encryption_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); -uint32_t FileCryptoMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileCryptoMetaData"); +/*! ZSTD_CCtxParams_setParameter() : + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); - xfer += oprot->writeFieldBegin("encryption_algorithm", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); -void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { - using ::std::swap; - swap(a.encryption_algorithm, b.encryption_algorithm); - swap(a.key_metadata, b.key_metadata); - swap(a.__isset, b.__isset); -} +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other197) { - encryption_algorithm = other197.encryption_algorithm; - key_metadata = other197.key_metadata; - __isset = other197.__isset; -} -FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other198) { - encryption_algorithm = other198.encryption_algorithm; - key_metadata = other198.key_metadata; - __isset = other198.__isset; - return *this; -} -void FileCryptoMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "FileCryptoMetaData("; - out << "encryption_algorithm=" << to_string(encryption_algorithm); - out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); - out << ")"; -} -}} // namespace +/*************************************** +* Advanced decompression functions +***************************************/ +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); -// LICENSE_CHANGE_END +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); -#include +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ -#define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ +/******************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ -#include +/*===== Advanced Streaming compression functions =====*/ +/**! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); -namespace snappy { +/**! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); -// A Sink is an interface that consumes a sequence of bytes. -class Sink { - public: - Sink() { } - virtual ~Sink(); +/**! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); - // Append "bytes[0,n-1]" to this. - virtual void Append(const char* bytes, size_t n) = 0; +/**! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); - // Returns a writable buffer of the specified length for appending. - // May return a pointer to the caller-owned scratch buffer which - // must have at least the indicated length. The returned buffer is - // only valid until the next operation on this Sink. - // - // After writing at most "length" bytes, call Append() with the - // pointer returned from this function and the number of bytes - // written. Many Append() implementations will avoid copying - // bytes if this function returned an internal buffer. - // - // If a non-scratch buffer is returned, the caller may only pass a - // prefix of it to Append(). That is, it is not correct to pass an - // interior pointer of the returned array to Append(). - // - // The default implementation always returns the scratch buffer. - virtual char* GetAppendBuffer(size_t length, char* scratch); +/**! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); - // For higher performance, Sink implementations can provide custom - // AppendAndTakeOwnership() and GetAppendBufferVariable() methods. - // These methods can reduce the number of copies done during - // compression/decompression. +/*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); - // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes" - // and calls the deleter function as (*deleter)(deleter_arg, bytes, n) - // to free the buffer. deleter function must be non NULL. - // - // The default implementation just calls Append and frees "bytes". - // Other implementations may avoid a copy while appending the buffer. - virtual void AppendAndTakeOwnership( - char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), - void *deleter_arg); - // Returns a writable buffer for appending and writes the buffer's capacity to - // *allocated_size. Guarantees *allocated_size >= min_size. - // May return a pointer to the caller-owned scratch buffer which must have - // scratch_size >= min_size. - // - // The returned buffer is only valid until the next operation - // on this ByteSink. - // - // After writing at most *allocated_size bytes, call Append() with the - // pointer returned from this function and the number of bytes written. - // Many Append() implementations will avoid copying bytes if this function - // returned an internal buffer. - // - // If the sink implementation allocates or reallocates an internal buffer, - // it should use the desired_size_hint if appropriate. If a caller cannot - // provide a reasonable guess at the desired capacity, it should set - // desired_size_hint = 0. - // - // If a non-scratch buffer is returned, the caller may only pass - // a prefix to it to Append(). That is, it is not correct to pass an - // interior pointer to Append(). - // - // The default implementation always returns the scratch buffer. - virtual char* GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size); +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; - private: - // No copying - Sink(const Sink&); - void operator=(const Sink&); -}; +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); -// A Source is an interface that yields a sequence of bytes -class Source { - public: - Source() { } - virtual ~Source(); +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); - // Return the number of bytes left to read from the source - virtual size_t Available() const = 0; - // Peek at the next flat region of the source. Does not reposition - // the source. The returned region is empty iff Available()==0. - // - // Returns a pointer to the beginning of the region and store its - // length in *len. - // - // The returned region is valid until the next call to Skip() or - // until this object is destroyed, whichever occurs first. - // - // The returned region may be larger than Available() (for example - // if this ByteSource is a view on a substring of a larger source). - // The caller is responsible for ensuring that it only reads the - // Available() bytes. - virtual const char* Peek(size_t* len) = 0; +/*===== Advanced Streaming decompression functions =====*/ +/** + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); - // Skip the next n bytes. Invalidates any buffer returned by - // a previous call to Peek(). - // REQUIRES: Available() >= n - virtual void Skip(size_t n) = 0; +/** + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); - private: - // No copying - Source(const Source&); - void operator=(const Source&); -}; +/** + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); -// A Source implementation that yields the contents of a flat array -class ByteArraySource : public Source { - public: - ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } - virtual ~ByteArraySource(); - virtual size_t Available() const; - virtual const char* Peek(size_t* len); - virtual void Skip(size_t n); - private: - const char* ptr_; - size_t left_; -}; -// A Sink implementation that writes to a flat array without any bound checks. -class UncheckedByteArraySink : public Sink { - public: - explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } - virtual ~UncheckedByteArraySink(); - virtual void Append(const char* data, size_t n); - virtual char* GetAppendBuffer(size_t len, char* scratch); - virtual char* GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size); - virtual void AppendAndTakeOwnership( - char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), - void *deleter_arg); +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ - // Return the current output pointer so that a caller can see how - // many bytes were produced. - // Note: this is not a Sink method. - char* CurrentDestination() const { return dest_; } - private: - char* dest_; -}; +/** + Buffer-less streaming compression (synchronous mode) -} // namespace snappy + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. -#endif // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. -// LICENSE_CHANGE_END + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ -namespace snappy { +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ -Source::~Source() { } +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -Sink::~Sink() { } -char* Sink::GetAppendBuffer(size_t length, char* scratch) { - return scratch; -} +/*- + Buffer-less streaming decompression (synchronous mode) -char* Sink::GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size) { - *allocated_size = scratch_size; - return scratch; -} + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. -void Sink::AppendAndTakeOwnership( - char* bytes, size_t n, - void (*deleter)(void*, const char*, size_t), - void *deleter_arg) { - Append(bytes, n); - (*deleter)(deleter_arg, bytes, n); -} + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). -ByteArraySource::~ByteArraySource() { } + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. -size_t ByteArraySource::Available() const { return left_; } + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. -const char* ByteArraySource::Peek(size_t* len) { - *len = left_; - return ptr_; -} + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. -void ByteArraySource::Skip(size_t n) { - left_ -= n; - ptr_ += n; -} + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. -UncheckedByteArraySink::~UncheckedByteArraySink() { } + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. -void UncheckedByteArraySink::Append(const char* data, size_t n) { - // Do no copying if the caller filled in the result of GetAppendBuffer() - if (data != dest_) { - memcpy(dest_, data, n); - } - dest_ += n; -} + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). -char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { - return dest_; -} + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. -void UncheckedByteArraySink::AppendAndTakeOwnership( - char* data, size_t n, - void (*deleter)(void*, const char*, size_t), - void *deleter_arg) { - if (data != dest_) { - memcpy(dest_, data, n); - (*deleter)(deleter_arg, data, n); - } - dest_ += n; -} + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). -char* UncheckedByteArraySink::GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size) { - *allocated_size = desired_size_hint; - return dest_; -} + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. -} // namespace snappy + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + == Special case : skippable frames == -// LICENSE_CHANGE_END + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list - -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Various stubs for the open-source version of Snappy. -#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ -#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ -// #ifdef HAVE_CONFIG_H -// #include "config.h" -// #endif -#include +/* ============================ */ +/** Block level API */ +/* ============================ */ -#include -#include -#include +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. -#ifdef HAVE_SYS_MMAN_H -#include -#endif + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ -#ifdef HAVE_UNISTD_H -#include -#endif +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ -#if defined(_MSC_VER) -#include -#endif // defined(_MSC_VER) +} -#ifndef __has_feature -#define __has_feature(x) 0 -#endif +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ -#if __has_feature(memory_sanitizer) -#include -#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \ - __msan_unpoison((address), (size)) -#else -#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */ -#endif // __has_feature(memory_sanitizer) +// LICENSE_CHANGE_END -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Various type stubs for the open-source version of Snappy. -// -// This file cannot include config.h, as it is included from snappy.h, -// which is a public header. Instead, snappy-stubs-public.h is generated by -// from snappy-stubs-public.h.in at configure time. -#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ -#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ -#include -#include -#include + /* XXH_reset, update, digest */ + /* XXH_reset, update, digest */ -#ifndef _WIN32 // HAVE_SYS_UIO_H -#include -#endif // HAVE_SYS_UIO_H +namespace duckdb_zstd { -#define SNAPPY_MAJOR 1 -#define SNAPPY_MINOR 1 -#define SNAPPY_PATCHLEVEL 7 -#define SNAPPY_VERSION \ - ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +// #define FSE_isError ERR_isError +// #define HUF_isError ERR_isError -namespace snappy { -using int8 = std::int8_t; -using uint8 = std::uint8_t; -using int16 = std::int16_t; -using uint16 = std::uint16_t; -using int32 = std::int32_t; -using uint32 = std::uint32_t; -using int64 = std::int64_t; -using uint64 = std::uint64_t; +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) -using string = std::string; +/** + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} -#ifdef _WIN32 // !HAVE_SYS_UIO_H -// Windows does not have an iovec type, yet the concept is universally useful. -// It is simple to define it ourselves, so we put it inside our own namespace. -struct iovec { - void* iov_base; - size_t iov_len; -}; -#endif // !HAVE_SYS_UIO_H +/** + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } -} // namespace snappy +/** + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } -#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ +/** + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); +/** + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); -// LICENSE_CHANGE_END +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) -#if defined(__x86_64__) +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) -// Enable 64-bit optimized versions of some routines. -#define ARCH_K8 1 +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) -#elif defined(__ppc64__) +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 -#define ARCH_PPC 1 +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 -#elif defined(__aarch64__) +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ -#define ARCH_ARM 1 +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ -#endif +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; -// Needed by OS X, among others. -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif +#define ZSTD_FRAMECHECKSUMSIZE 4 -// The size of an array, if known at compile-time. -// Will give unexpected results if used on a pointer. -// We undefine it first, since some compilers already have a definition. -#ifdef ARRAYSIZE -#undef ARRAYSIZE -#endif -#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ -// Static prediction hints. -#ifdef HAVE_BUILTIN_EXPECT -#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) -#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) -#else -#define SNAPPY_PREDICT_FALSE(x) x -#define SNAPPY_PREDICT_TRUE(x) x -#endif +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; -// This is only used for recomputing the tag byte table used during -// decompression; for simplicity we just remove it from the open-source -// version (anyone who wants to regenerate it can just do the call -// themselves within main()). -#define DEFINE_bool(flag_name, default_value, description) \ - bool FLAGS_ ## flag_name = default_value -#define DECLARE_bool(flag_name) \ - extern bool FLAGS_ ## flag_name +#define LONGNBSEQ 0x7F00 -namespace snappy { +#define MINMATCH 3 -//static const uint32 kuint32max = static_cast(0xFFFFFFFF); -//static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); +#define Litbits 8 +#define MaxLit ((1< -#endif + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); -#ifdef HAVE_SYS_ENDIAN_H -#include + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first COPY16() call because the copy length is + * almost certain to be short, so the branches have different + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. + */ +#ifndef __aarch64__ + do { + COPY16(op, ip); + } + while (op < oend); +#else + COPY16(op, ip); + if (op >= oend) return; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); #endif + } +} -#ifdef _MSC_VER -#include -#define bswap_16(x) _byteswap_ushort(x) -#define bswap_32(x) _byteswap_ulong(x) -#define bswap_64(x) _byteswap_uint64(x) +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + memcpy(dst, src, length); + } + return length; +} -#elif defined(__APPLE__) -// Mac OS X / Darwin features -#include -#define bswap_16(x) OSSwapInt16(x) -#define bswap_32(x) OSSwapInt32(x) -#define bswap_64(x) OSSwapInt64(x) +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 -#elif defined(HAVE_BYTESWAP_H) -#include +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 -#elif defined(bswap32) -// FreeBSD defines bswap{16,32,64} in (already #included). -#define bswap_16(x) bswap16(x) -#define bswap_32(x) bswap32(x) -#define bswap_64(x) bswap64(x) -#elif defined(BSWAP_64) -// Solaris 10 defines BSWAP_{16,32,64} in (already #included). -#define bswap_16(x) BSWAP_16(x) -#define bswap_32(x) BSWAP_32(x) -#define bswap_64(x) BSWAP_64(x) +/*-******************************************* +* Private declarations +*********************************************/ +typedef struct seqDef_s { + U32 offset; + U16 litLength; + U16 matchLength; +} seqDef; -#else +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; + BYTE* litStart; + BYTE* lit; + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; + U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ + U32 longLengthPos; +} seqStore_t; -inline uint16 bswap_16(uint16 x) { - return (x << 8) | (x >> 8); -} +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; -inline uint32 bswap_32(uint32 x) { - x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8); - return (x >> 16) | (x << 16); +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->matchLength + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthID == 1) { + seqLen.litLength += 0xFFFF; + } + if (seqStore->longLengthID == 2) { + seqLen.matchLength += 0xFFFF; + } + } + return seqLen; } -inline uint64 bswap_64(uint64 x) { - x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8); - x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16); - return (x >> 32) | (x << 32); +/** + * Contains the compressed frame size and an upper-bound for the decompressed frame size. + * Note: before using `compressedSize`, check for errors using ZSTD_isError(). + * similarly, before using `decompressedBound`, check for errors using: + * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` + */ +typedef struct { + size_t compressedSize; + unsigned long long decompressedBound; +} ZSTD_frameSizeInfo; /* decompress & legacy */ + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + +/* custom memory allocation functions */ +void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); +void* ZSTD_calloc(size_t size, ZSTD_customMem customMem); +void ZSTD_free(void* ptr, ZSTD_customMem customMem); + + +MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } } -#endif -#endif // defined(SNAPPY_IS_BIG_ENDIAN) +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ -// Convert to little-endian storage, opposite of network format. -// Convert x from host to little endian: x = LittleEndian.FromHost(x); -// convert x from little endian to host: x = LittleEndian.ToHost(x); -// -// Store values into unaligned memory converting to little endian order: -// LittleEndian.Store16(p, x); -// -// Load unaligned values stored in little endian converting to host order: -// x = LittleEndian.Load16(p); -class LittleEndian { - public: - // Conversion functions. -#if defined(SNAPPY_IS_BIG_ENDIAN) - static uint16 FromHost16(uint16 x) { return bswap_16(x); } - static uint16 ToHost16(uint16 x) { return bswap_16(x); } +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; /* declared here for decompress and fullbench */ - static uint32 FromHost32(uint32 x) { return bswap_32(x); } - static uint32 ToHost32(uint32 x) { return bswap_32(x); } +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); - static bool IsLittleEndian() { return false; } +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); -#else // !defined(SNAPPY_IS_BIG_ENDIAN) +} - static uint16 FromHost16(uint16 x) { return x; } - static uint16 ToHost16(uint16 x) { return x; } +#endif /* ZSTD_CCOMMON_H_MODULE */ - static uint32 FromHost32(uint32 x) { return x; } - static uint32 ToHost32(uint32 x) { return x; } - static bool IsLittleEndian() { return true; } +// LICENSE_CHANGE_END -#endif // !defined(SNAPPY_IS_BIG_ENDIAN) - // Functions to do unaligned loads and stores in little-endian order. - static uint16 Load16(const void *p) { - return ToHost16(UNALIGNED_LOAD16(p)); - } +namespace duckdb_zstd { - static void Store16(void *p, uint16 v) { - UNALIGNED_STORE16(p, FromHost16(v)); - } +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } - static uint32 Load32(const void *p) { - return ToHost32(UNALIGNED_LOAD32(p)); - } +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } - static void Store32(void *p, uint32 v) { - UNALIGNED_STORE32(p, FromHost32(v)); - } -}; -// Some bit-manipulation functions. -class Bits { - public: - // Return floor(log2(n)) for positive integer n. - static int Log2FloorNonZero(uint32 n); +/*-**************************************** +* ZSTD Error Management +******************************************/ +#undef ZSTD_isError /* defined within zstd_internal.h */ +/*! ZSTD_isError() : + * tells if a return value is an error code + * symbol is required for external callers */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } - // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. - static int Log2Floor(uint32 n); +/*! ZSTD_getErrorName() : + * provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } - // Return the first set least / most significant bit, 0-indexed. Returns an - // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except - // that it's 0-indexed. - static int FindLSBSetNonZero(uint32 n); +/*! ZSTD_getError() : + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - static int FindLSBSetNonZero64(uint64 n); -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +/*! ZSTD_getErrorString() : + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } - private: - // No copying - Bits(const Bits&); - void operator=(const Bits&); -}; -#ifdef HAVE_BUILTIN_CTZ -inline int Bits::Log2FloorNonZero(uint32 n) { - assert(n != 0); - // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof - // represents subtraction in base 2 and observes that there's no carry. - // - // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x). - // Using "31 ^" here instead of "31 -" allows the optimizer to strip the - // function body down to _bit_scan_reverse(x). - return 31 ^ __builtin_clz(n); +/*=************************************************************** +* Custom allocator +****************************************************************/ +void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return malloc(size); } -inline int Bits::Log2Floor(uint32 n) { - return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); +void* ZSTD_calloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + memset(ptr, 0, size); + return ptr; + } + return calloc(1, size); } -inline int Bits::FindLSBSetNonZero(uint32 n) { - assert(n != 0); - return __builtin_ctz(n); +void ZSTD_free(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + free(ptr); + } } -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -inline int Bits::FindLSBSetNonZero64(uint64 n) { - assert(n != 0); - return __builtin_ctzll(n); } -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -#elif defined(_MSC_VER) -inline int Bits::Log2FloorNonZero(uint32 n) { - assert(n != 0); - unsigned long where; - _BitScanReverse(&where, n); - return static_cast(where); -} +// LICENSE_CHANGE_END -inline int Bits::Log2Floor(uint32 n) { - unsigned long where; - if (_BitScanReverse(&where, n)) - return static_cast(where); - return -1; -} -inline int Bits::FindLSBSetNonZero(uint32 n) { - assert(n != 0); - unsigned long where; - if (_BitScanForward(&where, n)) - return static_cast(where); - return 32; -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -inline int Bits::FindLSBSetNonZero64(uint64 n) { - assert(n != 0); - unsigned long where; - if (_BitScanForward64(&where, n)) - return static_cast(where); - return 64; -} -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +/* ****************************************************************** + * FSE : Finite State Entropy encoder + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ -#else // Portable versions. +/* ************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ -inline int Bits::Log2FloorNonZero(uint32 n) { - assert(n != 0); + /* U32, U16, etc. */ + /* assert, DEBUGLOG */ - int log = 0; - uint32 value = n; - for (int i = 4; i >= 0; --i) { - int shift = (1 << i); - uint32 x = value >> shift; - if (x != 0) { - value = x; - log += shift; - } - } - assert(value == 1); - return log; -} -inline int Bits::Log2Floor(uint32 n) { - return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -inline int Bits::FindLSBSetNonZero(uint32 n) { - assert(n != 0); +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ - int rc = 31; - for (int i = 4, shift = 1 << 4; i >= 0; --i) { - const uint32 x = n << shift; - if (x != 0) { - n = x; - rc -= shift; - } - shift >>= 1; - } - return rc; -} +/* --- dependencies --- */ +#include /* size_t */ -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). -inline int Bits::FindLSBSetNonZero64(uint64 n) { - assert(n != 0); - const uint32 bottombits = static_cast(n); - if (bottombits == 0) { - // Bottom bits are zero, so scan in top bits - return 32 + FindLSBSetNonZero(static_cast(n >> 32)); - } else { - return FindLSBSetNonZero(bottombits); - } -} -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +namespace duckdb_zstd { +/* --- simple histogram functions --- */ -#endif // End portable versions. +/*! HIST_count(): + * Provides the precise count of each byte within a table 'count'. + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + * Updates *maxSymbolValuePtr with actual largest symbol value detected. + * @return : count of the most frequent symbol (which isn't identified). + * or an error code, which can be tested using HIST_isError(). + * note : if return == srcSize, there is only one symbol. + */ +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); -// Variable-length integer encoding. -class Varint { - public: - // Maximum lengths of varint encoding of uint32. - static const int kMax32 = 5; +unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ - // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. - // Never reads a character at or beyond limit. If a valid/terminated varint32 - // was found in the range, stores it in *OUTPUT and returns a pointer just - // past the last byte of the varint32. Else returns NULL. On success, - // "result <= limit". - static const char* Parse32WithLimit(const char* ptr, const char* limit, - uint32* OUTPUT); - // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". - // EFFECTS Encodes "v" into "ptr" and returns a pointer to the - // byte just past the last encoded byte. - static char* Encode32(char* ptr, uint32 v); +/* --- advanced histogram functions --- */ - // EFFECTS Appends the varint representation of "value" to "*s". - static void Append32(string* s, uint32 value); -}; +#define HIST_WKSP_SIZE_U32 1024 +#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) +/** HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * Benefit is this function will use very little stack space. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); -inline const char* Varint::Parse32WithLimit(const char* p, - const char* l, - uint32* OUTPUT) { - const unsigned char* ptr = reinterpret_cast(p); - const unsigned char* limit = reinterpret_cast(l); - uint32 b, result; - if (ptr >= limit) return NULL; - b = *(ptr++); result = b & 127; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; - return NULL; // Value is too long to be a varint32 - done: - *OUTPUT = result; - return reinterpret_cast(ptr); -} +/** HIST_countFast() : + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` + */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); -inline char* Varint::Encode32(char* sptr, uint32 v) { - // Operate on characters as unsigneds - unsigned char* ptr = reinterpret_cast(sptr); - static const int B = 128; - if (v < (1<<7)) { - *(ptr++) = v; - } else if (v < (1<<14)) { - *(ptr++) = v | B; - *(ptr++) = v>>7; - } else if (v < (1<<21)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = v>>14; - } else if (v < (1<<28)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = v>>21; - } else { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = (v>>21) | B; - *(ptr++) = v>>28; - } - return reinterpret_cast(ptr); -} +/** HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); -// If you know the internal layout of the std::string in use, you can -// replace this function with one that resizes the string without -// filling the new space with zeros (if applicable) -- -// it will be non-portable but faster. -inline void STLStringResizeUninitialized(string* s, size_t new_size) { - s->resize(new_size); -} +/*! HIST_count_simple() : + * Same as HIST_countFast(), this function is unsafe, + * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. + * It is also a bit slower for large inputs. + * However, it does not need any additional memory (not even on stack). + * @return : count of the most frequent symbol. + * Note this function doesn't produce any error (i.e. it must succeed). + */ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); -// Return a mutable char* pointing to a string's internal buffer, -// which may not be null-terminated. Writing through this pointer will -// modify the string. -// -// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the -// next call to a string method that invalidates iterators. -// -// As of 2006-04, there is no standard-blessed way of getting a -// mutable reference to a string's internal buffer. However, issue 530 -// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) -// proposes this as the method. It will officially be part of the standard -// for C++0x. This should already work on all current implementations. -inline char* string_as_array(string* str) { - return str->empty() ? NULL : &*str->begin(); } -} // namespace snappy - -#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ - // LICENSE_CHANGE_END + /* HIST_count_wksp */ -namespace snappy { -void Varint::Append32(string* s, uint32 value) { - char buf[Varint::kMax32]; - const char* p = Varint::Encode32(buf, value); - s->append(buf, p - buf); -} -} // namespace snappy -// LICENSE_CHANGE_END +/* ************************************************************** +* Error Management +****************************************************************/ +// #define FSE_isError ERR_isError -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ -// Copyright 2005 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) +namespace duckdb_zstd { -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list +/* Function templates */ -// Copyright 2005 and onwards Google Inc. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// A light-weight compression algorithm. It is designed for speed of -// compression and decompression, rather than for the utmost in space -// savings. -// -// For getting better compression ratios when you are compressing data -// with long repeated sequences or compressing data that is similar to -// other data, while still compressing fast, you might look at first -// using BMDiff and then compressing the output of BMDiff with -// Snappy. +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; -#ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__ -#define THIRD_PARTY_SNAPPY_SNAPPY_H__ + FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; + U32 highThreshold = tableSize-1; -#include -#include + /* CTable header */ + if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + assert(tableLog < 16); /* required for threshold strategy to work */ + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + #ifdef __clang_analyzer__ + memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ + #endif -namespace snappy { - class Source; - class Sink; + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u <= maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } - // ------------------------------------------------------------------------ - // Generic compression/decompression routines. - // ------------------------------------------------------------------------ + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurrences; + int const freq = normalizedCounter[symbol]; + for (nbOccurrences=0; nbOccurrences highThreshold) + position = (position + step) & tableMask; /* Low proba area */ + } } - // Compress the bytes read from "*source" and append to "*sink". Return the - // number of bytes written. - size_t Compress(Source* source, Sink* sink); + assert(position==0); /* Must have initialized all positions */ + } - // Find the uncompressed length of the given stream, as given by the header. - // Note that the true length could deviate from this; the stream could e.g. - // be truncated. - // - // Also note that this leaves "*source" in a state that is unsuitable for - // further operations, such as RawUncompress(). You will need to rewind - // or recreate the source yourself before attempting any further calls. - bool GetUncompressedLength(Source* source, uint32* result); + /* Build table */ + { U32 u; for (u=0; u> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} - // Returns the maximal size of the compressed representation of - // input data that is "source_bytes" bytes in length; - size_t MaxCompressedLength(size_t source_bytes); +static size_t +FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream = 0; + int bitCount = 0; + unsigned symbol = 0; + unsigned const alphabetSize = maxSymbolValue + 1; + int previousIs0 = 0; - // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() - // Returns true and stores the length of the uncompressed data in - // *result normally. Returns false on parsing error. - // This operation takes O(1) time. - bool GetUncompressedLength(const char* compressed, size_t compressed_length, - size_t* result); + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; - // Returns true iff the contents of "compressed[]" can be uncompressed - // successfully. Does not return the uncompressed data. Takes - // time proportional to compressed_length, but is usually at least - // a factor of four faster than actual decompression. - bool IsValidCompressedBuffer(const char* compressed, - size_t compressed_length); + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; - // Returns true iff the contents of "compressed" can be uncompressed - // successfully. Does not return the uncompressed data. Takes - // time proportional to *compressed length, but is usually at least - // a factor of four faster than actual decompression. - // On success, consumes all of *compressed. On failure, consumes an - // unspecified prefix of *compressed. - bool IsValidCompressed(Source* compressed); + while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ + if (previousIs0) { + unsigned start = symbol; + while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; + if (symbol == alphabetSize) break; /* incorrect distribution */ + while (symbol >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (symbol >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (symbol-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { int count = normalizedCounter[symbol++]; + int const max = (2*threshold-1) - remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count>=threshold) + count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count>=1; } + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } -} // end namespace snappy + if (remaining != 1) + return ERROR(GENERIC); /* incorrect normalized distribution */ + assert(symbol <= alphabetSize); -#endif // THIRD_PARTY_SNAPPY_SNAPPY_H__ + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + return (out-ostart); +} -// LICENSE_CHANGE_END +size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); +} -// Copyright 2008 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Internals shared between the Snappy implementation and its unittest. -#ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ -#define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ +/*-************************************************************** +* FSE Compression Code +****************************************************************/ +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)malloc(size); +} +void FSE_freeCTable (FSE_CTable* ct) { free(ct); } -namespace snappy { -namespace internal { +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + return minBits; +} -// Working memory performs a single allocation to hold all scratch space -// required for compression. -class WorkingMemory { - public: - explicit WorkingMemory(size_t input_size); - ~WorkingMemory(); +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} - // Allocates and clears a hash table using memory in "*this", - // stores the number of buckets in "*table_size" and returns a pointer to - // the base of the hash table. - uint16* GetHashTable(size_t fragment_size, int* table_size) const; - char* GetScratchInput() const { return input_; } - char* GetScratchOutput() const { return output_; } +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} - private: - char* mem_; // the allocated memory, never nullptr - size_t size_; // the size of the allocated memory, never 0 - uint16* table_; // the pointer to the hashtable - char* input_; // the pointer to the input scratch buffer - char* output_; // the pointer to the output scratch buffer - // No copying - WorkingMemory(const WorkingMemory&); - void operator=(const WorkingMemory&); -}; +/* Secondary normalization method. + To be used when primary method fails. */ -// Flat array compression that does not emit the "uncompressed length" -// prefix. Compresses "input" string to the "*op" buffer. -// -// REQUIRES: "input_length <= kBlockSize" -// REQUIRES: "op" points to an array of memory that is at least -// "MaxCompressedLength(input_length)" in size. -// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. -// REQUIRES: "table_size" is a power of two -// -// Returns an "end" pointer into "op" buffer. -// "end - op" is the compressed size of "input". -char* CompressFragment(const char* input, - size_t input_length, - char* op, - uint16* table, - const int table_size); +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; -// Find the largest n such that -// -// s1[0,n-1] == s2[0,n-1] -// and n <= (s2_limit - s2). -// -// Return make_pair(n, n < 8). -// Does not read *s2_limit or beyond. -// Does not read *(s1 + (s2_limit - s2)) or beyond. -// Requires that s2_limit >= s2. -// -// Separate implementation for 64-bit, little-endian cpus. -#if !defined(SNAPPY_IS_BIG_ENDIAN) && \ - (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)) -static inline std::pair FindMatchLength(const char* s1, - const char* s2, - const char* s2_limit) { - assert(s2_limit >= s2); - size_t matched = 0; + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); - // This block isn't necessary for correctness; we could just start looping - // immediately. As an optimization though, it is useful. It creates some not - // uncommon code paths that determine, without extra effort, whether the match - // length is less than 8. In short, we are hoping to avoid a conditional - // branch, and perhaps get better code layout from the C++ compiler. - if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { - uint64 a1 = UNALIGNED_LOAD64(s1); - uint64 a2 = UNALIGNED_LOAD64(s2); - if (a1 != a2) { - return std::pair(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3, - true); - } else { - matched = 8; - s2 += 8; + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = -1; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s]=NOT_YET_ASSIGNED; } - } + ToDistribute = (1 << tableLog) - distributed; - // Find out how long the match is. We loop over the data 64 bits at a - // time until we find a 64-bit block that doesn't match; then we find - // the first non-matching bit and use that to calculate the total - // length of the match. - while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { - if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) { - s2 += 8; - matched += 8; - } else { - uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); - int matching_bits = Bits::FindLSBSetNonZero64(x); - matched += matching_bits >> 3; - assert(matched >= 8); - return std::pair(matched, false); + if (ToDistribute == 0) + return 0; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; } - } - while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) { - if (s1[matched] == *s2) { - ++s2; - ++matched; - } else { - return std::pair(matched, matched < 8); + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) { maxV=s; maxC=count[s]; } + norm[maxV] += (short)ToDistribute; + return 0; } - } - return std::pair(matched, matched < 8); -} -#else -static inline std::pair FindMatchLength(const char* s1, - const char* s2, - const char* s2_limit) { - // Implementation based on the x86-64 version, above. - assert(s2_limit >= s2); - int matched = 0; - while (s2 <= s2_limit - 4 && - UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { - s2 += 4; - matched += 4; - } - if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { - uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); - int matching_bits = Bits::FindLSBSetNonZero(x); - matched += matching_bits >> 3; - } else { - while ((s2 < s2_limit) && (s1[matched] == *s2)) { - ++s2; - ++matched; + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) { ToDistribute--; norm[s]++; } + return 0; } - } - return std::pair(matched, matched < 8); + + { U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ((((U64)1<> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; } + + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + U64 const scale = 62 - tableLog; + U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = -1; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba< restToBeat; + } + if (proba > largestP) { largestP=proba; largest=s; } + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; -} // end namespace internal + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; -// The size of a compression block. Note that many parts of the compression -// code assumes that kBlockSize <= 65536; in particular, the hash table -// can only store 16-bit offsets, and EmitCopy() also assumes the offset -// is 65535 bytes or less. Note also that if you change this, it will -// affect the framing format (see framing_format.txt). -// -// Note that there might be older data around that is compressed with larger -// block sizes, so the decompression code should not rely on the -// non-existence of long backreferences. -static const int kBlockLog = 16; -static const size_t kBlockSize = 1 << kBlockLog; + /* Build table */ + for (s=0; s. or with headers that assume more -// advanced SSE versions without checking with all the OWNERS. -#include -#endif + /* init */ + if (srcSize <= 2) return 0; + { size_t const initError = BIT_initCStream(&bitC, dst, dstSize); + if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ } -#if SNAPPY_HAVE_BMI2 -// Please do not replace with . or with headers that assume more -// advanced SSE versions without checking with all the OWNERS. -#include -#endif +#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) -#include + if (srcSize & 1) { + FSE_initCState2(&CState1, ct, *--ip); + FSE_initCState2(&CState2, ct, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } else { + FSE_initCState2(&CState2, ct, *--ip); + FSE_initCState2(&CState1, ct, *--ip); + } -#include -#include -#include + /* join to mod 4 */ + srcSize -= 2; + if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } -namespace snappy { + /* 2 or 4 encoding per loop */ + while ( ip>istart ) { -using internal::COPY_1_BYTE_OFFSET; -using internal::COPY_2_BYTE_OFFSET; -using internal::LITERAL; -using internal::char_table; -using internal::kMaximumTagLength; + FSE_encodeSymbol(&bitC, &CState2, *--ip); -// Any hash function will produce a valid compressed bitstream, but a good -// hash function reduces the number of collisions and thus yields better -// compression for compressible input, and more speed for incompressible -// input. Of course, it doesn't hurt if the hash function is reasonably fast -// either, as it gets called a lot. -static inline uint32 HashBytes(uint32 bytes, int shift) { - uint32 kMul = 0x1e35a7bd; - return (bytes * kMul) >> shift; -} -static inline uint32 Hash(const char* p, int shift) { - return HashBytes(UNALIGNED_LOAD32(p), shift); -} + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); -size_t MaxCompressedLength(size_t source_len) { - // Compressed data can be defined as: - // compressed := item* literal* - // item := literal* copy - // - // The trailing literal sequence has a space blowup of at most 62/60 - // since a literal of length 60 needs one tag byte + one extra byte - // for length information. - // - // Item blowup is trickier to measure. Suppose the "copy" op copies - // 4 bytes of data. Because of a special check in the encoding code, - // we produce a 4-byte copy only if the offset is < 65536. Therefore - // the copy op takes 3 bytes to encode, and this type of item leads - // to at most the 62/60 blowup for representing literals. - // - // Suppose the "copy" op copies 5 bytes of data. If the offset is big - // enough, it will take 5 bytes to encode the copy op. Therefore the - // worst case here is a one-byte literal followed by a five-byte copy. - // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. - // - // This last factor dominates the blowup, so the final estimate is: - return 32 + source_len + source_len/6; -} + FSE_encodeSymbol(&bitC, &CState1, *--ip); -namespace { + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } -void UnalignedCopy64(const void* src, void* dst) { - char tmp[8]; - memcpy(tmp, src, 8); - memcpy(dst, tmp, 8); + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); } -void UnalignedCopy128(const void* src, void* dst) { - // memcpy gets vectorized when the appropriate compiler options are used. - // For example, x86 compilers targeting SSE2+ will optimize to an SSE2 load - // and store. - char tmp[16]; - memcpy(tmp, src, 16); - memcpy(dst, tmp, 16); +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); } -// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used -// for handling COPY operations where the input and output regions may overlap. -// For example, suppose: -// src == "ab" -// op == src + 2 -// op_limit == op + 20 -// After IncrementalCopySlow(src, op, op_limit), the result will have eleven -// copies of "ab" -// ababababababababababab -// Note that this does not match the semantics of either memcpy() or memmove(). -inline char* IncrementalCopySlow(const char* src, char* op, - char* const op_limit) { - // TODO: Remove pragma when LLVM is aware this - // function is only called in cold regions and when cold regions don't get - // vectorized or unrolled. -#ifdef __clang__ -#pragma clang loop unroll(disable) -#endif - while (op < op_limit) { - *op++ = *src++; - } - return op_limit; -} - -#if SNAPPY_HAVE_SSSE3 -// This is a table of shuffle control masks that can be used as the source -// operand for PSHUFB to permute the contents of the destination XMM register -// into a repeating byte pattern. -alignas(16) const char pshufb_fill_patterns[7][16] = { - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, - {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0}, - {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, - {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0}, - {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3}, - {0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1}, -}; +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } -#endif // SNAPPY_HAVE_SSSE3 +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` size must be `(1< 16 - // ------------------------------------------ - // html|html4|cp 0% 1.01% 27.73% - // urls 0% 0.88% 14.79% - // jpg 0% 64.29% 7.14% - // pdf 0% 2.56% 58.06% - // txt[1-4] 0% 0.23% 0.97% - // pb 0% 0.96% 13.88% - // bin 0.01% 22.27% 41.17% - // - // It is very rare that we don't have enough slop for doing block copies. It - // is also rare that we need to expand a pattern. Small patterns are common - // for incompressible formats and for those we are plenty fast already. - // Lengths are normally not greater than 16 but they vary depending on the - // input. In general if we always predict len <= 16 it would be an ok - // prediction. - // - // In order to be fast we want a pattern >= 8 bytes and an unrolled loop - // copying 2x 8 bytes at a time. + /* init conditions */ + if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); + if (srcSize <= 1) return 0; /* Not compressible */ + if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; - // Handle the uncommon case where pattern is less than 8 bytes. - if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) { -#if SNAPPY_HAVE_SSSE3 - // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB - // to permute the register's contents in-place into a repeating sequence of - // the first "pattern_size" bytes. - // For example, suppose: - // src == "abc" - // op == op + 3 - // After _mm_shuffle_epi8(), "pattern" will have five copies of "abc" - // followed by one byte of slop: abcabcabcabcabca. - // - // The non-SSE fallback implementation suffers from store-forwarding stalls - // because its loads and stores partly overlap. By expanding the pattern - // in-place, we avoid the penalty. - if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 16)) { - const __m128i shuffle_mask = _mm_load_si128( - reinterpret_cast(pshufb_fill_patterns) - + pattern_size - 1); - const __m128i pattern = _mm_shuffle_epi8( - _mm_loadl_epi64(reinterpret_cast(src)), shuffle_mask); - // Uninitialized bytes are masked out by the shuffle mask. - // TODO: remove annotation and macro defs once MSan is fixed. - SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(&pattern, sizeof(pattern)); - pattern_size *= 16 / pattern_size; - char* op_end = std::min(op_limit, buf_limit - 15); - while (op < op_end) { - _mm_storeu_si128(reinterpret_cast<__m128i*>(op), pattern); - op += pattern_size; - } - if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; - } - return IncrementalCopySlow(src, op, op_limit); -#else // !SNAPPY_HAVE_SSSE3 - // If plenty of buffer space remains, expand the pattern to at least 8 - // bytes. The way the following loop is written, we need 8 bytes of buffer - // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10 - // bytes if pattern_size is 2. Precisely encoding that is probably not - // worthwhile; instead, invoke the slow path if we cannot write 11 bytes - // (because 11 are required in the worst case). - if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) { - while (pattern_size < 8) { - UnalignedCopy64(src, op); - op += pattern_size; - pattern_size *= 2; - } - if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; - } else { - return IncrementalCopySlow(src, op, op_limit); + /* Scan input and build symbol stats */ + { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) ); + if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ } -#endif // SNAPPY_HAVE_SSSE3 - } - assert(pattern_size >= 8); - - // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single - // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe - // because expanding the pattern to at least 8 bytes guarantees that - // op - src >= 8. - // - // Typically, the op_limit is the gating factor so try to simplify the loop - // based on that. - if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 16)) { - // Factor the displacement from op to the source into a variable. This helps - // simplify the loop below by only varying the op pointer which we need to - // test for the end. Note that this was done after carefully examining the - // generated code to allow the addressing modes in the loop below to - // maximize micro-op fusion where possible on modern Intel processors. The - // generated code should be checked carefully for new processors or with - // major changes to the compiler. - // TODO: Simplify this code when the compiler reliably produces - // the correct x86 instruction sequence. - ptrdiff_t op_to_src = src - op; - // The trip count of this loop is not large and so unrolling will only hurt - // code size without helping performance. - // - // TODO: Replace with loop trip count hint. -#ifdef __clang__ -#pragma clang loop unroll(disable) -#endif - do { - UnalignedCopy64(op + op_to_src, op); - UnalignedCopy64(op + op_to_src + 8, op + 8); - op += 16; - } while (op < op_limit); - return op_limit; - } + tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); - // Fall back to doing as much as we can with the available slop in the - // buffer. This code path is relatively cold however so we save code size by - // avoiding unrolling and vectorizing. - // - // TODO: Remove pragma when when cold regions don't get vectorized - // or unrolled. -#ifdef __clang__ -#pragma clang loop unroll(disable) -#endif - for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) { - UnalignedCopy64(src, op); - UnalignedCopy64(src + 8, op + 8); - } - if (op >= op_limit) - return op_limit; + /* Write table description header */ + { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += nc_err; + } - // We only take this branch if we didn't have enough slop and we can do a - // single 8 byte copy. - if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) { - UnalignedCopy64(src, op); - src += 8; - op += 8; - } - return IncrementalCopySlow(src, op, op_limit); -} + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } -} // namespace + /* check compressibility */ + if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; -template -static inline char* EmitLiteral(char* op, - const char* literal, - int len) { - // The vast majority of copies are below 16 bytes, for which a - // call to memcpy is overkill. This fast path can sometimes - // copy up to 15 bytes too much, but that is okay in the - // main loop, since we have a bit to go on for both sides: - // - // - The input will always have kInputMarginBytes = 15 extra - // available bytes, as long as we're in the main loop, and - // if not, allow_fast_path = false. - // - The output will always have 32 spare bytes (see - // MaxCompressedLength). - assert(len > 0); // Zero-length literals are disallowed - int n = len - 1; - if (allow_fast_path && len <= 16) { - // Fits in tag byte - *op++ = LITERAL | (n << 2); + return op-ostart; +} - UnalignedCopy128(literal, op); - return op + len; - } +typedef struct { + FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; +} fseWkspMax_t; - if (n < 60) { - // Fits in tag byte - *op++ = LITERAL | (n << 2); - } else { - int count = (Bits::Log2Floor(n) >> 3) + 1; - assert(count >= 1); - assert(count <= 4); - *op++ = LITERAL | ((59 + count) << 2); - // Encode in upcoming bytes. - // Write 4 bytes, though we may care about only 1 of them. The output buffer - // is guaranteed to have at least 3 more spaces left as 'len >= 61' holds - // here and there is a memcpy of size 'len' below. - LittleEndian::Store32(op, n); - op += count; - } - memcpy(op, literal, len); - return op + len; +size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +{ + fseWkspMax_t scratchBuffer; + DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); } -template -static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) { - assert(len <= 64); - assert(len >= 4); - assert(offset < 65536); - assert(len_less_than_12 == (len < 12)); +size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); +} - if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) { - // offset fits in 11 bits. The 3 highest go in the top of the first byte, - // and the rest go in the second byte. - *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0); - *op++ = offset & 0xff; - } else { - // Write 4 bytes, though we only care about 3 of them. The output buffer - // is required to have some slack, so the extra byte won't overrun it. - uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8); - LittleEndian::Store32(op, u); - op += 3; - } - return op; } -template -static inline char* EmitCopy(char* op, size_t offset, size_t len) { - assert(len_less_than_12 == (len < 12)); - if (len_less_than_12) { - return EmitCopyAtMost64(op, offset, len); - } else { - // A special case for len <= 64 might help, but so far measurements suggest - // it's in the noise. +#endif /* FSE_COMMONDEFS_ONLY */ - // Emit 64 byte copies but make sure to keep at least four bytes reserved. - while (SNAPPY_PREDICT_FALSE(len >= 68)) { - op = EmitCopyAtMost64(op, offset, 64); - len -= 64; - } - // One or two copies will now finish the job. - if (len > 64) { - op = EmitCopyAtMost64(op, offset, 60); - len -= 60; - } +// LICENSE_CHANGE_END - // Emit remainder. - if (len < 12) { - op = EmitCopyAtMost64(op, offset, len); - } else { - op = EmitCopyAtMost64(op, offset, len); - } - return op; - } -} -bool GetUncompressedLength(const char* start, size_t n, size_t* result) { - uint32 v = 0; - const char* limit = start + n; - if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { - *result = v; - return true; - } else { - return false; - } -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -namespace { -uint32 CalculateTableSize(uint32 input_size) { - assert(kMaxHashTableSize >= 256); - if (input_size > kMaxHashTableSize) { - return kMaxHashTableSize; - } - if (input_size < 256) { - return 256; - } - // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1. - // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)). - return 2u << Bits::Log2Floor(input_size - 1); -} -} // namespace +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ -namespace internal { -WorkingMemory::WorkingMemory(size_t input_size) { - const size_t max_fragment_size = std::min(input_size, kBlockSize); - const size_t table_size = CalculateTableSize(max_fragment_size); - size_ = table_size * sizeof(*table_) + max_fragment_size + - MaxCompressedLength(max_fragment_size); - mem_ = std::allocator().allocate(size_); - table_ = reinterpret_cast(mem_); - input_ = mem_ + table_size * sizeof(*table_); - output_ = input_ + max_fragment_size; -} +/* --- dependencies --- */ + /* U32, BYTE, etc. */ + /* assert, DEBUGLOG */ + /* ERROR */ -WorkingMemory::~WorkingMemory() { - std::allocator().deallocate(mem_, size_); -} -uint16* WorkingMemory::GetHashTable(size_t fragment_size, - int* table_size) const { - const size_t htsize = CalculateTableSize(fragment_size); - memset(table_, 0, htsize * sizeof(*table_)); - *table_size = htsize; - return table_; -} -} // end namespace internal -// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will -// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have -// empirically found that overlapping loads such as -// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) -// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. -// -// We have different versions for 64- and 32-bit; ideally we would avoid the -// two functions and just inline the UNALIGNED_LOAD64 call into -// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever -// enough to avoid loading the value multiple times then. For 64-bit, the load -// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is -// done at GetUint32AtOffset() time. +namespace duckdb_zstd { -#ifdef ARCH_K8 +/* --- Error management --- */ +unsigned HIST_isError(size_t code) { return ERR_isError(code); } -typedef uint64 EightBytesReference; +/*-************************************************************** + * Histogram functions + ****************************************************************/ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned largestCount=0; -static inline EightBytesReference GetEightBytesAt(const char* ptr) { - return UNALIGNED_LOAD64(ptr); -} + memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } -static inline uint32 GetUint32AtOffset(uint64 v, int offset) { - assert(offset >= 0); - assert(offset <= 4); - return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); -} + while (ip largestCount) largestCount = count[s]; + } -static inline EightBytesReference GetEightBytesAt(const char* ptr) { - return ptr; + return largestCount; } -static inline uint32 GetUint32AtOffset(const char* v, int offset) { - assert(offset >= 0); - assert(offset <= 4); - return UNALIGNED_LOAD32(v + offset); -} +typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; -#endif +/* HIST_count_parallel_wksp() : + * store histogram into 4 intermediate tables, recombined at the end. + * this design makes better use of OoO cpus, + * and is noticeably faster when some values are heavily repeated. + * But it needs some additional workspace for intermediate tables. + * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. + * @return : largest histogram frequency, + * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ +static size_t HIST_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + HIST_checkInput_e check, + U32* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; -// Flat array compression that does not emit the "uncompressed length" -// prefix. Compresses "input" string to the "*op" buffer. -// -// REQUIRES: "input" is at most "kBlockSize" bytes long. -// REQUIRES: "op" points to an array of memory that is at least -// "MaxCompressedLength(input.size())" in size. -// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. -// REQUIRES: "table_size" is a power of two -// -// Returns an "end" pointer into "op" buffer. -// "end - op" is the compressed size of "input". -namespace internal { -char* CompressFragment(const char* input, - size_t input_size, - char* op, - uint16* table, - const int table_size) { - // "ip" is the input pointer, and "op" is the output pointer. - const char* ip = input; - assert(input_size <= kBlockSize); - assert((table_size & (table_size - 1)) == 0); // table must be power of two - const int shift = 32 - Bits::Log2Floor(table_size); - // assert(static_cast(kuint32max >> shift) == table_size - 1); - const char* ip_end = input + input_size; - const char* base_ip = ip; - // Bytes in [next_emit, ip) will be emitted as literal bytes. Or - // [next_emit, ip_end) after the main loop. - const char* next_emit = ip; + memset(workSpace, 0, 4*256*sizeof(unsigned)); - const size_t kInputMarginBytes = 15; - if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) { - const char* ip_limit = input + input_size - kInputMarginBytes; + /* safety checks */ + if (!sourceSize) { + memset(count, 0, maxSymbolValue + 1); + *maxSymbolValuePtr = 0; + return 0; + } + if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ - for (uint32 next_hash = Hash(++ip, shift); ; ) { - assert(next_emit < ip); - // The body of this loop calls EmitLiteral once and then EmitCopy one or - // more times. (The exception is that when we're close to exhausting - // the input we goto emit_remainder.) - // - // In the first iteration of this loop we're just starting, so - // there's nothing to copy, so calling EmitLiteral once is - // necessary. And we only start a new iteration when the - // current iteration has determined that a call to EmitLiteral will - // precede the next call to EmitCopy (if any). - // - // Step 1: Scan forward in the input looking for a 4-byte-long match. - // If we get close to exhausting the input then goto emit_remainder. - // - // Heuristic match skipping: If 32 bytes are scanned with no matches - // found, start looking only at every other byte. If 32 more bytes are - // scanned (or skipped), look at every third byte, etc.. When a match is - // found, immediately go back to looking at every byte. This is a small - // loss (~5% performance, ~0.1% density) for compressible data due to more - // bookkeeping, but for non-compressible data (such as JPEG) it's a huge - // win since the compressor quickly "realizes" the data is incompressible - // and doesn't bother looking for matches everywhere. - // - // The "skip" variable keeps track of how many bytes there are since the - // last match; dividing it by 32 (ie. right-shifting by five) gives the - // number of bytes to move ahead for each iteration. - uint32 skip = 32; - - const char* next_ip = ip; - const char* candidate; - do { - ip = next_ip; - uint32 hash = next_hash; - assert(hash == Hash(ip, shift)); - uint32 bytes_between_hash_lookups = skip >> 5; - skip += bytes_between_hash_lookups; - next_ip = ip + bytes_between_hash_lookups; - if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) { - goto emit_remainder; + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; } - next_hash = Hash(next_ip, shift); - candidate = base_ip + table[hash]; - assert(candidate >= base_ip); - assert(candidate < ip); - - table[hash] = ip - base_ip; - } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) != - UNALIGNED_LOAD32(candidate))); - - // Step 2: A 4-byte match has been found. We'll later see if more - // than 4 bytes match. But, prior to the match, input - // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." - assert(next_emit + 16 <= ip_end); - op = EmitLiteral(op, next_emit, ip - next_emit); - - // Step 3: Call EmitCopy, and then see if another EmitCopy could - // be our next move. Repeat until we find no match for the - // input immediately after what was consumed by the last EmitCopy call. - // - // If we exit this loop normally then we need to call EmitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can exit - // this loop via goto if we get close to exhausting the input. - EightBytesReference input_bytes; - uint32 candidate_bytes = 0; + ip-=4; + } - do { - // We have a 4-byte match at ip, and no need to emit any - // "literal bytes" prior to ip. - const char* base = ip; - std::pair p = - FindMatchLength(candidate + 4, ip + 4, ip_end); - size_t matched = 4 + p.first; - ip += matched; - size_t offset = base - candidate; - assert(0 == memcmp(base, candidate, matched)); - if (p.second) { - op = EmitCopy(op, offset, matched); - } else { - op = EmitCopy(op, offset, matched); - } - next_emit = ip; - if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) { - goto emit_remainder; - } - // We are now looking for a 4-byte match again. We read - // table[Hash(ip, shift)] for that. To improve compression, - // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)]. - input_bytes = GetEightBytesAt(ip - 1); - uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); - table[prev_hash] = ip - base_ip - 1; - uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); - candidate = base_ip + table[cur_hash]; - candidate_bytes = UNALIGNED_LOAD32(candidate); - table[cur_hash] = ip - base_ip; - } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); + /* finish last symbols */ + while (ipmaxSymbolValue; s--) { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); + } } - emit_remainder: - // Emit the remaining bytes as a literal - if (next_emit < ip_end) { - op = EmitLiteral(op, next_emit, - ip_end - next_emit); - } + { U32 s; + if (maxSymbolValue > 255) maxSymbolValue = 255; + for (s=0; s<=maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) max = count[s]; + } } - return op; + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + return (size_t)max; } -} // end namespace internal -// Called back at avery compression call to trace parameters and sizes. -static inline void Report(const char *algorithm, size_t compressed_size, - size_t uncompressed_size) {} +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); +} -// Signature of output types needed by decompression code. -// The decompression code is templatized on a type that obeys this -// signature so that we do not pay virtual function call overhead in -// the middle of a tight decompression loop. -// -// class DecompressionWriter { -// public: -// // Called before decompression -// void SetExpectedLength(size_t length); -// -// // Called after decompression -// bool CheckLength() const; -// -// // Called repeatedly during decompression -// bool Append(const char* ip, size_t length); -// bool AppendFromSelf(uint32 offset, size_t length); -// -// // The rules for how TryFastAppend differs from Append are somewhat -// // convoluted: -// // -// // - TryFastAppend is allowed to decline (return false) at any -// // time, for any reason -- just "return false" would be -// // a perfectly legal implementation of TryFastAppend. -// // The intention is for TryFastAppend to allow a fast path -// // in the common case of a small append. -// // - TryFastAppend is allowed to read up to bytes -// // from the input buffer, whereas Append is allowed to read -// // . However, if it returns true, it must leave -// // at least five (kMaximumTagLength) bytes in the input buffer -// // afterwards, so that there is always enough space to read the -// // next tag without checking for a refill. -// // - TryFastAppend must always return decline (return false) -// // if is 61 or more, as in this case the literal length is not -// // decoded fully. In practice, this should not be a big problem, -// // as it is unlikely that one would implement a fast path accepting -// // this much data. -// // -// bool TryFastAppend(const char* ip, size_t available, size_t length); -// }; +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); +} -static inline uint32 ExtractLowBytes(uint32 v, int n) { - assert(n >= 0); - assert(n <= 4); -#if SNAPPY_HAVE_BMI2 - return _bzhi_u32(v, 8 * n); -#else - // This needs to be wider than uint32 otherwise `mask << 32` will be - // undefined. - uint64 mask = 0xffffffff; - return v & ~(mask << (8 * n)); -#endif +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + if (*maxSymbolValuePtr < 255) + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); + *maxSymbolValuePtr = 255; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); } -static inline bool LeftShiftOverflows(uint8 value, uint32 shift) { - assert(shift < 32); - static const uint8 masks[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe}; - return (value & masks[shift]) != 0; +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); } -// Helper class for decompression -class SnappyDecompressor { - private: - Source* reader_; // Underlying source of bytes to decompress - const char* ip_; // Points to next buffered byte - const char* ip_limit_; // Points just past buffered bytes - uint32 peeked_; // Bytes peeked from reader (need to skip) - bool eof_; // Hit end of input without an error? - char scratch_[kMaximumTagLength]; // See RefillTag(). +} - // Ensure that all of the tag metadata for the next tag is available - // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even - // if (ip_limit_ - ip_ < 5). - // - // Returns true on success, false on error or end of input. - bool RefillTag(); - public: - explicit SnappyDecompressor(Source* reader) - : reader_(reader), - ip_(NULL), - ip_limit_(NULL), - peeked_(0), - eof_(false) { - } +// LICENSE_CHANGE_END - ~SnappyDecompressor() { - // Advance past any bytes we peeked at from the reader - reader_->Skip(peeked_); - } - // Returns true iff we have hit the end of the input without an error. - bool eof() const { - return eof_; - } +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list - // Read the uncompressed length stored at the start of the compressed data. - // On success, stores the length in *result and returns true. - // On failure, returns false. - bool ReadUncompressedLength(uint32* result) { - assert(ip_ == NULL); // Must not have read anything yet - // Length is encoded in 1..5 bytes - *result = 0; - uint32 shift = 0; - while (true) { - if (shift >= 32) return false; - size_t n; - const char* ip = reader_->Peek(&n); - if (n == 0) return false; - const unsigned char c = *(reinterpret_cast(ip)); - reader_->Skip(1); - uint32 val = c & 0x7f; - if (LeftShiftOverflows(static_cast(val), shift)) return false; - *result |= val << shift; - if (c < 128) { - break; - } - shift += 7; - } - return true; - } +/* ****************************************************************** + * Huffman encoder, part of New Generation Entropy library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ - // Process the next item found in the input. - // Returns true if successful, false on error or end of input. - template -#if defined(__GNUC__) && defined(__x86_64__) - __attribute__((aligned(32))) -#endif - void DecompressAllTags(Writer* writer) { - // In x86, pad the function body to start 16 bytes later. This function has - // a couple of hotspots that are highly sensitive to alignment: we have - // observed regressions by more than 20% in some metrics just by moving the - // exact same code to a different position in the benchmark binary. - // - // Putting this code on a 32-byte-aligned boundary + 16 bytes makes us hit - // the "lucky" case consistently. Unfortunately, this is a very brittle - // workaround, and future differences in code generation may reintroduce - // this regression. If you experience a big, difficult to explain, benchmark - // performance regression here, first try removing this hack. -#if defined(__GNUC__) && defined(__x86_64__) - // Two 8-byte "NOP DWORD ptr [EAX + EAX*1 + 00000000H]" instructions. - asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); - asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif - const char* ip = ip_; - // We could have put this refill fragment only at the beginning of the loop. - // However, duplicating it at the end of each branch gives the compiler more - // scope to optimize the expression based on the local - // context, which overall increases speed. - #define MAYBE_REFILL() \ - if (ip_limit_ - ip < kMaximumTagLength) { \ - ip_ = ip; \ - if (!RefillTag()) return; \ - ip = ip_; \ - } - - MAYBE_REFILL(); - for ( ;; ) { - const unsigned char c = *(reinterpret_cast(ip++)); - // Ratio of iterations that have LITERAL vs non-LITERAL for different - // inputs. - // - // input LITERAL NON_LITERAL - // ----------------------------------- - // html|html4|cp 23% 77% - // urls 36% 64% - // jpg 47% 53% - // pdf 19% 81% - // txt[1-4] 25% 75% - // pb 24% 76% - // bin 24% 76% - if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) { - size_t literal_length = (c >> 2) + 1u; - if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { - assert(literal_length < 61); - ip += literal_length; - // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend() - // will not return true unless there's already at least five spare - // bytes in addition to the literal. - continue; - } - if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) { - // Long literal. - const size_t literal_length_length = literal_length - 60; - literal_length = - ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) + - 1; - ip += literal_length_length; - } +/* ************************************************************** +* Includes +****************************************************************/ +#include /* memcpy, memset */ +#include /* printf (debug) */ - size_t avail = ip_limit_ - ip; - while (avail < literal_length) { - if (!writer->Append(ip, avail)) return; - literal_length -= avail; - reader_->Skip(peeked_); - size_t n; - ip = reader_->Peek(&n); - avail = n; - peeked_ = avail; - if (avail == 0) return; // Premature end of input - ip_limit_ = ip + avail; - } - if (!writer->Append(ip, literal_length)) { - return; - } - ip += literal_length; - MAYBE_REFILL(); - } else { - const size_t entry = char_table[c]; - const size_t trailer = - ExtractLowBytes(LittleEndian::Load32(ip), entry >> 11); - const size_t length = entry & 0xff; - ip += entry >> 11; - // copy_offset/256 is encoded in bits 8..10. By just fetching - // those bits, we get copy_offset (since the bit-field starts at - // bit 8). - const size_t copy_offset = entry & 0x700; - if (!writer->AppendFromSelf(copy_offset + trailer, length)) { - return; - } - MAYBE_REFILL(); - } - } -#undef MAYBE_REFILL - } -}; + /* header compression */ -bool SnappyDecompressor::RefillTag() { - const char* ip = ip_; - if (ip == ip_limit_) { - // Fetch a new fragment from the reader - reader_->Skip(peeked_); // All peeked bytes are used up - size_t n; - ip = reader_->Peek(&n); - peeked_ = n; - eof_ = (n == 0); - if (eof_) return false; - ip_limit_ = ip + n; - } - // Read the tag character - assert(ip < ip_limit_); - const unsigned char c = *(reinterpret_cast(ip)); - const uint32 entry = char_table[c]; - const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' - assert(needed <= sizeof(scratch_)); - // Read more bytes from reader if needed - uint32 nbuf = ip_limit_ - ip; - if (nbuf < needed) { - // Stitch together bytes from ip and reader to form the word - // contents. We store the needed bytes in "scratch_". They - // will be consumed immediately by the caller since we do not - // read more than we need. - memmove(scratch_, ip, nbuf); - reader_->Skip(peeked_); // All peeked bytes are used up - peeked_ = 0; - while (nbuf < needed) { - size_t length; - const char* src = reader_->Peek(&length); - if (length == 0) return false; - uint32 to_add = std::min(needed - nbuf, length); - memcpy(scratch_ + nbuf, src, to_add); - nbuf += to_add; - reader_->Skip(to_add); - } - assert(nbuf == needed); - ip_ = scratch_; - ip_limit_ = scratch_ + needed; - } else if (nbuf < kMaximumTagLength) { - // Have enough bytes, but move into scratch_ so that we do not - // read past end of input - memmove(scratch_, ip, nbuf); - reader_->Skip(peeked_); // All peeked bytes are used up - peeked_ = 0; - ip_ = scratch_; - ip_limit_ = scratch_ + nbuf; - } else { - // Pass pointer to buffer returned by reader_. - ip_ = ip; - } - return true; -} -template -static bool InternalUncompress(Source* r, Writer* writer) { - // Read the uncompressed length from the front of the compressed input - SnappyDecompressor decompressor(r); - uint32 uncompressed_len = 0; - if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; - return InternalUncompressAllTags(&decompressor, writer, r->Available(), - uncompressed_len); -} -template -static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, - Writer* writer, - uint32 compressed_len, - uint32 uncompressed_len) { - Report("snappy_uncompress", compressed_len, uncompressed_len); +/* ************************************************************** +* Error Management +****************************************************************/ +// #define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ - writer->SetExpectedLength(uncompressed_len); - // Process the entire input - decompressor->DecompressAllTags(writer); - writer->Flush(); - return (decompressor->eof() && writer->CheckLength()); -} - -bool GetUncompressedLength(Source* source, uint32* result) { - SnappyDecompressor decompressor(source); - return decompressor.ReadUncompressedLength(result); +namespace duckdb_zstd { +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); } -size_t Compress(Source* reader, Sink* writer) { - size_t written = 0; - size_t N = reader->Available(); - const size_t uncompressed_size = N; - char ulength[Varint::kMax32]; - char* p = Varint::Encode32(ulength, N); - writer->Append(ulength, p-ulength); - written += (p - ulength); - - internal::WorkingMemory wmem(N); - while (N > 0) { - // Get next block to compress (without copying if possible) - size_t fragment_size; - const char* fragment = reader->Peek(&fragment_size); - assert(fragment_size != 0); // premature end of input - const size_t num_to_read = std::min(N, kBlockSize); - size_t bytes_read = fragment_size; +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 +static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; - size_t pending_advance = 0; - if (bytes_read >= num_to_read) { - // Buffer returned by reader is large enough - pending_advance = num_to_read; - fragment_size = num_to_read; - } else { - char* scratch = wmem.GetScratchInput(); - memcpy(scratch, fragment, bytes_read); - reader->Skip(bytes_read); + unsigned maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; - while (bytes_read < num_to_read) { - fragment = reader->Peek(&fragment_size); - size_t n = std::min(fragment_size, num_to_read - bytes_read); - memcpy(scratch + bytes_read, fragment, n); - bytes_read += n; - reader->Skip(n); - } - assert(bytes_read == num_to_read); - fragment = scratch; - fragment_size = num_to_read; - } - assert(fragment_size == num_to_read); + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + BYTE scratchBuffer[1< not compressible */ + } - // Since we encode kBlockSize regions followed by a region - // which is <= kBlockSize in length, a previously allocated - // scratch_output[] region is big enough for this iteration. - char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput()); - char* end = internal::CompressFragment(fragment, fragment_size, dest, table, - table_size); - writer->Append(dest, end - dest); - written += (end - dest); + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); - N -= num_to_read; - reader->Skip(pending_advance); - } + /* Write table description header */ + { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); + op += hSize; + } - Report("snappy_compress", written, uncompressed_size); + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } - return written; + return (size_t)(op-ostart); } -// ----------------------------------------------------------------------- -// IOVec interfaces -// ----------------------------------------------------------------------- - -// A type that writes to an iovec. -// Note that this is not a "ByteSink", but a type that matches the -// Writer template argument to SnappyDecompressor::DecompressAllTags(). -class SnappyIOVecWriter { - private: - // output_iov_end_ is set to iov + count and used to determine when - // the end of the iovs is reached. - const struct iovec* output_iov_end_; - -#if !defined(NDEBUG) - const struct iovec* output_iov_; -#endif // !defined(NDEBUG) - // Current iov that is being written into. - const struct iovec* curr_iov_; +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt within "zstd/common/huf.h" */ - // Pointer to current iov's write location. - char* curr_iov_output_; +/*! HUF_writeCTable() : + `CTable` : Huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) +{ + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; + BYTE* op = (BYTE*)dst; + U32 n; - // Remaining bytes to write into curr_iov_output. - size_t curr_iov_remaining_; + /* check conditions */ + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); - // Total bytes decompressed into output_iov_ so far. - size_t total_written_; + /* convert to weight */ + bitsToWeight[0] = 0; + for (n=1; n1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; + } } - static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) { - return reinterpret_cast(iov->iov_base) + offset; - } + /* write raw values as 4-bits (max : 15) */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + for (n=0; n(iov->iov_base) - : nullptr), - curr_iov_remaining_(iov_count ? iov->iov_len : 0), - total_written_(0), - output_limit_(-1) {} - inline void SetExpectedLength(size_t len) { - output_limit_ = len; - } +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) +{ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + U32 nbSymbols = 0; - inline bool CheckLength() const { - return total_written_ == output_limit_; - } + /* get symbol weights */ + CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); - inline bool Append(const char* ip, size_t len) { - if (total_written_ + len > output_limit_) { - return false; - } + /* check result */ + if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); - return AppendNoCheck(ip, len); - } + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } } - inline bool AppendNoCheck(const char* ip, size_t len) { - while (len > 0) { - if (curr_iov_remaining_ == 0) { - // This iovec is full. Go to the next one. - if (curr_iov_ + 1 >= output_iov_end_) { - return false; - } - ++curr_iov_; - curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); - curr_iov_remaining_ = curr_iov_->iov_len; - } + /* fill nbBits */ + *hasZeroWeights = 0; + { U32 n; for (n=0; nn=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; + { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n= 16 + kMaximumTagLength && space_left >= 16 && - curr_iov_remaining_ >= 16) { - // Fast path, used for the majority (about 95%) of invocations. - UnalignedCopy128(ip, curr_iov_output_); - curr_iov_output_ += len; - curr_iov_remaining_ -= len; - total_written_ += len; - return true; - } +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) +{ + const HUF_CElt* table = (const HUF_CElt*)symbolTable; + assert(symbolValue <= HUF_SYMBOLVALUE_MAX); + return table[symbolValue].nbBits; +} - return false; - } - inline bool AppendFromSelf(size_t offset, size_t len) { - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - if (offset - 1u >= total_written_) { - return false; - } - const size_t space_left = output_limit_ - total_written_; - if (len > space_left) { - return false; - } +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; - // Locate the iovec from which we need to start the copy. - const iovec* from_iov = curr_iov_; - size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_; - while (offset > 0) { - if (from_iov_offset >= offset) { - from_iov_offset -= offset; - break; - } +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) +{ + const U32 largestBits = huffNode[lastNonNull].nbBits; + if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ - offset -= from_iov_offset; - --from_iov; -#if !defined(NDEBUG) - assert(from_iov >= output_iov_); -#endif // !defined(NDEBUG) - from_iov_offset = from_iov->iov_len; - } + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + int n = (int)lastNonNull; - // Copy bytes starting from the iovec pointed to by from_iov_index to - // the current iovec. - while (len > 0) { - assert(from_iov <= curr_iov_); - if (from_iov != curr_iov_) { - const size_t to_copy = - std::min((unsigned long)(from_iov->iov_len - from_iov_offset), (unsigned long)len); - AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy); - len -= to_copy; - if (len > 0) { - ++from_iov; - from_iov_offset = 0; - } - } else { - size_t to_copy = curr_iov_remaining_; - if (to_copy == 0) { - // This iovec is full. Go to the next one. - if (curr_iov_ + 1 >= output_iov_end_) { - return false; - } - ++curr_iov_; - curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); - curr_iov_remaining_ = curr_iov_->iov_len; - continue; - } - if (to_copy > len) { - to_copy = len; - } + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n --; + } /* n stops at huffNode[n].nbBits <= maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ - IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset), - curr_iov_output_, curr_iov_output_ + to_copy, - curr_iov_output_ + curr_iov_remaining_); - curr_iov_output_ += to_copy; - curr_iov_remaining_ -= to_copy; - from_iov_offset += to_copy; - total_written_ += to_copy; - len -= to_copy; - } - } + /* renorm totalCost */ + totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ - return true; - } + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; - inline void Flush() {} -}; + /* Get pos of last (smallest) symbol per rank */ + memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + int pos; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = (U32)pos; + } } -bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, - const struct iovec* iov, size_t iov_cnt) { - ByteArraySource reader(compressed, compressed_length); - return RawUncompressToIOVec(&reader, iov, iov_cnt); -} + while (totalCost > 0) { + U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 const highPos = rankLast[nBitsToDecrease]; + U32 const lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) + nBitsToDecrease ++; + totalCost -= 1 << (nBitsToDecrease-1); + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ + huffNode[rankLast[nBitsToDecrease]].nbBits ++; + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } } /* while (totalCost > 0) */ -bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, - size_t iov_cnt) { - SnappyIOVecWriter output(iov, iov_cnt); - return InternalUncompress(compressed, &output); + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + assert(n >= 0); + rankLast[1] = (U32)(n+1); + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } } } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; } -// ----------------------------------------------------------------------- -// Flat array interfaces -// ----------------------------------------------------------------------- +typedef struct { + U32 base; + U32 current; +} rankPos; -// A type that writes to a flat array. -// Note that this is not a "ByteSink", but a type that matches the -// Writer template argument to SnappyDecompressor::DecompressAllTags(). -class SnappyArrayWriter { - private: - char* base_; - char* op_; - char* op_limit_; +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; - public: - inline explicit SnappyArrayWriter(char* dst) - : base_(dst), - op_(dst), - op_limit_(dst) { - } +#define RANK_POSITION_TABLE_SIZE 32 - inline void SetExpectedLength(size_t len) { - op_limit_ = op_ + len; - } +typedef struct { + huffNodeTable huffNodeTbl; + rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; +} HUF_buildCTable_wksp_tables; - inline bool CheckLength() const { - return op_ == op_limit_; - } +static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) +{ + U32 n; - inline bool Append(const char* ip, size_t len) { - char* op = op_; - const size_t space_left = op_limit_ - op; - if (space_left < len) { - return false; + memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); + for (n=0; n<=maxSymbolValue; n++) { + U32 r = BIT_highbit32(count[n] + 1); + rankPosition[r].base ++; } - memcpy(op, ip, len); - op_ = op + len; - return true; - } - - inline bool TryFastAppend(const char* ip, size_t available, size_t len) { - char* op = op_; - const size_t space_left = op_limit_ - op; - if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) { - // Fast path, used for the majority (about 95%) of invocations. - UnalignedCopy128(ip, op); - op_ = op + len; - return true; - } else { - return false; + for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base; + for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base; + for (n=0; n<=maxSymbolValue; n++) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rankPosition[r].current++; + while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { + huffNode[pos] = huffNode[pos-1]; + pos--; + } + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; } - } +} - inline bool AppendFromSelf(size_t offset, size_t len) { - char* const op_end = op_ + len; - - // Check if we try to append from before the start of the buffer. - // Normally this would just be a check for "produced < offset", - // but "produced <= offset - 1u" is equivalent for every case - // except the one where offset==0, where the right side will wrap around - // to a very big number. This is convenient, as offset==0 is another - // invalid case that we also want to catch, so that we do not go - // into an infinite loop. - if (Produced() <= offset - 1u || op_end > op_limit_) return false; - op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_); - return true; - } - inline size_t Produced() const { - assert(op_ >= base_); - return op_ - base_; - } - inline void Flush() {} -}; +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). + */ +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) -bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { - ByteArraySource reader(compressed, n); - return RawUncompress(&reader, uncompressed); -} +size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) +{ + HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace; + nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; + nodeElt* const huffNode = huffNode0+1; + int nonNullRank; + int lowS, lowN; + int nodeNb = STARTNODE; + int n, nodeRoot; -bool RawUncompress(Source* compressed, char* uncompressed) { - SnappyArrayWriter output(uncompressed); - return InternalUncompress(compressed, &output); -} + /* safety checks */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) + return ERROR(workSpace_tooSmall); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) + return ERROR(maxSymbolValue_tooLarge); + memset(huffNode0, 0, sizeof(huffNodeTable)); -bool Uncompress(const char* compressed, size_t n, string* uncompressed) { - size_t ulength; - if (!GetUncompressedLength(compressed, n, &ulength)) { - return false; - } - // On 32-bit builds: max_size() < kuint32max. Check for that instead - // of crashing (e.g., consider externally specified compressed data). - if (ulength > uncompressed->max_size()) { - return false; - } - STLStringResizeUninitialized(uncompressed, ulength); - return RawUncompress(compressed, n, string_as_array(uncompressed)); -} + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); -// A Writer that drops everything on the floor and just does validation -class SnappyDecompressionValidator { - private: - size_t expected_; - size_t produced_; + /* init for parents */ + nonNullRank = (int)maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ - public: - inline SnappyDecompressionValidator() : expected_(0), produced_(0) { } - inline void SetExpectedLength(size_t len) { - expected_ = len; - } - inline bool CheckLength() const { - return expected_ == produced_; - } - inline bool Append(const char* ip, size_t len) { - produced_ += len; - return produced_ <= expected_; - } - inline bool TryFastAppend(const char* ip, size_t available, size_t length) { - return false; - } - inline bool AppendFromSelf(size_t offset, size_t len) { - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - if (produced_ <= offset - 1u) return false; - produced_ += len; - return produced_ <= expected_; - } - inline void Flush() {} -}; + /* create parents */ + while (nodeNb <= nodeRoot) { + int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; + nodeNb++; + } -bool IsValidCompressedBuffer(const char* compressed, size_t n) { - ByteArraySource reader(compressed, n); - SnappyDecompressionValidator writer; - return InternalUncompress(&reader, &writer); -} + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; -bool IsValidCompressed(Source* compressed) { - SnappyDecompressionValidator writer; - return InternalUncompress(compressed, &writer); -} + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); -void RawCompress(const char* input, - size_t input_length, - char* compressed, - size_t* compressed_length) { - ByteArraySource reader(input, input_length); - UncheckedByteArraySink writer(compressed); - Compress(&reader, &writer); + /* fill result into tree (val, nbBits) */ + { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + int const alphabetSize = (int)(maxSymbolValue + 1); + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine stating value per rank */ + { U16 min = 0; + for (n=(int)maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; nresize(compressed_length); - return compressed_length; +/** HUF_buildCTable() : + * @return : maxNbBits + * Note : count is used before tree is written, so they can safely overlap + */ +size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) +{ + HUF_buildCTable_wksp_tables workspace; + return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace)); } -// ----------------------------------------------------------------------- -// Sink interface -// ----------------------------------------------------------------------- - -// A type that decompresses into a Sink. The template parameter -// Allocator must export one method "char* Allocate(int size);", which -// allocates a buffer of "size" and appends that to the destination. -template -class SnappyScatteredWriter { - Allocator allocator_; - - // We need random access into the data generated so far. Therefore - // we keep track of all of the generated data as an array of blocks. - // All of the blocks except the last have length kBlockSize. - std::vector blocks_; - size_t expected_; +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +{ + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += CTable[s].nbBits * count[s]; + } + return nbBits >> 3; +} - // Total size of all fully generated blocks so far - size_t full_size_; +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + int bad = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + } + return !bad; +} - // Pointer into current output block - char* op_base_; // Base of output block - char* op_ptr_; // Pointer to next unfilled byte in block - char* op_limit_; // Pointer just past block +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } - inline size_t Size() const { - return full_size_ + (op_ptr_ - op_base_); - } +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} - bool SlowAppend(const char* ip, size_t len); - bool SlowAppendFromSelf(size_t offset, size_t len); +#define HUF_FLUSHBITS(s) BIT_flushBits(s) - public: - inline explicit SnappyScatteredWriter(const Allocator& allocator) - : allocator_(allocator), - full_size_(0), - op_base_(NULL), - op_ptr_(NULL), - op_limit_(NULL) { - } +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) - inline void SetExpectedLength(size_t len) { - assert(blocks_.empty()); - expected_ = len; - } +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) - inline bool CheckLength() const { - return Size() == expected_; - } +FORCE_INLINE_TEMPLATE size_t +HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + BIT_CStream_t bitC; - // Return the number of bytes actually uncompressed so far - inline size_t Produced() const { - return Size(); - } + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); + if (HUF_isError(initErr)) return 0; } - inline bool Append(const char* ip, size_t len) { - size_t avail = op_limit_ - op_ptr_; - if (len <= avail) { - // Fast path - memcpy(op_ptr_, ip, len); - op_ptr_ += len; - return true; - } else { - return SlowAppend(ip, len); + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + /* fall-through */ + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + /* fall-through */ + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + /* fall-through */ + case 0 : /* fall-through */ + default: break; } - } - inline bool TryFastAppend(const char* ip, size_t available, size_t length) { - char* op = op_ptr_; - const int space_left = op_limit_ - op; - if (length <= 16 && available >= 16 + kMaximumTagLength && - space_left >= 16) { - // Fast path, used for the majority (about 95%) of invocations. - UnalignedCopy128(ip, op); - op_ptr_ = op + length; - return true; - } else { - return false; + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); } - } - inline bool AppendFromSelf(size_t offset, size_t len) { - char* const op_end = op_ptr_ + len; - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - if (SNAPPY_PREDICT_TRUE(offset - 1u < (size_t)(op_ptr_ - op_base_) && - op_end <= op_limit_)) { - // Fast path: src and dst in current block. - op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_); - return true; - } - return SlowAppendFromSelf(offset, len); - } + return BIT_closeCStream(&bitC); +} - // Called at the end of the decompress. We ask the allocator - // write all blocks to the sink. - inline void Flush() { allocator_.Flush(Produced()); } -}; +#if DYNAMIC_BMI2 -template -bool SnappyScatteredWriter::SlowAppend(const char* ip, size_t len) { - size_t avail = op_limit_ - op_ptr_; - while (len > avail) { - // Completely fill this block - memcpy(op_ptr_, ip, avail); - op_ptr_ += avail; - assert(op_limit_ - op_ptr_ == 0); - full_size_ += (op_ptr_ - op_base_); - len -= avail; - ip += avail; +static TARGET_ATTRIBUTE("bmi2") size_t +HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} - // Bounds check - if (full_size_ + len > expected_) { - return false; +static size_t +HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + if (bmi2) { + return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); } + return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); +} - // Make new block - size_t bsize = std::min(kBlockSize, expected_ - full_size_); - op_base_ = allocator_.Allocate(bsize); - op_ptr_ = op_base_; - op_limit_ = op_base_ + bsize; - blocks_.push_back(op_base_); - avail = bsize; - } +#else - memcpy(op_ptr_, ip, len); - op_ptr_ += len; - return true; +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + (void)bmi2; + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); } -template -bool SnappyScatteredWriter::SlowAppendFromSelf(size_t offset, - size_t len) { - // Overflow check - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - const size_t cur = Size(); - if (offset - 1u >= cur) return false; - if (expected_ - cur < len) return false; +#endif - // Currently we shouldn't ever hit this path because Compress() chops the - // input into blocks and does not create cross-block copies. However, it is - // nice if we do not rely on that, since we can get better compression if we - // allow cross-block copies and thus might want to change the compressor in - // the future. - size_t src = cur - offset; - while (len-- > 0) { - char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)]; - Append(&c, 1); - src++; - } - return true; +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); } -class SnappySinkAllocator { - public: - explicit SnappySinkAllocator(Sink* dest): dest_(dest) {} - ~SnappySinkAllocator() {} - char* Allocate(int size) { - Datablock block(new char[size], size); - blocks_.push_back(block); - return block.data; - } +static size_t +HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, int bmi2) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; - // We flush only at the end, because the writer wants - // random access to the blocks and once we hand the - // block over to the sink, we can't access it anymore. - // Also we don't write more than has been actually written - // to the blocks. - void Flush(size_t size) { - size_t size_written = 0; - size_t block_size; - for (size_t i = 0; i < blocks_.size(); ++i) { - block_size = std::min(blocks_[i].size, size - size_written); - dest_->AppendAndTakeOwnership(blocks_[i].data, block_size, - &SnappySinkAllocator::Deleter, NULL); - size_written += block_size; - } - blocks_.clear(); - } + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ - private: - struct Datablock { - char* data; - size_t size; - Datablock(char* p, size_t s) : data(p), size(s) {} - }; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } - static void Deleter(void* arg, const char* bytes, size_t size) { - delete[] bytes; - } + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } - Sink* dest_; - std::vector blocks_; + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } - // Note: copying this object is allowed -}; + ip += segmentSize; + assert(op <= oend); + assert(ip <= iend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); + if (cSize==0) return 0; + op += cSize; + } -size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) { - SnappySinkAllocator allocator(uncompressed); - SnappyScatteredWriter writer(allocator); - InternalUncompress(compressed, &writer); - return writer.Produced(); + return (size_t)(op-ostart); } -bool Uncompress(Source* compressed, Sink* uncompressed) { - // Read the uncompressed length from the front of the compressed input - SnappyDecompressor decompressor(compressed); - uint32 uncompressed_len = 0; - if (!decompressor.ReadUncompressedLength(&uncompressed_len)) { - return false; - } +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} - char c; - size_t allocated_size; - char* buf = uncompressed->GetAppendBufferVariable( - 1, uncompressed_len, &c, 1, &allocated_size); +typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; - const size_t compressed_len = compressed->Available(); - // If we can get a flat buffer, then use it, otherwise do block by block - // uncompression - if (allocated_size >= uncompressed_len) { - SnappyArrayWriter writer(buf); - bool result = InternalUncompressAllTags(&decompressor, &writer, - compressed_len, uncompressed_len); - uncompressed->Append(buf, writer.Produced()); - return result; - } else { - SnappySinkAllocator allocator(uncompressed); - SnappyScatteredWriter writer(allocator); - return InternalUncompressAllTags(&decompressor, &writer, compressed_len, - uncompressed_len); - } +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, + const void* src, size_t srcSize, + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) +{ + size_t const cSize = (nbStreams==HUF_singleStream) ? + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + assert(op >= ostart); + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return (size_t)(op-ostart); } -} // namespace snappy +typedef struct { + unsigned count[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + HUF_buildCTable_wksp_tables buildCTable_wksp; +} HUF_compress_tables_t; +/* HUF_compress_internal() : + * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +static size_t +HUF_compress_internal (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + HUF_nbStreams_e nbStreams, + void* workSpace, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, + const int bmi2) +{ + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; -// LICENSE_CHANGE_END + HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); + /* checks & inits */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); + if (!srcSize) return 0; /* Uncompressed */ + if (!dstSize) return 0; /* cannot fit anything within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list + /* Heuristic : If old table is valid, use it for small inputs */ + if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ + /* Scan input and build symbol stats */ + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) ); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + /* Check validity of previous table */ + if ( repeat + && *repeat == HUF_repeat_check + && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, + maxSymbolValue, huffLog, + &table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); + CHECK_F(maxBits); + huffLog = (U32)maxBits; + /* Zero unused symbols in CTable, so we can check it for validity */ + memset(table->CTable + (maxSymbolValue + 1), 0, + sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); + } -namespace duckdb_apache { -namespace thrift { -namespace protocol { + /* Write table description header */ + { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); + /* Check if using previous huffman table is beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } } -TProtocol::~TProtocol() = default; -uint32_t TProtocol::skip_virt(TType type) { - return ::duckdb_apache::thrift::protocol::skip(*this, type); + /* Use the new huffman table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) + memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ + } + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, table->CTable, bmi2); } -TProtocolFactory::~TProtocolFactory() = default; - -}}} // duckdb_apache::thrift::protocol - -// LICENSE_CHANGE_END +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_timestamp.hpp -// -// -//===----------------------------------------------------------------------===// +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, hufTable, + repeat, preferRepeat, bmi2); +} +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * provide workspace to generate compression tables */ +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} -#include "duckdb.hpp" +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * re-use an existing huffman compression table */ +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + hufTable, repeat, preferRepeat, bmi2); +} -namespace duckdb { +size_t HUF_compress2 (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} -struct Int96 { - uint32_t value[3]; -}; +size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); +} -int64_t ImpalaTimestampToNanoseconds(const Int96 &impala_timestamp); -timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts); -Int96 TimestampToImpalaTimestamp(timestamp_t &ts); -timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts); -timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts); -date_t ParquetIntToDate(const int32_t &raw_date); +} -} // namespace duckdb +// LICENSE_CHANGE_END // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +/*-************************************* +* Dependencies +***************************************/ +#include /* INT_MAX */ +#include /* memset */ -#include -#include -#include - -namespace duckdb { - -enum class UnicodeType {INVALID, ASCII, UNICODE}; - - -class Utf8Proc { -public: - //! Distinguishes ASCII, Valid UTF8 and Invalid UTF8 strings - static UnicodeType Analyze(const char *s, size_t len); - //! Performs UTF NFC normalization of string, return value needs to be free'd - static char* Normalize(const char* s, size_t len); - //! Returns whether or not the UTF8 string is valid - static bool IsValid(const char *s, size_t len); - //! Returns the position (in bytes) of the next grapheme cluster - static size_t NextGraphemeCluster(const char *s, size_t len, size_t pos); - //! Returns the position (in bytes) of the previous grapheme cluster - static size_t PreviousGraphemeCluster(const char *s, size_t len, size_t pos); - - //! Transform a codepoint to utf8 and writes it to "c", sets "sz" to the size of the codepoint - static bool CodepointToUtf8(int cp, int &sz, char *c); - //! Returns the codepoint length in bytes when encoded in UTF8 - static int CodepointLength(int cp); - //! Transform a UTF8 string to a codepoint; returns the codepoint and writes the length of the codepoint (in UTF8) to sz - static int32_t UTF8ToCodepoint(const char *c, int &sz); - static size_t RenderWidth(const char *s, size_t len, size_t pos); - -}; - -} - + /* HIST_countFast_wksp */ -// LICENSE_CHANGE_END // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list -//===----------------------------------------------------------------------===// -// DuckDB -// -// miniz_wrapper.hpp -// -// -//===----------------------------------------------------------------------===// +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H +/*-************************************* +* Dependencies +***************************************/ // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list -/* miniz.c 2.0.8 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing - See "unlicense" statement at the end of this file. - Rich Geldreich , last updated Oct. 13, 2013 - Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt - - Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define - MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). - - * Low-level Deflate/Inflate implementation notes: +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ - Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or - greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses - approximately as well as zlib. +#ifndef ZSTD_CWKSP_H +#define ZSTD_CWKSP_H - Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function - coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory - block large enough to hold the entire file. +/*-************************************* +* Dependencies +***************************************/ - The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. - * zlib-style API notes: +/*-************************************* +* Constants +***************************************/ - miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in - zlib replacement in many apps: - The z_stream struct, optional memory allocation callbacks - deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound - inflateInit/inflateInit2/inflate/inflateEnd - compress, compress2, compressBound, uncompress - CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. - Supports raw deflate streams or standard zlib streams with adler-32 checking. +/* Since the workspace is effectively its own little malloc implementation / + * arena, when we run under ASAN, we should similarly insert redzones between + * each internal element of the workspace, so ASAN will catch overruns that + * reach outside an object but that stay inside the workspace. + * + * This defines the size of that redzone. + */ +#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE +#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 +#endif - Limitations: - The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. - I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but - there are no guarantees that miniz.c pulls this off perfectly. +namespace duckdb_zstd { - * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by - Alex Evans. Supports 1-4 bytes/pixel images. +/*-************************************* +* Structures +***************************************/ +typedef enum { + ZSTD_cwksp_alloc_objects, + ZSTD_cwksp_alloc_buffers, + ZSTD_cwksp_alloc_aligned +} ZSTD_cwksp_alloc_phase_e; - * ZIP archive API notes: +/** + * Zstd fits all its internal datastructures into a single continuous buffer, + * so that it only needs to perform a single OS allocation (or so that a buffer + * can be provided to it and it can perform no allocations at all). This buffer + * is called the workspace. + * + * Several optimizations complicate that process of allocating memory ranges + * from this workspace for each internal datastructure: + * + * - These different internal datastructures have different setup requirements: + * + * - The static objects need to be cleared once and can then be trivially + * reused for each compression. + * + * - Various buffers don't need to be initialized at all--they are always + * written into before they're read. + * + * - The matchstate tables have a unique requirement that they don't need + * their memory to be totally cleared, but they do need the memory to have + * some bound, i.e., a guarantee that all values in the memory they've been + * allocated is less than some maximum value (which is the starting value + * for the indices that they will then use for compression). When this + * guarantee is provided to them, they can use the memory without any setup + * work. When it can't, they have to clear the area. + * + * - These buffers also have different alignment requirements. + * + * - We would like to reuse the objects in the workspace for multiple + * compressions without having to perform any expensive reallocation or + * reinitialization work. + * + * - We would like to be able to efficiently reuse the workspace across + * multiple compressions **even when the compression parameters change** and + * we need to resize some of the objects (where possible). + * + * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp + * abstraction was created. It works as follows: + * + * Workspace Layout: + * + * [ ... workspace ... ] + * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] + * + * The various objects that live in the workspace are divided into the + * following categories, and are allocated separately: + * + * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, + * so that literally everything fits in a single buffer. Note: if present, + * this must be the first object in the workspace, since ZSTD_free{CCtx, + * CDict}() rely on a pointer comparison to see whether one or two frees are + * required. + * + * - Fixed size objects: these are fixed-size, fixed-count objects that are + * nonetheless "dynamically" allocated in the workspace so that we can + * control how they're initialized separately from the broader ZSTD_CCtx. + * Examples: + * - Entropy Workspace + * - 2 x ZSTD_compressedBlockState_t + * - CDict dictionary contents + * + * - Tables: these are any of several different datastructures (hash tables, + * chain tables, binary trees) that all respect a common format: they are + * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). + * Their sizes depend on the cparams. + * + * - Aligned: these buffers are used for various purposes that require 4 byte + * alignment, but don't require any initialization before they're used. + * + * - Buffers: these buffers are used for various purposes that don't require + * any alignment or initialization before they're used. This means they can + * be moved around at no cost for a new compression. + * + * Allocating Memory: + * + * The various types of objects must be allocated in order, so they can be + * correctly packed into the workspace buffer. That order is: + * + * 1. Objects + * 2. Buffers + * 3. Aligned + * 4. Tables + * + * Attempts to reserve objects of different types out of order will fail. + */ +typedef struct { + void* workspace; + void* workspaceEnd; - The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to - get the job done with minimal fuss. There are simple API's to retrieve file information, read files from - existing archives, create new archives, append new files to existing archives, or clone archive data from - one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), - or you can specify custom file read/write callbacks. + void* objectEnd; + void* tableEnd; + void* tableValidEnd; + void* allocStart; - - Archive reading: Just call this function to read a single file from a disk archive: + int allocFailed; + int workspaceOversizedDuration; + ZSTD_cwksp_alloc_phase_e phase; +} ZSTD_cwksp; - void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, - size_t *pSize, mz_uint zip_flags); +/*-************************************* +* Functions +***************************************/ - For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central - directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); - - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: +MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { + (void)ws; + assert(ws->workspace <= ws->objectEnd); + assert(ws->objectEnd <= ws->tableEnd); + assert(ws->objectEnd <= ws->tableValidEnd); + assert(ws->tableEnd <= ws->allocStart); + assert(ws->tableValidEnd <= ws->allocStart); + assert(ws->allocStart <= ws->workspaceEnd); +} - int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); +/** + * Align must be a power of 2. + */ +MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { + size_t const mask = align - 1; + assert((align & mask) == 0); + return (size + mask) & ~mask; +} - The locate operation can optionally check file comments too, which (as one example) can be used to identify - multiple versions of the same file in an archive. This function uses a simple linear search through the central - directory, so it's not very fast. +/** + * Use this to determine how much space in the workspace we will consume to + * allocate this object. (Normally it should be exactly the size of the object, + * but under special conditions, like ASAN, where we pad each object, it might + * be larger.) + * + * Since tables aren't currently redzoned, you don't need to call through this + * to figure out how much space you need for the matchState tables. Everything + * else is though. + */ +MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#else + return size; +#endif +} - Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and - retrieve detailed info on each file by calling mz_zip_reader_file_stat(). +MEM_STATIC void ZSTD_cwksp_internal_advance_phase( + ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { + assert(phase >= ws->phase); + if (phase > ws->phase) { + if (ws->phase < ZSTD_cwksp_alloc_buffers && + phase >= ZSTD_cwksp_alloc_buffers) { + ws->tableValidEnd = ws->objectEnd; + } + if (ws->phase < ZSTD_cwksp_alloc_aligned && + phase >= ZSTD_cwksp_alloc_aligned) { + /* If unaligned allocations down from a too-large top have left us + * unaligned, we need to realign our alloc ptr. Technically, this + * can consume space that is unaccounted for in the neededSpace + * calculation. However, I believe this can only happen when the + * workspace is too large, and specifically when it is too large + * by a larger margin than the space that will be consumed. */ + /* TODO: cleaner, compiler warning friendly way to do this??? */ + ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); + if (ws->allocStart < ws->tableValidEnd) { + ws->tableValidEnd = ws->allocStart; + } + } + ws->phase = phase; + } +} - - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data - to disk and builds an exact image of the central directory in memory. The central directory image is written - all at once at the end of the archive file when the archive is finalized. +/** + * Returns whether this object/buffer/etc was allocated in this workspace. + */ +MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { + return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); +} - The archive writer can optionally align each file's local header and file data to any power of 2 alignment, - which can be useful when the archive will be read from optical media. Also, the writer supports placing - arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still - readable by any ZIP tool. +/** + * Internal function. Do not use directly. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_internal( + ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { + void* alloc; + void* bottom = ws->tableEnd; + ZSTD_cwksp_internal_advance_phase(ws, phase); + alloc = (BYTE *)ws->allocStart - bytes; - - Archive appending: The simple way to add a single file to an archive is to call this function: +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif - mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, - const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; - The archive will be created if it doesn't already exist, otherwise it'll be appended to. - Note the appending is done in-place and is not an atomic operation, so if something goes wrong - during the operation it's possible the archive could be left without a central directory (although the local - file headers and file data will be fine, so the archive will be recoverable). +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + __asan_unpoison_memory_region(alloc, bytes); +#endif - For more complex archive modification scenarios: - 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to - preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the - compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and - you're done. This is safe but requires a bunch of temporary disk space or heap memory. + return alloc; +} - 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), - append new files as needed, then finalize the archive which will write an updated central directory to the - original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a - possibility that the archive's central directory could be lost with this method if anything goes wrong, though. +/** + * Reserves and returns unaligned memory. + */ +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { + return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); +} - - ZIP archive support limitations: - No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. - Requires streams capable of seeking. +/** + * Reserves and returns memory sized on and aligned on sizeof(unsigned). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { + assert((bytes & (sizeof(U32)-1)) == 0); + return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); +} - * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the - below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. +/** + * Aligned on sizeof(unsigned). These buffers have the special property that + * their values remain constrained, allowing us to re-use them without + * memset()-ing them. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { + const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; + void* alloc = ws->tableEnd; + void* end = (BYTE *)alloc + bytes; + void* top = ws->allocStart; - * Important: For best perf. be sure to customize the below macros for your target platform: - #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 - #define MINIZ_LITTLE_ENDIAN 1 - #define MINIZ_HAS_64BIT_REGISTERS 1 + DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + assert((bytes & (sizeof(U32)-1)) == 0); + ZSTD_cwksp_internal_advance_phase(ws, phase); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(end <= top); + if (end > top) { + DEBUGLOG(4, "cwksp: table alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->tableEnd = end; - * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz - uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files - (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). -*/ +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + __asan_unpoison_memory_region(alloc, bytes); +#endif + return alloc; +} +/** + * Aligned on sizeof(void*). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { + size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); + void* alloc = ws->objectEnd; + void* end = (BYTE*)alloc + roundedBytes; +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + DEBUGLOG(5, + "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", + alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); + assert(((size_t)alloc & (sizeof(void*)-1)) == 0); + assert((bytes & (sizeof(void*)-1)) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + /* we must be in the first phase, no advance is possible */ + if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { + DEBUGLOG(4, "cwksp: object alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; -/* Defines to completely disable specific portions of miniz.c: - If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. */ +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + __asan_unpoison_memory_region(alloc, bytes); +#endif -/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */ -#define MINIZ_NO_STDIO + return alloc; +} -/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */ -/* get/set file times, and the C run-time funcs that get/set times won't be called. */ -/* The current downside is the times written to your archives will be from 1979. */ -#define MINIZ_NO_TIME +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); -/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */ -/* #define MINIZ_NO_ARCHIVE_APIS */ +#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. */ + { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + assert(__msan_test_shadow(ws->objectEnd, size) == -1); + __msan_poison(ws->objectEnd, size); + } +#endif -/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */ -/* #define MINIZ_NO_ARCHIVE_WRITING_APIS */ + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + ws->tableValidEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} -/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */ -/*#define MINIZ_NO_ZLIB_APIS */ +MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} -/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */ -#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES +/** + * Zero the part of the allocated tables not already marked clean. + */ +MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); + } + ZSTD_cwksp_mark_tables_clean(ws); +} -/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. - Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc - callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user - functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */ -/*#define MINIZ_NO_MALLOC */ +/** + * Invalidates table allocations. + * All other allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing tables!"); -#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) -/* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */ -#define MINIZ_NO_TIME +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } #endif -#include - - + ws->tableEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} -#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) -#include -#endif +/** + * Invalidates all buffer, aligned, and table allocations. + * Object allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing!"); -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) -/* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */ -#define MINIZ_X86_OR_X64_CPU 1 -#else -#define MINIZ_X86_OR_X64_CPU 0 +#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the context re-use logic is sound, and that we don't + * access stuff that this compression hasn't initialized, we re-"poison" + * the workspace (or at least the non-static, non-table parts of it) + * every time we start a new compression. */ + { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; + __msan_poison(ws->tableValidEnd, size); + } #endif -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU -/* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */ -#define MINIZ_LITTLE_ENDIAN 1 -#else -#define MINIZ_LITTLE_ENDIAN 0 +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } #endif -#if MINIZ_X86_OR_X64_CPU -/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. */ -#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 // always 0 because alignment -#else -#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 -#endif + ws->tableEnd = ws->objectEnd; + ws->allocStart = ws->workspaceEnd; + ws->allocFailed = 0; + if (ws->phase > ZSTD_cwksp_alloc_buffers) { + ws->phase = ZSTD_cwksp_alloc_buffers; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} -#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) -/* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). */ -#define MINIZ_HAS_64BIT_REGISTERS 1 -#else -#define MINIZ_HAS_64BIT_REGISTERS 0 -#endif +/** + * The provided workspace takes ownership of the buffer [start, start+size). + * Any existing values in the workspace are ignored (the previously managed + * buffer, if present, must be separately freed). + */ +MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { + DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); + assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + ws->workspace = start; + ws->workspaceEnd = (BYTE*)start + size; + ws->objectEnd = ws->workspace; + ws->tableValidEnd = ws->objectEnd; + ws->phase = ZSTD_cwksp_alloc_objects; + ZSTD_cwksp_clear(ws); + ws->workspaceOversizedDuration = 0; + ZSTD_cwksp_assert_internal_consistency(ws); +} -namespace duckdb_miniz { +MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { + void* workspace = ZSTD_malloc(size, customMem); + DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); + RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); + ZSTD_cwksp_init(ws, workspace, size); + return 0; +} -/* ------------------- zlib-style API Definitions. */ +MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { + void *ptr = ws->workspace; + DEBUGLOG(4, "cwksp: freeing workspace"); + memset(ws, 0, sizeof(ZSTD_cwksp)); + ZSTD_free(ptr, customMem); +} -/* For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */ -typedef unsigned long mz_ulong; +/** + * Moves the management of a workspace from one cwksp to another. The src cwksp + * is left in an invalid state (src must be re-init()'ed before its used again). + */ +MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { + *dst = *src; + memset(src, 0, sizeof(ZSTD_cwksp)); +} -/* mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. */ -void mz_free(void *p); +MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); +} -#define MZ_ADLER32_INIT (1) -/* mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. */ -mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); +MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { + return ws->allocFailed; +} -#define MZ_CRC32_INIT (0) -/* mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. */ -mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); +/*-************************************* +* Functions Checking Free Space +***************************************/ -/* Compression strategies. */ -enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); +} -/* Method */ -#define MZ_DEFLATED 8 +MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; +} -/* Heap allocation callbacks. -Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. */ -typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); -typedef void (*mz_free_func)(void *opaque, void *address); -typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); +MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_available( + ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); +} -/* Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */ -enum { - MZ_NO_COMPRESSION = 0, - MZ_BEST_SPEED = 1, - MZ_BEST_COMPRESSION = 9, - MZ_UBER_COMPRESSION = 10, - MZ_DEFAULT_LEVEL = 6, - MZ_DEFAULT_COMPRESSION = -1 -}; +MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) + && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} -#define MZ_VERSION "10.0.3" -#define MZ_VERNUM 0xA030 -#define MZ_VER_MAJOR 10 -#define MZ_VER_MINOR 0 -#define MZ_VER_REVISION 3 -#define MZ_VER_SUBREVISION 0 +MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( + ZSTD_cwksp* ws, size_t additionalNeededSpace) { + if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { + ws->workspaceOversizedDuration++; + } else { + ws->workspaceOversizedDuration = 0; + } +} -#ifndef MINIZ_NO_ZLIB_APIS +} -/* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). */ -enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; +#endif /* ZSTD_CWKSP_H */ -/* Return status codes. MZ_PARAM_ERROR is non-standard. */ -enum { - MZ_OK = 0, - MZ_STREAM_END = 1, - MZ_NEED_DICT = 2, - MZ_ERRNO = -1, - MZ_STREAM_ERROR = -2, - MZ_DATA_ERROR = -3, - MZ_MEM_ERROR = -4, - MZ_BUF_ERROR = -5, - MZ_VERSION_ERROR = -6, - MZ_PARAM_ERROR = -10000 -}; -/* Window bits */ -#define MZ_DEFAULT_WINDOW_BITS 15 +// LICENSE_CHANGE_END -struct mz_internal_state; +// #ifdef ZSTD_MULTITHREAD +// # include "zstdmt_compress.h" +// #endif -/* Compression/decompression stream struct. */ -typedef struct mz_stream_s { - const unsigned char *next_in; /* pointer to next byte to read */ - unsigned int avail_in; /* number of bytes available at next_in */ - mz_ulong total_in; /* total number of bytes consumed so far */ +/*-************************************* +* Constants +***************************************/ +#define kSearchStrength 8 +#define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. + This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ - unsigned char *next_out; /* pointer to next byte to write */ - unsigned int avail_out; /* number of bytes that can be written to next_out */ - mz_ulong total_out; /* total number of bytes produced so far */ - char *msg; /* error msg (unused) */ - struct mz_internal_state *state; /* internal state, allocated by zalloc/zfree */ +namespace duckdb_zstd { +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; - mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */ - mz_free_func zfree; /* optional heap free function (defaults to free) */ - void *opaque; /* heap alloc function user pointer */ +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; +} ZSTD_prefixDict; - int data_type; /* data_type (unused) */ - mz_ulong adler; /* adler32 of the source or uncompressed data */ - mz_ulong reserved; /* not used */ -} mz_stream; +typedef struct { + void* dictBuffer; + void const* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; + ZSTD_CDict* cdict; +} ZSTD_localDict; -typedef mz_stream *mz_streamp; +typedef struct { + U32 CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_repeat repeatMode; +} ZSTD_hufCTables_t; -/* Returns the version string of miniz.c. */ -const char *mz_version(void); - -/* mz_deflateInit() initializes a compressor with default options: */ -/* Parameters: */ -/* pStream must point to an initialized mz_stream struct. */ -/* level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */ -/* level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. - */ -/* (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */ -/* Return values: */ -/* MZ_OK on success. */ -/* MZ_STREAM_ERROR if the stream is bogus. */ -/* MZ_PARAM_ERROR if the input parameters are bogus. */ -/* MZ_MEM_ERROR on out of memory. */ -int mz_deflateInit(mz_streamp pStream, int level); +typedef struct { + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_fseCTables_t; -/* mz_deflateInit2() is like mz_deflate(), except with more control: */ -/* Additional parameters: */ -/* method must be MZ_DEFLATED */ -/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) */ -/* mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */ -int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); +typedef struct { + ZSTD_hufCTables_t huf; + ZSTD_fseCTables_t fse; +} ZSTD_entropyCTables_t; -/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */ -int mz_deflateReset(mz_streamp pStream); +typedef struct { + U32 off; + U32 len; +} ZSTD_match_t; -/* mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. - */ -/* Parameters: */ -/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ -/* flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. */ -/* Return values: */ -/* MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). */ -/* MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. */ -/* MZ_STREAM_ERROR if the stream is bogus. */ -/* MZ_PARAM_ERROR if one of the parameters is invalid. */ -/* MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) */ -int mz_deflate(mz_streamp pStream, int flush); +typedef struct { + int price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; -/* mz_deflateEnd() deinitializes a compressor: */ -/* Return values: */ -/* MZ_OK on success. */ -/* MZ_STREAM_ERROR if the stream is bogus. */ -int mz_deflateEnd(mz_streamp pStream); +typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; -/* mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. */ -mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + unsigned* litFreq; /* table of literals statistics, of size 256 */ + unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ -/* Single-call compression functions mz_compress() and mz_compress2(): */ -/* Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. */ -int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); -int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, - int level); + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + U32 litSumBasePrice; /* to compare to log2(litfreq) */ + U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ + U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ + U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ + ZSTD_literalCompressionMode_e literalCompressionMode; +} optState_t; -/* mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). */ -mz_ulong mz_compressBound(mz_ulong source_len); +typedef struct { + ZSTD_entropyCTables_t entropy; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_compressedBlockState_t; -/* Initializes a decompressor. */ -int mz_inflateInit(mz_streamp pStream); +typedef struct { + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more valid data */ +} ZSTD_window_t; -/* mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: */ -/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). */ -int mz_inflateInit2(mz_streamp pStream, int window_bits); +typedef struct ZSTD_matchState_t ZSTD_matchState_t; +struct ZSTD_matchState_t { + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary, within context's referential. + * When loadedDictEnd != 0, a dictionary is in use, and still valid. + * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. + * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). + * When dict referential is copied into active context (i.e. not attached), + * loadedDictEnd == dictSize, since referential starts from zero. + */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + optState_t opt; /* optimal parser state */ + const ZSTD_matchState_t* dictMatchState; + ZSTD_compressionParameters cParams; +}; -/* Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. */ -/* Parameters: */ -/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ -/* flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */ -/* On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). */ -/* MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. */ -/* Return values: */ -/* MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. */ -/* MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. */ -/* MZ_STREAM_ERROR if the stream is bogus. */ -/* MZ_DATA_ERROR if the deflate stream is invalid. */ -/* MZ_PARAM_ERROR if one of the parameters is invalid. */ -/* MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again */ -/* with more input data, or with more room in the output buffer (except when using single call decompression, described above). */ -int mz_inflate(mz_streamp pStream, int flush); +typedef struct { + ZSTD_compressedBlockState_t* prevCBlock; + ZSTD_compressedBlockState_t* nextCBlock; + ZSTD_matchState_t matchState; +} ZSTD_blockState_t; -/* Deinitializes a decompressor. */ -int mz_inflateEnd(mz_streamp pStream); +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; -/* Single-call decompression. */ -/* Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. */ -int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); +typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ + ldmEntry_t* hashTable; + U32 loadedDictEnd; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + U64 hashPower; /* Used to compute the rolling hash. + * Depends on ldmParams.minMatchLength */ +} ldmState_t; -/* Returns a string description of the specified error code, or NULL if the error code is invalid. */ -const char *mz_error(int err); +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashRateLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ +} ldmParams_t; -/* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. */ -/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. */ -#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES -typedef unsigned char Byte; -typedef unsigned int uInt; -typedef mz_ulong uLong; -typedef Byte Bytef; -typedef uInt uIntf; -typedef char charf; -typedef int intf; -typedef void *voidpf; -typedef uLong uLongf; -typedef void *voidp; -typedef void *const voidpc; -#define Z_NULL 0 -#define Z_NO_FLUSH MZ_NO_FLUSH -#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH -#define Z_SYNC_FLUSH MZ_SYNC_FLUSH -#define Z_FULL_FLUSH MZ_FULL_FLUSH -#define Z_FINISH MZ_FINISH -#define Z_BLOCK MZ_BLOCK -#define Z_OK MZ_OK -#define Z_STREAM_END MZ_STREAM_END -#define Z_NEED_DICT MZ_NEED_DICT -#define Z_ERRNO MZ_ERRNO -#define Z_STREAM_ERROR MZ_STREAM_ERROR -#define Z_DATA_ERROR MZ_DATA_ERROR -#define Z_MEM_ERROR MZ_MEM_ERROR -#define Z_BUF_ERROR MZ_BUF_ERROR -#define Z_VERSION_ERROR MZ_VERSION_ERROR -#define Z_PARAM_ERROR MZ_PARAM_ERROR -#define Z_NO_COMPRESSION MZ_NO_COMPRESSION -#define Z_BEST_SPEED MZ_BEST_SPEED -#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION -#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION -#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY -#define Z_FILTERED MZ_FILTERED -#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY -#define Z_RLE MZ_RLE -#define Z_FIXED MZ_FIXED -#define Z_DEFLATED MZ_DEFLATED -#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS -#define alloc_func mz_alloc_func -#define free_func mz_free_func -#define internal_state mz_internal_state -#define z_stream mz_stream -#define deflateInit mz_deflateInit -#define deflateInit2 mz_deflateInit2 -#define deflateReset mz_deflateReset -#define deflate mz_deflate -#define deflateEnd mz_deflateEnd -#define deflateBound mz_deflateBound -#define compress mz_compress -#define compress2 mz_compress2 -#define compressBound mz_compressBound -#define inflateInit mz_inflateInit -#define inflateInit2 mz_inflateInit2 -#define inflate mz_inflate -#define inflateEnd mz_inflateEnd -#define uncompress mz_uncompress -#define crc32 mz_crc32 -#define adler32 mz_adler32 -#define MAX_WBITS 15 -#define MAX_MEM_LEVEL 9 -#define zError mz_error -#define ZLIB_VERSION MZ_VERSION -#define ZLIB_VERNUM MZ_VERNUM -#define ZLIB_VER_MAJOR MZ_VER_MAJOR -#define ZLIB_VER_MINOR MZ_VER_MINOR -#define ZLIB_VER_REVISION MZ_VER_REVISION -#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION -#define zlibVersion mz_version -#define zlib_version mz_version() -#endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ +typedef struct { + U32 offset; + U32 litLength; + U32 matchLength; +} rawSeq; -#endif /* MINIZ_NO_ZLIB_APIS */ +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The position where reading stopped. <= size. */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ +} rawSeqStore_t; -} +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + size_t seqIndex; + size_t maxSequences; +} SeqCollector; +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; -#include -#include -#include -#include + int compressionLevel; + int forceWindow; /* force back-references to respect limit of + * 1< -#define MZ_FILE FILE -#endif /* #ifdef MINIZ_NO_STDIO */ +struct ZSTD_CCtx_s { + ZSTD_compressionStage_e stage; + int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + ZSTD_CCtx_params requestedParams; + ZSTD_CCtx_params appliedParams; + U32 dictID; -#ifdef MINIZ_NO_TIME -typedef struct mz_dummy_time_t_tag -{ - int m_dummy; -} mz_dummy_time_t; -#define MZ_TIME_T mz_dummy_time_t -#else -#define MZ_TIME_T time_t -#endif + ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ + size_t blockSize; + unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ + unsigned long long consumedSrcSize; + unsigned long long producedCSize; + XXH64_state_t xxhState; + ZSTD_customMem customMem; + size_t staticSize; + SeqCollector seqCollector; + int isFirstBlock; + int initialized; -#define MZ_ASSERT(x) assert(x) + seqStore_t seqStore; /* sequences storage ptrs */ + ldmState_t ldmState; /* long distance matching state */ + rawSeq* ldmSequences; /* Storage for the ldm output sequences */ + size_t maxNbLdmSequences; + rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ + ZSTD_blockState_t blockState; + U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ -#ifdef MINIZ_NO_MALLOC -#define MZ_MALLOC(x) NULL -#define MZ_FREE(x) (void)x, ((void)0) -#define MZ_REALLOC(p, x) NULL -#else -#define MZ_MALLOC(x) malloc(x) -#define MZ_FREE(x) free(x) -#define MZ_REALLOC(p, x) realloc(p, x) -#endif + /* streaming */ + char* inBuff; + size_t inBuffSize; + size_t inToCompress; + size_t inBuffPos; + size_t inBuffTarget; + char* outBuff; + size_t outBuffSize; + size_t outBuffContentSize; + size_t outBuffFlushedSize; + ZSTD_cStreamStage streamStage; + U32 frameEnded; -#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + /* Dictionary */ + ZSTD_localDict localDict; + const ZSTD_CDict* cdict; + ZSTD_prefixDict prefixDict; /* single-usage dictionary */ -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN -#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) -#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) -#else -#define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) -#define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) + /* Multi-threading */ +#ifdef ZSTD_MULTITHREAD + ZSTDMT_CCtx* mtctx; #endif +}; -#define MZ_READ_LE64(p) (((mz_uint64)MZ_READ_LE32(p)) | (((mz_uint64)MZ_READ_LE32((const mz_uint8 *)(p) + sizeof(mz_uint32))) << 32U)) +typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; -#ifdef _MSC_VER -#define MZ_FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__)) -#else -#define MZ_FORCEINLINE inline -#endif +typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e; -extern void *miniz_def_alloc_func(void *opaque, size_t items, size_t size); -extern void miniz_def_free_func(void *opaque, void *address); -extern void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size); -#define MZ_UINT16_MAX (0xFFFFU) -#define MZ_UINT32_MAX (0xFFFFFFFFU) +typedef size_t (*ZSTD_blockCompressor) ( + ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode); +MEM_STATIC U32 ZSTD_LLcode(U32 litLength) +{ + static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + static const U32 LL_deltaCode = 19; + return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; -/* ------------------- Low-level Compression API Definitions */ +MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} -/* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). */ -#define TDEFL_LESS_MEMORY 0 +/* ZSTD_cParam_withinBounds: + * @return 1 if value is within cParam bounds, + * 0 otherwise */ +MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} -/* tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): */ -/* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). */ -enum +/* ZSTD_noCompressBlock() : + * Writes uncompressed block to dst buffer from given src. + * Returns the size of the block */ +MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) { - TDEFL_HUFFMAN_ONLY = 0, - TDEFL_DEFAULT_MAX_PROBES = 128, - TDEFL_MAX_PROBES_MASK = 0xFFF -}; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(srcSize + ZSTDInternalConstants::ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall, "dst buf too small for uncompressed block"); + MEM_writeLE24(dst, cBlockHeader24); + memcpy((BYTE*)dst + ZSTDInternalConstants::ZSTD_blockHeaderSize, src, srcSize); + return ZSTDInternalConstants::ZSTD_blockHeaderSize + srcSize; +} -/* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. */ -/* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). */ -/* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. */ -/* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). */ -/* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) */ -/* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */ -/* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */ -/* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */ -/* The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). */ -enum +MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) { - TDEFL_WRITE_ZLIB_HEADER = 0x01000, - TDEFL_COMPUTE_ADLER32 = 0x02000, - TDEFL_GREEDY_PARSING_FLAG = 0x04000, - TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, - TDEFL_RLE_MATCHES = 0x10000, - TDEFL_FILTER_MATCHES = 0x20000, - TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, - TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 -}; - -/* High level compression functions: */ -/* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). */ -/* On entry: */ -/* pSrc_buf, src_buf_len: Pointer and size of source block to compress. */ -/* flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. */ -/* On return: */ -/* Function returns a pointer to the compressed data, or NULL on failure. */ -/* *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. */ -/* The caller must free() the returned block when it's no longer needed. */ -void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); - -/* tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. */ -/* Returns 0 on failure. */ -size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); - -/* Compresses an image to a compressed PNG file in memory. */ -/* On entry: */ -/* pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. */ -/* The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. */ -/* level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */ -/* If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). */ -/* On return: */ -/* Function returns a pointer to the compressed data, or NULL on failure. */ -/* *pLen_out will be set to the size of the PNG image file. */ -/* The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. */ -void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); -void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); - -/* Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */ -typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); + BYTE* const op = (BYTE*)dst; + U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); + MEM_writeLE24(op, cBlockHeader); + op[3] = src; + return 4; +} -/* tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. */ -mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); -enum +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) { - TDEFL_MAX_HUFF_TABLES = 3, - TDEFL_MAX_HUFF_SYMBOLS_0 = 288, - TDEFL_MAX_HUFF_SYMBOLS_1 = 32, - TDEFL_MAX_HUFF_SYMBOLS_2 = 19, - TDEFL_LZ_DICT_SIZE = 32768, - TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, - TDEFL_MIN_MATCH_LEN = 3, - TDEFL_MAX_MATCH_LEN = 258 -}; + U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; + ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + return (srcSize >> minlog) + 2; +} -/* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). */ -#if TDEFL_LESS_MEMORY -enum -{ - TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, - TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, - TDEFL_MAX_HUFF_SYMBOLS = 288, - TDEFL_LZ_HASH_BITS = 12, - TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, - TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, - TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS -}; -#else -enum +MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) { - TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, - TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, - TDEFL_MAX_HUFF_SYMBOLS = 288, - TDEFL_LZ_HASH_BITS = 15, - TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, - TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, - TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS -}; -#endif - -/* The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. */ -typedef enum { - TDEFL_STATUS_BAD_PARAM = -2, - TDEFL_STATUS_PUT_BUF_FAILED = -1, - TDEFL_STATUS_OKAY = 0, - TDEFL_STATUS_DONE = 1 -} tdefl_status; + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + /* fall-through */ + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} -/* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */ -typedef enum { - TDEFL_NO_FLUSH = 0, - TDEFL_SYNC_FLUSH = 2, - TDEFL_FULL_FLUSH = 3, - TDEFL_FINISH = 4 -} tdefl_flush; +/*! ZSTD_safecopyLiterals() : + * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. + * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single + * large copies. + */ +static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { + assert(iend > ilimit_w); + if (ip <= ilimit_w) { + ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); + op += ilimit_w - ip; + ip = ilimit_w; + } + while (ip < iend) *op++ = *ip++; +} -/* tdefl's compression state structure. */ -typedef struct +/*! ZSTD_storeSeq() : + * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. + * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). + * `mlBase` : matchLength - MINMATCH + * Allowed to overread literals up to litLimit. +*/ +HINT_INLINE UNUSED_ATTR +void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) { - tdefl_put_buf_func_ptr m_pPut_buf_func; - void *m_pPut_buf_user; - mz_uint m_flags, m_max_probes[2]; - int m_greedy_parsing; - mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; - mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; - mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; - mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; - tdefl_status m_prev_return_status; - const void *m_pIn_buf; - void *m_pOut_buf; - size_t *m_pIn_buf_size, *m_pOut_buf_size; - tdefl_flush m_flush; - const mz_uint8 *m_pSrc; - size_t m_src_buf_left, m_out_buf_ofs; - mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; - mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; - mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; - mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; - mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; -} tdefl_compressor; - -/* Initializes the compressor. */ -/* There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. */ -/* pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. */ -/* If pBut_buf_func is NULL the user should always call the tdefl_compress() API. */ -/* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) */ -tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); - -/* Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. */ -tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); - -/* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. */ -/* tdefl_compress_buffer() always consumes the entire input buffer. */ -tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); - -tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); -mz_uint32 tdefl_get_adler32(tdefl_compressor *d); - -/* Create tdefl_compress() flags given zlib-style compression parameters. */ -/* level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) */ -/* window_bits may be -15 (raw deflate) or 15 (zlib) */ -/* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED */ -mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); - -/* Allocate the tdefl_compressor structure in C so that */ -/* non-C language bindings to tdefl_ API don't need to worry about */ -/* structure size and allocation mechanism. */ -tdefl_compressor *tdefl_compressor_alloc(); -void tdefl_compressor_free(tdefl_compressor *pComp); + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; + BYTE const* const litEnd = literals + litLength; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); + } +#endif + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); + /* copy Literals */ + assert(seqStorePtr->maxNbLit <= 128 KB); + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); + assert(literals + litLength <= litLimit); + if (litEnd <= litLimit_w) { + /* Common case we can use wildcopy. + * First copy 16 bytes, because literals are likely short. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(seqStorePtr->lit, literals); + if (litLength > 16) { + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); + } + } else { + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); + } + seqStorePtr->lit += litLength; + /* literal Length */ + if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 1; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + /* match offset */ + seqStorePtr->sequences[0].offset = offCode + 1; + /* match Length */ + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 2; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)mlBase; -/* ------------------- Low-level Decompression API Definitions */ + seqStorePtr->sequences++; +} -/* Decompression flags used by tinfl_decompress(). */ -/* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. */ -/* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. */ -/* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). */ -/* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. */ -enum +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (size_t val) { - TINFL_FLAG_PARSE_ZLIB_HEADER = 1, - TINFL_FLAG_HAS_MORE_INPUT = 2, - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, - TINFL_FLAG_COMPUTE_ADLER32 = 8 -}; - -/* High level decompression functions: */ -/* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). */ -/* On entry: */ -/* pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. */ -/* On return: */ -/* Function returns a pointer to the decompressed data, or NULL on failure. */ -/* *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. */ -/* The caller must call mz_free() on the returned block when it's no longer needed. */ -void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} -/* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. */ -/* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. */ -#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) -size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); -/* tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. */ -/* Returns 1 on success or 0 on failure. */ -typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); -int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); -struct tinfl_decompressor_tag; -typedef struct tinfl_decompressor_tag tinfl_decompressor; + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ -/* Return status. */ -typedef enum { - /* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */ - /* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */ - /* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */ - TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4, +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } - /* This flag indicates that one or more of the input parameters was obviously bogus. (You can try calling it again, but if you get this error the calling code is wrong.) */ - TINFL_STATUS_BAD_PARAM = -3, +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } - /* This flags indicate the inflator is finished but the adler32 check of the uncompressed data didn't match. If you call it again it'll return TINFL_STATUS_DONE. */ - TINFL_STATUS_ADLER32_MISMATCH = -2, +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } - /* This flags indicate the inflator has somehow failed (bad code, corrupted input, etc.). If you call it again without resetting via tinfl_init() it it'll just keep on returning the same status failure code. */ - TINFL_STATUS_FAILED = -1, +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } - /* Any status code less than TINFL_STATUS_DONE must indicate a failure. */ +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } - /* This flag indicates the inflator has returned every byte of uncompressed data that it can, has consumed every byte that it needed, has successfully reached the end of the deflate stream, and */ - /* if zlib headers and adler32 checking enabled that it has successfully checked the uncompressed data's adler32. If you call it again you'll just get TINFL_STATUS_DONE over and over again. */ - TINFL_STATUS_DONE = 0, +MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} - /* This flag indicates the inflator MUST have more input data (even 1 byte) before it can make any more forward progress, or you need to clear the TINFL_FLAG_HAS_MORE_INPUT */ - /* flag on the next call if you don't have any more source data. If the source data was somehow corrupted it's also possible (but unlikely) for the inflator to keep on demanding input to */ - /* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */ - TINFL_STATUS_NEEDS_MORE_INPUT = 1, +/** ZSTD_ipow() : + * Return base^exponent. + */ +static U64 ZSTD_ipow(U64 base, U64 exponent) +{ + U64 power = 1; + while (exponent) { + if (exponent & 1) power *= base; + exponent >>= 1; + base *= base; + } + return power; +} - /* This flag indicates the inflator definitely has 1 or more bytes of uncompressed data available, but it cannot write this data into the output buffer. */ - /* Note if the source compressed data was corrupted it's possible for the inflator to return a lot of uncompressed data to the caller. I've been assuming you know how much uncompressed data to expect */ - /* (either exact or worst case) and will stop calling the inflator and fail after receiving too much. In pure streaming scenarios where you have no idea how many bytes to expect this may not be possible */ - /* so I may need to add some code to address this. */ - TINFL_STATUS_HAS_MORE_OUTPUT = 2 -} tinfl_status; +#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 -/* Initializes the decompressor to its initial state. */ -#define tinfl_init(r) \ - do \ - { \ - (r)->m_state = 0; \ - } \ - MZ_MACRO_END -#define tinfl_get_adler32(r) (r)->m_check_adler32 +/** ZSTD_rollingHash_append() : + * Add the buffer to the hash value. + */ +static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) +{ + BYTE const* istart = (BYTE const*)buf; + size_t pos; + for (pos = 0; pos < size; ++pos) { + hash *= prime8bytes; + hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; + } + return hash; +} -/* Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. */ -/* This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. */ -tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); +/** ZSTD_rollingHash_compute() : + * Compute the rolling hash value of the buffer. + */ +MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) +{ + return ZSTD_rollingHash_append(0, buf, size); +} -/* Internal/private bits follow. */ -enum +/** ZSTD_rollingHash_primePower() : + * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash + * over a window of length bytes. + */ +MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) { - TINFL_MAX_HUFF_TABLES = 3, - TINFL_MAX_HUFF_SYMBOLS_0 = 288, - TINFL_MAX_HUFF_SYMBOLS_1 = 32, - TINFL_MAX_HUFF_SYMBOLS_2 = 19, - TINFL_FAST_LOOKUP_BITS = 10, - TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS -}; + return ZSTD_ipow(prime8bytes, length - 1); +} -typedef struct +/** ZSTD_rollingHash_rotate() : + * Rotate the rolling hash by one byte. + */ +MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) { - mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; - mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; -} tinfl_huff_table; + hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; + hash *= prime8bytes; + hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; + return hash; +} -#if MINIZ_HAS_64BIT_REGISTERS -#define TINFL_USE_64BIT_BITBUF 1 -#else -#define TINFL_USE_64BIT_BITBUF 0 -#endif - -#if TINFL_USE_64BIT_BITBUF -typedef mz_uint64 tinfl_bit_buf_t; -#define TINFL_BITBUF_SIZE (64) -#else -typedef mz_uint32 tinfl_bit_buf_t; -#define TINFL_BITBUF_SIZE (32) +/*-************************************* +* Round buffer management +***************************************/ +#if (ZSTD_WINDOWLOG_MAX_64 > 31) +# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" #endif +/* Max current allowed */ +#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Maximum chunk size before overflow correction needs to be called again */ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ -struct tinfl_decompressor_tag +/** + * ZSTD_window_clear(): + * Clears the window containing the history by simply setting it to empty. + */ +MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) { - mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; - tinfl_bit_buf_t m_bit_buf; - size_t m_dist_from_out_buf_start; - tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; - mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; -}; + size_t const endT = (size_t)(window->nextSrc - window->base); + U32 const end = (U32)endT; + window->lowLimit = end; + window->dictLimit = end; +} +/** + * ZSTD_window_hasExtDict(): + * Returns non-zero if the window has a non-empty extDict. + */ +MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) +{ + return window.lowLimit < window.dictLimit; +} +/** + * ZSTD_matchState_dictMode(): + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + ZSTD_dictMatchState : + ZSTD_noDict; +} +/** + * ZSTD_window_needOverflowCorrection(): + * Returns non-zero if the indices are getting too large and need overflow + * protection. + */ +MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + void const* srcEnd) +{ + U32 const current = (U32)((BYTE const*)srcEnd - window.base); + return current > ZSTD_CURRENT_MAX; +} +/** + * ZSTD_window_correctOverflow(): + * Reduces the indices to protect from index overflow. + * Returns the correction made to the indices, which must be applied to every + * stored index. + * + * The least significant cycleLog bits of the indices must remain the same, + * which may be 0. Every index up to maxDist in the past must be valid. + * NOTE: (maxDist & cycleMask) must be zero. + */ +MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, + U32 maxDist, void const* src) +{ + /* preemptive overflow correction: + * 1. correction is large enough: + * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) + * > 1<<29 + * + * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: + * After correction, current is less than (1<base < 1<<32. + * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); + U32 const currentCycle0 = current & cycleMask; + /* Exclude zero so that newCurrent - maxDist >= 1. */ + U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; + U32 const newCurrent = currentCycle1 + maxDist; + U32 const correction = current - newCurrent; + assert((maxDist & cycleMask) == 0); + assert(current > newCurrent); + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); -/* ------------------- ZIP archive reading/writing */ + window->base += correction; + window->dictBase += correction; + if (window->lowLimit <= correction) window->lowLimit = 1; + else window->lowLimit -= correction; + if (window->dictLimit <= correction) window->dictLimit = 1; + else window->dictLimit -= correction; -#ifndef MINIZ_NO_ARCHIVE_APIS + /* Ensure we can still reference the full window. */ + assert(newCurrent >= maxDist); + assert(newCurrent - maxDist >= 1); + /* Ensure that lowLimit and dictLimit didn't underflow. */ + assert(window->lowLimit <= newCurrent); + assert(window->dictLimit <= newCurrent); + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, + window->lowLimit); + return correction; +} -enum +/** + * ZSTD_window_enforceMaxDist(): + * Updates lowLimit so that: + * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * + * It ensures index is valid as long as index >= lowLimit. + * This must be called before a block compression call. + * + * loadedDictEnd is only defined if a dictionary is in use for current compression. + * As the name implies, loadedDictEnd represents the index at end of dictionary. + * The value lies within context's referential, it can be directly compared to blockEndIdx. + * + * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. + * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. + * This is because dictionaries are allowed to be referenced fully + * as long as the last byte of the dictionary is in the window. + * Once input has progressed beyond window size, dictionary cannot be referenced anymore. + * + * In normal dict mode, the dictionary lies between lowLimit and dictLimit. + * In dictMatchState mode, lowLimit and dictLimit are the same, + * and the dictionary is below them. + * forceWindow and dictMatchState are therefore incompatible. + */ +MEM_STATIC void +ZSTD_window_enforceMaxDist(ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) { - /* Note: These enums can be reduced as needed to save memory or stack space - they are pretty conservative. */ - MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, - MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 512, - MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 512 -}; + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); -typedef struct -{ - /* Central directory file index. */ - mz_uint32 m_file_index; + /* - When there is no dictionary : loadedDictEnd == 0. + In which case, the test (blockEndIdx > maxDist) is merely to avoid + overflowing next operation `newLowLimit = blockEndIdx - maxDist`. + - When there is a standard dictionary : + Index referential is copied from the dictionary, + which means it starts from 0. + In which case, loadedDictEnd == dictSize, + and it makes sense to compare `blockEndIdx > maxDist + dictSize` + since `blockEndIdx` also starts from zero. + - When there is an attached dictionary : + loadedDictEnd is expressed within the referential of the context, + so it can be directly compared against blockEndIdx. + */ + if (blockEndIdx > maxDist + loadedDictEnd) { + U32 const newLowLimit = blockEndIdx - maxDist; + if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; + if (window->dictLimit < window->lowLimit) { + DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", + (unsigned)window->dictLimit, (unsigned)window->lowLimit); + window->dictLimit = window->lowLimit; + } + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; + } +} - /* Byte offset of this entry in the archive's central directory. Note we currently only support up to UINT_MAX or less bytes in the central dir. */ - mz_uint64 m_central_dir_ofs; +/* Similar to ZSTD_window_enforceMaxDist(), + * but only invalidates dictionary + * when input progresses beyond window size. + * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) + * loadedDictEnd uses same referential as window->base + * maxDist is the window size */ +MEM_STATIC void +ZSTD_checkDictValidity(const ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + assert(loadedDictEndPtr != NULL); + assert(dictMatchStatePtr != NULL); + { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = *loadedDictEndPtr; + DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + assert(blockEndIdx >= loadedDictEnd); - /* These fields are copied directly from the zip's central dir. */ - mz_uint16 m_version_made_by; - mz_uint16 m_version_needed; - mz_uint16 m_bit_flag; - mz_uint16 m_method; + if (blockEndIdx > loadedDictEnd + maxDist) { + /* On reaching window size, dictionaries are invalidated. + * For simplification, if window size is reached anywhere within next block, + * the dictionary is invalidated for the full block. + */ + DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); + *loadedDictEndPtr = 0; + *dictMatchStatePtr = NULL; + } else { + if (*loadedDictEndPtr != 0) { + DEBUGLOG(6, "dictionary considered valid for current block"); + } } } +} -#ifndef MINIZ_NO_TIME - MZ_TIME_T m_time; -#endif +MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { + memset(window, 0, sizeof(*window)); + window->base = (BYTE const*)""; + window->dictBase = (BYTE const*)""; + window->dictLimit = 1; /* start from 1, so that 1st position is valid */ + window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + 1; /* see issue #1241 */ +} - /* CRC-32 of uncompressed data. */ - mz_uint32 m_crc32; +/** + * ZSTD_window_update(): + * Updates the window by appending [src, src + srcSize) to the window. + * If it is not contiguous, the current prefix becomes the extDict, and we + * forget about the extDict. Handles overlap of the prefix and extDict. + * Returns non-zero if the segment is contiguous. + */ +MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, + void const* src, size_t srcSize) +{ + BYTE const* const ip = (BYTE const*)src; + U32 contiguous = 1; + DEBUGLOG(5, "ZSTD_window_update"); + if (srcSize == 0) + return contiguous; + assert(window->base != NULL); + assert(window->dictBase != NULL); + /* Check if blocks follow each other */ + if (src != window->nextSrc) { + /* not contiguous */ + size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); + window->lowLimit = window->dictLimit; + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + window->dictLimit = (U32)distanceFromBase; + window->dictBase = window->base; + window->base = ip - distanceFromBase; + /* ms->nextToUpdate = window->dictLimit; */ + if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ + contiguous = 0; + } + window->nextSrc = ip + srcSize; + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ( (ip+srcSize > window->dictBase + window->lowLimit) + & (ip < window->dictBase + window->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + window->lowLimit = lowLimitMax; + DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); + } + return contiguous; +} - /* File's compressed size. */ - mz_uint64 m_comp_size; +/** + * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} - /* File's uncompressed size. Note, I've seen some old archives where directory entries had 512 bytes for their uncompressed sizes, but when you try to unpack them you actually get 0 bytes. */ - mz_uint64 m_uncomp_size; +/** + * Returns the lowest allowed match index in the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.dictLimit; + U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} - /* Zip internal and external file attributes. */ - mz_uint16 m_internal_attr; - mz_uint32 m_external_attr; - /* Entry's local header file offset in bytes. */ - mz_uint64 m_local_header_ofs; - /* Size of comment in bytes. */ - mz_uint32 m_comment_size; +/* debug functions */ +#if (DEBUGLEVEL>=2) - /* MZ_TRUE if the entry appears to be a directory. */ - mz_bool m_is_directory; +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const newStat = rawStat + 1; + U32 const hb = ZSTD_highbit32(newStat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (newStat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} - /* MZ_TRUE if the entry uses encryption/strong encryption (which miniz_zip doesn't support) */ - mz_bool m_is_encrypted; +/* display a table content, + * listing each element, its frequency, and its predicted bit cost */ +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} - /* MZ_TRUE if the file is not encrypted, a patch file, and if it uses a compression method we support. */ - mz_bool m_is_supported; +#endif - /* Filename. If string ends in '/' it's a subdirectory entry. */ - /* Guaranteed to be zero terminated, may be truncated to fit. */ - char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; +/* =============================================================== + * Shared internal declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ - /* Comment field. */ - /* Guaranteed to be zero terminated, may be truncated to fit. */ - char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + short* offcodeNCount, unsigned* offcodeMaxValue, + const void* const dict, size_t dictSize); -} mz_zip_archive_file_stat; +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); -typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n); -typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n); -typedef mz_bool (*mz_file_needs_keepalive)(void *pOpaque); +/* ============================================================== + * Private declarations + * These prototypes shall only be called from within lib/compress + * ============================================================== */ -struct mz_zip_internal_state_tag; -typedef struct mz_zip_internal_state_tag mz_zip_internal_state; +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + * Note: srcSizeHint == 0 means 0! + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); -typedef enum { - MZ_ZIP_MODE_INVALID = 0, - MZ_ZIP_MODE_READING = 1, - MZ_ZIP_MODE_WRITING = 2, - MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 -} mz_zip_mode; +/*! ZSTD_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); -typedef enum { - MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, - MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, - MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, - MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800, - MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG = 0x1000, /* if enabled, mz_zip_reader_locate_file() will be called on each file as its validated to ensure the func finds the file in the central dir (intended for testing) */ - MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY = 0x2000, /* validate the local headers, but don't decompress the entire file and check the crc32 */ - MZ_ZIP_FLAG_WRITE_ZIP64 = 0x4000, /* always use the zip64 file format, instead of the original zip file format with automatic switch to zip64. Use as flags parameter with mz_zip_writer_init*_v2 */ - MZ_ZIP_FLAG_WRITE_ALLOW_READING = 0x8000, - MZ_ZIP_FLAG_ASCII_FILENAME = 0x10000 -} mz_zip_flags; +void ZSTD_resetSeqStore(seqStore_t* ssPtr); -typedef enum { - MZ_ZIP_TYPE_INVALID = 0, - MZ_ZIP_TYPE_USER, - MZ_ZIP_TYPE_MEMORY, - MZ_ZIP_TYPE_HEAP, - MZ_ZIP_TYPE_FILE, - MZ_ZIP_TYPE_CFILE, - MZ_ZIP_TOTAL_TYPES -} mz_zip_type; +/*! ZSTD_getCParamsFromCDict() : + * as the name implies */ +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); -/* miniz error codes. Be sure to update mz_zip_get_error_string() if you add or modify this enum. */ -typedef enum { - MZ_ZIP_NO_ERROR = 0, - MZ_ZIP_UNDEFINED_ERROR, - MZ_ZIP_TOO_MANY_FILES, - MZ_ZIP_FILE_TOO_LARGE, - MZ_ZIP_UNSUPPORTED_METHOD, - MZ_ZIP_UNSUPPORTED_ENCRYPTION, - MZ_ZIP_UNSUPPORTED_FEATURE, - MZ_ZIP_FAILED_FINDING_CENTRAL_DIR, - MZ_ZIP_NOT_AN_ARCHIVE, - MZ_ZIP_INVALID_HEADER_OR_CORRUPTED, - MZ_ZIP_UNSUPPORTED_MULTIDISK, - MZ_ZIP_DECOMPRESSION_FAILED, - MZ_ZIP_COMPRESSION_FAILED, - MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE, - MZ_ZIP_CRC_CHECK_FAILED, - MZ_ZIP_UNSUPPORTED_CDIR_SIZE, - MZ_ZIP_ALLOC_FAILED, - MZ_ZIP_FILE_OPEN_FAILED, - MZ_ZIP_FILE_CREATE_FAILED, - MZ_ZIP_FILE_WRITE_FAILED, - MZ_ZIP_FILE_READ_FAILED, - MZ_ZIP_FILE_CLOSE_FAILED, - MZ_ZIP_FILE_SEEK_FAILED, - MZ_ZIP_FILE_STAT_FAILED, - MZ_ZIP_INVALID_PARAMETER, - MZ_ZIP_INVALID_FILENAME, - MZ_ZIP_BUF_TOO_SMALL, - MZ_ZIP_INTERNAL_ERROR, - MZ_ZIP_FILE_NOT_FOUND, - MZ_ZIP_ARCHIVE_TOO_LARGE, - MZ_ZIP_VALIDATION_FAILED, - MZ_ZIP_WRITE_CALLBACK_FAILED, - MZ_ZIP_TOTAL_ERRORS -} mz_zip_error; +/* ZSTD_compressBegin_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize); -typedef struct -{ - mz_uint64 m_archive_size; - mz_uint64 m_central_directory_file_ofs; +/* ZSTD_compress_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params); - /* We only support up to UINT32_MAX files in zip64 mode. */ - mz_uint32 m_total_files; - mz_zip_mode m_zip_mode; - mz_zip_type m_zip_type; - mz_zip_error m_last_error; - mz_uint64 m_file_offset_alignment; +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small ( 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); -mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip); -mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip); -MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip); +} -/* Reads n bytes of raw archive data, starting at file offset file_ofs, to pBuf. */ -size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n); +#endif /* ZSTD_COMPRESS_H */ -/* All mz_zip funcs set the m_last_error field in the mz_zip_archive struct. These functions retrieve/manipulate this field. */ -/* Note that the m_last_error functionality is not thread safe. */ -mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num); -mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip); -mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip); -mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip); -const char *mz_zip_get_error_string(mz_zip_error mz_err); -/* MZ_TRUE if the archive file entry is a directory entry. */ -mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index); +// LICENSE_CHANGE_END -/* MZ_TRUE if the file is encrypted/strong encrypted. */ -mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index); -/* MZ_TRUE if the compression method is supported, and the file is not encrypted, and the file is not a compressed patch file. */ -mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index); -/* Retrieves the filename of an archive file entry. */ -/* Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. */ -mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size); +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -/* Attempts to locates a file in the archive's central directory. */ -/* Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH */ -/* Returns -1 if the file cannot be found. */ -int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); -int mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *file_index); +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -/* Returns detailed information about an archive file entry. */ -mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat); +#ifndef ZSTD_COMPRESS_SEQUENCES_H +#define ZSTD_COMPRESS_SEQUENCES_H -/* MZ_TRUE if the file is in zip64 format. */ -/* A file is considered zip64 if it contained a zip64 end of central directory marker, or if it contained any zip64 extended file information fields in the central directory. */ -mz_bool mz_zip_is_zip64(mz_zip_archive *pZip); + /* FSE_repeat, FSE_CTable */ + /* symbolEncodingType_e, ZSTD_strategy */ -/* Returns the total central directory size in bytes. */ -/* The current max supported size is <= MZ_UINT32_MAX. */ -size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip); +namespace duckdb_zstd { -/* Extracts a archive file to a memory buffer using no memory allocation. */ -/* There must be at least enough room on the stack to store the inflator's state (~34KB or so). */ -mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); -mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; -/* Extracts a archive file to a memory buffer. */ -mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags); +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy); -/* Extracts a archive file to a dynamically allocated heap buffer. */ -/* The memory will be allocated via the mz_zip_archive's alloc/realloc functions. */ -/* Returns NULL and sets the last error on failure. */ -void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags); -void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags); +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize); -/* Extracts a archive file using a callback function to output the file's data. */ -mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); -/* Extract a file iteratively */ -mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); -mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); -size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size); -mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState); +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max); -#ifndef MINIZ_NO_STDIO -/* Extracts a archive file to a disk file and sets its last accessed and modified times. */ -/* This function only extracts files, not archive directory records. */ -mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags); +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max); -/* Extracts a archive file starting at the current position in the destination FILE stream. */ -mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *File, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags); -#endif +} -#if 0 -/* TODO */ - typedef void *mz_zip_streaming_extract_state_ptr; - mz_zip_streaming_extract_state_ptr mz_zip_streaming_extract_begin(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); - uint64_t mz_zip_streaming_extract_get_size(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); - uint64_t mz_zip_streaming_extract_get_cur_ofs(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); - mz_bool mz_zip_streaming_extract_seek(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, uint64_t new_ofs); - size_t mz_zip_streaming_extract_read(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, void *pBuf, size_t buf_size); - mz_bool mz_zip_streaming_extract_end(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); -#endif +#endif /* ZSTD_COMPRESS_SEQUENCES_H */ -/* This function compares the archive's local headers, the optional local zip64 extended information block, and the optional descriptor following the compressed data vs. the data in the central directory. */ -/* It also validates that each file can be successfully uncompressed unless the MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY is specified. */ -mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); -/* Validates an entire archive by calling mz_zip_validate_file() on each file. */ -mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags); +// LICENSE_CHANGE_END -/* Misc utils/helpers, valid for ZIP reading or writing */ -mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr); -mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr); -/* Universal end function - calls either mz_zip_reader_end() or mz_zip_writer_end(). */ -mz_bool mz_zip_end(mz_zip_archive *pZip); -/* -------- ZIP writing */ +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -/* Inits a ZIP archive writer. */ -/*Set pZip->m_pWrite (and pZip->m_pIO_opaque) before calling mz_zip_writer_init or mz_zip_writer_init_v2*/ -/*The output is streamable, i.e. file_ofs in mz_file_write_func always increases only by n*/ -mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); -mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags); +#ifndef ZSTD_COMPRESS_LITERALS_H +#define ZSTD_COMPRESS_LITERALS_H -mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size); -mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags); + /* ZSTD_hufCTables_t, ZSTD_minGain() */ -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning); -mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags); -mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags); -#endif +namespace duckdb_zstd { -/* Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. */ -/* For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. */ -/* For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). */ -/* Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. */ -/* Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before */ -/* the archive is finalized the file's central directory will be hosed. */ -mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename); -mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); -/* Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. */ -/* To add a directory entry, call this method with an archive name ending in a forwardslash with an empty buffer. */ -/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ -mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags); +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); -/* Like mz_zip_writer_add_mem(), except you can specify a file comment field, and optionally supply the function with already compressed data. */ -/* uncomp_size/uncomp_crc32 are only used if the MZ_ZIP_FLAG_COMPRESSED_DATA flag is specified. */ -mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, - mz_uint64 uncomp_size, mz_uint32 uncomp_crc32); +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2); -mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, - mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, const char *user_extra_data_local, mz_uint user_extra_data_local_len, - const char *user_extra_data_central, mz_uint user_extra_data_central_len); +} -#ifndef MINIZ_NO_STDIO -/* Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. */ -/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ -mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); +#endif /* ZSTD_COMPRESS_LITERALS_H */ -/* Like mz_zip_writer_add_file(), except the file data is read from the specified FILE stream. */ -mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 size_to_add, - const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len, - const char *user_extra_data_central, mz_uint user_extra_data_central_len); -#endif -/* Adds a file to an archive by fully cloning the data from another archive. */ -/* This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data (it may add or modify the zip64 local header extra data field), and the optional descriptor following the compressed data. */ -mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index); +// LICENSE_CHANGE_END -/* Finalizes the archive by writing the central directory records followed by the end of central directory record. */ -/* After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). */ -/* An archive must be manually finalized by calling this function for it to be valid. */ -mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); -/* Finalizes a heap archive, returning a poiner to the heap block and its size. */ -/* The heap block will be allocated using the mz_zip_archive's alloc/realloc callbacks. */ -mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize); -/* Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. */ -/* Note for the archive to be valid, it *must* have been finalized before ending (this function will not do it for you). */ -mz_bool mz_zip_writer_end(mz_zip_archive *pZip); +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -/* -------- Misc. high-level helper functions: */ +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -/* mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. */ -/* Note this is NOT a fully safe operation. If it crashes or dies in some way your archive can be left in a screwed up state (without a central directory). */ -/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ -/* TODO: Perhaps add an option to leave the existing central dir in place in case the add dies? We could then truncate the file (so the old central dir would be at the end) if something goes wrong. */ -mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); -mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr); +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H -/* Reads a single file from an archive into a heap block. */ -/* If pComment is not NULL, only the file with the specified comment will be extracted. */ -/* Returns NULL on failure. */ -void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags); -void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr); + /* U32 */ -#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */ +namespace duckdb_zstd { +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); -#endif /* MINIZ_NO_ARCHIVE_APIS */ +} -} // namespace duckdb_miniz +#endif /* ZSTD_FAST_H */ // LICENSE_CHANGE_END -#include -#include - -namespace duckdb { - -enum class MiniZStreamType { - MINIZ_TYPE_NONE, - MINIZ_TYPE_INFLATE, - MINIZ_TYPE_DEFLATE -}; - -struct MiniZStream { - MiniZStream() : type(MiniZStreamType::MINIZ_TYPE_NONE) { - memset(&stream, 0, sizeof(duckdb_miniz::mz_stream)); - } - ~MiniZStream() { - switch(type) { - case MiniZStreamType::MINIZ_TYPE_INFLATE: - duckdb_miniz::mz_inflateEnd(&stream); - break; - case MiniZStreamType::MINIZ_TYPE_DEFLATE: - duckdb_miniz::mz_deflateEnd(&stream); - break; - default: - break; - } - } - void FormatException(std::string error_msg) { - throw std::runtime_error(error_msg); - } - void FormatException(const char *error_msg, int mz_ret) { - auto err = duckdb_miniz::mz_error(mz_ret); - FormatException(error_msg + std::string(": ") + (err ? err : "Unknown error code")); - } - void Decompress(const char *compressed_data, size_t compressed_size, char *out_data, size_t out_size) { - auto mz_ret = mz_inflateInit2(&stream, -MZ_DEFAULT_WINDOW_BITS); - if (mz_ret != duckdb_miniz::MZ_OK) { - FormatException("Failed to initialize miniz", mz_ret); - } - type = MiniZStreamType::MINIZ_TYPE_INFLATE; - - if (compressed_size < GZIP_HEADER_MINSIZE) { - FormatException("Failed to decompress GZIP block: compressed size is less than gzip header size"); - } - auto gzip_hdr = (const unsigned char *)compressed_data; - if (gzip_hdr[0] != 0x1F || gzip_hdr[1] != 0x8B || gzip_hdr[2] != GZIP_COMPRESSION_DEFLATE || - gzip_hdr[3] & GZIP_FLAG_UNSUPPORTED) { - FormatException("Input is invalid/unsupported GZIP stream"); - } - - stream.next_in = (const unsigned char *)compressed_data + GZIP_HEADER_MINSIZE; - stream.avail_in = compressed_size - GZIP_HEADER_MINSIZE; - stream.next_out = (unsigned char *)out_data; - stream.avail_out = out_size; - mz_ret = mz_inflate(&stream, duckdb_miniz::MZ_FINISH); - if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) { - FormatException("Failed to decompress GZIP block", mz_ret); - } - } - size_t MaxCompressedLength(size_t input_size) { - return duckdb_miniz::mz_compressBound(input_size) + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE; - } - void Compress(const char *uncompressed_data, size_t uncompressed_size, char *out_data, size_t *out_size) { - auto mz_ret = mz_deflateInit2(&stream, duckdb_miniz::MZ_DEFAULT_LEVEL, MZ_DEFLATED, -MZ_DEFAULT_WINDOW_BITS, 1, 0); - if (mz_ret != duckdb_miniz::MZ_OK) { - FormatException("Failed to initialize miniz", mz_ret); - } - type = MiniZStreamType::MINIZ_TYPE_DEFLATE; - auto gzip_header = (unsigned char*) out_data; - memset(gzip_header, 0, GZIP_HEADER_MINSIZE); - gzip_header[0] = 0x1F; - gzip_header[1] = 0x8B; - gzip_header[2] = GZIP_COMPRESSION_DEFLATE; - gzip_header[3] = 0; - gzip_header[4] = 0; - gzip_header[5] = 0; - gzip_header[6] = 0; - gzip_header[7] = 0; - gzip_header[8] = 0; - gzip_header[9] = 0xFF; +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list - auto gzip_body = gzip_header + GZIP_HEADER_MINSIZE; +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ - stream.next_in = (const unsigned char*) uncompressed_data; - stream.avail_in = uncompressed_size; - stream.next_out = gzip_body; - stream.avail_out = *out_size - GZIP_HEADER_MINSIZE; +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H - mz_ret = mz_deflate(&stream, duckdb_miniz::MZ_FINISH); - if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) { - FormatException("Failed to compress GZIP block", mz_ret); - } - auto gzip_footer = gzip_body + stream.total_out; - auto crc = duckdb_miniz::mz_crc32(MZ_CRC32_INIT, (const unsigned char*) uncompressed_data, uncompressed_size); - gzip_footer[0] = crc & 0xFF; - gzip_footer[1] = (crc >> 8) & 0xFF; - gzip_footer[2] = (crc >> 16) & 0xFF; - gzip_footer[3] = (crc >> 24) & 0xFF; - gzip_footer[4] = uncompressed_size & 0xFF; - gzip_footer[5] = (uncompressed_size >> 8) & 0xFF; - gzip_footer[6] = (uncompressed_size >> 16) & 0xFF; - gzip_footer[7] = (uncompressed_size >> 24) & 0xFF; + /* U32 */ + /* ZSTD_CCtx, size_t */ - *out_size = stream.total_out + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE; - } +namespace duckdb_zstd { -private: - static constexpr uint8_t GZIP_HEADER_MINSIZE = 10; - static constexpr uint8_t GZIP_FOOTER_SIZE = 8; - static constexpr uint8_t GZIP_COMPRESSION_DEFLATE = 0x08; - static constexpr unsigned char GZIP_FLAG_UNSUPPORTED = 0x1 | 0x2 | 0x4 | 0x10 | 0x20; - duckdb_miniz::mz_stream stream; - MiniZStreamType type; -}; +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); } +#endif /* ZSTD_DOUBLE_FAST_H */ + // LICENSE_CHANGE_END // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list /* @@ -11527,7376 +10489,8184 @@ struct MiniZStream { * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ -#ifndef ZSTD_H_235446 -#define ZSTD_H_235446 -/* ====== Dependency ======*/ -#include /* INT_MAX */ -#include /* size_t */ +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H -/* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#ifndef ZSTDLIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) -# else -# define ZSTDLIB_VISIBILITY -# endif -#endif -#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY -#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) -# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define ZSTDLIB_API ZSTDLIB_VISIBILITY -#endif namespace duckdb_zstd { -/******************************************************************************* - Introduction +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); - zstd, short for Zstandard, is a fast lossless compression algorithm, targeting - real-time compression scenarios at zlib-level and better compression ratios. - The zstd compression library provides in-memory compression and decompression - functions. +void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ - The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), - which is currently 22. Levels >= 20, labeled `--ultra`, should be used with - caution, as they require more memory. The library also offers negative - compression levels, which extend the range of speed vs. ratio preferences. - The lower the level, the faster the speed (at the cost of compression). +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); - Compression can be done in: - - a single step (described as Simple API) - - a single step, reusing a context (described as Explicit context) - - unbounded multiple steps (described as Streaming compression) +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); - The compression ratio achievable on small data can be highly improved using - a dictionary. Dictionary compression can be performed in: - - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Bulk-processing - dictionary API) +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); - Advanced experimental functions can be accessed using - `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. +} - Advanced experimental APIs should never be used with a dynamically-linked - library. They are not "stable"; their definitions or signatures may change in - the future. Only static linking is allowed. -*******************************************************************************/ +#endif /* ZSTD_LAZY_H */ -/*------ Version ------*/ -#define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 5 -#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) -ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ +// LICENSE_CHANGE_END -#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE -#define ZSTD_QUOTE(str) #str -#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) -#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) -ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ -/* ************************************* - * Default constant - ***************************************/ -#ifndef ZSTD_CLEVEL_DEFAULT -# define ZSTD_CLEVEL_DEFAULT 3 -#endif -/* ************************************* - * Constants - ***************************************/ +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ -#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ -#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ -#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -#define ZSTD_BLOCKSIZELOG_MAX 17 -#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); +namespace duckdb_zstd { -/*! ZSTD_decompress() : - * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. - * `dstCapacity` is an upper bound of originalSize to regenerate. - * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. - * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, - const void* src, size_t compressedSize); +/* used in ZSTD_loadDictionaryContent() */ +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); -/*! ZSTD_getFrameContentSize() : requires v1.3.0+ - * `src` should point to the start of a ZSTD encoded frame. - * `srcSize` must be at least as large as the frame header. - * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. - * @return : - decompressed size of `src` frame content, if known - * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) - * note 1 : a 0 return value means the frame is valid but "empty". - * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. - * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - * In which case, it's necessary to use streaming mode to decompress data. - * Optionally, application can rely on some implicit limit, - * as ZSTD_decompress() only needs an upper bound of decompressed size. - * (For example, data could be necessarily cut into blocks <= 16 KB). - * note 3 : decompressed size is always present when compression is completed using single-pass functions, - * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). - * note 4 : decompressed size can be very large (64-bits value), - * potentially larger than what local system can handle as a single memory segment. - * In which case, it's necessary to use streaming mode to decompress data. - * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. - * Always ensure return value fits within application's authorized limits. - * Each application can set its own limits. - * note 6 : This function replaces ZSTD_getDecompressedSize() */ -#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) -ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); -/*! ZSTD_getDecompressedSize() : - * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). - * Both functions work the same way, but ZSTD_getDecompressedSize() blends - * "empty", "unknown" and "error" results to the same return value (0), - * while ZSTD_getFrameContentSize() gives them separate return values. - * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ -ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); -/*! ZSTD_findFrameCompressedSize() : - * `src` should point to the start of a ZSTD frame or skippable frame. - * `srcSize` must be >= first frame size - * @return : the compressed size of the first frame starting at `src`, - * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, - * or an error code if input is invalid */ -ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); -/*====== Helper functions ======*/ -#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ -ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ -ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ -ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ -ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ + /* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ +} -/*************************************** -* Explicit context -***************************************/ -/*= Compression context - * When compressing many times, - * it is recommended to allocate a context just once, - * and re-use it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Note : re-using context is just a speed / resource optimization. - * It doesn't change the compression ratio, which remains identical. - * Note 2 : In multi-threaded environments, - * use one different context per thread for parallel execution. - */ -typedef struct ZSTD_CCtx_s ZSTD_CCtx; -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); -ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); +#endif /* ZSTD_OPT_H */ -/*! ZSTD_compressCCtx() : - * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - * Important : in order to behave similarly to `ZSTD_compress()`, - * this function compresses at requested compression level, - * __ignoring any other parameter__ . - * If any advanced parameter was set using the advanced API, - * they will all be reset. Only `compressionLevel` remains. - */ -ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); -/*= Decompression context - * When decompressing many times, - * it is recommended to allocate a context only once, - * and re-use it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Use one context per thread for parallel execution. */ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); -ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); +// LICENSE_CHANGE_END -/*! ZSTD_decompressDCtx() : - * Same as ZSTD_decompress(), - * requires an allocated ZSTD_DCtx. - * Compatible with sticky parameters. - */ -ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); -/*************************************** -* Advanced compression API -***************************************/ +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -/* API design : - * Parameters are pushed one by one into an existing context, - * using ZSTD_CCtx_set*() functions. - * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. - * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! - * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . - * - * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. * - * This API supercedes all other "advanced" API entry points in the experimental section. - * In the future, we expect to remove from experimental API entry points which are redundant with this API. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H -/* Compression strategies, listed from fastest to strongest */ -typedef enum { ZSTD_fast=1, - ZSTD_dfast=2, - ZSTD_greedy=3, - ZSTD_lazy=4, - ZSTD_lazy2=5, - ZSTD_btlazy2=6, - ZSTD_btopt=7, - ZSTD_btultra=8, - ZSTD_btultra2=9 - /* note : new strategies _might_ be added in the future. - Only the order (from fast to strong) is guaranteed */ -} ZSTD_strategy; + /* ldmParams_t, U32 */ + /* ZSTD_CCtx, size_t */ +/*-************************************* +* Long distance matching +***************************************/ -typedef enum { +#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT - /* compression parameters - * Note: When compressing with a ZSTD_CDict these parameters are superseded - * by the parameters used to construct the ZSTD_CDict. - * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ - ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. - * Note that exact compression parameters are dynamically determined, - * depending on both compression level and srcSize (when known). - * Default level is ZSTD_CLEVEL_DEFAULT==3. - * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. - * Note 1 : it's possible to pass a negative compression level. - * Note 2 : setting a level does not automatically set all other compression parameters - * to default. Setting this will however eventually dynamically impact the compression - * parameters which have not been manually set. The manually set - * ones will 'stick'. */ - /* Advanced compression parameters : - * It's possible to pin down compression parameters to some specific values. - * In which case, these values are no longer dynamically selected by the compressor */ - ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. - * This will set a memory budget for streaming decompression, - * with larger values requiring more memory - * and typically compressing more. - * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. - * Special: value 0 means "use default windowLog". - * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT - * requires explicitly allowing such size at streaming decompression stage. */ - ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. - * Resulting memory usage is (1 << (hashLog+2)). - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. - * Larger tables improve compression ratio of strategies <= dFast, - * and improve speed of strategies > dFast. - * Special: value 0 means "use default hashLog". */ - ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. - * Resulting memory usage is (1 << (chainLog+2)). - * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. - * Larger tables result in better and slower compression. - * This parameter is useless for "fast" strategy. - * It's still useful when using "dfast" strategy, - * in which case it defines a secondary probe table. - * Special: value 0 means "use default chainLog". */ - ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. - * More attempts result in better and slower compression. - * This parameter is useless for "fast" and "dFast" strategies. - * Special: value 0 means "use default searchLog". */ - ZSTD_c_minMatch=105, /* Minimum size of searched matches. - * Note that Zstandard can still find matches of smaller size, - * it just tweaks its search algorithm to look for this size and larger. - * Larger values increase compression and decompression speed, but decrease ratio. - * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. - * Note that currently, for all strategies < btopt, effective minimum is 4. - * , for all strategies > fast, effective maximum is 6. - * Special: value 0 means "use default minMatchLength". */ - ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. - * For strategies btopt, btultra & btultra2: - * Length of Match considered "good enough" to stop search. - * Larger values make compression stronger, and slower. - * For strategy fast: - * Distance between match sampling. - * Larger values make compression faster, and weaker. - * Special: value 0 means "use default targetLength". */ - ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. - * The higher the value of selected strategy, the more complex it is, - * resulting in stronger and slower compression. - * Special: value 0 means "use default strategy". */ +namespace duckdb_zstd { - /* LDM mode parameters */ - ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. - * This parameter is designed to improve compression ratio - * for large inputs, by finding large matches at long distance. - * It increases memory usage and window size. - * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB - * except when expressly set to a different value. */ - ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. - * Larger values increase memory usage and compression ratio, - * but decrease compression speed. - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX - * default: windowlog - 7. - * Special: value 0 means "automatically determine hashlog". */ - ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. - * Larger/too small values usually decrease compression ratio. - * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. - * Special: value 0 means "use default value" (default: 64). */ - ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. - * Larger values improve collision resolution but decrease compression speed. - * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. - * Special: value 0 means "use default value" (default: 3). */ - ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. - * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). - * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. - * Larger values improve compression speed. - * Deviating far from default value will likely result in a compression ratio decrease. - * Special: value 0 means "automatically determine hashRateLog". */ +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params); - /* frame parameters */ - ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) - * Content size must be known at the beginning of compression. - * This is automatically the case when using ZSTD_compress2(), - * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ - ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ - ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ +/** + * ZSTD_ldm_generateSequences(): + * + * Generates the sequences using the long distance match finder. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. + * + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); - /* multi-threading parameters */ - /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). - * They return an error otherwise. */ - ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. - * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() : - * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, - * while compression work is performed in parallel, within worker threads. - * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : - * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). - * More workers improve speed, but also increase memory usage. - * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */ - ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. - * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. - * 0 means default, which is dynamically determined based on compression parameters. - * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. - * The minimum size is automatically and transparently enforced. */ - ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. - * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. - * It helps preserve compression ratio, while each job is compressed in parallel. - * This value is enforced only when nbWorkers >= 1. - * Larger values increase compression ratio, but decrease speed. - * Possible values range from 0 to 9 : - * - 0 means "default" : value will be determined by the library, depending on strategy - * - 1 means "no overlap" - * - 9 means "full overlap", using a full window size. - * Each intermediate rank increases/decreases load size by a factor 2 : - * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default - * default value varies between 6 and 9, depending on strategy */ +/** + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. + * + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. + */ +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); - /* note : additional experimental parameters are also available - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_c_rsyncable - * ZSTD_c_format - * ZSTD_c_forceMaxWindow - * ZSTD_c_forceAttachDict - * ZSTD_c_literalCompressionMode - * ZSTD_c_targetCBlockSize - * ZSTD_c_srcSizeHint - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly; - * also, the enums values themselves are unstable and can still change. - */ - ZSTD_c_experimentalParam1=500, - ZSTD_c_experimentalParam2=10, - ZSTD_c_experimentalParam3=1000, - ZSTD_c_experimentalParam4=1001, - ZSTD_c_experimentalParam5=1002, - ZSTD_c_experimentalParam6=1003, - ZSTD_c_experimentalParam7=1004 -} ZSTD_cParameter; +/** + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); -typedef struct { - size_t error; - int lowerBound; - int upperBound; -} ZSTD_bounds; -/*! ZSTD_cParam_getBounds() : - * All parameters must belong to an interval with lower and upper bounds, - * otherwise they will either trigger an error or be automatically clamped. - * @return : a structure, ZSTD_bounds, which contains - * - an error status field, which must be tested using ZSTD_isError() - * - lower and upper bounds, both inclusive +/** ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. */ -ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); +size_t ZSTD_ldm_getTableSize(ldmParams_t params); -/*! ZSTD_CCtx_setParameter() : - * Set one compression parameter, selected by enum ZSTD_cParameter. - * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). - * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - * Setting a parameter is generally only possible during frame initialization (before starting compression). - * Exception : when using multi-threading mode (nbWorkers >= 1), - * the following parameters can be updated _during_ compression (within same frame): - * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. - * new parameters will be active for next job only (after a flush()). - * @return : an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); - -/*! ZSTD_CCtx_setPledgedSrcSize() : - * Total input data size to be compressed as a single frame. - * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. - * This value will also be controlled at end of frame, and trigger an error if not respected. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. - * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. - * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. - * Note 2 : pledgedSrcSize is only valid once, for the next frame. - * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. - * Note 3 : Whenever all input data is provided and consumed in a single round, - * for example with ZSTD_compress2(), - * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), - * this value is automatically overridden by srcSize instead. - */ -ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); - -typedef enum { - ZSTD_reset_session_only = 1, - ZSTD_reset_parameters = 2, - ZSTD_reset_session_and_parameters = 3 -} ZSTD_ResetDirective; - -/*! ZSTD_CCtx_reset() : - * There are 2 different things that can be reset, independently or jointly : - * - The session : will stop compressing current frame, and make CCtx ready to start a new one. - * Useful after an error, or to interrupt any ongoing compression. - * Any internal data not yet flushed is cancelled. - * Compression parameters and dictionary remain unchanged. - * They will be used to compress next frame. - * Resetting session never fails. - * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. - * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) - * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) - * - Both : similar to resetting the session, followed by resetting parameters. +/** ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. */ -ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); -/*! ZSTD_compress2() : - * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. - * ZSTD_compress2() always starts a new frame. - * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. - * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - * - The function is always blocking, returns when compression is completed. - * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). +/** ZSTD_ldm_adjustParameters() : + * If the params->hashRateLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. */ -ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); +} -/*************************************** -* Advanced decompression API -***************************************/ +#endif /* ZSTD_FAST_H */ -/* The advanced API pushes parameters one by one into an existing DCtx context. - * Parameters are sticky, and remain valid for all following frames - * using the same DCtx context. - * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). - * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). - * Therefore, no new decompression function is necessary. - */ -typedef enum { +// LICENSE_CHANGE_END - ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which - * the streaming API will refuse to allocate memory buffer - * in order to protect the host from unreasonable memory requirements. - * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). - * Special: value 0 means "use default maximum windowLog". */ - /* note : additional experimental parameters are also available - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_d_format - * ZSTD_d_stableOutBuffer - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly - */ - ZSTD_d_experimentalParam1=1000, - ZSTD_d_experimentalParam2=1001 -} ZSTD_dParameter; +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -/*! ZSTD_dParam_getBounds() : - * All parameters must belong to an interval with lower and upper bounds, - * otherwise they will either trigger an error or be automatically clamped. - * @return : a structure, ZSTD_bounds, which contains - * - an error status field, which must be tested using ZSTD_isError() - * - both lower and upper bounds, inclusive +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ -ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); -/*! ZSTD_DCtx_setParameter() : - * Set one compression parameter, selected by enum ZSTD_dParameter. - * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). - * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - * Setting a parameter is only possible during frame initialization (before starting decompression). - * @return : 0, or an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); +#ifndef ZSTD_COMPRESS_ADVANCED_H +#define ZSTD_COMPRESS_ADVANCED_H -/*! ZSTD_DCtx_reset() : - * Return a DCtx to clean state. - * Session and parameters can be reset jointly or separately. - * Parameters can only be reset when no active frame is being decompressed. - * @return : 0, or an error code, which can be tested with ZSTD_isError() - */ -ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); +/*-************************************* +* Dependencies +***************************************/ + /* ZSTD_CCtx */ -/**************************** -* Streaming -****************************/ +namespace duckdb_zstd { +/*-************************************* +* Target Compressed Block Size +***************************************/ -typedef struct ZSTD_inBuffer_s { - const void* src; /**< start of input buffer */ - size_t size; /**< size of input buffer */ - size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ -} ZSTD_inBuffer; +/* ZSTD_compressSuperBlock() : + * Used to compress a super block when targetCBlockSize is being used. + * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock); +} -typedef struct ZSTD_outBuffer_s { - void* dst; /**< start of output buffer */ - size_t size; /**< size of output buffer */ - size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ -} ZSTD_outBuffer; +#endif /* ZSTD_COMPRESS_ADVANCED_H */ -/*-*********************************************************************** -* Streaming compression - HowTo -* -* A ZSTD_CStream object is required to track streaming operation. -* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. -* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. -* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. -* -* For parallel execution, use one separate ZSTD_CStream per thread. -* -* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. -* -* Parameters are sticky : when starting a new compression on the same context, -* it will re-use the same sticky parameters as previous compression session. -* When in doubt, it's recommended to fully initialize the context before usage. -* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), -* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to -* set more specific parameters, the pledged source size, or load a dictionary. -* -* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to -* consume input stream. The function will automatically update both `pos` -* fields within `input` and `output`. -* Note that the function may not consume the entire input, for example, because -* the output buffer is already full, in which case `input.pos < input.size`. -* The caller must check if input has been entirely consumed. -* If not, the caller must make some room to receive more compressed data, -* and then present again remaining input data. -* note: ZSTD_e_continue is guaranteed to make some forward progress when called, -* but doesn't guarantee maximal forward progress. This is especially relevant -* when compressing with multiple threads. The call won't block if it can -* consume some input, but if it can't it will wait for some, but not all, -* output to be flushed. -* @return : provides a minimum amount of data remaining to be flushed from internal buffers -* or an error code, which can be tested using ZSTD_isError(). -* -* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, -* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. -* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). -* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. -* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the -* operation. -* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will -* block until the flush is complete or the output buffer is full. -* @return : 0 if internal buffers are entirely flushed, -* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), -* or an error code, which can be tested using ZSTD_isError(). -* -* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. -* It will perform a flush and write frame epilogue. -* The epilogue is required for decoders to consider a frame completed. -* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. -* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to -* start a new frame. -* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will -* block until the flush is complete or the output buffer is full. -* @return : 0 if frame fully completed and fully flushed, -* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), -* or an error code, which can be tested using ZSTD_isError(). -* -* *******************************************************************/ +// LICENSE_CHANGE_END -typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ - /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ -/*===== ZSTD_CStream management functions =====*/ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); -ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); -/*===== Streaming compression functions =====*/ -typedef enum { - ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ - ZSTD_e_flush=1, /* flush any data provided so far, - * it creates (at least) one new block, that can be decoded immediately on reception; - * frame will continue: any future data can still reference previously compressed data, improving compression. - * note : multithreaded compression will block to flush as much output as possible. */ - ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. - * note that frame is only closed after compressed data is fully flushed (return value == 0). - * After that point, any additional data starts a new frame. - * note : each frame is independent (does not reference any content from previous frame). - : note : multithreaded compression will block to flush as much output as possible. */ -} ZSTD_EndDirective; -/*! ZSTD_compressStream2() : - * Behaves about the same as ZSTD_compressStream, with additional control on end directive. - * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) - * - output->pos must be <= dstCapacity, input->pos must be <= srcSize - * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. - * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. - * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, - * and then immediately returns, just indicating that there is some data remaining to be flushed. - * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. - * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. - * - @return provides a minimum amount of data remaining to be flushed from internal buffers - * or an error code, which can be tested using ZSTD_isError(). - * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. - * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. - * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. - * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), - * only ZSTD_e_end or ZSTD_e_flush operations are allowed. - * Before starting a new compression job, or changing compression parameters, - * it is required to fully flush internal buffers. +namespace duckdb_zstd { +/*-************************************* +* Helper functions +***************************************/ +/* ZSTD_compressBound() + * Note that the result from this function is only compatible with the "normal" + * full-block strategy. + * When there are a lot of small blocks due to frequent flush in streaming mode + * the overhead of headers can make the compressed data to be larger than the + * return value of ZSTD_compressBound(). */ -ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective endOp); +size_t ZSTD_compressBound(size_t srcSize) { + return ZSTD_COMPRESSBOUND(srcSize); +} -/* These buffer sizes are softly recommended. - * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. - * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), - * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. - * - * However, note that these recommendations are from the perspective of a C caller program. - * If the streaming interface is invoked from some other language, - * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, - * a major performance rule is to reduce crossing such interface to an absolute minimum. - * It's not rare that performance ends being spent more into the interface, rather than compression itself. - * In which cases, prefer using large buffers, as large as practical, - * for both input and output, to reduce the nb of roundtrips. - */ -ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CDict_s { + const void* dictContent; + size_t dictContentSize; + U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ + ZSTD_cwksp workspace; + ZSTD_matchState_t matchState; + ZSTD_compressedBlockState_t cBlockState; + ZSTD_customMem customMem; + U32 dictID; + int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ +}; /* typedef'd to ZSTD_CDict within "zstd.h" */ +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced({NULL, NULL, NULL}); +} -/* ***************************************************************************** - * This following is a legacy streaming API. - * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). - * It is redundant, but remains fully supported. - * Advanced parameters and dictionary compression can only be used through the - * new API. - ******************************************************************************/ +static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) +{ + assert(cctx != NULL); + memset(cctx, 0, sizeof(*cctx)); + cctx->customMem = memManager; + cctx->bmi2 = 0; + { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); + assert(!ZSTD_isError(err)); + (void)err; + } +} -/*! - * Equivalent to: - * - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - */ -ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); -/*! - * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). - * NOTE: The return value is different. ZSTD_compressStream() returns a hint for - * the next read size (if non-zero and not an error). ZSTD_compressStream2() - * returns the minimum nb of bytes left to flush (if non-zero and not an error). - */ -ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ -ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ -ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_STATIC_ASSERT(zcss_init==0); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + ZSTD_initCCtx(cctx, customMem); + return cctx; + } +} +ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) +{ + ZSTD_cwksp ws; + ZSTD_CCtx* cctx; + if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ + if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ + ZSTD_cwksp_init(&ws, workspace, workspaceSize); -/*-*************************************************************************** -* Streaming decompression - HowTo -* -* A ZSTD_DStream object is required to track streaming operations. -* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. -* ZSTD_DStream objects can be re-used multiple times. -* -* Use ZSTD_initDStream() to start a new decompression operation. -* @return : recommended first input size -* Alternatively, use advanced API to set specific properties. -* -* Use ZSTD_decompressStream() repetitively to consume your input. -* The function will update both `pos` fields. -* If `input.pos < input.size`, some input has not been consumed. -* It's up to the caller to present again remaining data. -* The function tries to flush all data decoded immediately, respecting output buffer size. -* If `output.pos < output.size`, decoder has flushed everything it could. -* But if `output.pos == output.size`, there might be some data left within internal buffers., -* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. -* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. -* @return : 0 when a frame is completely decoded and fully flushed, -* or an error code, which can be tested using ZSTD_isError(), -* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : -* the return value is a suggested next input size (just a hint for better latency) -* that will never request more than the remaining frame size. -* *******************************************************************************/ + cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); + if (cctx == NULL) return NULL; -typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ - /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ -/*===== ZSTD_DStream management functions =====*/ -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); -ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); + memset(cctx, 0, sizeof(ZSTD_CCtx)); + ZSTD_cwksp_move(&cctx->workspace, &ws); + cctx->staticSize = workspaceSize; -/*===== Streaming decompression functions =====*/ + /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, HUF_WORKSPACE_SIZE); + cctx->bmi2 = 0; + return cctx; +} -/* This function is redundant with the advanced API and equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_refDDict(zds, NULL); +/** + * Clears and frees all of the dictionaries in the CCtx. */ -ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); +static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) +{ + ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); + ZSTD_freeCDict(cctx->localDict.cdict); + memset(&cctx->localDict, 0, sizeof(cctx->localDict)); + memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); + cctx->cdict = NULL; +} -ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) +{ + size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; + size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); + return bufferSize + cdictSize; +} -ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ +static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) +{ + assert(cctx != NULL); + assert(cctx->staticSize == 0); + ZSTD_clearAllDicts(cctx); +#ifdef ZSTD_MULTITHREAD + ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; +#endif + ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); +} +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "not compatible with static CCtx"); + { + int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); + ZSTD_freeCCtxContent(cctx); + if (!cctxInWorkspace) { + ZSTD_free(cctx, cctx->customMem); + } + } + return 0; +} -/************************** -* Simple dictionary API -***************************/ -/*! ZSTD_compress_usingDict() : - * Compression at an explicit compression level using a Dictionary. - * A dictionary can be any arbitrary data segment (also called a prefix), - * or a buffer with specified information (see dict/zdict.h). - * Note : This function loads the dictionary, resulting in significant startup delay. - * It's intended for a dictionary used only once. - * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - int compressionLevel); -/*! ZSTD_decompress_usingDict() : - * Decompression using a known Dictionary. - * Dictionary must be identical to the one used during compression. - * Note : This function loads the dictionary, resulting in significant startup delay. - * It's intended for a dictionary used only once. - * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize); +static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_sizeof_CCtx(cctx->mtctx); +#else + (void)cctx; + return 0; +#endif +} -/*********************************** - * Bulk processing dictionary API - **********************************/ -typedef struct ZSTD_CDict_s ZSTD_CDict; +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support sizeof on NULL */ + /* cctx may be in the workspace */ + return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) + + ZSTD_cwksp_sizeof(&cctx->workspace) + + ZSTD_sizeof_localDict(cctx->localDict) + + ZSTD_sizeof_mtctx(cctx); +} -/*! ZSTD_createCDict() : - * When compressing multiple messages or blocks using the same dictionary, - * it's recommended to digest the dictionary only once, since it's a costly operation. - * ZSTD_createCDict() will create a state from digesting a dictionary. - * The resulting state can be used for future compression operations with very limited startup cost. - * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. - * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. - * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, - * in which case the only thing that it transports is the @compressionLevel. - * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, - * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, - int compressionLevel); - -/*! ZSTD_freeCDict() : - * Function frees memory allocated by ZSTD_createCDict(). */ -ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); - -/*! ZSTD_compress_usingCDict() : - * Compression using a digested Dictionary. - * Recommended when same dictionary is used multiple times. - * Note : compression level is _decided at dictionary creation time_, - * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict); - - -typedef struct ZSTD_DDict_s ZSTD_DDict; +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + return ZSTD_sizeof_CCtx(zcs); /* same object */ +} -/*! ZSTD_createDDict() : - * Create a digested dictionary, ready to start decompression operation without startup delay. - * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); +/* private API call, for dictBuilder only */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } -/*! ZSTD_freeDDict() : - * Function frees memory allocated with ZSTD_createDDict() */ -ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); +static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( + ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params cctxParams; + memset(&cctxParams, 0, sizeof(cctxParams)); + cctxParams.cParams = cParams; + cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(cParams)); + cctxParams.fParams.contentSizeFlag = 1; + return cctxParams; +} -/*! ZSTD_decompress_usingDDict() : - * Decompression using a digested Dictionary. - * Recommended when same dictionary is used multiple times. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict); +static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params* params; + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + params = (ZSTD_CCtx_params*)ZSTD_calloc( + sizeof(ZSTD_CCtx_params), customMem); + if (!params) { return NULL; } + params->customMem = customMem; + params->compressionLevel = ZSTD_CLEVEL_DEFAULT; + params->fParams.contentSizeFlag = 1; + return params; +} +ZSTD_CCtx_params* ZSTD_createCCtxParams(void) +{ + return ZSTD_createCCtxParams_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); +} -/******************************** - * Dictionary helper functions - *******************************/ +size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +{ + if (params == NULL) { return 0; } + ZSTD_free(params, params->customMem); + return 0; +} -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); +size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) +{ + return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); +} -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); +size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; + return 0; +} -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +{ + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + memset(cctxParams, 0, sizeof(*cctxParams)); + assert(!ZSTD_checkCParams(params.cParams)); + cctxParams->cParams = params.cParams; + cctxParams->fParams = params.fParams; + cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + return 0; +} +/* ZSTD_assignParamsToCCtxParams() : + * params is presumed valid at this stage */ +static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( + const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) +{ + ZSTD_CCtx_params ret = *cctxParams; + assert(!ZSTD_checkCParams(params->cParams)); + ret.cParams = params->cParams; + ret.fParams = params->fParams; + ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + return ret; +} -/******************************************************************************* - * Advanced dictionary and prefix API - * - * This API allows dictionaries to be used with ZSTD_compress2(), - * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and - * only reset with the context is reset with ZSTD_reset_parameters or - * ZSTD_reset_session_and_parameters. Prefixes are single-use. - ******************************************************************************/ +ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + switch(param) + { + case ZSTD_c_compressionLevel: + bounds.lowerBound = ZSTD_minCLevel(); + bounds.upperBound = ZSTD_maxCLevel(); + return bounds; -/*! ZSTD_CCtx_loadDictionary() : - * Create an internal CDict from `dict` buffer. - * Decompression will have to use same dictionary. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, - * meaning "return to no-dictionary mode". - * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. - * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). - * Note 2 : Loading a dictionary involves building tables. - * It's also a CPU consuming operation, with non-negligible impact on latency. - * Tables are dependent on compression parameters, and for this reason, - * compression parameters can no longer be changed after loading a dictionary. - * Note 3 :`dict` content will be copied internally. - * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. - * In such a case, dictionary buffer must outlive its users. - * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() - * to precisely select how dictionary content must be interpreted. */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + case ZSTD_c_windowLog: + bounds.lowerBound = ZSTD_WINDOWLOG_MIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; -/*! ZSTD_CCtx_refCDict() : - * Reference a prepared dictionary, to be used for all next compressed frames. - * Note that compression parameters are enforced from within CDict, - * and supersede any compression parameter previously set within CCtx. - * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. - * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. - * The dictionary will remain valid for future compressed frames using same CCtx. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Referencing a NULL CDict means "return to no-dictionary mode". - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. - * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ -ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + case ZSTD_c_hashLog: + bounds.lowerBound = ZSTD_HASHLOG_MIN; + bounds.upperBound = ZSTD_HASHLOG_MAX; + return bounds; -/*! ZSTD_CCtx_refPrefix() : - * Reference a prefix (single-usage dictionary) for next compressed frame. - * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). - * Decompression will need same prefix to properly regenerate data. - * Compressing with a prefix is similar in outcome as performing a diff and compressing it, - * but performs much faster, especially during decompression (compression speed is tunable with compression level). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary - * Note 1 : Prefix buffer is referenced. It **must** outlive compression. - * Its content must remain unmodified during compression. - * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, - * ensure that the window size is large enough to contain the entire source. - * See ZSTD_c_windowLog. - * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. - * It's a CPU consuming operation, with non-negligible impact on latency. - * If there is a need to use the same prefix multiple times, consider loadDictionary instead. - * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). - * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, - const void* prefix, size_t prefixSize); + case ZSTD_c_chainLog: + bounds.lowerBound = ZSTD_CHAINLOG_MIN; + bounds.upperBound = ZSTD_CHAINLOG_MAX; + return bounds; -/*! ZSTD_DCtx_loadDictionary() : - * Create an internal DDict from dict buffer, - * to be used to decompress next frames. - * The dictionary remains valid for all future frames, until explicitly invalidated. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, - * meaning "return to no-dictionary mode". - * Note 1 : Loading a dictionary involves building tables, - * which has a non-negligible impact on CPU usage and latency. - * It's recommended to "load once, use many times", to amortize the cost - * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. - * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. - * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of - * how dictionary content is loaded and interpreted. - */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + case ZSTD_c_searchLog: + bounds.lowerBound = ZSTD_SEARCHLOG_MIN; + bounds.upperBound = ZSTD_SEARCHLOG_MAX; + return bounds; -/*! ZSTD_DCtx_refDDict() : - * Reference a prepared dictionary, to be used to decompress next frames. - * The dictionary remains active for decompression of future frames using same DCtx. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. - * Special: referencing a NULL DDict means "return to no-dictionary mode". - * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. - */ -ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + case ZSTD_c_minMatch: + bounds.lowerBound = ZSTD_MINMATCH_MIN; + bounds.upperBound = ZSTD_MINMATCH_MAX; + return bounds; -/*! ZSTD_DCtx_refPrefix() : - * Reference a prefix (single-usage dictionary) to decompress next frame. - * This is the reverse operation of ZSTD_CCtx_refPrefix(), - * and must use the same prefix as the one used during compression. - * Prefix is **only used once**. Reference is discarded at end of frame. - * End of frame is reached when ZSTD_decompressStream() returns 0. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary - * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. - * Prefix buffer must remain unmodified up to the end of frame, - * reached when ZSTD_decompressStream() returns 0. - * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). - * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) - * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. - * A full dictionary is more costly, as it requires building tables. - */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, - const void* prefix, size_t prefixSize); + case ZSTD_c_targetLength: + bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; + bounds.upperBound = ZSTD_TARGETLENGTH_MAX; + return bounds; -/* === Memory management === */ + case ZSTD_c_strategy: + bounds.lowerBound = ZSTD_STRATEGY_MIN; + bounds.upperBound = ZSTD_STRATEGY_MAX; + return bounds; -/*! ZSTD_sizeof_*() : - * These functions give the _current_ memory usage of selected object. - * Note that object memory usage can evolve (increase or decrease) over time. */ -ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); -ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); -ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); -ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + case ZSTD_c_contentSizeFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; -} -#endif /* ZSTD_H_235446 */ + case ZSTD_c_checksumFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + case ZSTD_c_dictIDFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; -// LICENSE_CHANGE_END + case ZSTD_c_nbWorkers: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_NBWORKERS_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; -#include + case ZSTD_c_jobSize: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_JOBSIZE_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/types/chunk_collection.hpp" + case ZSTD_c_overlapLog: +#ifdef ZSTD_MULTITHREAD + bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; + bounds.upperBound = ZSTD_OVERLAPLOG_MAX; +#else + bounds.lowerBound = 0; + bounds.upperBound = 0; #endif + return bounds; -namespace duckdb { + case ZSTD_c_enableLongDistanceMatching: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; -using duckdb_parquet::format::CompressionCodec; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Encoding; -using duckdb_parquet::format::PageType; -using duckdb_parquet::format::Type; + case ZSTD_c_ldmHashLog: + bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; + return bounds; -const uint32_t RleBpDecoder::BITPACK_MASKS[] = { - 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, - 2047, 4095, 8191, 16383, 32767, 65535, 131071, 262143, 524287, 1048575, 2097151, - 4194303, 8388607, 16777215, 33554431, 67108863, 134217727, 268435455, 536870911, 1073741823, 2147483647}; + case ZSTD_c_ldmMinMatch: + bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; + bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; + return bounds; -const uint8_t RleBpDecoder::BITPACK_DLEN = 8; + case ZSTD_c_ldmBucketSizeLog: + bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; + bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; + return bounds; -ColumnReader::~ColumnReader() { -} + case ZSTD_c_ldmHashRateLog: + bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; + return bounds; -unique_ptr ColumnReader::CreateReader(const LogicalType &type_p, const SchemaElement &schema_p, - idx_t file_idx_p, idx_t max_define, idx_t max_repeat) { - switch (type_p.id()) { - case LogicalTypeId::BOOLEAN: - return make_unique(type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::UTINYINT: - return make_unique>>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::USMALLINT: - return make_unique>>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::UINTEGER: - return make_unique>>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::UBIGINT: - return make_unique>>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::INTEGER: - return make_unique>>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::BIGINT: - return make_unique>>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::FLOAT: - return make_unique>>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::DOUBLE: - return make_unique>>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::TIMESTAMP: - switch (schema_p.type) { - case Type::INT96: - return make_unique>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - case Type::INT64: - switch (schema_p.converted_type) { - case ConvertedType::TIMESTAMP_MICROS: - return make_unique>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - case ConvertedType::TIMESTAMP_MILLIS: - return make_unique>( - type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - break; - } - default: - break; - } - break; - case LogicalTypeId::DATE: - return make_unique>(type_p, schema_p, file_idx_p, - max_define, max_repeat); - case LogicalTypeId::BLOB: - case LogicalTypeId::VARCHAR: - return make_unique(type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::DECIMAL: - // we have to figure out what kind of int we need - switch (type_p.InternalType()) { - case PhysicalType::INT16: - return make_unique>(type_p, schema_p, file_idx_p, max_define, max_repeat); - case PhysicalType::INT32: - return make_unique>(type_p, schema_p, file_idx_p, max_define, max_repeat); - case PhysicalType::INT64: - return make_unique>(type_p, schema_p, file_idx_p, max_define, max_repeat); - case PhysicalType::INT128: - return make_unique>(type_p, schema_p, file_idx_p, max_define, max_repeat); + /* experimental parameters */ + case ZSTD_c_rsyncable: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; - default: - break; - } - break; - default: - break; - } - throw NotImplementedException(type_p.ToString()); -} + case ZSTD_c_forceMaxWindow : + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; -void ColumnReader::PrepareRead(parquet_filter_t &filter) { - dict_decoder.reset(); - defined_decoder.reset(); - block.reset(); + case ZSTD_c_format: + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + bounds.lowerBound = ZSTD_f_zstd1; + bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; - PageHeader page_hdr; - page_hdr.read(protocol); + case ZSTD_c_forceAttachDict: + ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); + bounds.lowerBound = ZSTD_dictDefaultAttach; + bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; - // page_hdr.printTo(std::cout); - // std::cout << '\n'; + case ZSTD_c_literalCompressionMode: + ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); + bounds.lowerBound = ZSTD_lcm_auto; + bounds.upperBound = ZSTD_lcm_uncompressed; + return bounds; - PreparePage(page_hdr.compressed_page_size, page_hdr.uncompressed_page_size); + case ZSTD_c_targetCBlockSize: + bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; + bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; + return bounds; - switch (page_hdr.type) { - case PageType::DATA_PAGE_V2: - case PageType::DATA_PAGE: - PrepareDataPage(page_hdr); - break; - case PageType::DICTIONARY_PAGE: - Dictionary(move(block), page_hdr.dictionary_page_header.num_values); - break; - default: - break; // ignore INDEX page type and any other custom extensions - } + case ZSTD_c_srcSizeHint: + bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; + bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; + return bounds; + + default: + bounds.error = ERROR(parameter_unsupported); + return bounds; + } } -void ColumnReader::PreparePage(idx_t compressed_page_size, idx_t uncompressed_page_size) { - auto &trans = (ThriftFileTransport &)*protocol->getTransport(); +/* ZSTD_cParam_clampBounds: + * Clamps the value into the bounded range. + */ +static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return bounds.error; + if (*value < bounds.lowerBound) *value = bounds.lowerBound; + if (*value > bounds.upperBound) *value = bounds.upperBound; + return 0; +} - block = make_shared(compressed_page_size + 1); - trans.read((uint8_t *)block->ptr, compressed_page_size); +#define BOUNDCHECK(cParam, val) { \ + RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ + parameter_outOfBound, "Param out of bounds"); \ +} - shared_ptr unpacked_block; - if (chunk->meta_data.codec != CompressionCodec::UNCOMPRESSED) { - unpacked_block = make_shared(uncompressed_page_size + 1); - } - switch (chunk->meta_data.codec) { - case CompressionCodec::UNCOMPRESSED: - break; - case CompressionCodec::GZIP: { - MiniZStream s; +static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) +{ + switch(param) + { + case ZSTD_c_compressionLevel: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + return 1; - s.Decompress((const char *)block->ptr, compressed_page_size, (char *)unpacked_block->ptr, - uncompressed_page_size); - block = move(unpacked_block); - - break; - } - case CompressionCodec::SNAPPY: { - auto res = snappy::RawUncompress((const char *)block->ptr, compressed_page_size, (char *)unpacked_block->ptr); - if (!res) { - throw std::runtime_error("Decompression failure"); - } - block = move(unpacked_block); - break; - } - case CompressionCodec::ZSTD: { - auto res = duckdb_zstd::ZSTD_decompress((char *)unpacked_block->ptr, uncompressed_page_size, - (const char *)block->ptr, compressed_page_size); - if (duckdb_zstd::ZSTD_isError(res) || res != (size_t)uncompressed_page_size) { - throw std::runtime_error("ZSTD Decompression failure"); - } - block = move(unpacked_block); - break; - } - - default: { - std::stringstream codec_name; - codec_name << chunk->meta_data.codec; - throw std::runtime_error("Unsupported compression codec \"" + codec_name.str() + - "\". Supported options are uncompressed, gzip or snappy"); - break; - } - } -} - -static uint8_t ComputeBitWidth(idx_t val) { - if (val == 0) { - return 0; - } - uint8_t ret = 1; - while (((idx_t)(1 << ret) - 1) < val) { - ret++; - } - return ret; + case ZSTD_c_format: + case ZSTD_c_windowLog: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow : + case ZSTD_c_nbWorkers: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + default: + return 0; + } } -void ColumnReader::PrepareDataPage(PageHeader &page_hdr) { - if (page_hdr.type == PageType::DATA_PAGE && !page_hdr.__isset.data_page_header) { - throw std::runtime_error("Missing data page header from data page"); - } - if (page_hdr.type == PageType::DATA_PAGE_V2 && !page_hdr.__isset.data_page_header_v2) { - throw std::runtime_error("Missing data page header from data page v2"); - } - - page_rows_available = page_hdr.type == PageType::DATA_PAGE ? page_hdr.data_page_header.num_values - : page_hdr.data_page_header_v2.num_values; - auto page_encoding = page_hdr.type == PageType::DATA_PAGE ? page_hdr.data_page_header.encoding - : page_hdr.data_page_header_v2.encoding; - - if (HasRepeats()) { - uint32_t rep_length = page_hdr.type == PageType::DATA_PAGE - ? block->read() - : page_hdr.data_page_header_v2.repetition_levels_byte_length; - block->available(rep_length); - repeated_decoder = - make_unique((const uint8_t *)block->ptr, rep_length, ComputeBitWidth(max_repeat)); - block->inc(rep_length); - } +size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); + if (cctx->streamStage != zcss_init) { + if (ZSTD_isUpdateAuthorized(param)) { + cctx->cParamsChanged = 1; + } else { + RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); + } } - if (HasDefines()) { - uint32_t def_length = page_hdr.type == PageType::DATA_PAGE - ? block->read() - : page_hdr.data_page_header_v2.definition_levels_byte_length; - block->available(def_length); - defined_decoder = - make_unique((const uint8_t *)block->ptr, def_length, ComputeBitWidth(max_define)); - block->inc(def_length); - } + switch(param) + { + case ZSTD_c_nbWorkers: + RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, + "MT not compatible with static alloc"); + break; - switch (page_encoding) { - case Encoding::RLE_DICTIONARY: - case Encoding::PLAIN_DICTIONARY: { - // TODO there seems to be some confusion whether this is in the bytes for v2 - // where is it otherwise?? - auto dict_width = block->read(); - // TODO somehow dict_width can be 0 ? - dict_decoder = make_unique((const uint8_t *)block->ptr, block->len, dict_width); - block->inc(block->len); - break; - } - case Encoding::PLAIN: - // nothing to do here, will be read directly below - break; + case ZSTD_c_compressionLevel: + case ZSTD_c_windowLog: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_format: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + break; - default: - throw std::runtime_error("Unsupported page encoding"); - } + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); } -idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result) { - // we need to reset the location because multiple column readers share the same protocol - auto &trans = (ThriftFileTransport &)*protocol->getTransport(); - trans.SetLocation(chunk_read_offset); - - idx_t result_offset = 0; - auto to_read = num_values; - - while (to_read > 0) { - while (page_rows_available == 0) { - PrepareRead(filter); - } - - D_ASSERT(block); - auto read_now = MinValue(to_read, page_rows_available); - - D_ASSERT(read_now <= STANDARD_VECTOR_SIZE); - - if (HasRepeats()) { - D_ASSERT(repeated_decoder); - repeated_decoder->GetBatch((char *)repeat_out + result_offset, read_now); - } - - if (HasDefines()) { - D_ASSERT(defined_decoder); - defined_decoder->GetBatch((char *)define_out + result_offset, read_now); - } - - if (dict_decoder) { - // we need the null count because the offsets and plain values have no entries for nulls - idx_t null_count = 0; - if (HasDefines()) { - for (idx_t i = 0; i < read_now; i++) { - if (define_out[i + result_offset] != max_define) { - null_count++; - } - } - } - - offset_buffer.resize(sizeof(uint32_t) * (read_now - null_count)); - dict_decoder->GetBatch(offset_buffer.ptr, read_now - null_count); - DictReference(result); - Offsets((uint32_t *)offset_buffer.ptr, define_out, read_now, filter, result_offset, result); - } else { - PlainReference(block, result); - Plain(block, define_out, read_now, filter, result_offset, result); - } +size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, + ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); + switch(param) + { + case ZSTD_c_format : + BOUNDCHECK(ZSTD_c_format, value); + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; - result_offset += read_now; - page_rows_available -= read_now; - to_read -= read_now; - } - group_rows_available -= num_values; - chunk_read_offset = trans.GetLocation(); + case ZSTD_c_compressionLevel : { + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + if (value) { /* 0 : does not change current level */ + CCtxParams->compressionLevel = value; + } + if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } - return num_values; -} + case ZSTD_c_windowLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_windowLog, value); + CCtxParams->cParams.windowLog = (U32)value; + return CCtxParams->cParams.windowLog; -void ColumnReader::Skip(idx_t num_values) { - dummy_define.zero(); - dummy_repeat.zero(); + case ZSTD_c_hashLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_hashLog, value); + CCtxParams->cParams.hashLog = (U32)value; + return CCtxParams->cParams.hashLog; - // TODO this can be optimized, for example we dont actually have to bitunpack offsets - auto values_read = - Read(num_values, none_filter, (uint8_t *)dummy_define.ptr, (uint8_t *)dummy_repeat.ptr, dummy_result); - if (values_read != num_values) { - throw std::runtime_error("Row count mismatch when skipping rows"); - } -} + case ZSTD_c_chainLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_chainLog, value); + CCtxParams->cParams.chainLog = (U32)value; + return CCtxParams->cParams.chainLog; -void StringColumnReader::VerifyString(const char *str_data, idx_t str_len) { - if (Type() != LogicalTypeId::VARCHAR) { - return; - } - // verify if a string is actually UTF8, and if there are no null bytes in the middle of the string - // technically Parquet should guarantee this, but reality is often disappointing - auto utf_type = Utf8Proc::Analyze(str_data, str_len); - if (utf_type == UnicodeType::INVALID) { - throw InternalException("Invalid string encoding found in Parquet file: value is not valid UTF8!"); - } -} + case ZSTD_c_searchLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_searchLog, value); + CCtxParams->cParams.searchLog = (U32)value; + return (size_t)value; -void StringColumnReader::Dictionary(shared_ptr data, idx_t num_entries) { - dict = move(data); - dict_strings = unique_ptr(new string_t[num_entries]); - for (idx_t dict_idx = 0; dict_idx < num_entries; dict_idx++) { - uint32_t str_len = dict->read(); - dict->available(str_len); + case ZSTD_c_minMatch : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_minMatch, value); + CCtxParams->cParams.minMatch = value; + return CCtxParams->cParams.minMatch; - VerifyString(dict->ptr, str_len); - dict_strings[dict_idx] = string_t(dict->ptr, str_len); - dict->inc(str_len); - } -} + case ZSTD_c_targetLength : + BOUNDCHECK(ZSTD_c_targetLength, value); + CCtxParams->cParams.targetLength = value; + return CCtxParams->cParams.targetLength; -class ParquetStringVectorBuffer : public VectorBuffer { -public: - explicit ParquetStringVectorBuffer(shared_ptr buffer_p) - : VectorBuffer(VectorBufferType::OPAQUE_BUFFER), buffer(move(buffer_p)) { - } + case ZSTD_c_strategy : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_strategy, value); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + return (size_t)CCtxParams->cParams.strategy; -private: - shared_ptr buffer; -}; + case ZSTD_c_contentSizeFlag : + /* Content size written in frame header _when known_ (default:1) */ + DEBUGLOG(4, "set content size flag = %u", (value!=0)); + CCtxParams->fParams.contentSizeFlag = value != 0; + return CCtxParams->fParams.contentSizeFlag; -void StringColumnReader::DictReference(Vector &result) { - StringVector::AddBuffer(result, make_buffer(dict)); -} -void StringColumnReader::PlainReference(shared_ptr plain_data, Vector &result) { - StringVector::AddBuffer(result, make_buffer(move(plain_data))); -} + case ZSTD_c_checksumFlag : + /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ + CCtxParams->fParams.checksumFlag = value != 0; + return CCtxParams->fParams.checksumFlag; -string_t StringParquetValueConversion::DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - auto &dict_strings = ((StringColumnReader &)reader).dict_strings; - return dict_strings[offset]; -} + case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ + DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); + CCtxParams->fParams.noDictIDFlag = !value; + return !CCtxParams->fParams.noDictIDFlag; -string_t StringParquetValueConversion::PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - auto &scr = ((StringColumnReader &)reader); - uint32_t str_len = scr.fixed_width_string_length == 0 ? plain_data.read() : scr.fixed_width_string_length; - plain_data.available(str_len); - ((StringColumnReader &)reader).VerifyString(plain_data.ptr, str_len); - auto ret_str = string_t(plain_data.ptr, str_len); - plain_data.inc(str_len); - return ret_str; -} + case ZSTD_c_forceMaxWindow : + CCtxParams->forceWindow = (value != 0); + return CCtxParams->forceWindow; -void StringParquetValueConversion::PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - auto &scr = ((StringColumnReader &)reader); - uint32_t str_len = scr.fixed_width_string_length == 0 ? plain_data.read() : scr.fixed_width_string_length; - plain_data.available(str_len); - plain_data.inc(str_len); -} + case ZSTD_c_forceAttachDict : { + const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; + BOUNDCHECK(ZSTD_c_forceAttachDict, pref); + CCtxParams->attachDictPref = pref; + return CCtxParams->attachDictPref; + } -idx_t ListColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result_out) { - if (!ListVector::HasEntry(result_out)) { - auto list_child = make_unique(result_out.GetType().child_types()[0].second); - ListVector::SetEntry(result_out, move(list_child)); - } + case ZSTD_c_literalCompressionMode : { + const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); + CCtxParams->literalCompressionMode = lcm; + return CCtxParams->literalCompressionMode; + } - idx_t result_offset = 0; - auto result_ptr = FlatVector::GetData(result_out); + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + CCtxParams->nbWorkers = value; + return CCtxParams->nbWorkers; +#endif - while (result_offset < num_values) { - auto child_req_num_values = MinValue(STANDARD_VECTOR_SIZE, child_column_reader->GroupRowsAvailable()); + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + /* Adjust to the minimum non-default value. */ + if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) + value = ZSTDMT_JOBSIZE_MIN; + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + assert(value >= 0); + CCtxParams->jobSize = value; + return CCtxParams->jobSize; +#endif - if (child_req_num_values == 0) { - break; - } + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); + CCtxParams->overlapLog = value; + return CCtxParams->overlapLog; +#endif - child_defines.zero(); - child_repeats.zero(); + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); + CCtxParams->rsyncable = value; + return CCtxParams->rsyncable; +#endif - idx_t child_actual_num_values = 0; + case ZSTD_c_enableLongDistanceMatching : + CCtxParams->ldmParams.enableLdm = (value!=0); + return CCtxParams->ldmParams.enableLdm; - if (overflow_child_count == 0) { - child_actual_num_values = child_column_reader->Read(child_req_num_values, child_filter, child_defines_ptr, - child_repeats_ptr, child_result); - } else { - child_actual_num_values = overflow_child_count; - overflow_child_count = 0; - child_result.Reference(overflow_child_vector); - } + case ZSTD_c_ldmHashLog : + if (value!=0) /* 0 ==> auto */ + BOUNDCHECK(ZSTD_c_ldmHashLog, value); + CCtxParams->ldmParams.hashLog = value; + return CCtxParams->ldmParams.hashLog; - append_chunk.data[0].Reference(child_result); - append_chunk.SetCardinality(child_actual_num_values); - append_chunk.Verify(); + case ZSTD_c_ldmMinMatch : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmMinMatch, value); + CCtxParams->ldmParams.minMatchLength = value; + return CCtxParams->ldmParams.minMatchLength; - idx_t current_chunk_offset = ListVector::GetListSize(result_out); - ListVector::Append(result_out, append_chunk.data[0], append_chunk.size()); - // hard-won piece of code this, modify at your own risk - // the intuition is that we have to only collapse values into lists that are repeated *on this level* - // the rest is pretty much handed up as-is as a single-valued list or NULL - idx_t child_idx; - for (child_idx = 0; child_idx < child_actual_num_values; child_idx++) { - if (child_repeats_ptr[child_idx] == max_repeat) { // value repeats on this level, append - D_ASSERT(result_offset > 0); - result_ptr[result_offset - 1].length++; - continue; - } - if (result_offset >= num_values) { // we ran out of output space - break; - } - if (child_defines_ptr[child_idx] >= max_define) { - // value has been defined down the stack, hence its NOT NULL - result_ptr[result_offset].offset = child_idx + current_chunk_offset; - result_ptr[result_offset].length = 1; - } else { - // value is NULL somewhere up the stack - FlatVector::SetNull(result_out, result_offset, true); - result_ptr[result_offset].offset = 0; - result_ptr[result_offset].length = 0; - } + case ZSTD_c_ldmBucketSizeLog : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); + CCtxParams->ldmParams.bucketSizeLog = value; + return CCtxParams->ldmParams.bucketSizeLog; - repeat_out[result_offset] = child_repeats_ptr[child_idx]; - define_out[result_offset] = child_defines_ptr[child_idx]; + case ZSTD_c_ldmHashRateLog : + RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, + parameter_outOfBound, "Param out of bounds!"); + CCtxParams->ldmParams.hashRateLog = value; + return CCtxParams->ldmParams.hashRateLog; - result_offset++; - } + case ZSTD_c_targetCBlockSize : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_targetCBlockSize, value); + CCtxParams->targetCBlockSize = value; + return CCtxParams->targetCBlockSize; - // we have read more values from the child reader than we can fit into the result for this read - // we have to pass everything from child_idx to child_actual_num_values into the next call - if (child_idx < child_actual_num_values && result_offset == num_values) { - overflow_child_vector.Slice(child_result, child_idx); - overflow_child_count = child_actual_num_values - child_idx; - overflow_child_vector.Verify(overflow_child_count); + case ZSTD_c_srcSizeHint : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_srcSizeHint, value); + CCtxParams->srcSizeHint = value; + return CCtxParams->srcSizeHint; - // move values in the child repeats and defines *backward* by child_idx - for (idx_t repdef_idx = 0; repdef_idx < overflow_child_count; repdef_idx++) { - child_defines_ptr[repdef_idx] = child_defines_ptr[child_idx + repdef_idx]; - child_repeats_ptr[repdef_idx] = child_repeats_ptr[child_idx + repdef_idx]; - } - } - } - result_out.Verify(result_offset); - return result_offset; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } } -} // namespace duckdb -#include -#include -#include -#include - - - -#include "duckdb.hpp" - -namespace duckdb { - -class ParquetExtension : public Extension { -public: - void Load(DuckDB &db) override; -}; - -} // namespace duckdb - - - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb.hpp" -#include "duckdb/common/types/chunk_collection.hpp" -#include "duckdb/function/copy_function.hpp" -#include "duckdb/function/table_function.hpp" -#include "duckdb/common/file_system.hpp" -#include "duckdb/parallel/parallel_state.hpp" -#include "duckdb/parser/parsed_data/create_copy_function_info.hpp" -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) +{ + return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); +} -#include "duckdb/main/config.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" -#include "duckdb/parser/expression/function_expression.hpp" -#include "duckdb/parser/tableref/table_function_ref.hpp" - -#include "duckdb/storage/statistics/base_statistics.hpp" - -#include "duckdb/main/client_context.hpp" -#include "duckdb/catalog/catalog.hpp" +size_t ZSTD_CCtxParams_getParameter( + ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) +{ + switch(param) + { + case ZSTD_c_format : + *value = CCtxParams->format; + break; + case ZSTD_c_compressionLevel : + *value = CCtxParams->compressionLevel; + break; + case ZSTD_c_windowLog : + *value = (int)CCtxParams->cParams.windowLog; + break; + case ZSTD_c_hashLog : + *value = (int)CCtxParams->cParams.hashLog; + break; + case ZSTD_c_chainLog : + *value = (int)CCtxParams->cParams.chainLog; + break; + case ZSTD_c_searchLog : + *value = CCtxParams->cParams.searchLog; + break; + case ZSTD_c_minMatch : + *value = CCtxParams->cParams.minMatch; + break; + case ZSTD_c_targetLength : + *value = CCtxParams->cParams.targetLength; + break; + case ZSTD_c_strategy : + *value = (unsigned)CCtxParams->cParams.strategy; + break; + case ZSTD_c_contentSizeFlag : + *value = CCtxParams->fParams.contentSizeFlag; + break; + case ZSTD_c_checksumFlag : + *value = CCtxParams->fParams.checksumFlag; + break; + case ZSTD_c_dictIDFlag : + *value = !CCtxParams->fParams.noDictIDFlag; + break; + case ZSTD_c_forceMaxWindow : + *value = CCtxParams->forceWindow; + break; + case ZSTD_c_forceAttachDict : + *value = CCtxParams->attachDictPref; + break; + case ZSTD_c_literalCompressionMode : + *value = CCtxParams->literalCompressionMode; + break; + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + assert(CCtxParams->nbWorkers == 0); +#endif + *value = CCtxParams->nbWorkers; + break; + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + assert(CCtxParams->jobSize <= INT_MAX); + *value = (int)CCtxParams->jobSize; + break; +#endif + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->overlapLog; + break; +#endif + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->rsyncable; + break; #endif + case ZSTD_c_enableLongDistanceMatching : + *value = CCtxParams->ldmParams.enableLdm; + break; + case ZSTD_c_ldmHashLog : + *value = CCtxParams->ldmParams.hashLog; + break; + case ZSTD_c_ldmMinMatch : + *value = CCtxParams->ldmParams.minMatchLength; + break; + case ZSTD_c_ldmBucketSizeLog : + *value = CCtxParams->ldmParams.bucketSizeLog; + break; + case ZSTD_c_ldmHashRateLog : + *value = CCtxParams->ldmParams.hashRateLog; + break; + case ZSTD_c_targetCBlockSize : + *value = (int)CCtxParams->targetCBlockSize; + break; + case ZSTD_c_srcSizeHint : + *value = (int)CCtxParams->srcSizeHint; + break; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return 0; +} -namespace duckdb { +/** ZSTD_CCtx_setParametersUsingCCtxParams() : + * just applies `params` into `cctx` + * no action is performed, parameters are merely stored. + * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. + * This is possible even if a compression is ongoing. + * In which case, new parameters will be applied on the fly, starting with next compression job. + */ +size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "The context is in the wrong stage!"); + RETURN_ERROR_IF(cctx->cdict, stage_wrong, + "Can't override parameters with cdict attached (some must " + "be inherited from the cdict)."); -struct ParquetReadBindData : public FunctionData { - shared_ptr initial_reader; - vector files; - vector column_ids; - std::atomic chunk_count; - idx_t cur_file; -}; + cctx->requestedParams = *params; + return 0; +} -struct ParquetReadOperatorData : public FunctionOperatorData { - shared_ptr reader; - ParquetReaderScanState scan_state; - bool is_parallel; - idx_t file_index; - vector column_ids; - TableFilterSet *table_filters; -}; +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't set pledgedSrcSize when not in init stage."); + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + return 0; +} -struct ParquetReadParallelState : public ParallelState { - std::mutex lock; - shared_ptr current_reader; - idx_t file_index; - idx_t row_group_index; -}; +/** + * Initializes the local dict using the requested parameters. + * NOTE: This does not use the pledged src size, because it may be used for more + * than one compression. + */ +static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) +{ + ZSTD_localDict* const dl = &cctx->localDict; + ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( + &cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize); + if (dl->dict == NULL) { + /* No local dictionary. */ + assert(dl->dictBuffer == NULL); + assert(dl->cdict == NULL); + assert(dl->dictSize == 0); + return 0; + } + if (dl->cdict != NULL) { + assert(cctx->cdict == dl->cdict); + /* Local dictionary already initialized. */ + return 0; + } + assert(dl->dictSize > 0); + assert(cctx->cdict == NULL); + assert(cctx->prefixDict.dict == NULL); -class ParquetScanFunction : public TableFunction { -public: - ParquetScanFunction() - : TableFunction("parquet_scan", {LogicalType::VARCHAR}, ParquetScanImplementation, ParquetScanBind, - ParquetScanInit, /* statistics */ ParquetScanStats, /* cleanup */ nullptr, - /* dependency */ nullptr, ParquetCardinality, - /* pushdown_complex_filter */ nullptr, /* to_string */ nullptr, ParquetScanMaxThreads, - ParquetInitParallelState, ParquetScanParallelInit, ParquetParallelStateNext, true, true, - ParquetProgress) { - } + dl->cdict = ZSTD_createCDict_advanced( + dl->dict, + dl->dictSize, + ZSTD_dlm_byRef, + dl->dictContentType, + cParams, + cctx->customMem); + RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); + cctx->cdict = dl->cdict; + return 0; +} - static unique_ptr ParquetReadBind(ClientContext &context, CopyInfo &info, - vector &expected_names, - vector &expected_types) { - for (auto &option : info.options) { - throw NotImplementedException("Unsupported option for COPY FROM parquet: %s", option.first); - } - auto result = make_unique(); +size_t ZSTD_CCtx_loadDictionary_advanced( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't load a dictionary when ctx is not in init stage."); + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "no malloc for static CCtx"); + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); + ZSTD_clearAllDicts(cctx); /* in case one already exists */ + if (dict == NULL || dictSize == 0) /* no dictionary mode */ + return 0; + if (dictLoadMethod == ZSTD_dlm_byRef) { + cctx->localDict.dict = dict; + } else { + void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); + RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); + memcpy(dictBuffer, dict, dictSize); + cctx->localDict.dictBuffer = dictBuffer; + cctx->localDict.dict = dictBuffer; + } + cctx->localDict.dictSize = dictSize; + cctx->localDict.dictContentType = dictContentType; + return 0; +} - FileSystem &fs = FileSystem::GetFileSystem(context); - result->files = fs.Glob(info.file_path); - if (result->files.empty()) { - throw IOException("No files found that match the pattern \"%s\"", info.file_path); - } - result->initial_reader = make_shared(context, result->files[0], expected_types); - return move(result); - } +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} - static unique_ptr ParquetScanStats(ClientContext &context, const FunctionData *bind_data_p, - column_t column_index) { - auto &bind_data = (ParquetReadBindData &)*bind_data_p; +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} - if (column_index == COLUMN_IDENTIFIER_ROW_ID) { - return nullptr; - } - // we do not want to parse the Parquet metadata for the sole purpose of getting column statistics +size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a dict when ctx not in init stage."); + /* Free the existing local cdict (if any) to save memory. */ + ZSTD_clearAllDicts(cctx); + cctx->cdict = cdict; + return 0; +} - // We already parsed the metadata for the first file in a glob because we need some type info. - auto overall_stats = - ParquetReader::ReadStatistics(bind_data.initial_reader->return_types[column_index], column_index, - bind_data.initial_reader->metadata->metadata.get()); +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); +} - if (!overall_stats) { - return nullptr; - } +size_t ZSTD_CCtx_refPrefix_advanced( + ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a prefix when ctx not in init stage."); + ZSTD_clearAllDicts(cctx); + if (prefix != NULL && prefixSize > 0) { + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + } + return 0; +} - // if there is only one file in the glob (quite common case), we are done - auto &config = DBConfig::GetConfig(context); - if (bind_data.files.size() < 2) { - return overall_stats; - } else if (config.object_cache_enable) { - auto &cache = ObjectCache::GetObjectCache(context); - // for more than one file, we could be lucky and metadata for *every* file is in the object cache (if - // enabled at all) - FileSystem &fs = FileSystem::GetFileSystem(context); - for (idx_t file_idx = 1; file_idx < bind_data.files.size(); file_idx++) { - auto &file_name = bind_data.files[file_idx]; - auto metadata = std::dynamic_pointer_cast(cache.Get(file_name)); - auto handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ); - // but we need to check if the metadata cache entries are current - if (!metadata || (fs.GetLastModifiedTime(*handle) >= metadata->read_time)) { - // missing or invalid metadata entry in cache, no usable stats overall - return nullptr; - } - // get and merge stats for file - auto file_stats = ParquetReader::ReadStatistics(bind_data.initial_reader->return_types[column_index], - column_index, metadata->metadata.get()); - if (!file_stats) { - return nullptr; - } - overall_stats->Merge(*file_stats); - } - // success! - return overall_stats; - } - // we have more than one file and no object cache so no statistics overall - return nullptr; - } +/*! ZSTD_CCtx_reset() : + * Also dumps dictionary */ +size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + cctx->streamStage = zcss_init; + cctx->pledgedSrcSizePlusOne = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't reset parameters only when not in init stage."); + ZSTD_clearAllDicts(cctx); + return ZSTD_CCtxParams_reset(&cctx->requestedParams); + } + return 0; +} - static unique_ptr ParquetScanBind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, - vector &input_table_names, - vector &return_types, vector &names) { - auto file_name = inputs[0].GetValue(); - auto result = make_unique(); - FileSystem &fs = FileSystem::GetFileSystem(context); - result->files = fs.Glob(file_name); - if (result->files.empty()) { - throw IOException("No files found that match the pattern \"%s\"", file_name); - } +/** ZSTD_checkCParams() : + control CParam values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); + BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); + BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); + return 0; +} - result->initial_reader = make_shared(context, result->files[0]); - return_types = result->initial_reader->return_types; +/** ZSTD_clampCParams() : + * make CParam values within valid range. + * @return : valid CParams */ +static ZSTD_compressionParameters +ZSTD_clampCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMP_TYPE(cParam, val, type) { \ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ + if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ + } +# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) + CLAMP(ZSTD_c_windowLog, cParams.windowLog); + CLAMP(ZSTD_c_chainLog, cParams.chainLog); + CLAMP(ZSTD_c_hashLog, cParams.hashLog); + CLAMP(ZSTD_c_searchLog, cParams.searchLog); + CLAMP(ZSTD_c_minMatch, cParams.minMatch); + CLAMP(ZSTD_c_targetLength,cParams.targetLength); + CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); + return cParams; +} - names = result->initial_reader->names; - return move(result); - } +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} - static unique_ptr ParquetScanInit(ClientContext &context, const FunctionData *bind_data_p, - vector &column_ids, - TableFilterCollection *filters) { - auto &bind_data = (ParquetReadBindData &)*bind_data_p; - bind_data.chunk_count = 0; - bind_data.cur_file = 0; - auto result = make_unique(); - result->column_ids = column_ids; +/** ZSTD_adjustCParams_internal() : + * optimize `cPar` for a specified input (`srcSize` and `dictSize`). + * mostly downsize to reduce memory consumption and initialization latency. + * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. + * note : `srcSize==0` means 0! + * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ +static ZSTD_compressionParameters +ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + static const U64 minSrcSize = 513; /* (1<<9) + 1 */ + static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + assert(ZSTD_checkCParams(cPar)==0); - result->is_parallel = false; - result->file_index = 0; - result->table_filters = filters->table_filters; - // single-threaded: one thread has to read all groups - vector group_ids; - for (idx_t i = 0; i < bind_data.initial_reader->NumRowGroups(); i++) { - group_ids.push_back(i); - } - result->reader = bind_data.initial_reader; - result->reader->InitializeScan(result->scan_state, column_ids, move(group_ids), filters->table_filters); - return move(result); - } + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; - static int ParquetProgress(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = (ParquetReadBindData &)*bind_data_p; - if (bind_data.initial_reader->NumRows() == 0) { - return (100 * (bind_data.cur_file + 1)) / bind_data.files.size(); - } - auto percentage = (bind_data.chunk_count * STANDARD_VECTOR_SIZE * 100 / bind_data.initial_reader->NumRows()) / - bind_data.files.size(); - percentage += 100 * bind_data.cur_file / bind_data.files.size(); - return percentage; - } + /* resize windowLog if input is small enough, to use less memory */ + if ( (srcSize < maxWindowResize) + && (dictSize < maxWindowResize) ) { + U32 const tSize = (U32)(srcSize + dictSize); + static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; + U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : + ZSTD_highbit32(tSize-1) + 1; + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } + if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1; + { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cycleLog > cPar.windowLog) + cPar.chainLog -= (cycleLog - cPar.windowLog); + } - static unique_ptr - ParquetScanParallelInit(ClientContext &context, const FunctionData *bind_data_p, ParallelState *parallel_state_p, - vector &column_ids, TableFilterCollection *filters) { - auto result = make_unique(); - result->column_ids = column_ids; - result->is_parallel = true; - result->table_filters = filters->table_filters; - if (!ParquetParallelStateNext(context, bind_data_p, result.get(), parallel_state_p)) { - return nullptr; - } - return move(result); - } + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ - static void ParquetScanImplementation(ClientContext &context, const FunctionData *bind_data_p, - FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { - auto &data = (ParquetReadOperatorData &)*operator_state; - auto &bind_data = (ParquetReadBindData &)*bind_data_p; + return cPar; +} - do { - data.reader->Scan(data.scan_state, output); - bind_data.chunk_count++; - if (output.size() == 0 && !data.is_parallel) { - auto &bind_data = (ParquetReadBindData &)*bind_data_p; - // check if there is another file - if (data.file_index + 1 < bind_data.files.size()) { - data.file_index++; - bind_data.cur_file++; - bind_data.chunk_count = 0; - string file = bind_data.files[data.file_index]; - // move to the next file - data.reader = - make_shared(context, file, data.reader->return_types, bind_data.files[0]); - vector group_ids; - for (idx_t i = 0; i < data.reader->NumRowGroups(); i++) { - group_ids.push_back(i); - } - data.reader->InitializeScan(data.scan_state, data.column_ids, move(group_ids), data.table_filters); - } else { - // exhausted all the files: done - break; - } - } else { - break; - } - } while (true); - } +ZSTD_compressionParameters +ZSTD_adjustCParams(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); +} - static unique_ptr ParquetCardinality(ClientContext &context, const FunctionData *bind_data) { - auto &data = (ParquetReadBindData &)*bind_data; - return make_unique(data.initial_reader->NumRows() * data.files.size()); - } +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); - static idx_t ParquetScanMaxThreads(ClientContext &context, const FunctionData *bind_data) { - auto &data = (ParquetReadBindData &)*bind_data; - return data.initial_reader->NumRowGroups() * data.files.size(); - } +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) +{ + ZSTD_compressionParameters cParams; + if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { + srcSizeHint = CCtxParams->srcSizeHint; + } + cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize); + if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; + if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; + if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; + if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; + if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch; + if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; + if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; + assert(!ZSTD_checkCParams(cParams)); + /* srcSizeHint == 0 means 0 */ + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); +} - static unique_ptr ParquetInitParallelState(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = (ParquetReadBindData &)*bind_data_p; - auto result = make_unique(); - result->current_reader = bind_data.initial_reader; - result->row_group_index = 0; - result->file_index = 0; - return move(result); - } +static size_t +ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const U32 forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't + * surrounded by redzones in ASAN. */ + size_t const tableSpace = chainSize * sizeof(U32) + + hSize * sizeof(U32) + + h3Size * sizeof(U32); + size_t const optPotentialSpace = + ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((1<strategy >= ZSTD_btopt)) + ? optPotentialSpace + : 0; + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size); + return tableSpace + optSpace; +} - static bool ParquetParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, - FunctionOperatorData *state_p, ParallelState *parallel_state_p) { - auto &bind_data = (ParquetReadBindData &)*bind_data_p; - auto ¶llel_state = (ParquetReadParallelState &)*parallel_state_p; - auto &scan_data = (ParquetReadOperatorData &)*state_p; +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + U32 const divider = (cParams.minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); + size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); - lock_guard parallel_lock(parallel_state.lock); - if (parallel_state.row_group_index < parallel_state.current_reader->NumRowGroups()) { - // groups remain in the current parquet file: read the next group - scan_data.reader = parallel_state.current_reader; - vector group_indexes {parallel_state.row_group_index}; - scan_data.reader->InitializeScan(scan_data.scan_state, scan_data.column_ids, group_indexes, - scan_data.table_filters); - parallel_state.row_group_index++; - return true; - } else { - // no groups remain in the current parquet file: check if there are more files to read - while (parallel_state.file_index + 1 < bind_data.files.size()) { - // read the next file - string file = bind_data.files[++parallel_state.file_index]; - parallel_state.current_reader = - make_shared(context, file, parallel_state.current_reader->return_types); - if (parallel_state.current_reader->NumRowGroups() == 0) { - // empty parquet file, move to next file - continue; - } - // set up the scan state to read the first group - scan_data.reader = parallel_state.current_reader; - vector group_indexes {0}; - scan_data.reader->InitializeScan(scan_data.scan_state, scan_data.column_ids, group_indexes, - scan_data.table_filters); - parallel_state.row_group_index = 1; - return true; - } - } - return false; - } -}; + size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); + size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq)); -struct ParquetWriteBindData : public FunctionData { - vector sql_types; - string file_name; - vector column_names; - duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY; -}; + /* estimateCCtxSize is for one-shot compression. So no buffers should + * be needed. However, we still allocate two 0-sized buffers, which can + * take space under ASAN. */ + size_t const bufferSpace = ZSTD_cwksp_alloc_size(0) + + ZSTD_cwksp_alloc_size(0); -struct ParquetWriteGlobalState : public GlobalFunctionData { - unique_ptr writer; -}; + size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)); -struct ParquetWriteLocalState : public LocalFunctionData { - ParquetWriteLocalState() { - buffer = make_unique(); - } + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; - unique_ptr buffer; -}; + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); + return neededSpace; + } +} -unique_ptr ParquetWriteBind(ClientContext &context, CopyInfo &info, vector &names, - vector &sql_types) { - auto bind_data = make_unique(); - for (auto &option : info.options) { - auto loption = StringUtil::Lower(option.first); - if (loption == "compression" || loption == "codec") { - if (!option.second.empty()) { - auto roption = StringUtil::Lower(option.second[0].ToString()); - if (roption == "uncompressed") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED; - continue; - } else if (roption == "snappy") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY; - continue; - } else if (roption == "gzip") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP; - continue; - } else if (roption == "zstd") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD; - continue; - } - } - throw ParserException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption); - } else { - throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str()); - } - } - bind_data->sql_types = sql_types; - bind_data->column_names = names; - bind_data->file_name = info.file_path; - return move(bind_data); +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); } -unique_ptr ParquetWriteInitializeGlobal(ClientContext &context, FunctionData &bind_data) { - auto global_state = make_unique(); - auto &parquet_bind = (ParquetWriteBindData &)bind_data; +static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); + return ZSTD_estimateCCtxSize_usingCParams(cParams); +} - auto &fs = FileSystem::GetFileSystem(context); - global_state->writer = make_unique(fs, parquet_bind.file_name, parquet_bind.sql_types, - parquet_bind.column_names, parquet_bind.codec); - return move(global_state); +size_t ZSTD_estimateCCtxSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCCtxSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; } -void ParquetWriteSink(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate, DataChunk &input) { - auto &global_state = (ParquetWriteGlobalState &)gstate; - auto &local_state = (ParquetWriteLocalState &)lstate; +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); + size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; + size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; + size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize) + + ZSTD_cwksp_alloc_size(outBuffSize); - // append data to the local (buffered) chunk collection - local_state.buffer->Append(input); - if (local_state.buffer->Count() > 100000) { - // if the chunk collection exceeds a certain size we flush it to the parquet file - global_state.writer->Flush(*local_state.buffer); - // and reset the buffer - local_state.buffer = make_unique(); - } + return CCtxSize + streamingSize; + } } -void ParquetWriteCombine(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate) { - auto &global_state = (ParquetWriteGlobalState &)gstate; - auto &local_state = (ParquetWriteLocalState &)lstate; - // flush any data left in the local state to the file - global_state.writer->Flush(*local_state.buffer); +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); } -void ParquetWriteFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { - auto &global_state = (ParquetWriteGlobalState &)gstate; - // finalize: write any additional metadata to the file here - global_state.writer->Finalize(); +static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); + return ZSTD_estimateCStreamSize_usingCParams(cParams); } -unique_ptr ParquetWriteInitializeLocal(ClientContext &context, FunctionData &bind_data) { - return make_unique(); +size_t ZSTD_estimateCStreamSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCStreamSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; } -unique_ptr ParquetScanReplacement(const string &table_name, void *data) { - if (!StringUtil::EndsWith(table_name, ".parquet")) { - return nullptr; - } - auto table_function = make_unique(); - vector> children; - children.push_back(make_unique(Value(table_name))); - table_function->function = make_unique("parquet_scan", children); - return table_function; -} +/* ZSTD_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads (non-blocking mode). + */ +ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_getFrameProgression(cctx->mtctx); + } +#endif + { ZSTD_frameProgression fp; + size_t const buffered = (cctx->inBuff == NULL) ? 0 : + cctx->inBuffPos - cctx->inToCompress; + if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); + assert(buffered <= ZSTD_BLOCKSIZE_MAX); + fp.ingested = cctx->consumedSrcSize + buffered; + fp.consumed = cctx->consumedSrcSize; + fp.produced = cctx->producedCSize; + fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ + fp.currentJobID = 0; + fp.nbActiveWorkers = 0; + return fp; +} } -void ParquetExtension::Load(DuckDB &db) { - ParquetScanFunction scan_fun; - CreateTableFunctionInfo cinfo(scan_fun); - cinfo.name = "read_parquet"; - CreateTableFunctionInfo pq_scan = cinfo; - pq_scan.name = "parquet_scan"; +/*! ZSTD_toFlushNow() + * Only useful for multithreading scenarios currently (nbWorkers >= 1). + */ +size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_toFlushNow(cctx->mtctx); + } +#endif + (void)cctx; + return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ +} - CopyFunction function("parquet"); - function.copy_to_bind = ParquetWriteBind; - function.copy_to_initialize_global = ParquetWriteInitializeGlobal; - function.copy_to_initialize_local = ParquetWriteInitializeLocal; - function.copy_to_sink = ParquetWriteSink; - function.copy_to_combine = ParquetWriteCombine; - function.copy_to_finalize = ParquetWriteFinalize; - function.copy_from_bind = ParquetScanFunction::ParquetReadBind; - function.copy_from_function = scan_fun; +static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + (void)cParams1; + (void)cParams2; + assert(cParams1.windowLog == cParams2.windowLog); + assert(cParams1.chainLog == cParams2.chainLog); + assert(cParams1.hashLog == cParams2.hashLog); + assert(cParams1.searchLog == cParams2.searchLog); + assert(cParams1.minMatch == cParams2.minMatch); + assert(cParams1.targetLength == cParams2.targetLength); + assert(cParams1.strategy == cParams2.strategy); +} - function.extension = "parquet"; - CreateCopyFunctionInfo info(function); +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +{ + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = ZSTDInternalConstants::repStartValue[i]; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; +} - Connection con(db); - con.BeginTransaction(); - auto &context = *con.context; - auto &catalog = Catalog::GetCatalog(context); - catalog.CreateCopyFunction(context, &info); - catalog.CreateTableFunction(context, &cinfo); - catalog.CreateTableFunction(context, &pq_scan); - con.Commit(); +/*! ZSTD_invalidateMatchState() + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). + */ +static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) +{ + ZSTD_window_clear(&ms->window); - auto &config = DBConfig::GetConfig(*db.instance); - config.replacement_scans.emplace_back(ParquetScanReplacement); + ms->nextToUpdate = ms->window.dictLimit; + ms->loadedDictEnd = 0; + ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; } -} // namespace duckdb +/** + * Indicates whether this compression proceeds directly from user-provided + * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or + * whether the context needs to buffer the input/output (ZSTDb_buffered). + */ +typedef enum { + ZSTDb_not_buffered, + ZSTDb_buffered +} ZSTD_buffered_policy_e; + +/** + * Controls, for this matchState reset, whether the tables need to be cleared / + * prepared for the coming compression (ZSTDcrp_makeClean), or whether the + * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a + * subsequent operation will overwrite the table space anyways (e.g., copying + * the matchState contents in from a CDict). + */ +typedef enum { + ZSTDcrp_makeClean, + ZSTDcrp_leaveDirty +} ZSTD_compResetPolicy_e; +/** + * Controls, for this matchState reset, whether indexing can continue where it + * left off (ZSTDirp_continue), or whether it needs to be restarted from zero + * (ZSTDirp_reset). + */ +typedef enum { + ZSTDirp_continue, + ZSTDirp_reset +} ZSTD_indexResetPolicy_e; +typedef enum { + ZSTD_resetTarget_CDict, + ZSTD_resetTarget_CCtx +} ZSTD_resetTarget_e; +static size_t +ZSTD_reset_matchState(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + const ZSTD_compressionParameters* cParams, + const ZSTD_compResetPolicy_e crp, + const ZSTD_indexResetPolicy_e forceResetIndex, + const ZSTD_resetTarget_e forWho) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + if (forceResetIndex == ZSTDirp_reset) { + ZSTD_window_init(&ms->window); + ZSTD_cwksp_mark_tables_dirty(ws); + } + ms->hashLog3 = hashLog3; + ZSTD_invalidateMatchState(ms); + assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ + ZSTD_cwksp_clear_tables(ws); -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/planner/table_filter.hpp" -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/string_util.hpp" -#include "duckdb/common/types/date.hpp" -#include "duckdb/common/pair.hpp" + DEBUGLOG(5, "reserving table space"); + /* table Space */ + ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); + ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); + ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); -#include "duckdb/storage/object_cache.hpp" -#endif + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); + if (crp!=ZSTDcrp_leaveDirty) { + /* reset tables only */ + ZSTD_cwksp_clean_tables(ws); + } -#include -#include -#include -#include -#include + /* opt parser space */ + if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { + DEBUGLOG(4, "reserving optimal parser space"); + ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); + ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); + ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); + } -namespace duckdb { + ms->cParams = *cParams; -using duckdb_parquet::format::ColumnChunk; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::FieldRepetitionType; -using duckdb_parquet::format::FileMetaData; -using duckdb_parquet::format::RowGroup; -using duckdb_parquet::format::SchemaElement; -using duckdb_parquet::format::Statistics; -using duckdb_parquet::format::Type; + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); -static unique_ptr CreateThriftProtocol(FileHandle &file_handle) { - shared_ptr trans(new ThriftFileTransport(file_handle)); - return make_unique>(trans); + return 0; } -static shared_ptr LoadMetadata(FileHandle &file_handle) { - auto current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); +/* ZSTD_indexTooCloseToMax() : + * minor optimization : prefer memset() rather than reduceIndex() + * which is measurably slow in some circumstances (reported for Visual Studio). + * Works when re-using a context for a lot of smallish inputs : + * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, + * memset() will be triggered before reduceIndex(). + */ +#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) +static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) +{ + return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); +} - auto proto = CreateThriftProtocol(file_handle); - auto &transport = ((ThriftFileTransport &)*proto->getTransport()); - auto file_size = transport.GetSize(); - if (file_size < 12) { - throw InvalidInputException("File '%s' too small to be a Parquet file", file_handle.path); - } +/*! ZSTD_resetCCtx_internal() : + note : `params` are assumed fully validated at this stage */ +static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, + ZSTD_CCtx_params params, + U64 const pledgedSrcSize, + ZSTD_compResetPolicy_e const crp, + ZSTD_buffered_policy_e const zbuff) +{ + ZSTD_cwksp* const ws = &zc->workspace; + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", + (U32)pledgedSrcSize, params.cParams.windowLog); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - ResizeableBuffer buf; - buf.resize(8); - buf.zero(); + zc->isFirstBlock = 1; - transport.SetLocation(file_size - 8); - transport.read((uint8_t *)buf.ptr, 8); + if (params.ldmParams.enableLdm) { + /* Adjust long distance matching parameters */ + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); + } - if (strncmp(buf.ptr + 4, "PAR1", 4) != 0) { - throw InvalidInputException("No magic bytes found at end of file '%s'", file_handle.path); - } - // read four-byte footer length from just before the end magic bytes - auto footer_len = *(uint32_t *)buf.ptr; - if (footer_len <= 0 || file_size < 12 + footer_len) { - throw InvalidInputException("Footer length error in file '%s'", file_handle.path); - } - auto metadata_pos = file_size - (footer_len + 8); - transport.SetLocation(metadata_pos); + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); + size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; + size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; + size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); - auto metadata = make_unique(); - metadata->read(proto.get()); - return make_shared(move(metadata), current_time); -} + ZSTD_indexResetPolicy_e needsIndexReset = zc->initialized ? ZSTDirp_continue : ZSTDirp_reset; -static LogicalType DeriveLogicalType(const SchemaElement &s_ele) { - // inner node - D_ASSERT(s_ele.__isset.type && s_ele.num_children == 0); - switch (s_ele.type) { - case Type::BOOLEAN: - return LogicalType::BOOLEAN; - case Type::INT32: - if (s_ele.__isset.converted_type) { - switch (s_ele.converted_type) { - case ConvertedType::DATE: - return LogicalType::DATE; - case ConvertedType::UINT_8: - return LogicalType::UTINYINT; - case ConvertedType::UINT_16: - return LogicalType::USMALLINT; - default: - return LogicalType::INTEGER; - } - } - return LogicalType::INTEGER; - case Type::INT64: - if (s_ele.__isset.converted_type) { - switch (s_ele.converted_type) { - case ConvertedType::TIMESTAMP_MICROS: - case ConvertedType::TIMESTAMP_MILLIS: - return LogicalType::TIMESTAMP; - case ConvertedType::UINT_32: - return LogicalType::UINTEGER; - case ConvertedType::UINT_64: - return LogicalType::UBIGINT; - default: - return LogicalType::BIGINT; - } - } - return LogicalType::BIGINT; + if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { + needsIndexReset = ZSTDirp_reset; + } - case Type::INT96: // always a timestamp it would seem - return LogicalType::TIMESTAMP; - case Type::FLOAT: - return LogicalType::FLOAT; - case Type::DOUBLE: - return LogicalType::DOUBLE; - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - if (s_ele.type == Type::FIXED_LEN_BYTE_ARRAY && !s_ele.__isset.type_length) { - return LogicalType::INVALID; - } - if (s_ele.__isset.converted_type) { - switch (s_ele.converted_type) { - case ConvertedType::DECIMAL: - if (s_ele.type == Type::FIXED_LEN_BYTE_ARRAY && s_ele.__isset.scale && s_ele.__isset.type_length) { - return LogicalType(LogicalTypeId::DECIMAL, s_ele.precision, s_ele.scale); - } - return LogicalType::INVALID; + if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); - case ConvertedType::UTF8: - return LogicalType::VARCHAR; - default: - return LogicalType::BLOB; - } - } - return LogicalType::BLOB; - default: - return LogicalType::INVALID; - } -} + /* Check if workspace is large enough, alloc a new one if needed */ + { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); + size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize); + size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); + size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)); -static unique_ptr CreateReaderRecursive(const FileMetaData *file_meta_data, idx_t depth, idx_t max_define, - idx_t max_repeat, idx_t &next_schema_idx, idx_t &next_file_idx) { - D_ASSERT(file_meta_data); - D_ASSERT(next_schema_idx < file_meta_data->schema.size()); - auto &s_ele = file_meta_data->schema[next_schema_idx]; - auto this_idx = next_schema_idx; + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; - if (s_ele.__isset.repetition_type) { - if (s_ele.repetition_type != FieldRepetitionType::REQUIRED) { - max_define++; - } - if (s_ele.repetition_type == FieldRepetitionType::REPEATED) { - max_repeat++; - } - } + int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; + int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); - if (!s_ele.__isset.type) { // inner node - if (s_ele.num_children == 0) { - throw std::runtime_error("Node has no children but should"); - } - child_list_t child_types; - vector> child_readers; + DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", + neededSpace>>10, matchStateSize>>10, bufferSpace>>10); + DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); - idx_t c_idx = 0; - while (c_idx < (idx_t)s_ele.num_children) { - next_schema_idx++; + if (workspaceTooSmall || workspaceWasteful) { + DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", + ZSTD_cwksp_sizeof(ws) >> 10, + neededSpace >> 10); - auto &child_ele = file_meta_data->schema[next_schema_idx]; + RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); - auto child_reader = CreateReaderRecursive(file_meta_data, depth + 1, max_define, max_repeat, - next_schema_idx, next_file_idx); - child_types.push_back(make_pair(child_ele.name, child_reader->Type())); - child_readers.push_back(move(child_reader)); + needsIndexReset = ZSTDirp_reset; - c_idx++; - } - D_ASSERT(!child_types.empty()); - unique_ptr result; - LogicalType result_type; - // if we only have a single child no reason to create a struct ay - if (child_types.size() > 1 || depth == 0) { - result_type = LogicalType(LogicalTypeId::STRUCT, child_types); - result = make_unique(result_type, s_ele, this_idx, max_define, max_repeat, - move(child_readers)); - } else { - // if we have a struct with only a single type, pull up - result_type = child_types[0].second; - result = move(child_readers[0]); - } - if (s_ele.repetition_type == FieldRepetitionType::REPEATED) { - result_type = LogicalType(LogicalTypeId::LIST, {make_pair("", result_type)}); - return make_unique(result_type, s_ele, this_idx, max_define, max_repeat, move(result)); - } - return result; - } else { // leaf node - // TODO check return value of derive type or should we only do this on read() - return ColumnReader::CreateReader(DeriveLogicalType(s_ele), s_ele, next_file_idx++, max_define, max_repeat); - } -} + ZSTD_cwksp_free(ws, zc->customMem); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); -// TODO we don't need readers for columns we are not going to read ay -static unique_ptr CreateReader(const FileMetaData *file_meta_data) { - idx_t next_schema_idx = 0; - idx_t next_file_idx = 0; + DEBUGLOG(5, "reserving object space"); + /* Statically sized space. + * entropyWorkspace never moves, + * though prev/next block swap places */ + assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); + zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); + zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); + } } - auto ret = CreateReaderRecursive(file_meta_data, 0, 0, 0, next_schema_idx, next_file_idx); - D_ASSERT(next_schema_idx == file_meta_data->schema.size() - 1); - D_ASSERT(file_meta_data->row_groups.empty() || next_file_idx == file_meta_data->row_groups[0].columns.size()); - return ret; -} + ZSTD_cwksp_clear(ws); -void ParquetReader::InitializeSchema(const vector &expected_types_p, const string &initial_filename_p) { - auto file_meta_data = GetFileMetadata(); + /* init params */ + zc->appliedParams = params; + zc->blockState.matchState.cParams = params.cParams; + zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; + zc->consumedSrcSize = 0; + zc->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + zc->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); + zc->blockSize = blockSize; - if (file_meta_data->__isset.encryption_algorithm) { - throw FormatException("Encrypted Parquet files are not supported"); - } - // check if we like this schema - if (file_meta_data->schema.size() < 2) { - throw FormatException("Need at least one non-root column in the file"); - } + XXH64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; - bool has_expected_types = !expected_types_p.empty(); - auto root_reader = CreateReader(file_meta_data); + ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); - auto root_type = root_reader->Type(); - D_ASSERT(root_type.id() == LogicalTypeId::STRUCT); - idx_t col_idx = 0; - for (auto &type_pair : root_type.child_types()) { - if (has_expected_types && expected_types_p[col_idx] != type_pair.second) { - if (initial_filename_p.empty()) { - throw FormatException("column \"%d\" in parquet file is of type %s, could not auto cast to " - "expected type %s for this column", - col_idx, type_pair.second, expected_types_p[col_idx].ToString()); - } else { - throw FormatException("schema mismatch in Parquet glob: column \"%d\" in parquet file is of type " - "%s, but in the original file \"%s\" this column is of type \"%s\"", - col_idx, type_pair.second, initial_filename_p, - expected_types_p[col_idx].ToString()); - } - } else { - names.push_back(type_pair.first); - return_types.push_back(type_pair.second); - } - col_idx++; - } - D_ASSERT(!names.empty()); - D_ASSERT(!return_types.empty()); -} + /* ZSTD_wildcopy() is used to copy into the literals buffer, + * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. + */ + zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); + zc->seqStore.maxNbLit = blockSize; -ParquetReader::ParquetReader(unique_ptr file_handle_p, const vector &expected_types_p, - const string &initial_filename_p) { - file_name = file_handle_p->path; - file_handle = move(file_handle_p); - metadata = LoadMetadata(*file_handle); - InitializeSchema(expected_types_p, initial_filename_p); -} + /* buffers */ + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); + zc->outBuffSize = buffOutSize; + zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); -ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, const vector &expected_types_p, - const string &initial_filename_p) { - auto &fs = FileSystem::GetFileSystem(context_p); - file_name = move(file_name_p); - file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ); - // If object cached is disabled - // or if this file has cached metadata - // or if the cached version already expired + /* ldm bucketOffsets table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const ldmBucketSize = + ((size_t)1) << (params.ldmParams.hashLog - + params.ldmParams.bucketSizeLog); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); + memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); + } - auto last_modify_time = fs.GetLastModifiedTime(*file_handle); - if (!ObjectCache::ObjectCacheEnabled(context_p)) { - metadata = LoadMetadata(*file_handle); - } else { - metadata = - std::dynamic_pointer_cast(ObjectCache::GetObjectCache(context_p).Get(file_name)); - if (!metadata || (last_modify_time + 10 >= metadata->read_time)) { - metadata = LoadMetadata(*file_handle); - ObjectCache::GetObjectCache(context_p).Put(file_name, metadata); - } - } + /* sequences storage */ + ZSTD_referenceExternalSequences(zc, NULL, 0); + zc->seqStore.maxNbSeq = maxNbSeq; + zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); - InitializeSchema(expected_types_p, initial_filename_p); -} + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &zc->blockState.matchState, + ws, + ¶ms.cParams, + crp, + needsIndexReset, + ZSTD_resetTarget_CCtx), ""); -ParquetReader::~ParquetReader() { -} + /* ldm hash table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); + memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); + zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); + zc->maxNbLdmSequences = maxNbLdmSeq; -const FileMetaData *ParquetReader::GetFileMetadata() { - D_ASSERT(metadata); - D_ASSERT(metadata->metadata); - return metadata->metadata.get(); -} + ZSTD_window_init(&zc->ldmState.window); + ZSTD_window_clear(&zc->ldmState.window); + zc->ldmState.loadedDictEnd = 0; + } -// TODO also somewhat ugly, perhaps this can be moved to the column reader too -unique_ptr ParquetReader::ReadStatistics(LogicalType &type, column_t file_col_idx, - const FileMetaData *file_meta_data) { - unique_ptr column_stats; - auto root_reader = CreateReader(file_meta_data); - auto column_reader = ((StructColumnReader *)root_reader.get())->GetChildReader(file_col_idx); + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + zc->initialized = 1; - for (auto &row_group : file_meta_data->row_groups) { - auto chunk_stats = column_reader->Stats(row_group.columns); - if (!chunk_stats) { - return nullptr; - } - if (!column_stats) { - column_stats = move(chunk_stats); - } else { - column_stats->Merge(*chunk_stats); - } - } - return column_stats; + return 0; + } } -const RowGroup &ParquetReader::GetGroup(ParquetReaderScanState &state) { - auto file_meta_data = GetFileMetadata(); - D_ASSERT(state.current_group >= 0 && (idx_t)state.current_group < state.group_idx_list.size()); - D_ASSERT(state.group_idx_list[state.current_group] >= 0 && - state.group_idx_list[state.current_group] < file_meta_data->row_groups.size()); - return file_meta_data->row_groups[state.group_idx_list[state.current_group]]; +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; iblockState.prevCBlock->rep[i] = 0; + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); } -void ParquetReader::PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx) { - auto &group = GetGroup(state); - - auto column_reader = ((StructColumnReader *)state.root_reader.get())->GetChildReader(state.column_ids[out_col_idx]); - - // TODO move this to columnreader too - if (state.filters) { - auto stats = column_reader->Stats(group.columns); - // filters contain output chunk index, not file col idx! - auto filter_entry = state.filters->filters.find(out_col_idx); - if (stats && filter_entry != state.filters->filters.end()) { - bool skip_chunk = false; - switch (column_reader->Type().id()) { - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - case LogicalTypeId::UBIGINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - case LogicalTypeId::FLOAT: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::DOUBLE: { - auto &num_stats = (NumericStatistics &)*stats; - for (auto &filter : filter_entry->second) { - skip_chunk = !num_stats.CheckZonemap(filter.comparison_type, filter.constant); - if (skip_chunk) { - break; - } - } - break; - } - case LogicalTypeId::BLOB: - case LogicalTypeId::VARCHAR: { - auto &str_stats = (StringStatistics &)*stats; - for (auto &filter : filter_entry->second) { - skip_chunk = !str_stats.CheckZonemap(filter.comparison_type, filter.constant.str_value); - if (skip_chunk) { - break; - } - } - break; - } - default: - break; - } - if (skip_chunk) { - state.group_offset = group.num_rows; - return; - // this effectively will skip this chunk - } - } - } - - state.root_reader->IntializeRead(group.columns, *state.thrift_file_proto); -} +/* These are the approximate sizes for each strategy past which copying the + * dictionary tables into the working context is faster than using them + * in-place. + */ +static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { + 8 KB, /* unused */ + 8 KB, /* ZSTD_fast */ + 16 KB, /* ZSTD_dfast */ + 32 KB, /* ZSTD_greedy */ + 32 KB, /* ZSTD_lazy */ + 32 KB, /* ZSTD_lazy2 */ + 32 KB, /* ZSTD_btlazy2 */ + 32 KB, /* ZSTD_btopt */ + 8 KB, /* ZSTD_btultra */ + 8 KB /* ZSTD_btultra2 */ +}; -idx_t ParquetReader::NumRows() { - return GetFileMetadata()->num_rows; +static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize) +{ + size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; + return ( pledgedSrcSize <= cutoff + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || params->attachDictPref == ZSTD_dictForceAttach ) + && params->attachDictPref != ZSTD_dictForceCopy + && !params->forceWindow; /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ } -idx_t ParquetReader::NumRowGroups() { - return GetFileMetadata()->row_groups.size(); -} +static size_t +ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; + unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Resize working context table params for input only, since the dict + * has its own tables. */ + /* pledgeSrcSize == 0 means 0! */ + params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); + params.cParams.windowLog = windowLog; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + } -void ParquetReader::InitializeScan(ParquetReaderScanState &state, vector column_ids, - vector groups_to_read, TableFilterSet *filters) { - state.current_group = -1; - state.finished = false; - state.column_ids = move(column_ids); - state.group_offset = 0; - state.group_idx_list = move(groups_to_read); - state.filters = filters; - state.sel.Initialize(STANDARD_VECTOR_SIZE); - state.file_handle = file_handle->file_system.OpenFile(file_handle->path, FileFlags::FILE_FLAGS_READ); - state.thrift_file_proto = CreateThriftProtocol(*state.file_handle); - state.root_reader = CreateReader(GetFileMetadata()); + { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; + if (cdictLen == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); + } else { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; - state.define_buf.resize(STANDARD_VECTOR_SIZE); - state.repeat_buf.resize(STANDARD_VECTOR_SIZE); -} + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + cdictEnd; + ZSTD_window_clear(&cctx->blockState.matchState.window); + } + /* loadedDictEnd is expressed within the referential of the active context */ + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + } } -template -void TemplatedFilterOperation(Vector &v, T constant, parquet_filter_t &filter_mask, idx_t count) { - D_ASSERT(v.GetVectorType() == VectorType::FLAT_VECTOR); // we just created the damn thing it better be + cctx->dictID = cdict->dictID; - auto v_ptr = FlatVector::GetData(v); - auto &mask = FlatVector::Validity(v); + /* copy block state */ + memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); - if (!mask.AllValid()) { - for (idx_t i = 0; i < count; i++) { - filter_mask[i] = filter_mask[i] && mask.RowIsValid(i) && OP::Operation(v_ptr[i], constant); - } - } else { - for (idx_t i = 0; i < count; i++) { - filter_mask[i] = filter_mask[i] && OP::Operation(v_ptr[i], constant); - } - } + return 0; } -template -static void FilterOperationSwitch(Vector &v, Value &constant, parquet_filter_t &filter_mask, idx_t count) { - if (filter_mask.none() || count == 0) { - return; - } - switch (v.GetType().id()) { - case LogicalTypeId::BOOLEAN: - TemplatedFilterOperation(v, constant.value_.boolean, filter_mask, count); - break; +static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; - case LogicalTypeId::UTINYINT: - TemplatedFilterOperation(v, constant.value_.utinyint, filter_mask, count); - break; + DEBUGLOG(4, "copying dictionary into context"); - case LogicalTypeId::USMALLINT: - TemplatedFilterOperation(v, constant.value_.usmallint, filter_mask, count); - break; + { unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Copy only compression parameters related to tables. */ + params.cParams = *cdict_cParams; + params.cParams.windowLog = windowLog; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); + assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); + } - case LogicalTypeId::UINTEGER: - TemplatedFilterOperation(v, constant.value_.uinteger, filter_mask, count); - break; + ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); - case LogicalTypeId::UBIGINT: - TemplatedFilterOperation(v, constant.value_.ubigint, filter_mask, count); - break; - - case LogicalTypeId::INTEGER: - TemplatedFilterOperation(v, constant.value_.integer, filter_mask, count); - break; + /* copy tables */ + { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); + size_t const hSize = (size_t)1 << cdict_cParams->hashLog; - case LogicalTypeId::BIGINT: - TemplatedFilterOperation(v, constant.value_.bigint, filter_mask, count); - break; + memcpy(cctx->blockState.matchState.hashTable, + cdict->matchState.hashTable, + hSize * sizeof(U32)); + memcpy(cctx->blockState.matchState.chainTable, + cdict->matchState.chainTable, + chainSize * sizeof(U32)); + } - case LogicalTypeId::FLOAT: - TemplatedFilterOperation(v, constant.value_.float_, filter_mask, count); - break; + /* Zero the hashTable3, since the cdict never fills it */ + { int const h3log = cctx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + assert(cdict->matchState.hashLog3 == 0); + memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } - case LogicalTypeId::DOUBLE: - TemplatedFilterOperation(v, constant.value_.double_, filter_mask, count); - break; + ZSTD_cwksp_mark_tables_clean(&cctx->workspace); - case LogicalTypeId::TIMESTAMP: - TemplatedFilterOperation(v, constant.value_.bigint, filter_mask, count); - break; + /* copy dictionary offsets */ + { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } - case LogicalTypeId::BLOB: - case LogicalTypeId::VARCHAR: - TemplatedFilterOperation(v, string_t(constant.str_value), filter_mask, count); - break; + cctx->dictID = cdict->dictID; - default: - throw NotImplementedException("Unsupported type for filter %s", v.ToString()); - } -} + /* copy block state */ + memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); -void ParquetReader::Scan(ParquetReaderScanState &state, DataChunk &result) { - while (ScanInternal(state, result)) { - if (result.size() > 0) { - break; - } - result.Reset(); - } + return 0; } -bool ParquetReader::ScanInternal(ParquetReaderScanState &state, DataChunk &result) { - if (state.finished) { - return false; - } - - // see if we have to switch to the next row group in the parquet file - if (state.current_group < 0 || (int64_t)state.group_offset >= GetGroup(state).num_rows) { - state.current_group++; - state.group_offset = 0; - - if ((idx_t)state.current_group == state.group_idx_list.size()) { - state.finished = true; - return false; - } - - for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) { - // this is a special case where we are not interested in the actual contents of the file - if (state.column_ids[out_col_idx] == COLUMN_IDENTIFIER_ROW_ID) { - continue; - } - - PrepareRowGroupBuffer(state, out_col_idx); - } - return true; - } - - auto this_output_chunk_rows = MinValue(STANDARD_VECTOR_SIZE, GetGroup(state).num_rows - state.group_offset); - result.SetCardinality(this_output_chunk_rows); - - if (this_output_chunk_rows == 0) { - state.finished = true; - return false; // end of last group, we are done - } - - // we evaluate simple table filters directly in this scan so we can skip decoding column data that's never going to - // be relevant - parquet_filter_t filter_mask; - filter_mask.set(); - - state.define_buf.zero(); - state.repeat_buf.zero(); - - auto define_ptr = (uint8_t *)state.define_buf.ptr; - auto repeat_ptr = (uint8_t *)state.repeat_buf.ptr; - - auto root_reader = ((StructColumnReader *)state.root_reader.get()); - - if (state.filters) { - vector need_to_read(result.ColumnCount(), true); +/* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ +static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ - // first load the columns that are used in filters - for (auto &filter_col : state.filters->filters) { - auto file_col_idx = state.column_ids[filter_col.first]; + DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", + (unsigned)pledgedSrcSize); - if (filter_mask.none()) { // if no rows are left we can stop checking filters - break; - } + if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { + return ZSTD_resetCCtx_byAttachingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } else { + return ZSTD_resetCCtx_byCopyingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } +} - root_reader->GetChildReader(file_col_idx) - ->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result.data[filter_col.first]); +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ +static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, + const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, + "Can't copy a ctx that's not in init stage."); - need_to_read[filter_col.first] = false; + memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_CCtx_params params = dstCCtx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = srcCCtx->appliedParams.cParams; + params.fParams = fParams; + ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff); + assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); + assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); + assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); + assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); + } - for (auto &filter : filter_col.second) { - switch (filter.comparison_type) { - case ExpressionType::COMPARE_EQUAL: - FilterOperationSwitch(result.data[filter_col.first], filter.constant, filter_mask, - this_output_chunk_rows); - break; - case ExpressionType::COMPARE_LESSTHAN: - FilterOperationSwitch(result.data[filter_col.first], filter.constant, filter_mask, - this_output_chunk_rows); - break; - case ExpressionType::COMPARE_LESSTHANOREQUALTO: - FilterOperationSwitch(result.data[filter_col.first], filter.constant, filter_mask, - this_output_chunk_rows); - break; - case ExpressionType::COMPARE_GREATERTHAN: - FilterOperationSwitch(result.data[filter_col.first], filter.constant, filter_mask, - this_output_chunk_rows); - break; - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - FilterOperationSwitch(result.data[filter_col.first], filter.constant, - filter_mask, this_output_chunk_rows); - break; - default: - D_ASSERT(0); - } - } - } + ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); - // we still may have to read some cols - for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) { - if (!need_to_read[out_col_idx]) { - continue; - } - auto file_col_idx = state.column_ids[out_col_idx]; + /* copy tables */ + { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; + int const h3log = srcCCtx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; - if (filter_mask.none()) { - root_reader->GetChildReader(file_col_idx)->Skip(result.size()); - continue; - } - // TODO handle ROWID here, too - root_reader->GetChildReader(file_col_idx) - ->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result.data[out_col_idx]); - } + memcpy(dstCCtx->blockState.matchState.hashTable, + srcCCtx->blockState.matchState.hashTable, + hSize * sizeof(U32)); + memcpy(dstCCtx->blockState.matchState.chainTable, + srcCCtx->blockState.matchState.chainTable, + chainSize * sizeof(U32)); + memcpy(dstCCtx->blockState.matchState.hashTable3, + srcCCtx->blockState.matchState.hashTable3, + h3Size * sizeof(U32)); + } - idx_t sel_size = 0; - for (idx_t i = 0; i < this_output_chunk_rows; i++) { - if (filter_mask[i]) { - state.sel.set_index(sel_size++, i); - } - } + ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); - result.Slice(state.sel, sel_size); - result.Verify(); + /* copy dictionary offsets */ + { + const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; + ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + dstCCtx->dictID = srcCCtx->dictID; - } else { // #nofilter, just fricking load the data - for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) { - auto file_col_idx = state.column_ids[out_col_idx]; + /* copy block state */ + memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); - if (file_col_idx == COLUMN_IDENTIFIER_ROW_ID) { - Value constant_42 = Value::BIGINT(42); - result.data[out_col_idx].Reference(constant_42); - continue; - } + return 0; +} - root_reader->GetChildReader(file_col_idx) - ->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result.data[out_col_idx]); - } - } +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); + ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); - state.group_offset += this_output_chunk_rows; - return true; + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + fParams, pledgedSrcSize, + zbuff); } -} // namespace duckdb - +#define ZSTD_ROWSIZE 16 +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) +{ + int const nbRows = (int)size / ZSTD_ROWSIZE; + int cellNb = 0; + int rowNb; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/types/value.hpp" -#include "duckdb/storage/statistics/string_statistics.hpp" -#include "duckdb/storage/statistics/numeric_statistics.hpp" +#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. + * + * This function however is intended to operate on those dirty tables and + * re-clean them. So when this function is used correctly, we can unpoison + * the memory it operated on. This introduces a blind spot though, since + * if we now try to operate on __actually__ poisoned memory, we will not + * detect that. */ + __msan_unpoison(table, size * sizeof(U32)); #endif -namespace duckdb { - -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Type; - -template -static unique_ptr TemplatedGetNumericStats(const LogicalType &type, - const duckdb_parquet::format::Statistics &parquet_stats) { - auto stats = make_unique(type); - - // for reasons unknown to science, Parquet defines *both* `min` and `min_value` as well as `max` and - // `max_value`. All are optional. such elegance. - if (parquet_stats.__isset.min) { - stats->min = FUNC((const_data_ptr_t)parquet_stats.min.data()); - } else if (parquet_stats.__isset.min_value) { - stats->min = FUNC((const_data_ptr_t)parquet_stats.min_value.data()); - } else { - stats->min.is_null = true; - } - if (parquet_stats.__isset.max) { - stats->max = FUNC((const_data_ptr_t)parquet_stats.max.data()); - } else if (parquet_stats.__isset.max_value) { - stats->max = FUNC((const_data_ptr_t)parquet_stats.max_value.data()); - } else { - stats->max.is_null = true; - } - // GCC 4.x insists on a move() here - return move(stats); + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { + int column; + for (column=0; column -static Value TransformStatisticsPlain(const_data_ptr_t input) { - return Value::CreateValue(Load(input)); +static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue) +{ + ZSTD_reduceTable_internal(table, size, reducerValue, 0); } -static Value TransformStatisticsFloat(const_data_ptr_t input) { - auto val = Load(input); - if (!Value::FloatIsValid(val)) { - return Value(LogicalType::FLOAT); - } - return Value::CreateValue(val); +static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue) +{ + ZSTD_reduceTable_internal(table, size, reducerValue, 1); } -static Value TransformStatisticsDouble(const_data_ptr_t input) { - auto val = Load(input); - if (!Value::DoubleIsValid(val)) { - return Value(LogicalType::DOUBLE); - } - return Value::CreateValue(val); -} +/*! ZSTD_reduceIndex() : +* rescale all indexes to avoid future overflow (indexes are U32) */ +static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) +{ + { U32 const hSize = (U32)1 << params->cParams.hashLog; + ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); + } -static Value TransformStatisticsTimestampMs(const_data_ptr_t input) { - return Value::TIMESTAMP(ParquetTimestampMsToTimestamp(Load(input))); -} + if (params->cParams.strategy != ZSTD_fast) { + U32 const chainSize = (U32)1 << params->cParams.chainLog; + if (params->cParams.strategy == ZSTD_btlazy2) + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + } -static Value TransformStatisticsTimestampMicros(const_data_ptr_t input) { - return Value::TIMESTAMP(ParquetTimestampMicrosToTimestamp(Load(input))); + if (ms->hashLog3) { + U32 const h3Size = (U32)1 << ms->hashLog3; + ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); + } } -static Value TransformStatisticsTimestampImpala(const_data_ptr_t input) { - return Value::TIMESTAMP(ImpalaTimestampToTimestamp(Load(input))); -} -unique_ptr ParquetTransformColumnStatistics(const SchemaElement &s_ele, const LogicalType &type, - const ColumnChunk &column_chunk) { - if (!column_chunk.__isset.meta_data || !column_chunk.meta_data.__isset.statistics) { - // no stats present for row group - return nullptr; - } - auto &parquet_stats = column_chunk.meta_data.statistics; - unique_ptr row_group_stats; +/*-******************************************************* +* Block entropic compression +*********************************************************/ - switch (type.id()) { +/* See doc/zstd_compression_format.md for detailed format description */ - case LogicalTypeId::UTINYINT: - row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); - break; +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + assert(nbSeq <= seqStorePtr->maxNbSeq); + for (u=0; ulongLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; +} - case LogicalTypeId::USMALLINT: - row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); - break; +/* ZSTD_useTargetCBlockSize(): + * Returns if target compressed block size param is being used. + * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); + return (cctxParams->targetCBlockSize != 0); +} - case LogicalTypeId::UINTEGER: - row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); - break; +/* ZSTD_compressSequences_internal(): + * actually compresses both literals and sequences */ +MEM_STATIC size_t +ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + void* entropyWorkspace, size_t entropyWkspSize, + const int bmi2) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + unsigned count[MaxSeq+1]; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + BYTE* seqHead; + BYTE* lastNCount = NULL; - case LogicalTypeId::UBIGINT: - row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); - break; - case LogicalTypeId::INTEGER: - row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); - break; + DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<>(type, parquet_stats); - break; + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = (size_t)(seqStorePtr->lit - literals); + size_t const cSize = ZSTD_compressLiterals( + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_disableLiteralsCompression(cctxParams), + op, dstCapacity, + literals, litSize, + entropyWorkspace, entropyWkspSize, + bmi2); + FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); + assert(cSize <= dstCapacity); + op += cSize; + } - case LogicalTypeId::FLOAT: - row_group_stats = TemplatedGetNumericStats(type, parquet_stats); - break; + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, "Can't fit seq hdr in output buf!"); + if (nbSeq < 128) { + *op++ = (BYTE)nbSeq; + } else if (nbSeq < LONGNBSEQ) { + op[0] = (BYTE)((nbSeq>>8) + 0x80); + op[1] = (BYTE)nbSeq; + op+=2; + } else { + op[0]=0xFF; + MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); + op+=3; + } + assert(op <= oend); + if (nbSeq==0) { + /* Copy the old tables over as if we repeated them */ + memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return (size_t)(op - ostart); + } - case LogicalTypeId::DOUBLE: - row_group_stats = TemplatedGetNumericStats(type, parquet_stats); - break; + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + assert(op <= oend); - // here we go, our favorite type - case LogicalTypeId::TIMESTAMP: { - switch (s_ele.type) { - case Type::INT64: - // arrow timestamp - switch (s_ele.converted_type) { - case ConvertedType::TIMESTAMP_MICROS: - row_group_stats = TemplatedGetNumericStats(type, parquet_stats); - break; - case ConvertedType::TIMESTAMP_MILLIS: - row_group_stats = TemplatedGetNumericStats(type, parquet_stats); - break; - default: - return nullptr; - } - break; - case Type::INT96: - // impala timestamp - row_group_stats = TemplatedGetNumericStats(type, parquet_stats); - break; - default: - return nullptr; - } - break; - } - case LogicalTypeId::VARCHAR: { - auto string_stats = make_unique(type); - if (parquet_stats.__isset.min) { - string_stats->Update(parquet_stats.min); - } else if (parquet_stats.__isset.min_value) { - string_stats->Update(parquet_stats.min_value); - } else { - return nullptr; - } - if (parquet_stats.__isset.max) { - string_stats->Update(parquet_stats.max); - } else if (parquet_stats.__isset.max_value) { - string_stats->Update(parquet_stats.max_value); - } else { - return nullptr; - } + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, + count, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->fse.litlengthCTable, + ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, + ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, MaxLL, + prevEntropy->fse.litlengthCTable, + sizeof(prevEntropy->fse.litlengthCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, + count, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->fse.offcodeCTable, + ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, + ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, DefaultMaxOff, + prevEntropy->fse.offcodeCTable, + sizeof(prevEntropy->fse.offcodeCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, + count, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->fse.matchlengthCTable, + ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, + ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, MaxML, + prevEntropy->fse.matchlengthCTable, + sizeof(prevEntropy->fse.matchlengthCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } - string_stats->has_unicode = true; // we dont know better - row_group_stats = move(string_stats); - break; - } - default: - // no stats for you - break; - } // end of type switch + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - // null count is generic - if (row_group_stats) { - if (parquet_stats.__isset.null_count) { - row_group_stats->validity_stats = make_unique(parquet_stats.null_count != 0); - } else { - row_group_stats->validity_stats = make_unique(true); - } - } else { - // if stats are missing from any row group we know squat - return nullptr; - } + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, (size_t)(oend - op), + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + assert(op <= oend); + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (lastNCount && (op - lastNCount) < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(op - lastNCount == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } + } - return row_group_stats; + DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); + return (size_t)(op - ostart); } -} // namespace duckdb - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/types/date.hpp" -#include "duckdb/common/types/time.hpp" -#include "duckdb/common/types/timestamp.hpp" -#endif - -namespace duckdb { +MEM_STATIC size_t +ZSTD_compressSequences(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, + void* entropyWorkspace, size_t entropyWkspSize, + int bmi2) +{ + size_t const cSize = ZSTD_compressSequences_internal( + seqStorePtr, prevEntropy, nextEntropy, cctxParams, + dst, dstCapacity, + entropyWorkspace, entropyWkspSize, bmi2); + if (cSize == 0) return 0; + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. + */ + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) + return 0; /* block not compressed */ + FORWARD_IF_ERROR(cSize, "ZSTD_compressSequences_internal failed"); -// surely they are joking -static constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = 2440588LL; -static constexpr int64_t MILLISECONDS_PER_DAY = 86400000LL; -static constexpr int64_t NANOSECONDS_PER_DAY = MILLISECONDS_PER_DAY * 1000LL * 1000LL; + /* Check compressibility */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); + if (cSize >= maxCSize) return 0; /* block not compressed */ + } -int64_t ImpalaTimestampToNanoseconds(const Int96 &impala_timestamp) { - int64_t days_since_epoch = impala_timestamp.value[2] - JULIAN_TO_UNIX_EPOCH_DAYS; - auto nanoseconds = Load((data_ptr_t)impala_timestamp.value); - return days_since_epoch * NANOSECONDS_PER_DAY + nanoseconds; + return cSize; } -timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts) { - auto impala_ns = ImpalaTimestampToNanoseconds(raw_ts); - return Timestamp::FromEpochMs(impala_ns / 1000000); -} +/* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) + * assumption : strat is a valid strategy */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) +{ + static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = { + { ZSTD_compressBlock_fast /* default for 0 */, + ZSTD_compressBlock_fast, + ZSTD_compressBlock_doubleFast, + ZSTD_compressBlock_greedy, + ZSTD_compressBlock_lazy, + ZSTD_compressBlock_lazy2, + ZSTD_compressBlock_btlazy2, + ZSTD_compressBlock_btopt, + ZSTD_compressBlock_btultra, + ZSTD_compressBlock_btultra2 }, + { ZSTD_compressBlock_fast_extDict /* default for 0 */, + ZSTD_compressBlock_fast_extDict, + ZSTD_compressBlock_doubleFast_extDict, + ZSTD_compressBlock_greedy_extDict, + ZSTD_compressBlock_lazy_extDict, + ZSTD_compressBlock_lazy2_extDict, + ZSTD_compressBlock_btlazy2_extDict, + ZSTD_compressBlock_btopt_extDict, + ZSTD_compressBlock_btultra_extDict, + ZSTD_compressBlock_btultra_extDict }, + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + ZSTD_compressBlock_doubleFast_dictMatchState, + ZSTD_compressBlock_greedy_dictMatchState, + ZSTD_compressBlock_lazy_dictMatchState, + ZSTD_compressBlock_lazy2_dictMatchState, + ZSTD_compressBlock_btlazy2_dictMatchState, + ZSTD_compressBlock_btopt_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState } + }; + ZSTD_blockCompressor selectedCompressor; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); -Int96 TimestampToImpalaTimestamp(timestamp_t &ts) { - int32_t hour, min, sec, msec; - Time::Convert(Timestamp::GetTime(ts), hour, min, sec, msec); - uint64_t ms_since_midnight = hour * 60 * 60 * 1000 + min * 60 * 1000 + sec * 1000 + msec; - auto days_since_epoch = Date::Epoch(Timestamp::GetDate(ts)) / (24 * 60 * 60); - // first two uint32 in Int96 are nanoseconds since midnights - // last uint32 is number of days since year 4713 BC ("Julian date") - Int96 impala_ts; - Store(ms_since_midnight * 1000000, (data_ptr_t)impala_ts.value); - impala_ts.value[2] = days_since_epoch + JULIAN_TO_UNIX_EPOCH_DAYS; - return impala_ts; + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + assert(selectedCompressor != NULL); + return selectedCompressor; } -timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts) { - return Timestamp::FromEpochMicroSeconds(raw_ts); -} -timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts) { - return Timestamp::FromEpochMs(raw_ts); +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; } -date_t ParquetIntToDate(const int32_t &raw_date) { - return raw_date; +void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; } -} // namespace duckdb - - +typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/function/table_function.hpp" -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#include "duckdb/parser/parsed_data/create_copy_function_info.hpp" -#include "duckdb/main/client_context.hpp" -#include "duckdb/main/connection.hpp" -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/string_util.hpp" -#include "duckdb/common/types/date.hpp" -#include "duckdb/common/types/time.hpp" -#include "duckdb/common/types/timestamp.hpp" -#include "duckdb/common/serializer/buffered_file_writer.hpp" -#include "duckdb/common/serializer/buffered_serializer.hpp" -#endif +static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + /* Assert that we have correctly flushed the ctx params into the ms's copy */ + ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); + if (srcSize < MIN_CBLOCK_SIZE+ZSTDInternalConstants::ZSTD_blockHeaderSize+1) { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); + return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ + } + ZSTD_resetSeqStore(&(zc->seqStore)); + /* required for optimal parser to read stats from dictionary */ + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; + /* tell the optimal parser how we expect to compress literals */ + ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, + * and when that stops being the case, the dict must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + /* limited update after a very long match */ + { const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 current = (U32)(istart-base); + if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ + if (current > ms->nextToUpdate + 384) + ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); + } + /* select and store sequences */ + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); + size_t lastLLSize; + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(!zc->appliedParams.ldmParams.enableLdm); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&zc->externSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm) { + rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize), ""); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(ldmSeqStore.pos == ldmSeqStore.size); + } else { /* not long range mode */ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } + { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } } + return ZSTDbss_compress; +} +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqs = seqStore->sequencesStart; + size_t seqsSize = seqStore->sequences - seqs; -namespace duckdb { + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; size_t position; int repIdx; -using namespace duckdb_parquet; // NOLINT -using namespace duckdb_apache::thrift; // NOLINT -using namespace duckdb_apache::thrift::protocol; // NOLINT -using namespace duckdb_apache::thrift::transport; // NOLINT -using namespace duckdb_miniz; // NOLINT + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; + outSeqs[i].litLength = seqs[i].litLength; + outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; -using duckdb_parquet::format::CompressionCodec; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Encoding; -using duckdb_parquet::format::FieldRepetitionType; -using duckdb_parquet::format::FileMetaData; -using duckdb_parquet::format::PageHeader; -using duckdb_parquet::format::PageType; -using duckdb_parquet::format::RowGroup; -using duckdb_parquet::format::Type; + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } -class MyTransport : public TTransport { -public: - explicit MyTransport(Serializer &serializer) : serializer(serializer) { - } + if (outSeqs[i].offset <= ZSTD_REP_NUM) { + outSeqs[i].rep = outSeqs[i].offset; + repIdx = (unsigned int)i - outSeqs[i].offset; - bool isOpen() const override { - return true; - } + if (outSeqs[i].litLength == 0) { + if (outSeqs[i].offset < 3) { + --repIdx; + } else { + repIdx = (unsigned int)i - 1; + } + ++outSeqs[i].rep; + } + assert(repIdx >= -3); + outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : ZSTDInternalConstants::repStartValue[-repIdx - 1]; + if (outSeqs[i].rep == 4) { + --outSeqs[i].offset; + } + } else { + outSeqs[i].offset -= ZSTD_REP_NUM; + } - void open() override { - } + position += outSeqs[i].litLength; + outSeqs[i].matchPos = (unsigned int)position; + position += outSeqs[i].matchLength; + } + zc->seqCollector.seqIndex += seqsSize; +} - void close() override { - } +size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize); + void* dst = ZSTD_malloc(dstCapacity, ZSTDInternalConstants::ZSTD_defaultCMem); + SeqCollector seqCollector; - void write_virt(const uint8_t *buf, uint32_t len) override { - serializer.WriteData((const_data_ptr_t)buf, len); - } + RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); -private: - Serializer &serializer; -}; + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; -static Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type) { - switch (duckdb_type.id()) { - case LogicalTypeId::BOOLEAN: - return Type::BOOLEAN; - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - return Type::INT32; - case LogicalTypeId::BIGINT: - return Type::INT64; - case LogicalTypeId::FLOAT: - return Type::FLOAT; - case LogicalTypeId::DECIMAL: // for now... - case LogicalTypeId::DOUBLE: - return Type::DOUBLE; - case LogicalTypeId::VARCHAR: - case LogicalTypeId::BLOB: - return Type::BYTE_ARRAY; - case LogicalTypeId::DATE: - case LogicalTypeId::TIMESTAMP: - return Type::INT96; - default: - throw NotImplementedException(duckdb_type.ToString()); - } + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_free(dst, ZSTDInternalConstants::ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; } -static bool DuckDBTypeToConvertedType(const LogicalType &duckdb_type, ConvertedType::type &result) { - switch (duckdb_type.id()) { - case LogicalTypeId::VARCHAR: - result = ConvertedType::UTF8; - return true; - default: - return false; - } +/* Returns true if the given block is a RLE block */ +static int ZSTD_isRLE(const BYTE *ip, size_t length) { + size_t i; + if (length < 2) return 1; + for (i = 1; i < length; ++i) { + if (ip[0] != ip[i]) return 0; + } + return 1; } -static void VarintEncode(uint32_t val, Serializer &ser) { - do { - uint8_t byte = val & 127; - val >>= 7; - if (val != 0) { - byte |= 128; - } - ser.Write(byte); - } while (val != 0); -} +/* Returns true if the given block may be RLE. + * This is just a heuristic based on the compressibility. + * It may return both false positives and false negatives. + */ +static int ZSTD_maybeRLE(seqStore_t const* seqStore) +{ + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); -static uint8_t GetVarintSize(uint32_t val) { - uint8_t res = 0; - do { - uint8_t byte = val & 127; - val >>= 7; - if (val != 0) { - byte |= 128; - } - res++; - } while (val != 0); - return res; + return nbSeqs < 4 && nbLits < 10; } -template -static void TemplatedWritePlain(Vector &col, idx_t length, ValidityMask &mask, Serializer &ser) { - auto *ptr = FlatVector::GetData(col); - for (idx_t r = 0; r < length; r++) { - if (mask.RowIsValid(r)) { - ser.Write((TGT)ptr[r]); - } - } +static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) +{ + ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; } -ParquetWriter::ParquetWriter(FileSystem &fs, string file_name_p, vector types_p, vector names_p, - CompressionCodec::type codec) - : file_name(move(file_name_p)), sql_types(move(types_p)), column_names(move(names_p)), codec(codec) { - // initialize the file writer - writer = make_unique(fs, file_name.c_str(), - FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW); - // parquet files start with the string "PAR1" - writer->WriteData((const_data_ptr_t) "PAR1", 4); - TCompactProtocolFactoryT tproto_factory; - protocol = tproto_factory.getProtocol(make_shared(*writer)); - file_meta_data.num_rows = 0; - file_meta_data.schema.resize(sql_types.size() + 1); +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame) +{ + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. + */ + const U32 rleMaxLength = 25; + size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); - file_meta_data.schema[0].num_children = sql_types.size(); - file_meta_data.schema[0].__isset.num_children = true; - file_meta_data.version = 1; + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + } - for (idx_t i = 0; i < sql_types.size(); i++) { - auto &schema_element = file_meta_data.schema[i + 1]; + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } - schema_element.type = DuckDBTypeToParquetType(sql_types[i]); - schema_element.repetition_type = FieldRepetitionType::OPTIONAL; - schema_element.num_children = 0; - schema_element.__isset.num_children = true; - schema_element.__isset.type = true; - schema_element.__isset.repetition_type = true; - schema_element.name = column_names[i]; - schema_element.__isset.converted_type = DuckDBTypeToConvertedType(sql_types[i], schema_element.converted_type); - } -} + /* encode sequences and literals */ + cSize = ZSTD_compressSequences(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, + zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); -void ParquetWriter::Flush(ChunkCollection &buffer) { - if (buffer.Count() == 0) { - return; - } - std::lock_guard glock(lock); + if (frame && + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + cSize < rleMaxLength && + ZSTD_isRLE(ip, srcSize)) + { + cSize = 1; + op[0] = ip[0]; + } - // set up a new row group for this chunk collection - RowGroup row_group; - row_group.num_rows = 0; - row_group.file_offset = writer->GetTotalWritten(); - row_group.__isset.file_offset = true; - row_group.columns.resize(buffer.ColumnCount()); +out: + if (!ZSTD_isError(cSize) && cSize > 1) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + } + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - // iterate over each of the columns of the chunk collection and write them - for (idx_t i = 0; i < buffer.ColumnCount(); i++) { - // we start off by writing everything into a temporary buffer - // this is necessary to (1) know the total written size, and (2) to compress it afterwards - BufferedSerializer temp_writer; + return cSize; +} - // set up some metadata - PageHeader hdr; - hdr.compressed_page_size = 0; - hdr.uncompressed_page_size = 0; - hdr.type = PageType::DATA_PAGE; - hdr.__isset.data_page_header = true; +static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const size_t bss, U32 lastBlock) +{ + DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); + if (bss == ZSTDbss_compress) { + if (/* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) + { + return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); + } + /* Attempt superblock compression. + * + * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the + * standard ZSTD_compressBound(). This is a problem, because even if we have + * space now, taking an extra byte now could cause us to run out of space later + * and violate ZSTD_compressBound(). + * + * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. + * + * In order to respect ZSTD_compressBound() we must attempt to emit a raw + * uncompressed block in these cases: + * * cSize == 0: Return code for an uncompressed block. + * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). + * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of + * output space. + * * cSize >= blockBound(srcSize): We have expanded the block too much so + * emit an uncompressed block. + */ + { + size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); + if (cSize != ERROR(dstSize_tooSmall)) { + size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); + if (cSize != 0 && cSize < maxCSize + ZSTDInternalConstants::ZSTD_blockHeaderSize) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + return cSize; + } + } + } + } - hdr.data_page_header.num_values = buffer.Count(); - hdr.data_page_header.encoding = Encoding::PLAIN; - hdr.data_page_header.definition_level_encoding = Encoding::RLE; - hdr.data_page_header.repetition_level_encoding = Encoding::BIT_PACKED; + DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); + /* Superblock compression failed, attempt to emit a single no compress block. + * The decoder will be able to stream this block since it is uncompressed. + */ + return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); +} - // record the current offset of the writer into the file - // this is the starting position of the current page - auto start_offset = writer->GetTotalWritten(); +static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) +{ + size_t cSize = 0; + const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); - // write the definition levels (i.e. the inverse of the nullmask) - // we always bit pack everything + cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); - // first figure out how many bytes we need (1 byte per 8 rows, rounded up) - auto define_byte_count = (buffer.Count() + 7) / 8; - // we need to set up the count as a varint, plus an added marker for the RLE scheme - // for this marker we shift the count left 1 and set low bit to 1 to indicate bit packed literals - uint32_t define_header = (define_byte_count << 1) | 1; - uint32_t define_size = GetVarintSize(define_header) + define_byte_count; + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - // we write the actual definitions into the temp_writer for now - temp_writer.Write(define_size); - VarintEncode(define_header, temp_writer); + return cSize; +} - for (auto &chunk : buffer.Chunks()) { - auto &validity = FlatVector::Validity(chunk->data[i]); - auto validity_data = validity.GetData(); - auto chunk_define_byte_count = (chunk->size() + 7) / 8; - if (!validity_data) { - ValidityMask nop_mask(chunk->size()); - temp_writer.WriteData((const_data_ptr_t)nop_mask.GetData(), chunk_define_byte_count); - } else { - // write the bits of the nullmask - temp_writer.WriteData((const_data_ptr_t)validity_data, chunk_define_byte_count); - } - } +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + void const* ip, + void const* iend) +{ + if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { + U32 const maxDist = (U32)1 << params->cParams.windowLog; + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_cwksp_mark_tables_dirty(ws); + ZSTD_reduceIndex(ms, params, correction); + ZSTD_cwksp_mark_tables_clean(ws); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + /* invalidate dictionaries on overflow correction */ + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } +} - // now write the actual payload: we write this as PLAIN values (for now? possibly for ever?) - for (auto &chunk : buffer.Chunks()) { - auto &input = *chunk; - auto &input_column = input.data[i]; - auto &mask = FlatVector::Validity(input_column); +/*! ZSTD_compress_frameChunk() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; - // write actual payload data - switch (sql_types[i].id()) { - case LogicalTypeId::BOOLEAN: { - auto *ptr = FlatVector::GetData(input_column); - uint8_t byte = 0; - uint8_t byte_pos = 0; - for (idx_t r = 0; r < input.size(); r++) { - if (mask.RowIsValid(r)) { // only encode if non-null - byte |= (ptr[r] & 1) << byte_pos; - byte_pos++; + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); - temp_writer.Write(byte); - if (byte_pos == 8) { - temp_writer.Write(byte); - byte = 0; - byte_pos = 0; - } - } - } - // flush last byte if req - if (byte_pos > 0) { - temp_writer.Write(byte); - } - break; - } - case LogicalTypeId::TINYINT: - TemplatedWritePlain(input_column, input.size(), mask, temp_writer); - break; - case LogicalTypeId::SMALLINT: - TemplatedWritePlain(input_column, input.size(), mask, temp_writer); - break; - case LogicalTypeId::INTEGER: - TemplatedWritePlain(input_column, input.size(), mask, temp_writer); - break; - case LogicalTypeId::BIGINT: - TemplatedWritePlain(input_column, input.size(), mask, temp_writer); - break; - case LogicalTypeId::FLOAT: - TemplatedWritePlain(input_column, input.size(), mask, temp_writer); - break; - case LogicalTypeId::DECIMAL: { - // FIXME: fixed length byte array... - Vector double_vec(LogicalType::DOUBLE); - VectorOperations::Cast(input_column, double_vec, input.size()); - TemplatedWritePlain(double_vec, input.size(), mask, temp_writer); - break; - } - case LogicalTypeId::DOUBLE: - TemplatedWritePlain(input_column, input.size(), mask, temp_writer); - break; - case LogicalTypeId::DATE: { - auto *ptr = FlatVector::GetData(input_column); - for (idx_t r = 0; r < input.size(); r++) { - if (mask.RowIsValid(r)) { - auto ts = Timestamp::FromDatetime(ptr[r], 0); - temp_writer.Write(TimestampToImpalaTimestamp(ts)); - } - } - break; - } - case LogicalTypeId::TIMESTAMP: { - auto *ptr = FlatVector::GetData(input_column); - for (idx_t r = 0; r < input.size(); r++) { - if (mask.RowIsValid(r)) { - temp_writer.Write(TimestampToImpalaTimestamp(ptr[r])); - } - } - break; - } - case LogicalTypeId::BLOB: - case LogicalTypeId::VARCHAR: { - auto *ptr = FlatVector::GetData(input_column); - for (idx_t r = 0; r < input.size(); r++) { - if (mask.RowIsValid(r)) { - temp_writer.Write(ptr[r].GetSize()); - temp_writer.WriteData((const_data_ptr_t)ptr[r].GetDataUnsafe(), ptr[r].GetSize()); - } - } - break; - } - default: - throw NotImplementedException((sql_types[i].ToString())); - } - } + DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); + if (cctx->appliedParams.fParams.checksumFlag && srcSize) + XXH64_update(&cctx->xxhState, src, srcSize); - // now that we have finished writing the data we know the uncompressed size - hdr.uncompressed_page_size = temp_writer.blob.size; + while (remaining) { + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - // compress the data based - size_t compressed_size; - data_ptr_t compressed_data; - unique_ptr compressed_buf; - switch (codec) { - case CompressionCodec::UNCOMPRESSED: - compressed_size = temp_writer.blob.size; - compressed_data = temp_writer.blob.data.get(); - break; - case CompressionCodec::SNAPPY: { - compressed_size = snappy::MaxCompressedLength(temp_writer.blob.size); - compressed_buf = unique_ptr(new data_t[compressed_size]); - snappy::RawCompress((const char *)temp_writer.blob.data.get(), temp_writer.blob.size, - (char *)compressed_buf.get(), &compressed_size); - compressed_data = compressed_buf.get(); - break; - } - case CompressionCodec::GZIP: { - MiniZStream s; - compressed_size = s.MaxCompressedLength(temp_writer.blob.size); - compressed_buf = unique_ptr(new data_t[compressed_size]); - s.Compress((const char *)temp_writer.blob.data.get(), temp_writer.blob.size, (char *)compressed_buf.get(), - &compressed_size); - compressed_data = compressed_buf.get(); - break; - } - case CompressionCodec::ZSTD: { - compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.blob.size); - compressed_buf = unique_ptr(new data_t[compressed_size]); - compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size, - (const void *)temp_writer.blob.data.get(), - temp_writer.blob.size, ZSTD_CLEVEL_DEFAULT); - compressed_data = compressed_buf.get(); - break; - } - default: - throw InternalException("Unsupported codec for Parquet Writer"); - } + RETURN_ERROR_IF(dstCapacity < ZSTDInternalConstants::ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, + dstSize_tooSmall, + "not enough space to store compressed block"); + if (remaining < blockSize) blockSize = remaining; - hdr.compressed_page_size = compressed_size; - // now finally write the data to the actual file - hdr.write(protocol.get()); - writer->WriteData(compressed_data, compressed_size); + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); - auto &column_chunk = row_group.columns[i]; - column_chunk.__isset.meta_data = true; - column_chunk.meta_data.data_page_offset = start_offset; - column_chunk.meta_data.total_compressed_size = writer->GetTotalWritten() - start_offset; - column_chunk.meta_data.codec = codec; - column_chunk.meta_data.path_in_schema.push_back(file_meta_data.schema[i + 1].name); - column_chunk.meta_data.num_values = buffer.Count(); - column_chunk.meta_data.type = file_meta_data.schema[i + 1].type; - } - row_group.num_rows += buffer.Count(); + /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ + if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; - // append the row group to the file meta data - file_meta_data.row_groups.push_back(row_group); - file_meta_data.num_rows += buffer.Count(); -} + { size_t cSize; + if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); + assert(cSize > 0); + assert(cSize <= blockSize + ZSTDInternalConstants::ZSTD_blockHeaderSize); + } else { + cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTDInternalConstants::ZSTD_blockHeaderSize, dstCapacity-ZSTDInternalConstants::ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); -void ParquetWriter::Finalize() { - auto start_offset = writer->GetTotalWritten(); - file_meta_data.write(protocol.get()); + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTDInternalConstants::ZSTD_blockHeaderSize; + } + } - writer->Write(writer->GetTotalWritten() - start_offset); - // parquet files also end with the string "PAR1" - writer->WriteData((const_data_ptr_t) "PAR1", 4); + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + cctx->isFirstBlock = 0; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (unsigned)cSize); + } } - // flush to disk - writer->Sync(); - writer.reset(); + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return (size_t)(op-ostart); } -} // namespace duckdb +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params->fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params->cParams.windowLog; + U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params->fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ + BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos=0; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, + "dst buf is too small to fit worst-case frame header size."); + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); -/* ****************************************************************** - * FSE : Finite State Entropy encoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************************************** -* Includes -****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + if (params->format == ZSTD_f_zstd1) { + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDescriptionByte; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small (stage != ZSTDcs_init, stage_wrong, + "wrong cctx stage"); + RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, + parameter_unsupported, + "incompatible with ldm"); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + return 0; +} -#if !defined(ZSTD_NO_INLINE) -#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# define INLINE_KEYWORD inline -#else -# define INLINE_KEYWORD -#endif -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_INLINE_ATTR __attribute__((always_inline)) -#elif defined(_MSC_VER) -# define FORCE_INLINE_ATTR __forceinline -#else -# define FORCE_INLINE_ATTR -#endif +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + size_t fhSize = 0; -#else + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (unsigned)srcSize); + RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, + "missing init (ZSTD_compressBegin)"); -#define INLINE_KEYWORD -#define FORCE_INLINE_ATTR + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, + cctx->pledgedSrcSizePlusOne-1, cctx->dictID); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + assert(fhSize <= dstCapacity); + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } -#endif + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ -/** - * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant - * parameters. They must be inlined for the compiler to eliminate the constant - * branches. - */ -#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR -/** - * HINT_INLINE is used to help the compiler generate better code. It is *not* - * used for "templates", so it can be tweaked based on the compilers - * performance. - * - * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the - * always_inline attribute. - * - * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline - * attribute. - */ -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 -# define HINT_INLINE static INLINE_KEYWORD -#else -# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR -#endif + if (!ZSTD_window_update(&ms->window, src, srcSize)) { + ms->nextToUpdate = ms->window.dictLimit; + } + if (cctx->appliedParams.ldmParams.enableLdm) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + } -/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ -#if defined(__GNUC__) -# define UNUSED_ATTR __attribute__((unused)) -#else -# define UNUSED_ATTR -#endif + if (!frame) { + /* overflow check and correction for block mode */ + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, + src, (BYTE const*)src + srcSize); + } -/* force no inlining */ -#ifdef _MSC_VER -# define FORCE_NOINLINE static __declspec(noinline) -#else -# if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_NOINLINE static __attribute__((__noinline__)) -# else -# define FORCE_NOINLINE static -# endif -#endif + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); + { size_t const cSize = frame ? + ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); + FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += (cSize + fhSize); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + RETURN_ERROR_IF( + cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + return cSize + fhSize; + } +} -/* target attribute */ -#ifndef __has_attribute - #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ -#endif -#if defined(__GNUC__) || defined(__ICCARM__) -# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) -#else -# define TARGET_ATTRIBUTE(target) -#endif +size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); +} -/* Enable runtime BMI2 dispatch based on the CPU. - * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. - */ -#ifndef DYNAMIC_BMI2 - #if ((defined(__clang__) && __has_attribute(__target__)) \ - || (defined(__GNUC__) \ - && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ - && (defined(__x86_64__) || defined(_M_X86)) \ - && !defined(__BMI2__) - # define DYNAMIC_BMI2 1 - #else - # define DYNAMIC_BMI2 0 - #endif -#endif -/* prefetch - * can be disabled, by declaring NO_PREFETCH build macro */ -#if defined(NO_PREFETCH) -# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ -# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ -#else -# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ -# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) -# elif defined(__aarch64__) -# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) -# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) -# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) -# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) -# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) -# else -# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ -# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ -# endif -#endif /* NO_PREFETCH */ +size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) +{ + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); + return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); +} -#define CACHELINE_SIZE 64 +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); + { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } -#define PREFETCH_AREA(p, s) { \ - const char* const _ptr = (const char*)(p); \ - size_t const _size = (size_t)(s); \ - size_t _pos; \ - for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ - PREFETCH_L2(_ptr + _pos); \ - } \ + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); } -/* vectorization - * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ -#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) -# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) -# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) -# else -# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") -# endif -#else -# define DONT_VECTORIZE -#endif - -/* Tell the compiler that a branch is likely or unlikely. - * Only use these macros if it causes the compiler to generate better code. - * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc - * and clang, please do. +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code */ -#if defined(__GNUC__) -#ifndef LIKELY -#define LIKELY(x) (__builtin_expect((x), 1)) -#endif -#ifndef UNLIKELY -#define UNLIKELY(x) (__builtin_expect((x), 0)) -#endif -#else -#ifndef LIKELY -#define LIKELY(x) (x) -#endif -#ifndef UNLIKELY -#define UNLIKELY(x) (x) -#endif -#endif - -#endif /* ZSTD_COMPILER_H */ - - -// LICENSE_CHANGE_END - +static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* src, size_t srcSize, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + ZSTD_window_update(&ms->window, src, srcSize); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + if (params->ldmParams.enableLdm && ls != NULL) { + ZSTD_window_update(&ls->window, src, srcSize); + ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); + } -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + /* Assert that we the ms params match the params we're being given */ + ZSTD_assertEqualCParams(params->cParams, ms->cParams); -#ifndef MEM_H_MODULE -#define MEM_H_MODULE + if (srcSize <= HASH_READ_SIZE) return 0; -/*-**************************************** -* Dependencies -******************************************/ -#include /* size_t, ptrdiff_t */ -#include /* memcpy */ + while (iend - ip > HASH_READ_SIZE) { + size_t const remaining = (size_t)(iend - ip); + size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); + const BYTE* const ichunk = ip + chunk; + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); -/*-**************************************** -* Compiler specifics -******************************************/ -#if defined(_MSC_VER) /* Visual Studio */ -# include /* _byteswap_ulong */ -# include /* _byteswap_* */ -#endif -#if defined(__GNUC__) -# define MEM_STATIC static __inline __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define MEM_STATIC static inline -#elif defined(_MSC_VER) -# define MEM_STATIC static __inline -#else -# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif + if (params->ldmParams.enableLdm && ls != NULL) + ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); -#ifndef __has_builtin -# define __has_builtin(x) 0 /* compat. with non-clang compilers */ -#endif + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, ichunk, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); + break; -/* code only tested on 32 and 64 bits systems */ -#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } -MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + if (chunk >= HASH_READ_SIZE) + ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + break; -/* detects whether we are being compiled under msan */ -#if defined (__has_feature) -# if __has_feature(memory_sanitizer) -# define MEMORY_SANITIZER 1 -# endif -#endif + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + if (chunk >= HASH_READ_SIZE) + ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); + break; -/*-************************************************************** -* Basic Types -*****************************************************************/ -#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef int16_t S16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; - typedef int64_t S64; -#else -# include -#if CHAR_BIT != 8 -# error "this implementation requires char to be exactly 8-bit type" -#endif - typedef unsigned char BYTE; -#if USHRT_MAX != 65535 -# error "this implementation requires short to be exactly 16-bit type" -#endif - typedef unsigned short U16; - typedef signed short S16; -#if UINT_MAX != 4294967295 -# error "this implementation requires int to be exactly 32-bit type" -#endif - typedef unsigned int U32; - typedef signed int S32; -/* note : there are no limits defined for long long type in C90. - * limits exist in C99, however, in such case, is preferred */ - typedef unsigned long long U64; - typedef signed long long S64; -#endif + default: + assert(0); /* not possible : not a valid strategy id */ + } -namespace duckdb_zstd { + ip = ichunk; + } -/*-************************************************************** -* Memory I/O -*****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define MEM_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif + ms->nextToUpdate = (U32)(iend - ms->window.base); + return 0; +} -MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } -MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } -MEM_STATIC unsigned MEM_isLittleEndian(void) -{ - const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; +/* Dictionaries that assign zero probability to symbols that show up causes problems + when FSE encoding. Refuse dictionaries that assign zero probability to symbols + that we may encounter during compression. + NOTE: This behavior is not standard and could be improved in the future. */ +static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { + U32 s; + RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted, "dict fse tables don't have all symbols"); + for (s = 0; s <= maxSymbolValue; ++s) { + RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted, "dict fse tables don't have all symbols"); + } + return 0; } -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + short* offcodeNCount, unsigned* offcodeMaxValue, + const void* const dict, size_t dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ + const BYTE* const dictEnd = dictPtr + dictSize; + dictPtr += 8; + bs->entropy.huf.repeatMode = HUF_repeat_check; -/* violates C standard, by lying on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } -MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + { unsigned maxSymbolValue = 255; + unsigned hasZeroWeights = 1; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights) + bs->entropy.huf.repeatMode = HUF_repeat_valid; -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); + dictPtr += hufHeaderSize; + } -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) - __pragma( pack(push, 1) ) - typedef struct { U16 v; } unalign16; - typedef struct { U32 v; } unalign32; - typedef struct { U64 v; } unalign64; - typedef struct { size_t v; } unalignArch; - __pragma( pack(pop) ) -#else - typedef struct { U16 v; } __attribute__((packed)) unalign16; - typedef struct { U32 v; } __attribute__((packed)) unalign32; - typedef struct { U64 v; } __attribute__((packed)) unalign64; - typedef struct { size_t v; } __attribute__((packed)) unalignArch; -#endif + { unsigned offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + /* fill all offset symbols to avoid garbage at end of table */ + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.offcodeCTable, + offcodeNCount, MaxOff, offcodeLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + dictPtr += offcodeHeaderSize; + } -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } -MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + /* Every match length code must have non-zero probability */ + FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.matchlengthCTable, + matchlengthNCount, matchlengthMaxValue, matchlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + dictPtr += matchlengthHeaderSize; + } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + /* Every literal length code must have non-zero probability */ + FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.litlengthCTable, + litlengthNCount, litlengthMaxValue, litlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + dictPtr += litlengthHeaderSize; + } -#else + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + bs->rep[0] = MEM_readLE32(dictPtr+0); + bs->rep[1] = MEM_readLE32(dictPtr+4); + bs->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; -/* default method, safe and standard. - can sometimes prove slower */ + return dictPtr - (const BYTE*)dict; +} -MEM_STATIC U16 MEM_read16(const void* memPtr) +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed >= 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) { - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + size_t dictID; + size_t eSize; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); + eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize); + FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); + dictPtr += eSize; + + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ + } + /* All offset values <= dictContentSize + 128 KB must be representable */ + FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), ""); + /* All repCodes must be <= dictContentSize and != 0*/ + { U32 u; + for (u=0; u<3; u++) { + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); + } } + + bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( + ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); + return dictID; + } } -MEM_STATIC U32 MEM_read32(const void* memPtr) +/** ZSTD_compress_insertDictionary() : +* @return : dictID, or an error code */ +static size_t +ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + const ZSTD_CCtx_params* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) { - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); + if ((dict==NULL) || (dictSize<8)) { + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + return 0; + } + + ZSTD_reset_compressedBlockState(bs); + + /* dict restricted modes */ + if (dictContentType == ZSTD_dct_rawContent) + return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); + + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_auto) { + DEBUGLOG(4, "raw content dictionary detected"); + return ZSTD_loadDictionaryContent( + ms, ls, ws, params, dict, dictSize, dtlm); + } + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + assert(0); /* impossible */ + } + + /* dict as full zstd dictionary */ + return ZSTD_loadZstdDictionary( + bs, ms, ws, params, dict, dictSize, dtlm, workspace); } -MEM_STATIC U64 MEM_read64(const void* memPtr) +#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6) + +/*! ZSTD_compressBegin_internal() : + * @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) { - U64 val; memcpy(&val, memPtr, sizeof(val)); return val; + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if ( (cdict) + && (cdict->dictContentSize > 0) + && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceLoad) ) { + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); + } + + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff) , ""); + { size_t const dictID = cdict ? + ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, + cdict->dictContentSize, dictContentType, dtlm, + cctx->entropyWorkspace) + : ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, + dictContentType, dtlm, cctx->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= UINT_MAX); + cctx->dictID = (U32)dictID; + } + return 0; } -MEM_STATIC size_t MEM_readST(const void* memPtr) +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) { - size_t val; memcpy(&val, memPtr, sizeof(val)); return val; + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); + /* compression parameters verification and optimization */ + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, dtlm, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) { - memcpy(memPtr, &value, sizeof(value)); + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, + NULL /*cdict*/, + &cctxParams, pledgedSrcSize); } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - memcpy(memPtr, &value, sizeof(value)); + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) { - memcpy(memPtr, &value, sizeof(value)); + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); } -#endif /* MEM_FORCE_MEMORY_ACCESS */ -MEM_STATIC U32 MEM_swap32(U32 in) +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) { -#if defined(_MSC_VER) /* Visual Studio */ - return _byteswap_ulong(in); -#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ - || (defined(__clang__) && __has_builtin(__builtin_bswap32)) - return __builtin_bswap32(in); -#else - return ((in << 24) & 0xff000000 ) | - ((in << 8) & 0x00ff0000 ) | - ((in >> 8) & 0x0000ff00 ) | - ((in >> 24) & 0x000000ff ); -#endif -} + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t fhSize = 0; -MEM_STATIC U64 MEM_swap64(U64 in) -{ -#if defined(_MSC_VER) /* Visual Studio */ - return _byteswap_uint64(in); -#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ - || (defined(__clang__) && __has_builtin(__builtin_bswap64)) - return __builtin_bswap64(in); -#else - return ((in << 56) & 0xff00000000000000ULL) | - ((in << 40) & 0x00ff000000000000ULL) | - ((in << 24) & 0x0000ff0000000000ULL) | - ((in << 8) & 0x000000ff00000000ULL) | - ((in >> 8) & 0x00000000ff000000ULL) | - ((in >> 24) & 0x0000000000ff0000ULL) | - ((in >> 40) & 0x000000000000ff00ULL) | - ((in >> 56) & 0x00000000000000ffULL); -#endif -} + DEBUGLOG(4, "ZSTD_writeEpilogue"); + RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); -MEM_STATIC size_t MEM_swapST(size_t in) -{ - if (MEM_32bits()) - return (size_t)MEM_swap32((U32)in); - else - return (size_t)MEM_swap64((U64)in); -} + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } -/*=== Little endian r/w ===*/ + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTDInternalConstants::ZSTD_blockHeaderSize; + dstCapacity -= ZSTDInternalConstants::ZSTD_blockHeaderSize; + } -MEM_STATIC U16 MEM_readLE16(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read16(memPtr); - else { - const BYTE* p = (const BYTE*)memPtr; - return (U16)(p[0] + (p[1]<<8)); + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); + MEM_writeLE32(op, checksum); + op += 4; } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; } -MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) { - if (MEM_isLittleEndian()) { - MEM_write16(memPtr, val); - } else { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE)val; - p[1] = (BYTE)(val>>8); + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, + dst, dstCapacity, src, srcSize, + 1 /* frame mode */, 1 /* last chunk */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + DEBUGLOG(4, "end of frame : controlling src size"); + RETURN_ERROR_IF( + cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize = %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); } + return cSize + endResult; } -MEM_STATIC U32 MEM_readLE24(const void* memPtr) -{ - return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); -} -MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_parameters* params) { - MEM_writeLE16(memPtr, (U16)val); - ((BYTE*)memPtr)[2] = (BYTE)(val>>16); + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compress_internal"); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + &cctxParams); } -MEM_STATIC U32 MEM_readLE32(const void* memPtr) +size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) { - if (MEM_isLittleEndian()) - return MEM_read32(memPtr); - else - return MEM_swap32(MEM_read32(memPtr)); + DEBUGLOG(4, "ZSTD_compress_advanced"); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); + return ZSTD_compress_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + ¶ms); } -MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +/* Internal */ +size_t ZSTD_compress_advanced_internal( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params) { - if (MEM_isLittleEndian()) - MEM_write32(memPtr, val32); - else - MEM_write32(memPtr, MEM_swap32(val32)); + DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + params, srcSize, ZSTDb_not_buffered) , ""); + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } -MEM_STATIC U64 MEM_readLE64(const void* memPtr) +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel) { - if (MEM_isLittleEndian()) - return MEM_read64(memPtr); - else - return MEM_swap64(MEM_read64(memPtr)); + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0); + ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); + assert(params.fParams.contentSizeFlag == 1); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); } -MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) { - if (MEM_isLittleEndian()) - MEM_write64(memPtr, val64); - else - MEM_write64(memPtr, MEM_swap64(val64)); + DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); + assert(cctx != NULL); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); } -MEM_STATIC size_t MEM_readLEST(const void* memPtr) +size_t ZSTD_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) { - if (MEM_32bits()) - return (size_t)MEM_readLE32(memPtr); - else - return (size_t)MEM_readLE64(memPtr); + size_t result; + ZSTD_CCtx ctxBody; + ZSTD_initCCtx(&ctxBody, ZSTDInternalConstants::ZSTD_defaultCMem); + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ + return result; } -MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) -{ - if (MEM_32bits()) - MEM_writeLE32(memPtr, (U32)val); - else - MEM_writeLE64(memPtr, (U64)val); -} -/*=== Big endian r/w ===*/ +/* ===== Dictionary API ===== */ -MEM_STATIC U32 MEM_readBE32(const void* memPtr) +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters cParams, + ZSTD_dictLoadMethod_e dictLoadMethod) { - if (MEM_isLittleEndian()) - return MEM_swap32(MEM_read32(memPtr)); - else - return MEM_read32(memPtr); + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); + return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); } -MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) { - if (MEM_isLittleEndian()) - MEM_write32(memPtr, MEM_swap32(val32)); - else - MEM_write32(memPtr, val32); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); } -MEM_STATIC U64 MEM_readBE64(const void* memPtr) +size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) { - if (MEM_isLittleEndian()) - return MEM_swap64(MEM_read64(memPtr)); - else - return MEM_read64(memPtr); + if (cdict==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); + /* cdict may be in the workspace */ + return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) + + ZSTD_cwksp_sizeof(&cdict->workspace); } -MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +static size_t ZSTD_initCDict_internal( + ZSTD_CDict* cdict, + const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) { - if (MEM_isLittleEndian()) - MEM_write64(memPtr, MEM_swap64(val64)); - else - MEM_write64(memPtr, val64); -} - -MEM_STATIC size_t MEM_readBEST(const void* memPtr) -{ - if (MEM_32bits()) - return (size_t)MEM_readBE32(memPtr); - else - return (size_t)MEM_readBE64(memPtr); -} - -MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) -{ - if (MEM_32bits()) - MEM_writeBE32(memPtr, (U32)val); - else - MEM_writeBE64(memPtr, (U64)val); -} - -} - -#endif /* MEM_H_MODULE */ + DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); + assert(!ZSTD_checkCParams(cParams)); + cdict->matchState.cParams = cParams; + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { + cdict->dictContent = dictBuffer; + } else { + void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); + RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); + cdict->dictContent = internalBuffer; + memcpy(internalBuffer, dictBuffer, dictSize); + } + cdict->dictContentSize = dictSize; + cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); -// LICENSE_CHANGE_END - /* U32, U16, etc. */ + /* Reset the state to no dictionary */ + ZSTD_reset_compressedBlockState(&cdict->cBlockState); + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &cdict->matchState, + &cdict->workspace, + &cParams, + ZSTDcrp_makeClean, + ZSTDirp_reset, + ZSTD_resetTarget_CDict), ""); + /* (Maybe) load the dictionary + * Skips loading the dictionary if it is < 8 bytes. + */ + { ZSTD_CCtx_params params; + memset(¶ms, 0, sizeof(params)); + params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + params.fParams.contentSizeFlag = 1; + params.cParams = cParams; + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, + ¶ms, cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= (size_t)(U32)-1); + cdict->dictID = (U32)dictID; + } + } -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + return 0; +} -/* ****************************************************************** - * debug - * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem) +{ + DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + { size_t const workspaceSize = + ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); + void* const workspace = ZSTD_malloc(workspaceSize, customMem); + ZSTD_cwksp ws; + ZSTD_CDict* cdict; -/* - * The purpose of this header is to enable debug functions. - * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, - * and DEBUG_STATIC_ASSERT() for compile-time. - * - * By default, DEBUGLEVEL==0, which means run-time debug is disabled. - * - * Level 1 enables assert() only. - * Starting level 2, traces can be generated and pushed to stderr. - * The higher the level, the more verbose the traces. - * - * It's possible to dynamically adjust level using variable g_debug_level, - * which is only declared if DEBUGLEVEL>=2, - * and is a global variable, not multi-thread protected (use with care) - */ + if (!workspace) { + ZSTD_free(workspace, customMem); + return NULL; + } -#ifndef DEBUG_H_12987983217 -#define DEBUG_H_12987983217 + ZSTD_cwksp_init(&ws, workspace, workspaceSize); -#if defined (__cplusplus) -extern "C" { -#endif + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + assert(cdict != NULL); + ZSTD_cwksp_move(&cdict->workspace, &ws); + cdict->customMem = customMem; + cdict->compressionLevel = 0; /* signals advanced API usage */ + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + cParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } -/* static assert is triggered at compile time, leaving no runtime artefact. - * static assert only works with compile-time constants. - * Also, this variant can only be used inside a function. */ -#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + return cdict; + } +} +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTDInternalConstants::ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} -/* DEBUGLEVEL is expected to be defined externally, - * typically through compiler command line. - * Value must be a number. */ -#ifndef DEBUGLEVEL -# define DEBUGLEVEL 0 -#endif +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + return ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTDInternalConstants::ZSTD_defaultCMem); +} +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = cdict->customMem; + int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); + ZSTD_cwksp_free(&cdict->workspace, cMem); + if (!cdictInWorkspace) { + ZSTD_free(cdict, cMem); + } + return 0; + } +} -/* DEBUGFILE can be defined externally, - * typically through compiler command line. - * note : currently useless. - * Value must be stderr or stdout */ -#ifndef DEBUGFILE -# define DEBUGFILE stderr -#endif +/*! ZSTD_initStaticCDict_advanced() : + * Generate a digested dictionary in provided memory area. + * workspace: The memory area to emplace the dictionary into. + * Provided pointer must 8-bytes aligned. + * It must outlive dictionary usage. + * workspaceSize: Use ZSTD_estimateCDictSize() + * to determine how large workspace must be. + * cParams : use ZSTD_getCParams() to transform a compression level + * into its relevants cParams. + * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * Note : there is no corresponding "free" function. + * Since workspace was allocated externally, it must be freed externally. + */ +const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + matchStateSize; + ZSTD_CDict* cdict; + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ -/* recommended values for DEBUGLEVEL : - * 0 : release mode, no debug, all run-time checks disabled - * 1 : enables assert() only, no display - * 2 : reserved, for currently active debug path - * 3 : events once per object lifetime (CCtx, CDict, etc.) - * 4 : events once per frame - * 5 : events once per block - * 6 : events once per sequence (verbose) - * 7+: events at every position (*very* verbose) - * - * It's generally inconvenient to output traces > 5. - * In which case, it's possible to selectively trigger high verbosity levels - * by modifying g_debug_level. - */ + { + ZSTD_cwksp ws; + ZSTD_cwksp_init(&ws, workspace, workspaceSize); + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + if (cdict == NULL) return NULL; + ZSTD_cwksp_move(&cdict->workspace, &ws); + } -#if (DEBUGLEVEL>=1) -# include -#else -# ifndef assert /* assert may be already defined, due to prior #include */ -# define assert(condition) ((void)0) /* disable assert (default) */ -# endif -#endif + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", + (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); + if (workspaceSize < neededSize) return NULL; -#if (DEBUGLEVEL>=2) -# include -extern int g_debuglevel; /* the variable is only declared, - it actually lives in debug.c, - and is shared by the whole process. - It's not thread-safe. - It's useful when enabling very verbose levels - on selective conditions (such as position in src) */ + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + cParams) )) + return NULL; -# define RAWLOG(l, ...) { \ - if (l<=g_debuglevel) { \ - fprintf(stderr, __VA_ARGS__); \ - } } -# define DEBUGLOG(l, ...) { \ - if (l<=g_debuglevel) { \ - fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ - fprintf(stderr, " \n"); \ - } } -#else -# define RAWLOG(l, ...) {} /* disabled */ -# define DEBUGLOG(l, ...) {} /* disabled */ -#endif + return cdict; +} +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +{ + assert(cdict != NULL); + return cdict->matchState.cParams; +} -#if defined (__cplusplus) +/* ZSTD_compressBegin_usingCDict_advanced() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); + { ZSTD_CCtx_params params = cctx->requestedParams; + params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0 ) + && (params.attachDictPref != ZSTD_dictForceLoad) ? + ZSTD_getCParamsFromCDict(cdict) + : ZSTD_getCParams(cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize); + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); + } + params.fParams = fParams; + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + ¶ms, pledgedSrcSize, + ZSTDb_not_buffered); + } } -#endif -#endif /* DEBUG_H_12987983217 */ +/* ZSTD_compressBegin_usingCDict() : + * pledgedSrcSize=0 means "unknown" + * if pledgedSrcSize>0, it will enable contentSizeFlag */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); + return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); +} +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} -// LICENSE_CHANGE_END - /* assert, DEBUGLOG */ +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list /* ****************************************************************** - * hist : Histogram functions - * part of Finite State Entropy project - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ +* Streaming +********************************************************************/ -/* --- dependencies --- */ -#include /* size_t */ +ZSTD_CStream* ZSTD_createCStream(void) +{ + DEBUGLOG(3, "ZSTD_createCStream"); + return ZSTD_createCStream_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); +} +ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticCCtx(workspace, workspaceSize); +} -namespace duckdb_zstd { -/* --- simple histogram functions --- */ +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ /* CStream and CCtx are now same object */ + return ZSTD_createCCtx_advanced(customMem); +} -/*! HIST_count(): - * Provides the precise count of each byte within a table 'count'. - * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). - * Updates *maxSymbolValuePtr with actual largest symbol value detected. - * @return : count of the most frequent symbol (which isn't identified). - * or an error code, which can be tested using HIST_isError(). - * note : if return == srcSize, there is only one symbol. - */ -size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize); +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + return ZSTD_freeCCtx(zcs); /* same object */ +} -unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ -/* --- advanced histogram functions --- */ +/*====== Initialization ======*/ -#define HIST_WKSP_SIZE_U32 1024 -#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) -/** HIST_count_wksp() : - * Same as HIST_count(), but using an externally provided scratch buffer. - * Benefit is this function will use very little stack space. - * `workSpace` is a writable buffer which must be 4-bytes aligned, - * `workSpaceSize` must be >= HIST_WKSP_SIZE - */ -size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize, - void* workSpace, size_t workSpaceSize); +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } -/** HIST_countFast() : - * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. - * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` - */ -size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize); +size_t ZSTD_CStreamOutSize(void) +{ + return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTDInternalConstants::ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} -/** HIST_countFast_wksp() : - * Same as HIST_countFast(), but using an externally provided scratch buffer. - * `workSpace` is a writable buffer which must be 4-bytes aligned, - * `workSpaceSize` must be >= HIST_WKSP_SIZE - */ -size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize, - void* workSpace, size_t workSpaceSize); +static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, + const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, + const ZSTD_CDict* const cdict, + ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_resetCStream_internal"); + /* Finalize the compression parameters */ + params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ -/*! HIST_count_simple() : - * Same as HIST_countFast(), this function is unsafe, - * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. - * It is also a bit slower for large inputs. - * However, it does not need any additional memory (not even on stack). - * @return : count of the most frequent symbol. - * Note this function doesn't produce any error (i.e. it must succeed). - */ -unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, ZSTD_dtlm_fast, + cdict, + ¶ms, pledgedSrcSize, + ZSTDb_buffered) , ""); + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + cctx->inBuffTarget = cctx->blockSize + + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + return 0; /* ready to go */ } +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} -// LICENSE_CHANGE_END - /* HIST_count_wksp */ +/*! ZSTD_initCStream_internal() : + * Note : for lib/compress only. Used by zstdmt_compress.c. + * Assumption 1 : params are valid + * Assumption 2 : either dict, or cdict, is defined, not both */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_internal"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + zcs->requestedParams = *params; + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if (dict) { + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + } else { + /* Dictionary is cleared if !cdict */ + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + } + return 0; +} +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + zcs->requestedParams.fParams = fParams; + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list - -/* ****************************************************************** - * bitstream - * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ -#ifndef BITSTREAM_H_MODULE -#define BITSTREAM_H_MODULE - -/* -* This API consists of small unitary functions, which must be inlined for best performance. -* Since link-time-optimization is not available for all compilers, -* these functions are defined into a .h to be included. -*/ +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} -/*-**************************************** -* Dependencies -******************************************/ - /* unaligned access routines */ - /* UNLIKELY() */ - /* assert(), DEBUGLOG(), RAWLOG() */ +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be exact. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pss) +{ + /* for compatibility with older programs relying on this behavior. + * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. + * This line will be removed in the future. + */ + U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_srcSize"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} -/* Note : this module is expected to remain private, do not expose it */ +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + return 0; +} -#ifndef ERROR_H_MODULE -#define ERROR_H_MODULE +/*====== Compression ======*/ -/* **************************************** -* Dependencies -******************************************/ -#include /* size_t */ +static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) +{ + size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; + if (hintInSize==0) hintInSize = cctx->blockSize; + return hintInSize; +} +/** ZSTD_compressStream_generic(): + * internal function for all *compressStream*() variants + * non-static, because can be called from zstdmt_compress.c + * @return : hint size for next input */ +static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) +{ + const char* const istart = (const char*)input->src; + const char* const iend = input->size != 0 ? istart + input->size : istart; + const char* ip = input->pos != 0 ? istart + input->pos : istart; + char* const ostart = (char*)output->dst; + char* const oend = output->size != 0 ? ostart + output->size : ostart; + char* op = output->pos != 0 ? ostart + output->pos : ostart; + U32 someMoreWork = 1; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + /* check expectations */ + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + assert(output->pos <= output->size); + assert(input->pos <= input->size); -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + while (someMoreWork) { + switch(zcs->streamStage) + { + case zcss_init: + RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); -#ifndef ZSTD_ERRORS_H_398273423 -#define ZSTD_ERRORS_H_398273423 + case zcss_load: + if ( (flushMode == ZSTD_e_end) + && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ + && (zcs->inBuffPos == 0) ) { + /* shortcut to compression pass directly into output buffer */ + size_t const cSize = ZSTD_compressEnd(zcs, + op, oend-op, ip, iend-ip); + DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); + ip = iend; + op += cSize; + zcs->frameEnded = 1; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + someMoreWork = 0; break; + } + /* complete loading into inBuffer */ + { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy( + zcs->inBuff + zcs->inBuffPos, toLoad, + ip, iend-ip); + zcs->inBuffPos += loaded; + if (loaded != 0) + ip += loaded; + if ( (flushMode == ZSTD_e_continue) + && (zcs->inBuffPos < zcs->inBuffTarget) ) { + /* not enough input to fill full block : stop here */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (zcs->inBuffPos == zcs->inToCompress) ) { + /* empty */ + someMoreWork = 0; break; + } + } + /* compress current block (note : this stage cannot be stopped in the middle) */ + DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); + { void* cDst; + size_t cSize; + size_t const iSize = zcs->inBuffPos - zcs->inToCompress; + size_t oSize = oend-op; + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); + if (oSize >= ZSTD_compressBound(iSize)) + cDst = op; /* compress into output buffer, to skip flush stage */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); + zcs->inToCompress = zcs->inBuffPos; + if (cDst == op) { /* no need to flush */ + op += cSize; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed directly in outBuffer"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + } + break; + } + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->streamStage = zcss_flush; /* pass-through to flush stage */ + } + /* fall-through */ + case zcss_flush: + DEBUGLOG(5, "flush stage"); + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), + zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", + (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); + if (flushed) + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { + /* flush not fully completed, presumably because dst is too small */ + assert(op==oend); + someMoreWork = 0; + break; + } + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed on flush"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + break; + } + zcs->streamStage = zcss_load; + break; + } -/*===== dependency =====*/ -#include /* size_t */ + default: /* impossible */ + assert(0); + } + } + input->pos = ip - istart; + output->pos = op - ostart; + if (zcs->frameEnded) return 0; + return ZSTD_nextInputSizeHint(zcs); +} -/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ -#ifndef ZSTDERRORLIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) -# else -# define ZSTDERRORLIB_VISIBILITY -# endif -#endif -#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY -#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) -# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers >= 1) { + assert(cctx->mtctx != NULL); + return ZSTDMT_nextInputSizeHint(cctx->mtctx); + } #endif -namespace duckdb_zstd { -/*-********************************************* - * Error codes list - *-********************************************* - * Error codes _values_ are pinned down since v1.3.1 only. - * Therefore, don't rely on values if you may link to any version < v1.3.1. - * - * Only values < 100 are considered stable. - * - * note 1 : this API shall be used with static linking only. - * dynamic linking is not yet officially supported. - * note 2 : Prefer relying on the enum than on its value whenever possible - * This is the only supported way to use the error list < v1.3.1 - * note 3 : ZSTD_isError() is always correct, whatever the library version. - **********************************************/ -typedef enum { - ZSTD_error_no_error = 0, - ZSTD_error_GENERIC = 1, - ZSTD_error_prefix_unknown = 10, - ZSTD_error_version_unsupported = 12, - ZSTD_error_frameParameter_unsupported = 14, - ZSTD_error_frameParameter_windowTooLarge = 16, - ZSTD_error_corruption_detected = 20, - ZSTD_error_checksum_wrong = 22, - ZSTD_error_dictionary_corrupted = 30, - ZSTD_error_dictionary_wrong = 32, - ZSTD_error_dictionaryCreation_failed = 34, - ZSTD_error_parameter_unsupported = 40, - ZSTD_error_parameter_outOfBound = 42, - ZSTD_error_tableLog_tooLarge = 44, - ZSTD_error_maxSymbolValue_tooLarge = 46, - ZSTD_error_maxSymbolValue_tooSmall = 48, - ZSTD_error_stage_wrong = 60, - ZSTD_error_init_missing = 62, - ZSTD_error_memory_allocation = 64, - ZSTD_error_workSpace_tooSmall= 66, - ZSTD_error_dstSize_tooSmall = 70, - ZSTD_error_srcSize_wrong = 72, - ZSTD_error_dstBuffer_null = 74, - /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ - ZSTD_error_frameIndex_tooLarge = 100, - ZSTD_error_seekableIO = 102, - ZSTD_error_dstBuffer_wrong = 104, - ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ -} ZSTD_ErrorCode; - -/*! ZSTD_getErrorCode() : - convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, - which can be used to compare with enum list published above */ -ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); -ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + return ZSTD_nextInputSizeHint(cctx); } -#endif /* ZSTD_ERRORS_H_398273423 */ - - -// LICENSE_CHANGE_END - /* enum list */ - -namespace duckdb_zstd { -/* **************************************** -* Compiler-specific -******************************************/ -#if defined(__GNUC__) -# define ERR_STATIC static __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define ERR_STATIC static inline -#elif defined(_MSC_VER) -# define ERR_STATIC static __inline -#else -# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif - +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); + return ZSTD_nextInputSizeHint_MTorST(zcs); +} -/*-**************************************** -* Customization (error_public.h) -******************************************/ -typedef ZSTD_ErrorCode ERR_enum; -#define PREFIX(name) ZSTD_error_##name +size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); + /* check conditions */ + RETURN_ERROR_IF(output->pos > output->size, GENERIC, "invalid buffer"); + RETURN_ERROR_IF(input->pos > input->size, GENERIC, "invalid buffer"); + assert(cctx!=NULL); -/*-**************************************** -* Error codes handling -******************************************/ -#undef ERROR /* already defined on Visual Studio */ -#define ERROR(name) ZSTD_ERROR(name) -#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) + /* transparent initialization stage */ + if (cctx->streamStage == zcss_init) { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ + memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ + params.cParams = ZSTD_getCParamsFromCCtxParams( + &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); -ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } -ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } +#ifdef ZSTD_MULTITHREAD + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { + params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ + } + if (params.nbWorkers > 0) { + /* mt context creation */ + if (cctx->mtctx == NULL) { + DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", + params.nbWorkers); + cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem); + RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); + } + /* mt compression */ + DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); + FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( + cctx->mtctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); + cctx->streamStage = zcss_load; + cctx->appliedParams.nbWorkers = params.nbWorkers; + } else +#endif + { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, + params, cctx->pledgedSrcSizePlusOne-1) , ""); + assert(cctx->streamStage == zcss_load); + assert(cctx->appliedParams.nbWorkers == 0); + } } + /* end of transparent initialization stage */ -/* check and forward error code */ -#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e -#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + /* compression stage */ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); + size_t flushMin; + assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); + if (cctx->cParamsChanged) { + ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); + cctx->cParamsChanged = 0; + } + do { + flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + if ( ZSTD_isError(flushMin) + || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + } + FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); + } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); + DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); + /* Either we don't require maximum forward progress, we've finished the + * flush, or we are out of output space. + */ + assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); + return flushMin; + } +#endif + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); + DEBUGLOG(5, "completed ZSTD_compressStream2"); + return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ +} +size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} -/*-**************************************** -* Error Strings -******************************************/ +size_t ZSTD_compress2(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + { size_t oPos = 0; + size_t iPos = 0; + size_t const result = ZSTD_compressStream2_simpleArgs(cctx, + dst, dstCapacity, &oPos, + src, srcSize, &iPos, + ZSTD_e_end); + FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); + if (result != 0) { /* compression not completed, due to lack of output space */ + assert(oPos == dstCapacity); + RETURN_ERROR(dstSize_tooSmall, ""); + } + assert(iPos == srcSize); /* all input is expected consumed */ + return oPos; + } +} -const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ +/*====== Finalize ======*/ -ERR_STATIC const char* ERR_getErrorName(size_t code) +/*! ZSTD_flushStream() : + * @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) { - return ERR_getErrorString(ERR_getErrorCode(code)); + ZSTD_inBuffer input = { NULL, 0, 0 }; + return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); } -} -#endif /* ERROR_H_MODULE */ - - -// LICENSE_CHANGE_END - /* error codes and messages */ - - -/*========================================= -* Target specific -=========================================*/ -#if defined(__BMI__) && defined(__GNUC__) -# include /* support for bextr (experimental) */ -#elif defined(__ICCARM__) -# include -#endif - -#define STREAM_ACCUMULATOR_MIN_32 25 -#define STREAM_ACCUMULATOR_MIN_64 57 -#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) - -namespace duckdb_zstd { - -/*-****************************************** -* bitStream encoding API (write forward) -********************************************/ -/* bitStream can mix input from multiple sources. - * A critical property of these streams is that they encode and decode in **reverse** direction. - * So the first bit sequence you add will be the last to be read, like a LIFO stack. - */ -typedef struct { - size_t bitContainer; - unsigned bitPos; - char* startPtr; - char* ptr; - char* endPtr; -} BIT_CStream_t; - -MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); -MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); -MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); -MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); - -/* Start with initCStream, providing the size of buffer to write into. -* bitStream will never write outside of this buffer. -* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. -* -* bits are first added to a local register. -* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. -* Writing data into memory is an explicit operation, performed by the flushBits function. -* Hence keep track how many bits are potentially stored into local register to avoid register overflow. -* After a flushBits, a maximum of 7 bits might still be stored into local register. -* -* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. -* -* Last operation is to close the bitStream. -* The function returns the final size of CStream in bytes. -* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) -*/ - - -/*-******************************************** -* bitStream decoding API (read backward) -**********************************************/ -typedef struct { - size_t bitContainer; - unsigned bitsConsumed; - const char* ptr; - const char* start; - const char* limitPtr; -} BIT_DStream_t; - -typedef enum { BIT_DStream_unfinished = 0, - BIT_DStream_endOfBuffer = 1, - BIT_DStream_completed = 2, - BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ - /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ - -MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); -MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); -MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); - - -/* Start by invoking BIT_initDStream(). -* A chunk of the bitStream is then stored into a local register. -* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -* You can then retrieve bitFields stored into the local register, **in reverse order**. -* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. -* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. -* Otherwise, it can be less than that, so proceed accordingly. -* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). -*/ - - -/*-**************************************** -* unsafe API -******************************************/ -MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); -/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); + FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); + if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ + /* single thread mode : attempt to calculate remaining to flush more precisely */ + { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; + size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); + size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); + return toFlush; + } +} -MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); -/* unsafe version; does not check buffer overflow */ -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); -/* faster, but works only if nbBits >= 1 */ +/*-===== Pre-defined compression levels =====-*/ +#define ZSTD_MAX_CLEVEL 22 +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } +int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - for any srcSize > 256 KB */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ + { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +}; -/*-************************************************************** -* Internal functions -****************************************************************/ -MEM_STATIC unsigned BIT_highbit32 (U32 val) +/*! ZSTD_getCParams_internal() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. + * Use dictSize == 0 for unknown or unused. */ +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - return _BitScanReverse ( &r, val ) ? (unsigned)r : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return __builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, - 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, - 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; -# endif + int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; + size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; + U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); + int row = compressionLevel; + DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ + /* refine parameters based on srcSize & dictSize */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); } } -/*===== Local Constants =====*/ -static const unsigned BIT_mask[] = { - 0, 1, 3, 7, 0xF, 0x1F, - 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, - 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, - 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, - 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, - 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ -#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) - -/*-************************************************************** -* bitStream encoding -****************************************************************/ -/*! BIT_initCStream() : - * `dstCapacity` must be > sizeof(size_t) - * @return : 0 if success, - * otherwise an error code (can be tested using ERR_isError()) */ -MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, - void* startPtr, size_t dstCapacity) +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - bitC->bitContainer = 0; - bitC->bitPos = 0; - bitC->startPtr = (char*)startPtr; - bitC->ptr = bitC->startPtr; - bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); - if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); - return 0; + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); } -/*! BIT_addBits() : - * can add up to 31 bits into `bitC`. - * Note : does not check for register overflow ! */ -MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, - size_t value, unsigned nbBits) -{ - MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); - assert(nbBits < BIT_MASK_SIZE); - assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); - bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; - bitC->bitPos += nbBits; +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); + memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + return params; } -/*! BIT_addBitsFast() : - * works only if `value` is _clean_, - * meaning all high bits above nbBits are 0 */ -MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, - size_t value, unsigned nbBits) -{ - assert((value>>nbBits) == 0); - assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); - bitC->bitContainer |= value << bitC->bitPos; - bitC->bitPos += nbBits; +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize); } -/*! BIT_flushBitsFast() : - * assumption : bitContainer has not overflowed - * unsafe version; does not check buffer overflow */ -MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); - assert(bitC->ptr <= bitC->endPtr); - MEM_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; } -/*! BIT_flushBits() : - * assumption : bitContainer has not overflowed - * safe version; check for buffer overflow, and prevents it. - * note : does not signal buffer overflow. - * overflow will be revealed later on using BIT_closeCStream() */ -MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); - assert(bitC->ptr <= bitC->endPtr); - MEM_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; -} -/*! BIT_closeCStream() : - * @return : size of CStream, in bytes, - * or 0 if it could not fit into dstBuffer */ -MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) -{ - BIT_addBitsFast(bitC, 1, 1); /* endMark */ - BIT_flushBits(bitC); - if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ - return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); -} +// LICENSE_CHANGE_END -/*-******************************************************** -* bitStream decoding -**********************************************************/ -/*! BIT_initDStream() : - * Initialize a BIT_DStream_t. - * `bitD` : a pointer to an already allocated BIT_DStream_t structure. - * `srcSize` must be the *exact* size of the bitStream, in bytes. - * @return : size of stream (== srcSize), or an errorCode if a problem is detected +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list + +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ -MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) -{ - if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } - bitD->start = (const char*)srcBuffer; - bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + /*-************************************* + * Dependencies + ***************************************/ - if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ - bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); - bitD->bitContainer = MEM_readLEST(bitD->ptr); - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ - if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } - } else { - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE*)(bitD->start); - switch(srcSize) - { - case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); - /* fall-through */ - case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); - /* fall-through */ +namespace duckdb_zstd { +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); - /* fall-through */ + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); - case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; - /* fall-through */ + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } - case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; - /* fall-through */ + memcpy(ostart + flSize, src, srcSize); + DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); + return srcSize + flSize; +} - case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; - /* fall-through */ +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - default: break; - } - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; - if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ - } - bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); } - return srcSize; + ostart[flSize] = *(const BYTE*)src; + DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); + return flSize+1; } -MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2) { - return bitContainer >> start; -} + size_t const minGain = ZSTD_minGain(srcSize, strategy); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; -MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) -{ - U32 const regMask = sizeof(bitContainer)*8 - 1; - /* if start > regMask, bitstream is corrupted, and result is undefined */ - assert(nbBits < BIT_MASK_SIZE); - return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; -} + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", + disableLiteralCompression, (U32)srcSize); -MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) -{ - assert(nbBits < BIT_MASK_SIZE); - return bitContainer & BIT_mask[nbBits]; -} - -/*! BIT_lookBits() : - * Provides next n bits from local register. - * local register is not modified. - * On 32-bits, maxNbBits==24. - * On 64-bits, maxNbBits==56. - * @return : value extracted */ -MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) -{ - /* arbitrate between double-shift and shift+mask */ -#if 1 - /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, - * bitstream is likely corrupted, and result is undefined */ - return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); -#else - /* this code path is slower on my os-x laptop */ - U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; - return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); -#endif -} - -/*! BIT_lookBitsFast() : - * unsafe version; only works if nbBits >= 1 */ -MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) -{ - U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; - assert(nbBits >= 1); - return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); -} - -MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) -{ - bitD->bitsConsumed += nbBits; -} - -/*! BIT_readBits() : - * Read (consume) next n bits from local register and update. - * Pay attention to not read more than nbBits contained into local register. - * @return : extracted value. */ -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) -{ - size_t const value = BIT_lookBits(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} + /* Prepare nextEntropy assuming reusing the existing table */ + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -/*! BIT_readBitsFast() : - * unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) -{ - size_t const value = BIT_lookBitsFast(bitD, nbBits); - assert(nbBits >= 1); - BIT_skipBits(bitD, nbBits); - return value; -} + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); -/*! BIT_reloadDStreamFast() : - * Similar to BIT_reloadDStream(), but with two differences: - * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! - * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this - * point you must use BIT_reloadDStream() to reload. - */ -MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) -{ - if (UNLIKELY(bitD->ptr < bitD->limitPtr)) - return BIT_DStream_overflow; - assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = MEM_readLEST(bitD->ptr); - return BIT_DStream_unfinished; -} + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } -/*! BIT_reloadDStream() : - * Refill `bitD` from buffer previously set in BIT_initDStream() . - * This function is safe, it guarantees it will not read beyond src buffer. - * @return : status of `BIT_DStream_t` internal register. - * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ -MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) -{ - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ - return BIT_DStream_overflow; + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); + { HUF_repeat repeat = prevHuf->repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? + HUF_compress1X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : + HUF_compress4X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + DEBUGLOG(5, "Reusing previous huffman table"); + hType = set_repeat; + } + } - if (bitD->ptr >= bitD->limitPtr) { - return BIT_reloadDStreamFast(bitD); + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } - if (bitD->ptr == bitD->start) { - if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; - return BIT_DStream_completed; + if (cLitSize==1) { + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); } - /* start < ptr < limitPtr */ - { U32 nbBytes = bitD->bitsConsumed >> 3; - BIT_DStream_status result = BIT_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) { - nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ - result = BIT_DStream_endOfBuffer; - } - bitD->ptr -= nbBytes; - bitD->bitsConsumed -= nbBytes*8; - bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ - return result; + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; } -} -/*! BIT_endOfDStream() : - * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). - */ -MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) -{ - return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); + return lhSize+cLitSize; } } -#endif /* BITSTREAM_H_MODULE */ - // LICENSE_CHANGE_END - // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list -/* ****************************************************************** - * FSE : Finite State Entropy codec - * Public Prototypes declaration - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -#ifndef FSE_H -#define FSE_H - + */ -/*-***************************************** -* Dependencies -******************************************/ -#include /* size_t, ptrdiff_t */ + /*-************************************* + * Dependencies + ***************************************/ namespace duckdb_zstd { -/*-***************************************** -* FSE_PUBLIC_API : control library symbols visibility -******************************************/ -#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) -# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) -#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ -# define FSE_PUBLIC_API __declspec(dllexport) -#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) -# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define FSE_PUBLIC_API -#endif +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabilityLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; -/*------ Version ------*/ -#define FSE_VERSION_MAJOR 0 -#define FSE_VERSION_MINOR 9 -#define FSE_VERSION_RELEASE 0 +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} -#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE -#define FSE_QUOTE(str) #str -#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) -#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), ""); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} -#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) -FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabilityLog256[norm]; + } + return cost >> 8; +} +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += (size_t)count[s] * bitCost; + } + return cost >> kAccuracyLog; +} -/*-**************************************** -* FSE simple functions -******************************************/ -/*! FSE_compress() : - Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. - 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). - @return : size of compressed data (<= dstCapacity). - Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. - if FSE_isError(return), compression failed (more details using FSE_getErrorName()) -*/ -FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabilityLog256[norm256]; + } + return cost >> 8; +} -/*! FSE_decompress(): - Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', - into already allocated destination buffer 'dst', of size 'dstCapacity'. - @return : size of regenerated data (<= maxDstSize), - or an error code, which can be tested using FSE_isError() . +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) +{ + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); - ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! - Why ? : making this distinction requires a header. - Header management is intentionally delegated to the user layer, which can better manage special cases. -*/ -FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, - const void* cSrc, size_t cSrcSize); + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); -/*-***************************************** -* Tool functions -******************************************/ -FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + switch (type) { + case set_rle: + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); + *op = codeTable[0]; + return 1; + case set_repeat: + memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + S16 norm[MaxSeq + 1]; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), ""); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); + return NCountSize; + } + } + default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); + } +} -/* Error Management */ -FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ -FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); + DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", + (int)(blockStream.endPtr - blockStream.startPtr), + (unsigned)dstCapacity); -/*-***************************************** -* FSE advanced functions -******************************************/ -/*! FSE_compress2() : - Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' - Both parameters can be defined as '0' to mean : use default value - @return : size of compressed data - Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. - if FSE_isError(return), it's an error code. -*/ -FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); - - -/*-***************************************** -* FSE detailed API -******************************************/ -/*! -FSE_compress() does the following: -1. count symbol occurrence from source[] into table count[] (see hist.h) -2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) -3. save normalized counters to memory buffer using writeNCount() -4. build encoding table 'CTable' from normalized counters -5. encode the data stream using encoding table 'CTable' - -FSE_decompress() does the following: -1. read normalized counters with readNCount() -2. build decoding table 'DTable' from normalized counters -3. decode the data stream using decoding table 'DTable' - -The following API allows targeting specific sub-functions for advanced tasks. -For example, it's possible to compress several blocks using the same 'CTable', -or to save and provide normalized distribution using external method. -*/ - -/* *** COMPRESSION *** */ - -/*! FSE_optimalTableLog(): - dynamically downsize 'tableLog' when conditions are met. - It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. - @return : recommended tableLog (necessarily <= 'maxTableLog') */ -FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_normalizeCount(): - normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) - 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). - @return : tableLog, - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, - const unsigned* count, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_NCountWriteBound(): - Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. - Typically useful for allocation purpose. */ -FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_writeNCount(): - Compactly save 'normalizedCounter' into 'buffer'. - @return : size of the compressed table, - or an errorCode, which can be tested using FSE_isError(). */ -FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, - const short* normalizedCounter, - unsigned maxSymbolValue, unsigned tableLog); - -/*! Constructor and Destructor of FSE_CTable. - Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ -typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ -FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); -FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); - -/*! FSE_buildCTable(): - Builds `ct`, which must be already allocated, using FSE_createCTable(). - @return : 0, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_compress_usingCTable(): - Compress `src` using `ct` into `dst` which must be already allocated. - @return : size of compressed data (<= `dstCapacity`), - or 0 if compressed data could not fit into `dst`, - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); - -/*! -Tutorial : ----------- -The first step is to count all symbols. FSE_count() does this job very fast. -Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. -'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] -maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) -FSE_count() will return the number of occurrence of the most frequent symbol. -This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). - -The next step is to normalize the frequencies. -FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. -It also guarantees a minimum of 1 to any Symbol with frequency >= 1. -You can use 'tableLog'==0 to mean "use default tableLog value". -If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), -which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). - -The result of FSE_normalizeCount() will be saved into a table, -called 'normalizedCounter', which is a table of signed short. -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. -The return value is tableLog if everything proceeded as expected. -It is 0 if there is a single symbol within distribution. -If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). - -'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). -'buffer' must be already allocated. -For guaranteed success, buffer size must be at least FSE_headerBound(). -The result of the function is the number of bytes written into 'buffer'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). - -'normalizedCounter' can then be used to create the compression table 'CTable'. -The space required by 'CTable' must be already allocated, using FSE_createCTable(). -You can then use FSE_buildCTable() to fill 'CTable'. -If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). - -'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). -Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' -The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. -If it returns '0', compressed data could not fit into 'dst'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). -*/ - - -/* *** DECOMPRESSION *** */ + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, ZSTDInternalConstants::LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ZSTDInternalConstants::ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); -/*! FSE_readNCount(): - Read compactly saved 'normalizedCounter' from 'rBuffer'. - @return : size read from 'rBuffer', - or an errorCode, which can be tested using FSE_isError(). - maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ -FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, - unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, - const void* rBuffer, size_t rBuffSize); + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); + } } -/*! Constructor and Destructor of FSE_DTable. - Note that its size depends on 'tableLog' */ -typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ -FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); -FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); -/*! FSE_buildDTable(): - Builds 'dt', which must be already allocated, using FSE_createDTable(). - return : 0, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + { size_t const streamSize = BIT_closeCStream(&blockStream); + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); + return streamSize; + } +} -/*! FSE_decompress_usingDTable(): - Decompress compressed source `cSrc` of size `cSrcSize` using `dt` - into `dst` which must be already allocated. - @return : size of regenerated data (necessarily <= `dstCapacity`), - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} -/*! -Tutorial : ----------- -(Note : these functions only decompress FSE-compressed blocks. - If block is uncompressed, use memcpy() instead - If block is a single repeated byte, use memset() instead ) -The first step is to obtain the normalized frequencies of symbols. -This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. -In practice, that means it's necessary to know 'maxSymbolValue' beforehand, -or size the table to handle worst case situations (typically 256). -FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. -The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. -Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. -If there is an error, the function will return an error code, which can be tested using FSE_isError(). +#if DYNAMIC_BMI2 -The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. -This is performed by the function FSE_buildDTable(). -The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} -`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). -`cSrcSize` must be strictly correct, otherwise decompression will fail. -FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) -*/ +#endif +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ + DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); } -#endif /* FSE_H */ - +} // LICENSE_CHANGE_END - // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list -/* ****************************************************************** - * FSE : Finite State Entropy codec - * Public Prototypes declaration - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -#ifndef FSE_H_FSE_STATIC_LINKING_ONLY -#define FSE_H_FSE_STATIC_LINKING_ONLY - -/* *** Dependency *** */ - - -namespace duckdb_zstd { - -/* ***************************************** -* Static allocation -*******************************************/ -/* FSE buffer bounds */ -#define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) -#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ -#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) -#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1< 12) ? (1 << (maxTableLog - 2)) : 1024) ) -size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); -size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); -/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ - -size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); -/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ + /*-************************************* + * Dependencies + ***************************************/ -/* FSE_buildCTable_wksp() : - * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * `wkspSize` must be >= `(1<repeatMode; -The first thing to do is to init bitStream and state. - size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); - FSE_initCState(&state, ct); + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); -Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); -You can then encode your input data, byte after byte. -FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. -Remember decoding will be done in reverse direction. - FSE_encodeByte(&bitStream, &state, symbol); + /* Prepare nextEntropy assuming reusing the existing table */ + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); -At any time, you can also add any bit sequence. -Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders - BIT_addBits(&bitStream, bitField, nbBits); + if (disableLiteralsCompression) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } -The above methods don't commit data to memory, they just store it into local register, for speed. -Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -Writing data to memory is a manual operation, performed by the flushBits function. - BIT_flushBits(&bitStream); + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } -Your last FSE encoding operation shall be to flush your last state value(s). - FSE_flushState(&bitStream, &state); + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } -Finally, you must close the bitStream. -The function returns the size of CStream in bytes. -If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) -If there is an error, it returns an errorCode (which can be tested using FSE_isError()). - size_t size = BIT_closeCStream(&bitStream); -*/ + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + /* Build Huffman Tree */ + memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} -/* ***************************************** -* FSE symbol decompression API -*******************************************/ -typedef struct { - size_t state; - const void* table; /* precise table may vary, depending on U16 */ -} FSE_DState_t; - - -static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); - -static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); - -static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); - -/**< -Let's now decompose FSE_decompress_usingDTable() into its unitary components. -You will decode FSE-encoded symbols from the bitStream, -and also any other bitFields you put in, **in reverse order**. - -You will need a few variables to track your bitStream. They are : - -BIT_DStream_t DStream; // Stream context -FSE_DState_t DState; // State context. Multiple ones are possible -FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() - -The first thing to do is to init the bitStream. - errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); - -You should then retrieve your initial state(s) -(in reverse flushing order if you have several ones) : - errorCode = FSE_initDState(&DState, &DStream, DTablePtr); - -You can then decode your data, symbol after symbol. -For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. -Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). - unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); - -You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) -Note : maximum allowed nbBits is 25, for 32-bits compatibility - size_t bitField = BIT_readBits(&DStream, nbBits); - -All above operations only read from local register (which size depends on size_t). -Refueling the register from memory is manually performed by the reload method. - endSignal = FSE_reloadDStream(&DStream); - -BIT_reloadDStream() result tells if there is still some more data to read from DStream. -BIT_DStream_unfinished : there is still some data left into the DStream. -BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. -BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. -BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. - -When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, -to properly detect the exact end of stream. -After each decoded symbol, check if DStream is fully consumed using this simple test : - BIT_reloadDStream(&DStream) >= BIT_DStream_completed - -When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. -Checking if DStream has reached its end is performed by : - BIT_endOfDStream(&DStream); -Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. - FSE_endOfDState(&DState); -*/ - - -/* ***************************************** -* FSE unsafe API -*******************************************/ -static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); -/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ - - -/* ***************************************** -* Implementation of inlined functions -*******************************************/ -typedef struct { - int deltaFindState; - U32 deltaNbBits; -} FSE_symbolCompressionTransform; /* total 8 bytes */ - -MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +/** ZSTD_buildSuperBlockEntropy_sequences() : + * Builds entropy for the super-block sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) { - const void* ptr = ct; - const U16* u16ptr = (const U16*) ptr; - const U32 tableLog = MEM_read16(ptr); - statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; - statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); - statePtr->stateLog = tableLog; -} + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); + BYTE* const cTableWksp = countWkspStart + countWkspSize; + const size_t cTableWkspSize = wkspEnd-cTableWksp; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); + memset(workspace, 0, wkspSize); -/*! FSE_initCState2() : -* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) -* uses the smallest state value possible, saving the cost of this symbol */ -MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) -{ - FSE_initCState(statePtr, ct); - { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* stateTable = (const U16*)(statePtr->stateTable); - U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); - statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; - statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; - } + fseMetadata->lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 LLtype; + unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + countWksp, max, llCodeTable, nbSeq, ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + } } + /* build CTable for Offsets */ + { U32 Offtype; + unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWksp, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + countWksp, max, ofCodeTable, nbSeq, ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + } } + /* build CTable for MatchLengths */ + { U32 MLtype; + unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + countWksp, max, mlCodeTable, nbSeq, ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + } } + assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); + return op-ostart; } -MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) + +/** ZSTD_buildSuperBlockEntropy() : + * Builds entropy for the super-block. + * @return : 0 on success or error code */ +static size_t +ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) { - FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* const stateTable = (const U16*)(statePtr->stateTable); - U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); - BIT_addBits(bitC, statePtr->value, nbBitsOut); - statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_disableLiteralsCompression(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); + return 0; } -MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +/** ZSTD_compressSubBlock_literal() : + * Compresses literals section for a sub-block. + * When we have to write the Huffman table we will sometimes choose a header + * size larger than necessary. This is because we have to pick the header size + * before we know the table size + compressed size, so we have a bound on the + * table size. If we guessed incorrectly, we fall back to uncompressed literals. + * + * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded + * in writing the header, otherwise it is set to 0. + * + * hufMetadata->hType has literals block type info. + * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. + * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. + * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block + * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block + * and the following sub-blocks' literals sections will be Treeless_Literals_Block. + * @return : compressed size of literals section of a sub-block + * Or 0 if it unable to compress. + * Or error code */ +static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + const BYTE* literals, size_t litSize, + void* dst, size_t dstSize, + const int bmi2, int writeEntropy, int* entropyWritten) { - BIT_addBits(bitC, statePtr->value, statePtr->stateLog); - BIT_flushBits(bitC); -} + size_t const header = writeEntropy ? 200 : 0; + size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart + lhSize; + U32 const singleStream = lhSize == 3; + symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; + size_t cLitSize = 0; + (void)bmi2; /* TODO bmi2... */ -/* FSE_getMaxNbBits() : - * Approximate maximum cost of a symbol, in bits. - * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) - * note 1 : assume symbolValue is valid (<= maxSymbolValue) - * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ -MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) -{ - const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; - return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; -} + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); -/* FSE_bitCost() : - * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) - * note 1 : assume symbolValue is valid (<= maxSymbolValue) - * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ -MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) -{ - const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; - U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; - U32 const threshold = (minNbBits+1) << 16; - assert(tableLog < 16); - assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ - { U32 const tableSize = 1 << tableLog; - U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); - U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ - U32 const bitMultiplier = 1 << accuracyLog; - assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); - assert(normalizedDeltaFromThreshold <= bitMultiplier); - return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + *entropyWritten = 0; + if (litSize == 0 || hufMetadata->hType == set_basic) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } else if (hufMetadata->hType == set_rle) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); + return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); } -} + assert(litSize > 0); + assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); -/* ====== Decompression ====== */ - -typedef struct { - U16 tableLog; - U16 fastMode; -} FSE_DTableHeader; /* sizeof U32 */ + if (writeEntropy && hufMetadata->hType == set_compressed) { + memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); + op += hufMetadata->hufDesSize; + cLitSize += hufMetadata->hufDesSize; + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); + } -typedef struct -{ - unsigned short newState; - unsigned char symbol; - unsigned char nbBits; -} FSE_decode_t; /* size == U32 */ + /* TODO bmi2 */ + { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) + : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); + op += cSize; + cLitSize += cSize; + if (cSize == 0 || ERR_isError(cSize)) { + DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); + return 0; + } + /* If we expand and we aren't writing a header then emit uncompressed */ + if (!writeEntropy && cLitSize >= litSize) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + /* If we are writing headers then allow expansion that doesn't change our header size. */ + if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { + assert(cLitSize > litSize); + DEBUGLOG(5, "Literals expanded beyond allowed header size"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); + } -MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) -{ - const void* ptr = dt; - const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; - DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + *entropyWritten = 1; + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); + return op-ostart; } -MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - return DInfo.symbol; +static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { + const seqDef* const sstart = sequences; + const seqDef* const send = sequences + nbSeq; + const seqDef* sp = sstart; + size_t matchLengthSum = 0; + size_t litLengthSum = 0; + while (send-sp > 0) { + ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); + litLengthSum += seqLen.litLength; + matchLengthSum += seqLen.matchLength; + sp++; + } + assert(litLengthSum <= litSize); + if (!lastSequence) { + assert(litLengthSum == litSize); + } + return matchLengthSum + litSize; } -MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +/** ZSTD_compressSubBlock_sequences() : + * Compresses sequences section for a sub-block. + * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have + * symbol compression modes for the super-block. + * The first successfully compressed block will have these in its header. + * We set entropyWritten=1 when we succeed in compressing the sequences. + * The following sub-blocks will always have repeat mode. + * @return : compressed size of sequences section of a sub-block + * Or 0 if it is unable to compress + * Or error code. */ +static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy, int* entropyWritten) { - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.newState + lowBits; -} + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; -MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBits(bitD, nbBits); + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} + *entropyWritten = 0; + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, ""); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + return op - ostart; + } -/*! FSE_decodeSymbolFast() : - unsafe, only works if no symbol has a probability > 50% */ -MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + /* seqHead : flags for FSE encoding type */ + seqHead = op++; - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); -MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) -{ - return DStatePtr->state == 0; -} - - - -#ifndef FSE_COMMONDEFS_ONLY + if (writeEntropy) { + const U32 LLtype = fseMetadata->llType; + const U32 Offtype = fseMetadata->ofType; + const U32 MLtype = fseMetadata->mlType; + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); + op += fseMetadata->fseTablesSize; + } else { + const U32 repeat = set_repeat; + *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); + } -/* ************************************************************** -* Tuning parameters -****************************************************************/ -/*!MEMORY_USAGE : -* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -* Increasing memory usage improves compression ratio -* Reduced memory usage can improve speed, due to cache effect -* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ -#ifndef FSE_MAX_MEMORY_USAGE -# define FSE_MAX_MEMORY_USAGE 14 -#endif -#ifndef FSE_DEFAULT_MEMORY_USAGE -# define FSE_DEFAULT_MEMORY_USAGE 13 + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + fseTables->matchlengthCTable, mlCode, + fseTables->offcodeCTable, ofCode, + fseTables->litlengthCTable, llCode, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(fseMetadata->lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } #endif + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); + } -/*!FSE_MAX_SYMBOL_VALUE : -* Maximum symbol value authorized. -* Required for proper stack allocation */ -#ifndef FSE_MAX_SYMBOL_VALUE -# define FSE_MAX_SYMBOL_VALUE 255 + /* zstd versions <= 1.4.0 mistakenly report error when + * sequences section body size is less than 3 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1664. + * This can happen when the previous sequences section block is compressed + * with rle mode and the current block's sequences section is compressed + * with repeat mode where sequences section body size can be 1 byte. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (op-seqHead < 4) { + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " + "an uncompressed block when sequences are < 4 bytes"); + return 0; + } #endif -/* ************************************************************** -* template functions type & suffix -****************************************************************/ -#define FSE_FUNCTION_TYPE BYTE -#define FSE_FUNCTION_EXTENSION -#define FSE_DECODE_TYPE FSE_decode_t - + *entropyWritten = 1; + return op - ostart; +} -#endif /* !FSE_COMMONDEFS_ONLY */ +/** ZSTD_compressSubBlock() : + * Compresses a single sub-block. + * @return : compressed size of the sub-block + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* literals, size_t litSize, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, + int writeLitEntropy, int writeSeqEntropy, + int* litEntropyWritten, int* seqEntropyWritten, + U32 lastBlock) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart + ZSTDInternalConstants::ZSTD_blockHeaderSize; + DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", + litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); + { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, + &entropyMetadata->hufMetadata, literals, litSize, + op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); + FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); + if (cLitSize == 0) return 0; + op += cLitSize; + } + { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, + &entropyMetadata->fseMetadata, + sequences, nbSeq, + llCode, mlCode, ofCode, + cctxParams, + op, oend-op, + bmi2, writeSeqEntropy, seqEntropyWritten); + FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); + if (cSeqSize == 0) return 0; + op += cSeqSize; + } + /* Write block header */ + { size_t cSize = (op-ostart)-ZSTDInternalConstants::ZSTD_blockHeaderSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + } + return op-ostart; +} +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = 255; + size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ -/* *************************************************************** -* Constants -*****************************************************************/ -#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) -#define FSE_MAX_TABLESIZE (1U<hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} -#define FSE_TABLELOG_ABSOLUTE_MAX 15 -#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX -# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" -#endif +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; -#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, ZSTDInternalConstants::LL_bits, + ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ZSTDInternalConstants::ML_bits, + ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; } -#endif /* FSE_H_FSE_STATIC_LINKING_ONLY */ +static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t cSizeEstimate = 0; + cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return cSizeEstimate + ZSTDInternalConstants::ZSTD_blockHeaderSize; +} +static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) +{ + if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) + return 1; + if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) + return 1; + if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) + return 1; + return 0; +} -// LICENSE_CHANGE_END +/** ZSTD_compressSubBlock_multi() : + * Breaks super-block into multiple sub-blocks and compresses them. + * Entropy will be written to the first block. + * The following blocks will use repeat mode to compress. + * All sub-blocks are compressed blocks (no raw or rle blocks). + * @return : compressed size of the super block (which is multiple ZSTD blocks) + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, + const ZSTD_compressedBlockState_t* prevCBlock, + ZSTD_compressedBlockState_t* nextCBlock, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const int bmi2, U32 lastBlock, + void* workspace, size_t wkspSize) +{ + const seqDef* const sstart = seqStorePtr->sequencesStart; + const seqDef* const send = seqStorePtr->sequences; + const seqDef* sp = sstart; + const BYTE* const lstart = seqStorePtr->litStart; + const BYTE* const lend = seqStorePtr->lit; + const BYTE* lp = lstart; + BYTE const* ip = (BYTE const*)src; + BYTE const* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + const BYTE* llCodePtr = seqStorePtr->llCode; + const BYTE* mlCodePtr = seqStorePtr->mlCode; + const BYTE* ofCodePtr = seqStorePtr->ofCode; + size_t targetCBlockSize = cctxParams->targetCBlockSize; + size_t litSize, seqCount; + int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; + int writeSeqEntropy = 1; + int lastSequence = 0; + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", + (unsigned)(lend-lp), (unsigned)(send-sstart)); + litSize = 0; + seqCount = 0; + do { + size_t cBlockSizeEstimate = 0; + if (sstart == send) { + lastSequence = 1; + } else { + const seqDef* const sequence = sp + seqCount; + lastSequence = sequence == send - 1; + litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; + seqCount++; + } + if (lastSequence) { + assert(lp <= lend); + assert(litSize <= (size_t)(lend - lp)); + litSize = (size_t)(lend - lp); + } + /* I think there is an optimization opportunity here. + * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful + * since it recalculates estimate from scratch. + * For example, it would recount literal distribution and symbol codes everytime. + */ + cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, + &nextCBlock->entropy, entropyMetadata, + workspace, wkspSize, writeLitEntropy, writeSeqEntropy); + if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { + int litEntropyWritten = 0; + int seqEntropyWritten = 0; + const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); + const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, oend-op, + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + lastBlock && lastSequence); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Committed the sub-block"); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + sp += seqCount; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + litSize = 0; + seqCount = 0; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + } + } + } while (!lastSequence); + if (writeLitEntropy) { + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); + memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); + } + if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { + /* If we haven't written our entropy tables, then we've violated our contract and + * must emit an uncompressed block. + */ + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); + return 0; + } + if (ip < iend) { + size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); + DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + assert(cSize != 0); + op += cSize; + /* We have to regenerate the repcodes because we've skipped some sequences */ + if (sp < send) { + seqDef const* seq; + repcodes_t rep; + memcpy(&rep, prevCBlock->rep, sizeof(rep)); + for (seq = sstart; seq < sp; ++seq) { + rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + } + memcpy(nextCBlock->rep, &rep, sizeof(rep)); + } + } + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); + return op-ostart; +} +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; -/* ************************************************************** -* Error Management -****************************************************************/ -// #define FSE_isError ERR_isError + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + return ZSTD_compressSubBlock_multi(&zc->seqStore, + zc->blockState.prevCBlock, + zc->blockState.nextCBlock, + &entropyMetadata, + &zc->appliedParams, + dst, dstCapacity, + src, srcSize, + zc->bmi2, lastBlock, + zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */); +} -/* ************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ +} -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -# error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -# error "FSE_FUNCTION_TYPE must be defined" -#endif -/* Function names */ -#define FSE_CAT(X,Y) X##Y -#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) -#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) +// LICENSE_CHANGE_END -namespace duckdb_zstd { -/* Function templates */ +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -/* FSE_buildCTable_wksp() : - * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; - FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); - U32 const step = FSE_TABLESTEP(tableSize); - U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; - - FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; - U32 highThreshold = tableSize-1; - - /* CTable header */ - if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); - tableU16[-2] = (U16) tableLog; - tableU16[-1] = (U16) maxSymbolValue; - assert(tableLog < 16); /* required for threshold strategy to work */ - /* For explanations on how to distribute symbol values over the table : - * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ - #ifdef __clang_analyzer__ - memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ - #endif - /* symbol start positions */ - { U32 u; - cumul[0] = 0; - for (u=1; u <= maxSymbolValue+1; u++) { - if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ - cumul[u] = cumul[u-1] + 1; - tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); - } else { - cumul[u] = cumul[u-1] + normalizedCounter[u-1]; - } } - cumul[maxSymbolValue+1] = tableSize+1; - } - /* Spread symbols */ - { U32 position = 0; - U32 symbol; - for (symbol=0; symbol<=maxSymbolValue; symbol++) { - int nbOccurrences; - int const freq = normalizedCounter[symbol]; - for (nbOccurrences=0; nbOccurrences highThreshold) - position = (position + step) & tableMask; /* Low proba area */ - } } +namespace duckdb_zstd { - assert(position==0); /* Must have initialized all positions */ - } +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashLarge = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32 const mls = cParams->minMatch; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; - /* Build table */ - { U32 u; for (u=0; ucParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = + dictMode == ZSTD_dictMatchState ? + &dms->cParams : NULL; + const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? + dms->chainTable : NULL; + const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? + dictBase + dictStartIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? + dictCParams->hashLog : hBitsL; + const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? + dictCParams->chainLog : hBitsS; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); -#ifndef FSE_COMMONDEFS_ONLY + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + /* if a dictionary is attached, it must be within window range */ + if (dictMode == ZSTD_dictMatchState) { + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); + } -/*-************************************************************** -* FSE NCount encoding -****************************************************************/ -size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) -{ - size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; - return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ -} + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const current = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); + U32 const maxRep = current - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } -static size_t -FSE_writeNCount_generic (void* header, size_t headerBufferSize, - const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, - unsigned writeIsSafe) -{ - BYTE* const ostart = (BYTE*) header; - BYTE* out = ostart; - BYTE* const oend = ostart + headerBufferSize; - int nbBits; - const int tableSize = 1 << tableLog; - int remaining; - int threshold; - U32 bitStream = 0; - int bitCount = 0; - unsigned symbol = 0; - unsigned const alphabetSize = maxSymbolValue + 1; - int previousIs0 = 0; + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 offset; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); + size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + const U32 repIndex = current + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashLong[h2] = hashSmall[h] = current; /* update hash tables */ - /* Table Size */ - bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; - bitCount += 4; + /* check dictMatchState repcode */ + if (dictMode == ZSTD_dictMatchState + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } - /* Init */ - remaining = tableSize+1; /* +1 for extra accuracy */ - threshold = tableSize; - nbBits = tableLog+1; + /* check noDict repcode */ + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } - while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ - if (previousIs0) { - unsigned start = symbol; - while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; - if (symbol == alphabetSize) break; /* incorrect distribution */ - while (symbol >= start+24) { - start+=24; - bitStream += 0xFFFFU << bitCount; - if ((!writeIsSafe) && (out > oend-2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE) bitStream; - out[1] = (BYTE)(bitStream>>8); - out+=2; - bitStream>>=16; + if (matchIndexL > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchLong) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; } - while (symbol >= start+3) { - start+=3; - bitStream += 3 << bitCount; - bitCount += 2; + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState long match */ + U32 const dictMatchIndexL = dictHashLong[dictHL]; + const BYTE* dictMatchL = dictBase + dictMatchIndexL; + assert(dictMatchL < dictEnd); + + if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { + mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; + offset = (U32)(current - dictMatchIndexL - dictIndexDelta); + while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ + goto _match_found; + } } + + if (matchIndexS > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; } - bitStream += (symbol-start) << bitCount; - bitCount += 2; - if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream>>8); - out += 2; - bitStream >>= 16; - bitCount -= 16; + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState short match */ + U32 const dictMatchIndexS = dictHashSmall[dictHS]; + match = dictBase + dictMatchIndexS; + matchIndexS = dictMatchIndexS + dictIndexDelta; + + if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; } } - { int count = normalizedCounter[symbol++]; - int const max = (2*threshold-1) - remaining; - remaining -= count < 0 ? -count : count; - count++; /* +1 for extra accuracy */ - if (count>=threshold) - count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ - bitStream += count << bitCount; - bitCount += nbBits; - bitCount -= (count>=1; } - } - if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream>>8); - out += 2; - bitStream >>= 16; - bitCount -= 16; - } } - if (remaining != 1) - return ERROR(GENERIC); /* incorrect normalized distribution */ - assert(symbol <= alphabetSize); + ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif + continue; - /* flush remaining bitStream */ - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream>>8); - out+= (bitCount+7) /8; +_search_next_long: - return (out-ostart); -} + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = current + 1; + /* check prefix long +1 match */ + if (matchIndexL3 > prefixLowestIndex) { + if (MEM_read64(matchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dict long +1 match */ + U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; + const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; + assert(dictMatchL3 < dictEnd); + if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; + ip++; + offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); + while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ + goto _match_found; + } } } -size_t FSE_writeNCount (void* buffer, size_t bufferSize, - const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) -{ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ - if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + /* if no long +1 match, explore the short match we found */ + if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; + offset = (U32)(current - matchIndexS); + while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } - if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + /* fall-through */ - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); -} +_match_found: + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); -/*-************************************************************** -* FSE Compression Code -****************************************************************/ +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; -FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) -{ - size_t size; - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); - return (FSE_CTable*)malloc(size); -} + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = current+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } -void FSE_freeCTable (FSE_CTable* ct) { free(ct); } + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState + && repIndex2 < prefixLowestIndex ? + dictBase + repIndex2 - dictIndexDelta : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } -/* provides the minimum logSize to safely represent a distribution */ -static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) -{ - U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; - U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; - U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; - assert(srcSize > 1); /* Not supported, RLE should be used instead */ - return minBits; + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + } /* while (ip < ilimit) */ + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); } -unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) { - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; - U32 tableLog = maxTableLog; - U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); - assert(srcSize > 1); /* Not supported, RLE should be used instead */ - if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; - if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ - if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ - if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; - if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; - return tableLog; + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + } } -unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) + +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) { - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + } } -/* Secondary normalization method. - To be used when primary method fails. */ - -static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) +static size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */) { - short const NOT_YET_ASSIGNED = -2; - U32 s; - U32 distributed = 0; - U32 ToDistribute; + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + U32 offset_1=rep[0], offset_2=rep[1]; - /* Init */ - U32 const lowThreshold = (U32)(total >> tableLog); - U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); - for (s=0; s<=maxSymbolValue; s++) { - if (count[s] == 0) { - norm[s]=0; - continue; - } - if (count[s] <= lowThreshold) { - norm[s] = -1; - distributed++; - total -= count[s]; - continue; - } - if (count[s] <= lowOne) { - norm[s] = 1; - distributed++; - total -= count[s]; - continue; - } + /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); - norm[s]=NOT_YET_ASSIGNED; - } - ToDistribute = (1 << tableLog) - distributed; + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; - if (ToDistribute == 0) - return 0; + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; - if ((total / ToDistribute) > lowOne) { - /* risk of rounding to zero */ - lowOne = (U32)((total * 3) / (ToDistribute * 2)); - for (s=0; s<=maxSymbolValue; s++) { - if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { - norm[s] = 1; - distributed++; - total -= count[s]; + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ + + if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ + & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; + offset = current - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = current + 1; + if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; + ip++; + offset = current+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + offset = current - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; continue; } } - ToDistribute = (1 << tableLog) - distributed; - } - if (distributed == maxSymbolValue+1) { - /* all values are pretty poor; - probably incompressible data (should have already been detected); - find max, then give all remaining points to max */ - U32 maxV = 0, maxC = 0; - for (s=0; s<=maxSymbolValue; s++) - if (count[s] > maxC) { maxV=s; maxC=count[s]; } - norm[maxV] += (short)ToDistribute; - return 0; - } + /* move to next sequence start */ + ip += mLength; + anchor = ip; - if (total == 0) { - /* all of the symbols were low enough for the lowOne or lowThreshold */ - for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) - if (norm[s] > 0) { ToDistribute--; norm[s]++; } - return 0; - } + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = current+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } - { U64 const vStepLog = 62 - tableLog; - U64 const mid = (1ULL << (vStepLog-1)) - 1; - U64 const rStep = ((((U64)1<> vStepLog); - U32 const sEnd = (U32)(end >> vStepLog); - U32 const weight = sEnd - sStart; - if (weight < 1) - return ERROR(GENERIC); - norm[s] = (short)weight; - tmpTotal = end; + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ + & (repIndex2 > dictStartIndex)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; } } } - return 0; + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); } -size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, - const unsigned* count, size_t total, - unsigned maxSymbolValue) +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) { - /* Sanity checks */ - if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; - if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ - if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ - - { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; - U64 const scale = 62 - tableLog; - U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ - U64 const vStep = 1ULL<<(scale-20); - int stillToDistribute = 1<> tableLog); - - for (s=0; s<=maxSymbolValue; s++) { - if (count[s] == total) return 0; /* rle special case */ - if (count[s] == 0) { normalizedCounter[s]=0; continue; } - if (count[s] <= lowThreshold) { - normalizedCounter[s] = -1; - stillToDistribute--; - } else { - short proba = (short)((count[s]*step) >> scale); - if (proba<8) { - U64 restToBeat = vStep * rtbTable[proba]; - proba += (count[s]*step) - ((U64)proba< restToBeat; - } - if (proba > largestP) { largestP=proba; largest=s; } - normalizedCounter[s] = proba; - stillToDistribute -= proba; - } } - if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { - /* corner case, need another normalization method */ - size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); - if (FSE_isError(errorCode)) return errorCode; - } - else normalizedCounter[largest] += (short)stillToDistribute; - } - -#if 0 - { /* Print Table (debug) */ - U32 s; - U32 nTotal = 0; - for (s=0; s<=maxSymbolValue; s++) - RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); - for (s=0; s<=maxSymbolValue; s++) - nTotal += abs(normalizedCounter[s]); - if (nTotal != (1U<cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); } -#endif - - return tableLog; } +} -/* fake FSE_CTable, for raw (uncompressed) input */ -size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) -{ - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSymbolValue = tableMask; - void* const ptr = ct; - U16* const tableU16 = ( (U16*) ptr) + 2; - void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ - FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return ERROR(GENERIC); /* min size */ - /* header */ - tableU16[-2] = (U16) nbBits; - tableU16[-1] = (U16) maxSymbolValue; +// LICENSE_CHANGE_END - /* Build table */ - for (s=0; scParams; + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog; + U32 const mls = cParams->minMatch; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; - return 0; + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { + U32 const current = (U32)(ip - base); + size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); + hashTable[hash0] = current; + if (dtlm == ZSTD_dtlm_fast) continue; + /* Only load extra positions for ZSTD_dtlm_full */ + { U32 p; + for (p = 1; p < fastHashFillStep; ++p) { + size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); + if (hashTable[hash] == 0) { /* not yet filled */ + hashTable[hash] = current + p; + } } } } } -static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, - const void* src, size_t srcSize, - const FSE_CTable* ct, const unsigned fast) +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_fast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls) { - const BYTE* const istart = (const BYTE*) src; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const iend = istart + srcSize; - const BYTE* ip=iend; - - BIT_CStream_t bitC; - FSE_CState_t CState1, CState2; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; /* init */ - if (srcSize <= 2) return 0; - { size_t const initError = BIT_initCStream(&bitC, dst, dstSize); - if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ } - -#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) - - if (srcSize & 1) { - FSE_initCState2(&CState1, ct, *--ip); - FSE_initCState2(&CState2, ct, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - FSE_FLUSHBITS(&bitC); - } else { - FSE_initCState2(&CState2, ct, *--ip); - FSE_initCState2(&CState1, ct, *--ip); - } - - /* join to mod 4 */ - srcSize -= 2; - if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ - FSE_encodeSymbol(&bitC, &CState2, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - FSE_FLUSHBITS(&bitC); + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); + ip0 += (ip0 == prefixStart); + ip1 = ip0 + 1; + { U32 const current = (U32)(ip0 - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); + U32 const maxRep = current - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } - /* 2 or 4 encoding per loop */ - while ( ip>istart ) { + /* Main Search Loop */ +#ifdef __INTEL_COMPILER + /* From intel 'The vector pragma indicates that the loop should be + * vectorized if it is legal to do so'. Can be used together with + * #pragma ivdep (but have opted to exclude that because intel + * warns against using it).*/ + #pragma vector always +#endif + while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ + size_t mLength; + BYTE const* ip2 = ip0 + 2; + size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); + U32 const val0 = MEM_read32(ip0); + size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); + U32 const val1 = MEM_read32(ip1); + U32 const current0 = (U32)(ip0-base); + U32 const current1 = (U32)(ip1-base); + U32 const matchIndex0 = hashTable[h0]; + U32 const matchIndex1 = hashTable[h1]; + BYTE const* repMatch = ip2 - offset_1; + const BYTE* match0 = base + matchIndex0; + const BYTE* match1 = base + matchIndex1; + U32 offcode; - FSE_encodeSymbol(&bitC, &CState2, *--ip); +#if defined(__aarch64__) + PREFETCH_L1(ip0+256); +#endif - if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ - FSE_FLUSHBITS(&bitC); + hashTable[h0] = current0; /* update hash table */ + hashTable[h1] = current1; /* update hash table */ - FSE_encodeSymbol(&bitC, &CState1, *--ip); + assert(ip0 + 1 == ip1); - if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ - FSE_encodeSymbol(&bitC, &CState2, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); + if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { + mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; + ip0 = ip2 - mLength; + match0 = repMatch - mLength; + mLength += 4; + offcode = 0; + goto _match; + } + if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { + /* found a regular match */ + goto _offset; + } + if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { + /* found a regular match after one literal */ + ip0 = ip1; + match0 = match1; + goto _offset; + } + { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; + assert(step >= 2); + ip0 += step; + ip1 += step; + continue; } +_offset: /* Requires: ip0, match0 */ + /* Compute the offset code */ + offset_2 = offset_1; + offset_1 = (U32)(ip0-match0); + offcode = offset_1 + ZSTD_REP_MOVE; + mLength = 4; + /* Count the backwards match length */ + while (((ip0>anchor) & (match0>prefixStart)) + && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ - FSE_FLUSHBITS(&bitC); - } +_match: /* Requires: ip0, match0, offcode */ + /* Count the forward length */ + mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); + ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); + /* match found */ + ip0 += mLength; + anchor = ip0; - FSE_flushCState(&bitC, &CState2); - FSE_flushCState(&bitC, &CState1); - return BIT_closeCStream(&bitC); -} + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); -size_t FSE_compress_usingCTable (void* dst, size_t dstSize, - const void* src, size_t srcSize, - const FSE_CTable* ct) -{ - unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + ip1 = ip0 + 1; + } - if (fast) - return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); - else - return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); -} + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + /* Return the last literals size */ + return (size_t)(iend - anchor); +} -size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } -/* FSE_compress_wksp() : - * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). - * `wkspSize` size must be `(1<cParams.minMatch; + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + } +} - unsigned count[FSE_MAX_SYMBOL_VALUE+1]; - S16 norm[FSE_MAX_SYMBOL_VALUE+1]; - FSE_CTable* CTable = (FSE_CTable*)workSpace; - size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue); - void* scratchBuffer = (void*)(CTable + CTableSize); - size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable)); +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_dictMatchState_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; - /* init conditions */ - if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); - if (srcSize <= 1) return 0; /* Not compressible */ - if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; + const U32* const dictHashTable = dms->hashTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); + const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); + const U32 dictHLog = dictCParams->hashLog; - /* Scan input and build symbol stats */ - { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) ); - if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ - if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ - if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ - } + /* if a dictionary is still attached, it necessarily means that + * it is within window size. So we just check it. */ + const U32 maxDistance = 1U << cParams->windowLog; + const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); + assert(endIndex - prefixStartIndex <= maxDistance); + (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ - tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); - CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); + /* ensure there will be no no underflow + * when translating a dict index into a local index */ + assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); - /* Write table description header */ - { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); - op += nc_err; - } + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); + ip += (dictAndPrefixLength == 0); + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); - /* Compress */ - CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); - { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); - if (cSize == 0) return 0; /* not enough space for compressed data */ - op += cSize; + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hlog, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + const U32 repIndex = current + 1 - offset_1; + const BYTE* repMatch = (repIndex < prefixStartIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashTable[h] = current; /* update hash table */ + + if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else if ( (matchIndex <= prefixStartIndex) ) { + size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); + U32 const dictMatchIndex = dictHashTable[dictHash]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictStartIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; + while (((ip>anchor) & (dictMatch>dictStart)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + } else if (MEM_read32(match) != MEM_read32(ip)) { + /* it's not a match, and we're not going to check the dictionary */ + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a regular match */ + U32 const offset = (U32)(ip-match); + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + while (((ip>anchor) & (match>prefixStart)) + && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + assert(base+current+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } } - /* check compressibility */ - if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; - return op-ostart; + /* Return the last literals size */ + return (size_t)(iend - anchor); } -typedef struct { - FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; - BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; -} fseWkspMax_t; - -size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) { - fseWkspMax_t scratchBuffer; - DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); - return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); + } } -size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) + +static size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) { - return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const BYTE* const dictStart = dictBase + dictStartIndex; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); + + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hlog, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + hashTable[h] = current; /* update hash table */ + DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current); + assert(offset_1 <= current +1); /* check repIndex */ + + if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + } else { + if ( (matchIndex < dictStartIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + U32 const offset = current - matchIndex; + size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = offset; /* update offset history */ + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ip += mLength; + anchor = ip; + } } + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); } + +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } } -#endif /* FSE_COMMONDEFS_ONLY */ +} // LICENSE_CHANGE_END // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list -/* ****************************************************************** - * hist : Histogram functions - * part of Finite State Entropy project - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ + */ -/* --- dependencies --- */ - /* U32, BYTE, etc. */ - /* assert, DEBUGLOG */ - /* ERROR */ -namespace duckdb_zstd { -/* --- Error management --- */ -unsigned HIST_isError(size_t code) { return ERR_isError(code); } +/*-************************************* +* Binary Tree search +***************************************/ -/*-************************************************************** - * Histogram functions - ****************************************************************/ -unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) +namespace duckdb_zstd { + +static void +ZSTD_updateDUBT(ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iend, + U32 mls) { - const BYTE* ip = (const BYTE*)src; - const BYTE* const end = ip + srcSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned largestCount=0; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; - memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); - if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; - while (ipwindow.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; - while (!count[maxSymbolValue]) maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; + if (idx != target) + DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", + idx, target, ms->window.dictLimit); + assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ + (void)iend; - { U32 s; - for (s=0; s<=maxSymbolValue; s++) - if (count[s] > largestCount) largestCount = count[s]; - } + assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ + for ( ; idx < target ; idx++) { + size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ + U32 const matchIndex = hashTable[h]; - return largestCount; + U32* const nextCandidatePtr = bt + 2*(idx&btMask); + U32* const sortMarkPtr = nextCandidatePtr + 1; + + DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); + hashTable[h] = idx; /* Update Hash Table */ + *nextCandidatePtr = matchIndex; /* update BT like a chain */ + *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; + } + ms->nextToUpdate = target; } -typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; -/* HIST_count_parallel_wksp() : - * store histogram into 4 intermediate tables, recombined at the end. - * this design makes better use of OoO cpus, - * and is noticeably faster when some values are heavily repeated. - * But it needs some additional workspace for intermediate tables. - * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. - * @return : largest histogram frequency, - * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ -static size_t HIST_count_parallel_wksp( - unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - HIST_checkInput_e check, - U32* const workSpace) +/** ZSTD_insertDUBT1() : + * sort one already inserted but unsorted position + * assumption : current >= btlow == (current - btmask) + * doesn't fail */ +static void +ZSTD_insertDUBT1(ZSTD_matchState_t* ms, + U32 current, const BYTE* inputEnd, + U32 nbCompares, U32 btLow, + const ZSTD_dictMode_e dictMode) { - const BYTE* ip = (const BYTE*)source; - const BYTE* const iend = ip+sourceSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max=0; - U32* const Counting1 = workSpace; - U32* const Counting2 = Counting1 + 256; - U32* const Counting3 = Counting2 + 256; - U32* const Counting4 = Counting3 + 256; - - memset(workSpace, 0, 4*256*sizeof(unsigned)); - - /* safety checks */ - if (!sourceSize) { - memset(count, 0, maxSymbolValue + 1); - *maxSymbolValuePtr = 0; - return 0; - } - if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; + const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ + U32 dummy32; /* to be nullified at the end */ + U32 const windowValid = ms->window.lowLimit; + U32 const maxDistance = 1U << cParams->windowLog; + U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; - /* by stripes of 16 bytes */ - { U32 cached = MEM_read32(ip); ip += 4; - while (ip < iend-15) { - U32 c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - } - ip-=4; - } - /* finish last symbols */ - while (ip= btLow); + assert(ip < iend); /* condition for ZSTD_count */ - if (check) { /* verify stats will fit into destination table */ - U32 s; for (s=255; s>maxSymbolValue; s--) { - Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; - if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); - } } + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < current); + /* note : all candidates are now supposed sorted, + * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK + * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ - { U32 s; - if (maxSymbolValue > 255) maxSymbolValue = 255; - for (s=0; s<=maxSymbolValue; s++) { - count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; - if (count[s] > max) max = count[s]; - } } + if ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ + || (current < dictLimit) /* both in extDict */) { + const BYTE* const mBase = ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit)) ? + base : dictBase; + assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ + || (current < dictLimit) ); + match = mBase + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* preparation for next read of match[matchLength] */ + } - while (!count[maxSymbolValue]) maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; - return (size_t)max; -} + DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", + current, matchIndex, (U32)matchLength); -/* HIST_countFast_wksp() : - * Same as HIST_countFast(), but using an externally provided scratch buffer. - * `workSpace` is a writable buffer which must be 4-bytes aligned, - * `workSpaceSize` must be >= HIST_WKSP_SIZE - */ -size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - void* workSpace, size_t workSpaceSize) -{ - if (sourceSize < 1500) /* heuristic threshold */ - return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); - if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); - return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); -} + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } -/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ -size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize) -{ - unsigned tmpCounters[HIST_WKSP_SIZE_U32]; - return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); -} + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", + matchIndex, btLow, nextPtr[1]); + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", + matchIndex, btLow, nextPtr[0]); + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } -/* HIST_count_wksp() : - * Same as HIST_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ -size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - void* workSpace, size_t workSpaceSize) -{ - if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); - if (*maxSymbolValuePtr < 255) - return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); - *maxSymbolValuePtr = 255; - return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); + *smallerPtr = *largerPtr = 0; } -size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) -{ - unsigned tmpCounters[HIST_WKSP_SIZE_U32]; - return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); -} -} +static size_t +ZSTD_DUBT_findBetterDictMatch ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + size_t bestLength, + U32 nbCompares, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_matchState_t * const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; + const U32 * const dictHashTable = dms->hashTable; + U32 const hashLog = dmsCParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 dictMatchIndex = dictHashTable[h]; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + U32 const current = (U32)(ip-base); + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictEnd = dms->window.nextSrc; + U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); + U32 const dictLowLimit = dms->window.lowLimit; + U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; -// LICENSE_CHANGE_END + U32* const dictBt = dms->chainTable; + U32 const btLog = dmsCParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; + size_t commonLengthSmaller=0, commonLengthLarger=0; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + (void)dictMode; + assert(dictMode == ZSTD_dictMatchState); -/* ****************************************************************** - * Huffman encoder, part of New Generation Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ + while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { + U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dictBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (dictMatchIndex+matchLength >= dictHighLimit) + match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif + if (matchLength > bestLength) { + U32 matchIndex = dictMatchIndex + dictIndexDelta; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { + DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", + current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + } + if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + if (match[matchLength] < ip[matchLength]) { + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } -/* ************************************************************** -* Includes -****************************************************************/ -#include /* memcpy, memset */ -#include /* printf (debug) */ + if (bestLength >= MINMATCH) { + U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + current, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; +} - /* header compression */ +static size_t +ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + const BYTE* const base = ms->window.base; + U32 const current = (U32)(ip-base); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= current) ? 0 : current - btMask; + U32 const unsortLimit = MAX(btLow, windowLow); -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + U32* nextCandidate = bt + 2*(matchIndex&btMask); + U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; + U32 nbCompares = 1U << cParams->searchLog; + U32 nbCandidates = nbCompares; + U32 previousCandidate = 0; -/* ****************************************************************** - * huff0 huffman codec, - * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); + assert(ip <= iend-8); /* required for h calculation */ -#ifndef HUF_H_298734234 -#define HUF_H_298734234 + /* reach end of unsorted candidates list */ + while ( (matchIndex > unsortLimit) + && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) + && (nbCandidates > 1) ) { + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", + matchIndex); + *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ + previousCandidate = matchIndex; + matchIndex = *nextCandidate; + nextCandidate = bt + 2*(matchIndex&btMask); + unsortedMark = bt + 2*(matchIndex&btMask) + 1; + nbCandidates --; + } -/* *** Dependencies *** */ -#include /* size_t */ + /* nullify last candidate if it's still unsorted + * simplification, detrimental to compression ratio, beneficial for speed */ + if ( (matchIndex > unsortLimit) + && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", + matchIndex); + *nextCandidate = *unsortedMark = 0; + } + /* batch sort stacked candidates */ + matchIndex = previousCandidate; + while (matchIndex) { /* will end on matchIndex == 0 */ + U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; + U32 const nextCandidateIdx = *nextCandidateIdxPtr; + ZSTD_insertDUBT1(ms, matchIndex, iend, + nbCandidates, unsortLimit, dictMode); + matchIndex = nextCandidateIdx; + nbCandidates++; + } -/* *** library symbols visibility *** */ -/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, - * HUF symbols remain "private" (internal symbols for library only). - * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ -#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) -# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) -#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ -# define HUF_PUBLIC_API __declspec(dllexport) -#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) -# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ -#else -# define HUF_PUBLIC_API -#endif + /* find longest match */ + { size_t commonLengthSmaller = 0, commonLengthLarger = 0; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current + 8 + 1; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; -namespace duckdb_zstd { + matchIndex = hashTable[h]; + hashTable[h] = current; /* Update Hash Table */ -/* ========================== */ -/* *** simple functions *** */ -/* ========================== */ + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; -/** HUF_compress() : - * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. - * 'dst' buffer must be already allocated. - * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). - * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. - * @return : size of compressed data (<= `dstCapacity`). - * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) - */ -HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); + if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } -/** HUF_decompress() : - * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', - * into already allocated buffer 'dst', of minimum size 'dstSize'. - * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. - * Note : in contrast with FSE, HUF_decompress can regenerate - * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, - * because it knows size to regenerate (originalSize). - * @return : size of regenerated data (== originalSize), - * or an error code, which can be tested using HUF_isError() - */ -HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize); + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + if (dictMode == ZSTD_dictMatchState) { + nbCompares = 0; /* in addition to avoiding checking any + * further in this loop, make sure we + * skip checking in the dictionary. */ + } + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } -/* *** Tool functions *** */ -#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ -HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + *smallerPtr = *largerPtr = 0; -/* Error Management */ -HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ -HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + if (dictMode == ZSTD_dictMatchState && nbCompares) { + bestLength = ZSTD_DUBT_findBetterDictMatch( + ms, ip, iend, + offsetPtr, bestLength, nbCompares, + mls, dictMode); + } + assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + if (bestLength >= MINMATCH) { + U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + current, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + } +} -/* *** Advanced function *** */ -/** HUF_compress2() : - * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. - * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . - * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ -HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog); +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls /* template */, + const ZSTD_dictMode_e dictMode) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); +} -/** HUF_compress4X_wksp() : - * Same as HUF_compress2(), but uses externally allocated `workSpace`. - * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ -#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) -#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) -HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize); -#endif /* HUF_H_298734234 */ +static size_t +ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} -// LICENSE_CHANGE_END +static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} +static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list -/* ****************************************************************** - * WARNING !! - * The following section contains advanced and experimental definitions - * which shall never be used in the context of a dynamic library, - * because they are not guaranteed to remain stable in the future. - * Only consider them in association with static linking. - * **************************************************************** */ -#ifndef HUF_H_HUF_STATIC_LINKING_ONLY -#define HUF_H_HUF_STATIC_LINKING_ONLY -/* *** Dependencies *** */ - /* U32 */ +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; -/* *** Constants *** */ -#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ -#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ -#define HUF_SYMBOLVALUE_MAX 255 + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } -#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) -# error "HUF_TABLELOG_MAX is too large !" -#endif - - -/* **************************************** -* Static allocation -******************************************/ -/* HUF buffer bounds */ -#define HUF_CTABLEBOUND 129 -#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ -#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* static allocation of HUF's Compression Table */ -#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ -#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) -#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ - U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ - void* name##hv = &(name##hb); \ - HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ - -/* static allocation of HUF's DTable */ -typedef U32 HUF_DTable; -#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) -#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ - HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } -#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ - HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } - - -/* **************************************** -* Advanced decompression functions -******************************************/ -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -#endif - -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ -#endif - - -/* **************************************** - * HUF detailed API - * ****************************************/ - -/*! HUF_compress() does the following: - * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "zstd/common/fse.h") - * 2. (optional) refine tableLog using HUF_optimalTableLog() - * 3. build Huffman table from count using HUF_buildCTable() - * 4. save Huffman table to memory buffer using HUF_writeCTable() - * 5. encode the data stream using HUF_compress4X_usingCTable() - * - * The following API allows targeting specific sub-functions for advanced tasks. - * For example, it's possible to compress several blocks using the same 'CTable', - * or to save and regenerate 'CTable' using external methods. - */ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); -typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ -size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ -size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); -size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); -size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); -int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); - -typedef enum { - HUF_repeat_none, /**< Cannot use the previous table */ - HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ - HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ - } HUF_repeat; -/** HUF_compress4X_repeat() : - * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. - * If it uses hufTable it does not modify hufTable or repeat. - * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. - * If preferRepeat then the old table will always be used if valid. */ -size_t HUF_compress4X_repeat(void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); - -/** HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. - */ -#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) -#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) -size_t HUF_buildCTable_wksp (HUF_CElt* tree, - const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, - void* workSpace, size_t wkspSize); - -/*! HUF_readStats() : - * Read compact Huffman tree, saved by HUF_writeCTable(). - * `huffWeight` is destination buffer. - * @return : size read from `src` , or an error Code . - * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ -size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, - U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize); - -/** HUF_readCTable() : - * Loading a CTable saved with HUF_writeCTable() */ -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); - -/** HUF_getNbBits() : - * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX - * Note 1 : is not inlined, as HUF_CElt definition is private - * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ -U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} -/* - * HUF_decompress() does the following: - * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics - * 2. build Huffman table from save, using HUF_readDTableX?() - * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() - */ +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} -/** HUF_selectDecoder() : - * Tells which decoder is likely to decode faster, - * based on a set of pre-computed metrics. - * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . - * Assumption : 0 < dstSize <= 128 KB */ -U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); -/** - * The minimum workspace size for the `workSpace` used in - * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). - * - * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when - * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. - * Buffer overflow errors may potentially occur if code modifications result in - * a required workspace size greater than that specified in the following - * macro. - */ -#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) -#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const chainTable = ms->chainTable; + const U32 chainSize = (1 << cParams->chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 current = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 minChain = current > chainSize ? current - chainSize : 0; + U32 nbAttempts = 1U << cParams->searchLog; + size_t ml=4-1; -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); -size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); -size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); -#endif + /* HC4 match finder */ + U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); -size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif + for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } -/* ====================== */ -/* single stream variants */ -/* ====================== */ + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } -size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ -size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); -/** HUF_compress1X_repeat() : - * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. - * If it uses hufTable it does not modify hufTable or repeat. - * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. - * If preferRepeat then the old table will always be used if valid. */ -size_t HUF_compress1X_repeat(void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + if (dictMode == ZSTD_dictMatchState) { + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32* const dmsChainTable = dms->chainTable; + const U32 dmsChainSize = (1 << dms->cParams.chainLog); + const U32 dmsChainMask = dmsChainSize - 1; + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ -#endif + matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; -size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); -size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ -#endif + for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; -size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } -/* BMI2 variants. - * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. - */ -size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); -#endif -size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); + if (matchIndex <= dmsMinChain) break; + matchIndex = dmsChainTable[matchIndex & dmsChainMask]; + } + } + return ml; } -#endif /* HUF_STATIC_LINKING_ONLY */ - - - -// LICENSE_CHANGE_END - - - -/* ************************************************************** -* Error Management -****************************************************************/ -// #define HUF_isError ERR_isError -#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} -namespace duckdb_zstd { -/* ************************************************************** -* Utils -****************************************************************/ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) { - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } } -/* ******************************************************* -* HUF : Huffman block compression -*********************************************************/ -/* HUF_compressWeights() : - * Same as FSE_compress(), but dedicated to huff0's weights compression. - * The use case needs much less stack memory. - * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. - */ -#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 -static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) { - BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; - BYTE* const oend = ostart + dstSize; + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} - unsigned maxSymbolValue = HUF_TABLELOG_MAX; - U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; - FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; - BYTE scratchBuffer[1<window.base; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; - /* init conditions */ - if (wtSize <= 1) return 0; /* Not compressible */ + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? + (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS + : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : + (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS + : ZSTD_HcFindBestMatch_selectMLS); + U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; - /* Scan input and build symbol stats */ - { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ - if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ - if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ - } + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? + dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); - tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); - CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); - /* Write table description header */ - { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); - op += hSize; + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const current = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog); + U32 const maxRep = current - windowLow; + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; } - - /* Compress */ - CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); - { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) ); - if (cSize == 0) return 0; /* not enough space for compressed data */ - op += cSize; + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); } - return (size_t)(op-ostart); -} - - -struct HUF_CElt_s { - U16 val; - BYTE nbBits; -}; /* typedef'd to HUF_CElt within "zstd/common/huf.h" */ - -/*! HUF_writeCTable() : - `CTable` : Huffman tree to save, using huf representation. - @return : size of saved CTable */ -size_t HUF_writeCTable (void* dst, size_t maxDstSize, - const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) -{ - BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ - BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; - BYTE* op = (BYTE*)dst; - U32 n; + /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; - /* check conditions */ - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + /* check repCode */ + if (dictMode == ZSTD_dictMatchState) { + const U32 repIndex = (U32)(ip - base) + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + if (depth==0) goto _storeSequence; + } + } + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } - /* convert to weight */ - bitsToWeight[0] = 0; - for (n=1; n matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } - /* attempt weights compression by FSE */ - { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); - if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ - op[0] = (BYTE)hSize; - return hSize+1; - } } + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } - /* write raw values as 4-bits (max : 15) */ - if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ - if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ - op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); - huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ - for (n=0; n=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (dictMode == ZSTD_dictMatchState) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (dictMode == ZSTD_dictMatchState) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) -{ - BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ - U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ - U32 tableLog = 0; - U32 nbSymbols = 0; + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ + /* catch up */ + if (offset) { + if (dictMode == ZSTD_noDict) { + while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) + && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + } + if (dictMode == ZSTD_dictMatchState) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; + const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } - /* get symbol weights */ - CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex = current2 - offset_2; + const BYTE* repMatch = dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex : + base + repIndex; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; + } + break; + } + } - /* check result */ - if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + if (dictMode == ZSTD_noDict) { + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } - /* Prepare base value per rank */ - { U32 n, nextRankStart = 0; - for (n=1; n<=tableLog; n++) { - U32 current = nextRankStart; - nextRankStart += (rankVal[n] << (n-1)); - rankVal[n] = current; - } } + /* Save reps for next block */ + rep[0] = offset_1 ? offset_1 : savedOffset; + rep[1] = offset_2 ? offset_2 : savedOffset; - /* fill nbBits */ - *hasZeroWeights = 0; - { U32 n; for (n=0; nn=tableLog+1 */ - U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; - { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } } - /* assign value within rank, symbol order */ - { U32 n; for (n=0; n maxNbBits */ - - /* there are several too large elements (at least >= 2) */ - { int totalCost = 0; - const U32 baseCost = 1 << (largestBits - maxNbBits); - int n = (int)lastNonNull; - - while (huffNode[n].nbBits > maxNbBits) { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); - huffNode[n].nbBits = (BYTE)maxNbBits; - n --; - } /* n stops at huffNode[n].nbBits <= maxNbBits */ - while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ - - /* renorm totalCost */ - totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ - - /* repay normalized cost */ - { U32 const noSymbol = 0xF0F0F0F0; - U32 rankLast[HUF_TABLELOG_MAX+2]; - - /* Get pos of last (smallest) symbol per rank */ - memset(rankLast, 0xF0, sizeof(rankLast)); - { U32 currentNbBits = maxNbBits; - int pos; - for (pos=n ; pos >= 0; pos--) { - if (huffNode[pos].nbBits >= currentNbBits) continue; - currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ - rankLast[maxNbBits-currentNbBits] = (U32)pos; - } } - - while (totalCost > 0) { - U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; - for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { - U32 const highPos = rankLast[nBitsToDecrease]; - U32 const lowPos = rankLast[nBitsToDecrease-1]; - if (highPos == noSymbol) continue; - if (lowPos == noSymbol) break; - { U32 const highTotal = huffNode[highPos].count; - U32 const lowTotal = 2 * huffNode[lowPos].count; - if (highTotal <= lowTotal) break; - } } - /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ - /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ - while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) - nBitsToDecrease ++; - totalCost -= 1 << (nBitsToDecrease-1); - if (rankLast[nBitsToDecrease-1] == noSymbol) - rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ - huffNode[rankLast[nBitsToDecrease]].nbBits ++; - if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ - rankLast[nBitsToDecrease] = noSymbol; - else { - rankLast[nBitsToDecrease]--; - if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) - rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ - } } /* while (totalCost > 0) */ - - while (totalCost < 0) { /* Sometimes, cost correction overshoot */ - if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ - while (huffNode[n].nbBits == maxNbBits) n--; - huffNode[n+1].nbBits--; - assert(n >= 0); - rankLast[1] = (U32)(n+1); - totalCost++; - continue; - } - huffNode[ rankLast[1] + 1 ].nbBits--; - rankLast[1]++; - totalCost ++; - } } } /* there are several too large elements (at least >= 2) */ - - return maxNbBits; + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); } -typedef struct { - U32 base; - U32 current; -} rankPos; - -typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; - -#define RANK_POSITION_TABLE_SIZE 32 - -typedef struct { - huffNodeTable huffNodeTbl; - rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; -} HUF_buildCTable_wksp_tables; - -static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) { - U32 n; - - memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); - for (n=0; n<=maxSymbolValue; n++) { - U32 r = BIT_highbit32(count[n] + 1); - rankPosition[r].base ++; - } - for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base; - for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base; - for (n=0; n<=maxSymbolValue; n++) { - U32 const c = count[n]; - U32 const r = BIT_highbit32(c+1) + 1; - U32 pos = rankPosition[r].current++; - while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { - huffNode[pos] = huffNode[pos-1]; - pos--; - } - huffNode[pos].count = c; - huffNode[pos].byte = (BYTE)n; - } + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); } - -/** HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). - */ -#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) - -size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) { - HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace; - nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; - nodeElt* const huffNode = huffNode0+1; - int nonNullRank; - int lowS, lowN; - int nodeNb = STARTNODE; - int n, nodeRoot; - - /* safety checks */ - if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) - return ERROR(workSpace_tooSmall); - if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) - return ERROR(maxSymbolValue_tooLarge); - memset(huffNode0, 0, sizeof(huffNodeTable)); - - /* sort, decreasing order */ - HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); - - /* init for parents */ - nonNullRank = (int)maxSymbolValue; - while(huffNode[nonNullRank].count == 0) nonNullRank--; - lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; - huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; - huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; - nodeNb++; lowS-=2; - for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); - huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ - - /* create parents */ - while (nodeNb <= nodeRoot) { - int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; - huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; - nodeNb++; - } - - /* distribute weights (unlimited tree height) */ - huffNode[nodeRoot].nbBits = 0; - for (n=nodeRoot-1; n>=STARTNODE; n--) - huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - for (n=0; n<=nonNullRank; n++) - huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - - /* enforce maxTableLog */ - maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); - - /* fill result into tree (val, nbBits) */ - { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; - U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; - int const alphabetSize = (int)(maxSymbolValue + 1); - if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ - for (n=0; n<=nonNullRank; n++) - nbPerRank[huffNode[n].nbBits]++; - /* determine stating value per rank */ - { U16 min = 0; - for (n=(int)maxNbBits; n>0; n--) { - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } } - for (n=0; n> 3; + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); } -int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { - int bad = 0; - int s; - for (s = 0; s <= (int)maxSymbolValue; ++s) { - bad |= (count[s] != 0) & (CTable[s].nbBits == 0); - } - return !bad; +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); } -size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } -FORCE_INLINE_TEMPLATE void -HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth) { - BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); -} - -#define HUF_FLUSHBITS(s) BIT_flushBits(s) + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ms->window.lowLimit; + const U32 windowLog = ms->cParams.windowLog; -#define HUF_FLUSHBITS_1(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; -#define HUF_FLUSHBITS_2(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) + U32 offset_1 = rep[0], offset_2 = rep[1]; -FORCE_INLINE_TEMPLATE size_t -HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable) -{ - const BYTE* ip = (const BYTE*) src; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; - size_t n; - BIT_CStream_t bitC; + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); /* init */ - if (dstSize < 8) return 0; /* not enough space to compress */ - { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); - if (HUF_isError(initErr)) return 0; } - - n = srcSize & ~3; /* join to mod 4 */ - switch (srcSize & 3) - { - case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); - HUF_FLUSHBITS_2(&bitC); - /* fall-through */ - case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); - HUF_FLUSHBITS_1(&bitC); - /* fall-through */ - case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); - HUF_FLUSHBITS(&bitC); - /* fall-through */ - case 0 : /* fall-through */ - default: break; - } - - for (; n>0; n-=4) { /* note : n&3==0 at this stage */ - HUF_encodeSymbol(&bitC, ip[n- 1], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 2], CTable); - HUF_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 3], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 4], CTable); - HUF_FLUSHBITS(&bitC); - } - - return BIT_closeCStream(&bitC); -} - -#if DYNAMIC_BMI2 - -static TARGET_ATTRIBUTE("bmi2") size_t -HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable) -{ - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); -} - -static size_t -HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable) -{ - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); -} - -static size_t -HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable, const int bmi2) -{ - if (bmi2) { - return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); - } - return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); -} - -#else - -static size_t -HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable, const int bmi2) -{ - (void)bmi2; - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); -} + ip += (ip == prefixStart); + /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); #endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 current = (U32)(ip-base); -size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) -{ - return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); -} - - -static size_t -HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable, int bmi2) -{ - size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ - const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; - - if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ - if (srcSize < 12) return 0; /* no saving possible : too small input */ - op += 6; /* jumpTable */ - - assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); - MEM_writeLE16(ostart, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); - MEM_writeLE16(ostart+2, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); - MEM_writeLE16(ostart+4, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - assert(op <= oend); - assert(ip <= iend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); - if (cSize==0) return 0; - op += cSize; - } - - return (size_t)(op-ostart); -} - -size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) -{ - return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); -} - -typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; + /* check repCode */ + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog); + const U32 repIndex = (U32)(current+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } -static size_t HUF_compressCTable_internal( - BYTE* const ostart, BYTE* op, BYTE* const oend, - const void* src, size_t srcSize, - HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) -{ - size_t const cSize = (nbStreams==HUF_singleStream) ? - HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : - HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); - if (HUF_isError(cSize)) { return cSize; } - if (cSize==0) { return 0; } /* uncompressible */ - op += cSize; - /* check compressibility */ - assert(op >= ostart); - if ((size_t)(op-ostart) >= srcSize-1) { return 0; } - return (size_t)(op-ostart); -} + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } -typedef struct { - unsigned count[HUF_SYMBOLVALUE_MAX + 1]; - HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; - HUF_buildCTable_wksp_tables buildCTable_wksp; -} HUF_compress_tables_t; + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } -/* HUF_compress_internal() : - * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ -static size_t -HUF_compress_internal (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - HUF_nbStreams_e nbStreams, - void* workSpace, size_t wkspSize, - HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, - const int bmi2) -{ - HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } - HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); + /* search match, depth 1 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } - /* checks & inits */ - if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); - if (!srcSize) return 0; /* Uncompressed */ - if (!dstSize) return 0; /* cannot fit anything within dst budget */ - if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ - if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); - if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; - if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + /* let's find an even better one */ + if ((depth==2) && (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } - /* Heuristic : If old table is valid, use it for small inputs */ - if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, oldHufTable, bmi2); - } + /* search match, depth 2 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } - /* Scan input and build symbol stats */ - { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) ); - if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ - if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ - } + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } - /* Check validity of previous table */ - if ( repeat - && *repeat == HUF_repeat_check - && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { - *repeat = HUF_repeat_none; - } - /* Heuristic : use existing table for small inputs */ - if (preferRepeat && repeat && *repeat != HUF_repeat_none) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, oldHufTable, bmi2); - } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } - /* Build Huffman Tree */ - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, - maxSymbolValue, huffLog, - &table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); - CHECK_F(maxBits); - huffLog = (U32)maxBits; - /* Zero unused symbols in CTable, so we can check it for validity */ - memset(table->CTable + (maxSymbolValue + 1), 0, - sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); - } + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repCurrent = (U32)(ip-base); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); + const U32 repIndex = repCurrent - offset_2; + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } - /* Write table description header */ - { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); - /* Check if using previous huffman table is beneficial */ - if (repeat && *repeat != HUF_repeat_none) { - size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); - size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); - if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, oldHufTable, bmi2); - } } + /* Save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; - /* Use the new huffman table */ - if (hSize + 12ul >= srcSize) { return 0; } - op += hSize; - if (repeat) { *repeat = HUF_repeat_none; } - if (oldHufTable) - memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ - } - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, table->CTable, bmi2); + /* Return the last literals size */ + return (size_t)(iend - anchor); } -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize) +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) { - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_singleStream, - workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); } -size_t HUF_compress1X_repeat (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_singleStream, - workSpace, wkspSize, hufTable, - repeat, preferRepeat, bmi2); -} +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) -size_t HUF_compress1X (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog) { - unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; - return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); } -/* HUF_compress4X_repeat(): - * compress input using 4 streams. - * provide workspace to generate compression tables */ -size_t HUF_compress4X_wksp (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_fourStreams, - workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/); -} +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) -/* HUF_compress4X_repeat(): - * compress input using 4 streams. - * re-use an existing huffman compression table */ -size_t HUF_compress4X_repeat (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) { - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_fourStreams, - workSpace, wkspSize, - hufTable, repeat, preferRepeat, bmi2); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); } -size_t HUF_compress2 (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog) -{ - unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; - return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); -} +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) -size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); } } @@ -18906,7 +18676,7 @@ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSi // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list /* @@ -18919,3485 +18689,3584 @@ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSi * You may select, at your option, one of the above-listed licenses. */ -/*-************************************* -* Dependencies -***************************************/ -#include /* INT_MAX */ -#include /* memset */ - /* HIST_countFast_wksp */ + /* ZSTD_fillHashTable() */ + /* ZSTD_fillDoubleHashTable() */ +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_RLOG 7 +#define LDM_HASH_CHAR_OFFSET 10 +namespace duckdb_zstd { +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) +{ + params->windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (cParams->strategy >= ZSTD_btopt) { + /* Get out of the way of the optimal parser */ + U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); + assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); + assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); + params->minMatchLength = minMatch; + } + if (params->hashLog == 0) { + params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + } + if (params->hashRateLog == 0) { + params->hashRateLog = params->windowLog < params->hashLog + ? 0 + : params->windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); + size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); + return params.enableLdm ? totalSize : 0; +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; +} -/* This header contains definitions - * that shall **only** be used by modules within lib/compress. - */ +/** ZSTD_ldm_getSmallHash() : + * numBits should be <= 32 + * If numBits==0, returns 0. + * @return : the most significant numBits of value. */ +static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) +{ + assert(numBits <= 32); + return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); +} -#ifndef ZSTD_COMPRESS_H -#define ZSTD_COMPRESS_H +/** ZSTD_ldm_getChecksum() : + * numBitsToDiscard should be <= 32 + * @return : the next most significant 32 bits after numBitsToDiscard */ +static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) +{ + assert(numBitsToDiscard <= 32); + return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; +} -/*-************************************* -* Dependencies -***************************************/ +/** ZSTD_ldm_getTag() ; + * Given the hash, returns the most significant numTagBits bits + * after (32 + hbits) bits. + * + * If there are not enough bits remaining, return the last + * numTagBits bits. */ +static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) +{ + assert(numTagBits < 32 && hbits <= 32); + if (32 - hbits < numTagBits) { + return hash & (((U32)1 << numTagBits) - 1); + } else { + return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); + } +} +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const bucketOffsets = ldmState->bucketOffsets; + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; + bucketOffsets[hash]++; + bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. +/** ZSTD_ldm_makeEntryAndInsertByTag() : * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + * Gets the small hash, checksum, and tag from the rollingHash. + * + * If the tag matches (1 << ldmParams.hashRateLog)-1, then + * creates an ldmEntry from the offset, and inserts it into the hash table. + * + * hBits is the length of the small hash, which is the most significant hBits + * of rollingHash. The checksum is the next 32 most significant bits, followed + * by ldmParams.hashRateLog bits that make up the tag. */ +static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, + U64 const rollingHash, + U32 const hBits, + U32 const offset, + ldmParams_t const ldmParams) +{ + U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); + U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; + if (tag == tagMask) { + U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + ldmEntry_t entry; + entry.offset = offset; + entry.checksum = checksum; + ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); + } +} -#ifndef ZSTD_CCOMMON_H_MODULE -#define ZSTD_CCOMMON_H_MODULE +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} -/* this module contains definitions which must be identical - * across compression, decompression and dictBuilder. - * It also contains a few functions useful to at least 2 of them - * and which benefit from being inlined */ +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, + void const* end) +{ + const BYTE* const iend = (const BYTE*)end; -/*-************************************* -* Dependencies -***************************************/ -#ifdef __aarch64__ -#include -#endif + switch(ms->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); + break; - /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + return 0; +} +/** ZSTD_ldm_fillLdmHashTable() : + * + * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). + * lastHash is the rolling hash that corresponds to lastHashed. + * + * Returns the rolling hash corresponding to position iend-1. */ +static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, + U64 lastHash, const BYTE* lastHashed, + const BYTE* iend, const BYTE* base, + U32 hBits, ldmParams_t const ldmParams) +{ + U64 rollingHash = lastHash; + const BYTE* cur = lastHashed + 1; + while (cur < iend) { + rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1], + cur[ldmParams.minMatchLength-1], + state->hashPower); + ZSTD_ldm_makeEntryAndInsertByTag(state, + rollingHash, hBits, + (U32)(cur - base), ldmParams); + ++cur; + } + return rollingHash; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params) +{ + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); + if ((size_t)(iend - ip) >= params->minMatchLength) { + U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); + ZSTD_ldm_fillLdmHashTable( + state, startingHash, ip, iend - params->minMatchLength, state->window.base, + params->hashLog - params->bucketSizeLog, + *params); + } +} -/* ************************************************************************************** - * ADVANCED AND EXPERIMENTAL FUNCTIONS - **************************************************************************************** - * The definitions in the following section are considered experimental. - * They are provided for advanced scenarios. - * They should never be used with a dynamic library, as prototypes may change in the future. - * Use them only in association with static linking. - * ***************************************************************************************/ +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) +{ + U32 const current = (U32)(anchor - ms->window.base); + if (current > ms->nextToUpdate + 1024) { + ms->nextToUpdate = + current - MIN(512, current - ms->nextToUpdate - 1024); + } +} -#ifndef ZSTD_H_ZSTD_STATIC_LINKING_ONLY -#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY +static size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); + U32 const minMatchLength = params->minMatchLength; + U64 const hashPower = ldmState->hashPower; + U32 const hBits = params->hashLog - params->bucketSizeLog; + U32 const ldmBucketSize = 1U << params->bucketSizeLog; + U32 const hashRateLog = params->hashRateLog; + U32 const ldmTagMask = (1U << params->hashRateLog) - 1; + /* Prefix and extDict parameters */ + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash */ + BYTE const* lastHashed = NULL; + U64 rollingHash = 0; -namespace duckdb_zstd { + while (ip <= ilimit) { + size_t mLength; + U32 const current = (U32)(ip - base); + size_t forwardMatchLength = 0, backwardMatchLength = 0; + ldmEntry_t* bestEntry = NULL; + if (ip != istart) { + rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], + lastHashed[minMatchLength], + hashPower); + } else { + rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); + } + lastHashed = ip; -/**************************************************************************************** - * experimental API (static linking only) - **************************************************************************************** - * The following symbols and constants - * are not planned to join "stable API" status in the near future. - * They can still change in future versions. - * Some of them are planned to remain in the static_only section indefinitely. - * Some of them might be removed in the future (especially when redundant with existing stable functions) - * ***************************************************************************************/ + /* Do not insert and do not look for a match */ + if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { + ip++; + continue; + } -#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ -#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) -#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ -#define ZSTD_SKIPPABLEHEADERSIZE 8 + /* Get the best entry and compute the match lengths */ + { + ldmEntry_t* const bucket = + ZSTD_ldm_getBucket(ldmState, + ZSTD_ldm_getSmallHash(rollingHash, hBits), + *params); + ldmEntry_t* cur; + size_t bestMatchLength = 0; + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); -/* compression parameter bounds */ -#define ZSTD_WINDOWLOG_MAX_32 30 -#define ZSTD_WINDOWLOG_MAX_64 31 -#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) -#define ZSTD_HASHLOG_MIN 6 -#define ZSTD_CHAINLOG_MAX_32 29 -#define ZSTD_CHAINLOG_MAX_64 30 -#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) -#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN -#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ -#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX -#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ -#define ZSTD_STRATEGY_MIN ZSTD_fast -#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + curForwardMatchLength = ZSTD_count_2segments( + ip, pMatch, iend, + matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, + lowMatchPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(ip, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, + lowPrefixPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + } -#define ZSTD_OVERLAPLOG_MIN 0 -#define ZSTD_OVERLAPLOG_MAX 9 + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + } -#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame - * requiring larger than (1<offset - backwardMatchLength) + */ + U32 const matchIndex = bestEntry->offset; + U32 const offset = current - matchIndex; + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; -/* Advanced parameter bounds */ -#define ZSTD_TARGETCBLOCKSIZE_MIN 64 -#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX -#define ZSTD_SRCSIZEHINT_MIN 0 -#define ZSTD_SRCSIZEHINT_MAX INT_MAX + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(ip - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } -/* internal */ -#define ZSTD_HASHLOG3_MAX 17 + /* Insert the current entry into the hash table */ + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, + (U32)(lastHashed - base), + *params); + assert(ip + backwardMatchLength == lastHashed); -/* --- Advanced types --- */ + /* Fill the hash table from lastHashed+1 to ip+mLength*/ + /* Heuristic: don't need to fill the entire table at end of block */ + if (ip + mLength <= ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + mLength, base, hBits, *params); + lastHashed = ip + mLength - 1; + } + ip += mLength; + anchor = ip; + } + return iend - anchor; +} -typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} -typedef struct { - unsigned int matchPos; /* Match pos in dst */ - /* If seqDef.offset > 3, then this is seqDef.offset - 3 - * If seqDef.offset < 3, then this is the corresponding repeat offset - * But if seqDef.offset < 3 and litLength == 0, this is the - * repeat offset before the corresponding repeat offset - * And if seqDef.offset == 3 and litLength == 0, this is the - * most recent repeat offset - 1 +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t chunk; + size_t leftoverSize = 0; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. */ - unsigned int offset; - unsigned int litLength; /* Literal length */ - unsigned int matchLength; /* Match length */ - /* 0 when seq not rep and seqDef.offset otherwise - * when litLength == 0 this will be <= 4, otherwise <= 3 like normal + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximum distance and handle overflow correction. */ - unsigned int rep; -} ZSTD_Sequence; + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; + size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; -typedef struct { - unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ - unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ - unsigned hashLog; /**< dispatch table : larger == faster, more memory */ - unsigned searchLog; /**< nb of searches : larger == more compression, slower */ - unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ - unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ - ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ -} ZSTD_compressionParameters; + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ + if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + /* invalidate dictionaries on overflow correction */ + ldmState->loadedDictEnd = 0; + } + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * the offset against maxDist directly. + * + * NOTE: Because of dictionaries + sequence splitting we MUST make sure + * that any offset used is valid at the END of the sequence, since it may + * be split into two sequences. This condition holds when using + * ZSTD_window_enforceMaxDist(), but if we move to checking offsets + * against maxDist directly, we'll have to carefully handle that case. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } + } + return 0; +} -typedef struct { - int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ - int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ - int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ -} ZSTD_frameParameters; +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= (U32)srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} -typedef struct { - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; -} ZSTD_parameters; +/** + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) +{ + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; + assert(sequence.offset > 0); + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ + if (remaining <= sequence.litLength) { + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + sequence.matchLength = remaining - sequence.litLength; + if (sequence.matchLength < minMatch) { + sequence.offset = 0; + } + } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); + return sequence; +} -typedef enum { - ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ - ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ - ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ -} ZSTD_dictContentType_e; +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + unsigned const minMatch = cParams->minMatch; + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; -typedef enum { - ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ - ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ -} ZSTD_dictLoadMethod_e; + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the lits */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + int i; + /* End signal */ + if (sequence.offset == 0) + break; -typedef enum { - ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ - ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. - * Useful to save 4 bytes per generated frame. - * Decoder cannot recognise automatically this format, requiring this instruction. */ -} ZSTD_format_e; + assert(ip + sequence.litLength + sequence.matchLength <= iend); -typedef enum { - /* Note: this enum and the behavior it controls are effectively internal - * implementation details of the compressor. They are expected to continue - * to evolve and should be considered only in the context of extremely - * advanced performance tuning. - * - * Zstd currently supports the use of a CDict in three ways: - * - * - The contents of the CDict can be copied into the working context. This - * means that the compression can search both the dictionary and input - * while operating on a single set of internal tables. This makes - * the compression faster per-byte of input. However, the initial copy of - * the CDict's tables incurs a fixed cost at the beginning of the - * compression. For small compressions (< 8 KB), that copy can dominate - * the cost of the compression. - * - * - The CDict's tables can be used in-place. In this model, compression is - * slower per input byte, because the compressor has to search two sets of - * tables. However, this model incurs no start-up cost (as long as the - * working context's tables can be reused). For small inputs, this can be - * faster than copying the CDict's tables. - * - * - The CDict's tables are not used at all, and instead we use the working - * context alone to reload the dictionary and use params based on the source - * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). - * This method is effective when the dictionary sizes are very small relative - * to the input size, and the input size is fairly large to begin with. - * - * Zstd has a simple internal heuristic that selects which strategy to use - * at the beginning of a compression. However, if experimentation shows that - * Zstd is making poor choices, it is possible to override that choice with - * this enum. - */ - ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ - ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ - ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ -} ZSTD_dictAttachPref_e; + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Run the block compressor */ + DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); + { + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, ip, sequence.litLength); + ip += sequence.litLength; + /* Update the repcodes */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + rep[i] = rep[i-1]; + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, + sequence.offset + ZSTD_REP_MOVE, + sequence.matchLength - MINMATCH); + ip += sequence.matchLength; + } + } + /* Fill the tables for the block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Compress the last literals */ + return blockCompressor(ms, seqStore, rep, ip, iend - ip); +} -typedef enum { - ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. - * Negative compression levels will be uncompressed, and positive compression - * levels will be compressed. */ - ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be - * emitted if Huffman compression is not profitable. */ - ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ -} ZSTD_literalCompressionMode_e; +} -/*************************************** -* Frame size functions -***************************************/ +// LICENSE_CHANGE_END -/*! ZSTD_findDecompressedSize() : - * `src` should point to the start of a series of ZSTD encoded and/or skippable frames - * `srcSize` must be the _exact_ size of this series - * (i.e. there should be a frame boundary at `src + srcSize`) - * @return : - decompressed size of all data in all successive frames - * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN - * - if an error occurred: ZSTD_CONTENTSIZE_ERROR - * - * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. - * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - * In which case, it's necessary to use streaming mode to decompress data. - * note 2 : decompressed size is always present when compression is done with ZSTD_compress() - * note 3 : decompressed size can be very large (64-bits value), - * potentially larger than what local system can handle as a single memory segment. - * In which case, it's necessary to use streaming mode to decompress data. - * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. - * Always ensure result fits within application's authorized limits. - * Each application can set its own limits. - * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to - * read each contained frame header. This is fast as most of the data is skipped, - * however it does mean that all frame data must be present and valid. */ -ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); -/*! ZSTD_decompressBound() : - * `src` should point to the start of a series of ZSTD encoded and/or skippable frames - * `srcSize` must be the _exact_ size of this series - * (i.e. there should be a frame boundary at `src + srcSize`) - * @return : - upper-bound for the decompressed size of all data in all successive frames - * - if an error occured: ZSTD_CONTENTSIZE_ERROR +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list + +/* + * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. * - * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. - * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. - * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. - * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: - * upper-bound = # blocks * min(128 KB, Window_Size) + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ -ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); -/*! ZSTD_frameHeaderSize() : - * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. - * @return : size of the Frame Header, - * or an error code (if srcSize is too small) */ -ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); -/*! ZSTD_getSequences() : - * Extract sequences from the sequence store - * zc can be used to insert custom compression params. - * This function invokes ZSTD_compress2 - * @return : number of sequences extracted - */ -ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize); -/*************************************** -* Memory management -***************************************/ -/*! ZSTD_estimate*() : - * These functions make it possible to estimate memory usage - * of a future {D,C}Ctx, before its creation. - * - * ZSTD_estimateCCtxSize() will provide a memory budget large enough - * for any compression level up to selected one. - * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate - * does not include space for a window buffer. - * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. - * The estimate will assume the input may be arbitrarily large, - * which is the worst case. - * - * When srcSize can be bound by a known and rather "small" value, - * this fact can be used to provide a tighter estimation - * because the CCtx compression context will need less memory. - * This tighter estimation can be provided by more advanced functions - * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), - * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). - * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. - * - * Note 2 : only single-threaded compression is supported. - * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - */ -ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); -/*! ZSTD_estimateCStreamSize() : - * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. - * It will also consider src size to be arbitrarily "large", which is worst case. - * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. - * Note : CStream size estimation is only correct for single-threaded compression. - * ZSTD_DStream memory budget depends on window Size. - * This information can be passed manually, using ZSTD_estimateDStreamSize, - * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); - * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), - * an internal ?Dict will be created, which additional size is not estimated here. - * In this case, get total size by adding ZSTD_estimate?DictSize */ -ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ +#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ +#define ZSTD_MAX_PRICE (1<<30) -/*! ZSTD_estimate?DictSize() : - * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). - * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). - * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. - */ -ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); -ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); +#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ -/*! ZSTD_initStatic*() : - * Initialize an object using a pre-allocated fixed-size buffer. - * workspace: The memory area to emplace the object into. - * Provided pointer *must be 8-bytes aligned*. - * Buffer must outlive object. - * workspaceSize: Use ZSTD_estimate*Size() to determine - * how large workspace must be to support target scenario. - * @return : pointer to object (same address as workspace, just different type), - * or NULL if error (size too small, incorrect alignment, etc.) - * Note : zstd will never resize nor malloc() when using a static buffer. - * If the object requires more memory than available, - * zstd will just error out (typically ZSTD_error_memory_allocation). - * Note 2 : there is no corresponding "free" function. - * Since workspace is allocated externally, it must be freed externally too. - * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level - * into its associated cParams. - * Limitation 1 : currently not compatible with internal dictionary creation, triggered by - * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). - * Limitation 2 : static cctx currently not compatible with multi-threading. - * Limitation 3 : static dctx is incompatible with legacy support. - */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ -ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ +/*-************************************* +* Price functions for optimal parser +***************************************/ -ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams); +#if 0 /* approximation at bit level */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) +#else /* opt==approx, ultra==accurate */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +#endif -ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType); +namespace duckdb_zstd { +MEM_STATIC U32 ZSTD_bitWeight(U32 stat) +{ + return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); +} -/*! Custom memory allocation : - * These prototypes make it possible to pass your own allocation/free functions. - * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. - * All allocation/free operations will be completed using these custom variants instead of regular ones. - */ -typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); -typedef void (*ZSTD_freeFunction) (void* opaque, void* address); -typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) +{ + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * BITCOST_MULTIPLIER; + U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + BITCOST_ACCURACY < 31); + return weight; +} -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); +#if (DEBUGLEVEL>=2) +/* debugging function, + * @return price in bytes as fractional value + * for debug messages only */ +MEM_STATIC double ZSTD_fCost(U32 price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} +#endif -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams, - ZSTD_customMem customMem); +static int ZSTD_compressedLiterals(optState_t const* const optPtr) +{ + return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; +} -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_customMem customMem); +static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) +{ + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); + optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); + optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); +} +/* ZSTD_downscaleStat() : + * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) + * return the resulting sum of elements */ +static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) +{ + U32 s, sum=0; + DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); + assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); + for (s=0; s> (ZSTD_FREQ_DIV+malus)); + sum += table[s]; + } + return sum; +} -/*************************************** -* Advanced compression functions -***************************************/ +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * or init from zero, using src for literals stats, or flat 1 for match symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int const optLevel) +{ + int const compressedLiterals = ZSTD_compressedLiterals(optPtr); + DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); + optPtr->priceType = zop_dynamic; -/*! ZSTD_createCDict_byReference() : - * Create a digested dictionary for compression - * Dictionary content is just referenced, not duplicated. - * As a consequence, `dictBuffer` **must** outlive CDict, - * and its content must remain unmodified throughout the lifetime of CDict. - * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + if (optPtr->litLengthSum == 0) { /* first block : init */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ + DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); + optPtr->priceType = zop_predef; + } -/*! ZSTD_getCParams() : - * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. - * `estimatedSrcSize` value is optional, select 0 if not known */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + assert(optPtr->symbolCosts != NULL); + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { + /* huffman table presumed generated by dictionary */ + optPtr->priceType = zop_dynamic; -/*! ZSTD_getParams() : - * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. - * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ -ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + if (compressedLiterals) { + unsigned lit; + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; + for (lit=0; lit<=MaxLit; lit++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); + assert(bitCost <= scaleLog); + optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litSum += optPtr->litFreq[lit]; + } } -/*! ZSTD_checkCParams() : - * Ensure param values remain within authorized range. - * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + { unsigned ll; + FSE_CState_t llstate; + FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); + optPtr->litLengthSum = 0; + for (ll=0; ll<=MaxLL; ll++) { + U32 const scaleLog = 10; /* scale to 1K */ + U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); + assert(bitCost < scaleLog); + optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litLengthSum += optPtr->litLengthFreq[ll]; + } } -/*! ZSTD_adjustCParams() : - * optimize params for a given `srcSize` and `dictSize`. - * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. - * `dictSize` must be `0` when there is no dictionary. - * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. - * This function never fails (wide contract) */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + { unsigned ml; + FSE_CState_t mlstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); + optPtr->matchLengthSum = 0; + for (ml=0; ml<=MaxML; ml++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); + assert(bitCost < scaleLog); + optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; + } } -/*! ZSTD_compress_advanced() : - * Note : this function is now DEPRECATED. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); + { unsigned of; + FSE_CState_t ofstate; + FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); + optPtr->offCodeSum = 0; + for (of=0; of<=MaxOff; of++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); + assert(bitCost < scaleLog); + optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->offCodeSum += optPtr->offCodeFreq[of]; + } } -/*! ZSTD_compress_usingCDict_advanced() : - * Note : this function is now REDUNDANT. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning in some future version */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams); + } else { /* not a dictionary */ + assert(optPtr->litFreq != NULL); + if (compressedLiterals) { + unsigned lit = MaxLit; + HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + } -/*! ZSTD_CCtx_loadDictionary_byReference() : - * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. - * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + { unsigned ll; + for (ll=0; ll<=MaxLL; ll++) + optPtr->litLengthFreq[ll] = 1; + } + optPtr->litLengthSum = MaxLL+1; -/*! ZSTD_CCtx_loadDictionary_advanced() : - * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over - * how to load the dictionary (by copy ? by reference ?) - * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + { unsigned ml; + for (ml=0; ml<=MaxML; ml++) + optPtr->matchLengthFreq[ml] = 1; + } + optPtr->matchLengthSum = MaxML+1; -/*! ZSTD_CCtx_refPrefix_advanced() : - * Same as ZSTD_CCtx_refPrefix(), but gives finer control over - * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + { unsigned of; + for (of=0; of<=MaxOff; of++) + optPtr->offCodeFreq[of] = 1; + } + optPtr->offCodeSum = MaxOff+1; -/* === experimental parameters === */ -/* these parameters can be used with ZSTD_setParameter() - * they are not guaranteed to remain supported in the future */ + } - /* Enables rsyncable mode, - * which makes compressed files more rsync friendly - * by adding periodic synchronization points to the compressed data. - * The target average block size is ZSTD_c_jobSize / 2. - * It's possible to modify the job size to increase or decrease - * the granularity of the synchronization point. - * Once the jobSize is smaller than the window size, - * it will result in compression ratio degradation. - * NOTE 1: rsyncable mode only works when multithreading is enabled. - * NOTE 2: rsyncable performs poorly in combination with long range mode, - * since it will decrease the effectiveness of synchronization points, - * though mileage may vary. - * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. - * If the selected compression level is already running significantly slower, - * the overall speed won't be significantly impacted. - */ - #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + } else { /* new block : re-use previous statistics, scaled down */ -/* Select a compression format. - * The value must be of type ZSTD_format_e. - * See ZSTD_format_e enum definition for details */ -#define ZSTD_c_format ZSTD_c_experimentalParam2 + if (compressedLiterals) + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); + } -/* Force back-reference distances to remain < windowSize, - * even when referencing into Dictionary content (default:0) */ -#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + ZSTD_setBasePrices(optPtr, optLevel); +} -/* Controls whether the contents of a CDict - * are used in place, or copied into the working context. - * Accepts values from the ZSTD_dictAttachPref_e enum. - * See the comments on that enum for an explanation of the feature. */ -#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 +/* ZSTD_rawLiteralsCost() : + * price of literals (only) in specified segment (which length can be 0). + * does not include price of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + if (litLength == 0) return 0; -/* Controls how the literals are compressed (default is auto). - * The value must be of type ZSTD_literalCompressionMode_e. - * See ZSTD_literalCompressionMode_t enum definition for details. - */ -#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + if (!ZSTD_compressedLiterals(optPtr)) + return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ -/* Tries to fit compressed block size to be around targetCBlockSize. - * No target when targetCBlockSize == 0. - * There is no guarantee on compressed block size (default:0) */ -#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + if (optPtr->priceType == zop_predef) + return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ -/* User's best guess of source size. - * Hint is not valid when srcSizeHint == 0. - * There is no guarantee that hint is close to actual source size, - * but compression ratio may regress significantly if guess considerably underestimates */ -#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + /* dynamic statistics */ + { U32 price = litLength * optPtr->litSumBasePrice; + U32 u; + for (u=0; u < litLength; u++) { + assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ + price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); + } + return price; + } +} -/*! ZSTD_CCtx_getParameter() : - * Get the requested compression parameter value, selected by enum ZSTD_cParameter, - * and store it into int* value. - * @return : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + return (ZSTDInternalConstants::LL_bits[llCode] * BITCOST_MULTIPLIER) + + optPtr->litLengthSumBasePrice + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); + } +} -/*! ZSTD_CCtx_params : - * Quick howto : - * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure - * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into - * an existing ZSTD_CCtx_params structure. - * This is similar to - * ZSTD_CCtx_setParameter(). - * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to - * an existing CCtx. - * These parameters will be applied to - * all subsequent frames. - * - ZSTD_compressStream2() : Do compression using the CCtx. - * - ZSTD_freeCCtxParams() : Free the memory. - * - * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() - * for static allocation of CCtx for single-threaded compression. - */ -ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); -ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ +FORCE_INLINE_TEMPLATE U32 +ZSTD_getMatchPrice(U32 const offset, + U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offset+1); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); -/*! ZSTD_CCtxParams_reset() : - * Reset params to default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ + return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); -/*! ZSTD_CCtxParams_init() : - * Initializes the compression parameters of cctxParams according to - * compression level. All other parameters are reset to their default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + /* dynamic statistics */ + price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); + if ((optLevel<2) /*static*/ && offCode >= 20) + price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ -/*! ZSTD_CCtxParams_init_advanced() : - * Initializes the compression and frame parameters of cctxParams according to - * params. All other parameters are reset to their default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + /* match Length */ + { U32 const mlCode = ZSTD_MLcode(mlBase); + price += (ZSTDInternalConstants::ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); + } -/*! ZSTD_CCtxParams_setParameter() : - * Similar to ZSTD_CCtx_setParameter. - * Set one compression parameter, selected by enum ZSTD_cParameter. - * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ -/*! ZSTD_CCtxParams_getParameter() : - * Similar to ZSTD_CCtx_getParameter. - * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; +} -/*! ZSTD_CCtx_setParametersUsingCCtxParams() : - * Apply a set of ZSTD_CCtx_params to the compression context. - * This can be done even after compression is started, - * if nbWorkers==0, this will have no impact until a new compression is started. - * if nbWorkers>=1, new parameters will be picked up at next job, - * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). - */ -ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( - ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); +/* ZSTD_updateStats() : + * assumption : literals + litLengtn <= iend */ +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offsetCode, U32 matchLength) +{ + /* literals */ + if (ZSTD_compressedLiterals(optPtr)) { + U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } -/*! ZSTD_compressStream2_simpleArgs() : - * Same as ZSTD_compressStream2(), - * but using only integral types as arguments. - * This variant might be helpful for binders from dynamic languages - * which have troubles handling structures containing memory pointers. - */ -ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos, - ZSTD_EndDirective endOp); + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); + optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + } -/*************************************** -* Advanced decompression functions -***************************************/ + /* match Length */ + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } +} -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); -/*! ZSTD_createDDict_byReference() : - * Create a digested dictionary, ready to start decompression operation without startup delay. - * Dictionary content is referenced, and therefore stays in dictBuffer. - * It is important that dictBuffer outlives DDict, - * it must remain read accessible throughout the lifetime of DDict */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} -/*! ZSTD_DCtx_loadDictionary_byReference() : - * Same as ZSTD_DCtx_loadDictionary(), - * but references `dict` content instead of copying it into `dctx`. - * This saves memory if `dict` remains around., - * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -/*! ZSTD_DCtx_loadDictionary_advanced() : - * Same as ZSTD_DCtx_loadDictionary(), - * but gives direct control over - * how to load the dictionary (by copy ? by reference ?) - * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip) +{ + U32* const hashTable3 = ms->hashTable3; + U32 const hashLog3 = ms->hashLog3; + const BYTE* const base = ms->window.base; + U32 idx = *nextToUpdate3; + U32 const target = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); + assert(hashLog3 > 0); -/*! ZSTD_DCtx_refPrefix_advanced() : - * Same as ZSTD_DCtx_refPrefix(), but gives finer control over - * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } -/*! ZSTD_DCtx_setMaxWindowSize() : - * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. - * This protects a decoder context from reserving too much memory for itself (potential attack scenario). - * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) - * @return : 0, or an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + *nextToUpdate3 = target; + return hashTable3[hash3]; +} -/* ZSTD_d_format - * experimental parameter, - * allowing selection between ZSTD_format_e input compression formats - */ -#define ZSTD_d_format ZSTD_d_experimentalParam1 -/* ZSTD_d_stableOutBuffer - * Experimental parameter. - * Default is 0 == disabled. Set to 1 to enable. - * - * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same - * between calls, except for the modifications that zstd makes to pos (the - * caller must not modify pos). This is checked by the decompressor, and - * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer - * MUST be large enough to fit the entire decompressed frame. This will be - * checked when the frame content size is known. The data in the ZSTD_outBuffer - * in the range [dst, dst + pos) MUST not be modified during decompression - * or you will get data corruption. - * - * When this flags is enabled zstd won't allocate an output buffer, because - * it can write directly to the ZSTD_outBuffer, but it will still allocate - * an input buffer large enough to fit any compressed block. This will also - * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. - * If you need to avoid the input buffer allocation use the buffer-less - * streaming API. - * - * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using - * this flag is ALWAYS memory safe, and will never access out-of-bounds - * memory. However, decompression WILL fail if you violate the preconditions. - * - * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST - * not be modified during decompression or you will get data corruption. This - * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate - * matches. Normally zstd maintains its own buffer for this purpose, but passing - * this flag tells zstd to use the user provided buffer. - */ -#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 -/*! ZSTD_DCtx_setFormat() : - * Instruct the decoder context about what kind of data to decode next. - * This instruction is mandatory to decode data without a fully-formed header, - * such ZSTD_f_zstd1_magicless for example. - * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. + * ip : assumed <= iend-8 . + * @return : nb of positions added */ +static U32 ZSTD_insertBt1( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + U32 const mls, const int extDict) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + U32 matchEndIdx = current+8+1; + size_t bestLength = 8; + U32 nbCompares = 1U << cParams->searchLog; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ -/*! ZSTD_decompressStream_simpleArgs() : - * Same as ZSTD_decompressStream(), - * but using only integral types as arguments. - * This can be helpful for binders from dynamic languages - * which have troubles handling structures containing memory pointers. - */ -ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( - ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos); + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); + assert(ip <= iend-8); /* required for h calculation */ + hashTable[h] = current; /* Update Hash Table */ -/******************************************************************** -* Advanced streaming functions -* Warning : most of these functions are now redundant with the Advanced API. -* Once Advanced API reaches "stable" status, -* redundant functions will be deprecated, and then at some point removed. -********************************************************************/ + assert(windowLow > 0); + while (nbCompares-- && (matchIndex >= windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < current); -/*===== Advanced Streaming compression functions =====*/ -/**! ZSTD_initCStream_srcSize() : - * This function is deprecated, and equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * - * pledgedSrcSize must be correct. If it is not known at init time, use - * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, - * "0" also disables frame content size field. It may be enabled in the future. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, - int compressionLevel, - unsigned long long pledgedSrcSize); +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif -/**! ZSTD_initCStream_usingDict() : - * This function is deprecated, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - * - * Creates of an internal CDict (incompatible with static CCtx), except if - * dict == NULL or dictSize < 8, in which case no dict is used. - * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if - * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - int compressionLevel); + if (!extDict || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } -/**! ZSTD_initCStream_advanced() : - * This function is deprecated, and is approximately equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd parameter and leave the rest as-is. - * for ((param, value) : params) { - * ZSTD_CCtx_setParameter(zcs, param, value); - * } - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - * - * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. - * pledgedSrcSize must be correct. - * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, - unsigned long long pledgedSrcSize); + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } -/**! ZSTD_initCStream_usingCDict() : - * This function is deprecated, and equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, cdict); - * - * note : cdict will just be referenced, and must outlive compression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } -/**! ZSTD_initCStream_usingCDict_advanced() : - * This function is DEPRECATED, and is approximately equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. - * for ((fParam, value) : fParams) { - * ZSTD_CCtx_setParameter(zcs, fParam, value); - * } - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_refCDict(zcs, cdict); - * - * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. - * pledgedSrcSize must be correct. If srcSize is not known at init time, use - * value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, - unsigned long long pledgedSrcSize); + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } -/*! ZSTD_resetCStream() : - * This function is deprecated, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * - * start a new frame, using same parameters from previous frame. - * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. - * Note that zcs must be init at least once before using ZSTD_resetCStream(). - * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. - * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. - * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, - * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. - * @return : 0, or an error code (which can be tested using ZSTD_isError()) - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + *smallerPtr = *largerPtr = 0; + { U32 positions = 0; + if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > current + 8); + return MAX(positions, matchEndIdx - (current + 8)); + } +} +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + idx, target, dictMode); -typedef struct { - unsigned long long ingested; /* nb input bytes read and buffered */ - unsigned long long consumed; /* nb input bytes actually compressed */ - unsigned long long produced; /* nb of compressed bytes generated and buffered */ - unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ - unsigned currentJobID; /* MT only : latest started job nb */ - unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ -} ZSTD_frameProgression; + while(idx < target) { + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + assert(idx < (U32)(idx + forward)); + idx += forward; + } + assert((size_t)(ip - base) <= (size_t)(U32)(-1)); + assert((size_t)(iend - base) <= (size_t)(U32)(-1)); + ms->nextToUpdate = target; +} -/* ZSTD_getFrameProgression() : - * tells how much data has been ingested (read from input) - * consumed (input actually compressed) and produced (output) for current frame. - * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. - * Aggregates progression inside active worker threads. - */ -ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { + ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); +} -/*! ZSTD_toFlushNow() : - * Tell how many bytes are ready to be flushed immediately. - * Useful for multithreading scenarios (nbWorkers >= 1). - * Probe the oldest active job, defined as oldest job not yet entirely flushed, - * and check its output buffer. - * @return : amount of data stored in oldest job and ready to be flushed immediately. - * if @return == 0, it means either : - * + there is no active job (could be checked with ZSTD_frameProgression()), or - * + oldest job is still actively compressing data, - * but everything it has produced has also been flushed so far, - * therefore flush speed is limited by production speed of oldest job - * irrespective of the speed of concurrent (and newer) jobs. - */ -ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ + const U32 lengthToBeat, + U32 const mls /* template */) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + const BYTE* const base = ms->window.base; + U32 const current = (U32)(ip-base); + U32 const hashLog = cParams->hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; + U32* const hashTable = ms->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + U32 const dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32 const btLow = (btMask >= current) ? 0 : current - btMask; + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); + U32 const matchLow = windowLow ? windowLow : 1; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + U32 nbCompares = 1U << cParams->searchLog; + const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; + const ZSTD_compressionParameters* const dmsCParams = + dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; + const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; + const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; + U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; + U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; + U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; + U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; + U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; + U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; + U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; -/*===== Advanced Streaming decompression functions =====*/ -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); - * - * note: no dictionary will be used if dict == NULL or dictSize < 8 - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + size_t bestLength = lengthToBeat-1; + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_refDDict(zds, ddict); - * - * note : ddict is referenced, it must outlive decompression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + /* check repCode */ + assert(ll0 <= 1); /* necessarily 1 or 0 */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = current - repOffset; + U32 repLen = 0; + assert(current >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ + /* We must validate the repcode offset because when we're using a dictionary the + * valid offset range shrinks when the dictionary goes out of bounds. + */ + if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= current */ + const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? + dmsBase + repIndex - dmsIndexDelta : + dictBase + repIndex; + assert(current >= windowLow); + if ( dictMode == ZSTD_extDict + && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } + if (dictMode == ZSTD_dictMatchState + && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */ + & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", + repCode, ll0, repOffset, repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * - * re-use decompression parameters from previous init; saves dictionary loading - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); + if ((matchIndex3 >= matchLow) + & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + } + /* save best solution */ + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(current > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + ms->nextToUpdate = current+1; /* skip insertion */ + return 1; + } } } + /* no dictMatchState lookup: dicts don't have a populated HC3 table */ + } -/********************************************************************* -* Buffer-less and synchronous inner streaming functions -* -* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. -* But it's also a complex one, with several restrictions, documented below. -* Prefer normal streaming API for an easier experience. -********************************************************************* */ + hashTable[h] = current; /* Update Hash Table */ -/** - Buffer-less streaming compression (synchronous mode) + while (nbCompares-- && (matchIndex >= matchLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + const BYTE* match; + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(current > matchIndex); - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. - ZSTD_CCtx object can be re-used multiple times within successive compression operations. + if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ + match = base + matchIndex; + if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); + } else { + match = dictBase + matchIndex; + assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* prepare for match[matchLength] read */ + } - Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, - or ZSTD_compressBegin_advanced(), for finer parameter control. - It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + if (matchLength > bestLength) { + DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", + (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } + } - Then, consume your input using ZSTD_compressContinue(). - There are some important considerations to keep in mind when using this advanced function : - - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. - - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. - - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. - Worst case evaluation is provided by ZSTD_compressBound(). - ZSTD_compressContinue() doesn't guarantee recover after a failed compression. - - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). - It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) - - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. - In which case, it will "discard" the relevant memory section from its history. + if (match[matchLength] < ip[matchLength]) { + /* match smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ + } else { + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } - Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. - It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + *smallerPtr = *largerPtr = 0; - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. -*/ + if (dictMode == ZSTD_dictMatchState && nbCompares) { + size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); + U32 dictMatchIndex = dms->hashTable[dmsH]; + const U32* const dmsBt = dms->chainTable; + commonLengthSmaller = commonLengthLarger = 0; + while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) { + const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dmsBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); + if (dictMatchIndex+matchLength >= dmsHighLimit) + match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ -/*===== Buffer-less streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + if (matchLength > bestLength) { + matchIndex = dictMatchIndex + dmsIndexDelta; + DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", + (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } -ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ + if (match[matchLength] < ip[matchLength]) { + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + } + assert(matchEndIdx > current+8); + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + return mnum; +} -/*- - Buffer-less streaming decompression (synchronous mode) - A ZSTD_DCtx object is required to track streaming operations. - Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. - A ZSTD_DCtx object can be re-used multiple times. +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const matchLengthSearch = cParams->minMatch; + DEBUGLOG(8, "ZSTD_BtGetAllMatches"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); + switch(matchLengthSearch) + { + case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); + default : + case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); + case 7 : + case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); + } +} - First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). - Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. - Data fragment must be large enough to ensure successful decoding. - `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. - @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. - >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. - errorCode, which can be tested using ZSTD_isError(). - It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, - such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). - Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. - As a consequence, check that values remain within valid application range. - For example, do not allocate memory blindly, check that `windowSize` is within expectation. - Each application can set its own limits, depending on local restrictions. - For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. +/*-******************************* +* Optimal parser +*********************************/ - ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. - ZSTD_decompressContinue() is very sensitive to contiguity, - if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, - or that previous contiguous segment is large enough to properly handle maximum back-reference distance. - There are multiple ways to guarantee this condition. - The most memory efficient way is to use a round buffer of sufficient size. - Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), - which can @return an error code if required value is too large for current system (in 32-bits mode). - In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, - up to the moment there is not enough room left in the buffer to guarantee decoding another full block, - which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. - At which point, decoding can resume from the beginning of the buffer. - Note that already decoded data stored in the buffer should be flushed before being overwritten. +static U32 ZSTD_totalLen(ZSTD_optimal_t sol) +{ + return sol.litlen + sol.mlen; +} - There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. +#if 0 /* debug */ - Finally, if you control the compression process, you can also ignore all buffer size rules, - as long as the encoder and decoder progress in "lock-step", - aka use exactly the same buffer sizes, break contiguity at the same place, etc. +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} - Once buffers are setup, start decompression, with ZSTD_decompressBegin(). - If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). +#endif - Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. - ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const int optLevel, + const ZSTD_dictMode_e dictMode) +{ + optState_t* const optStatePtr = &ms->opt; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + const ZSTD_compressionParameters* const cParams = &ms->cParams; - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). - It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. - It can also be an error code, which can be tested with ZSTD_isError(). + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; + U32 nextToUpdate3 = ms->nextToUpdate; - A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - Context can then be reset to start a new decompression. + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + ZSTD_optimal_t lastSequence; - Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). - This information is not required to properly decode a frame. + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", + (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); + assert(optLevel <= 2); + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); + ip += (ip==prefixStart); - == Special case : skippable frames == + /* Match Loop */ + while (ip < ilimit) { + U32 cur, last_pos = 0; - Skippable frames allow integration of user-defined data into a flow of concatenated frames. - Skippable frames will be ignored (skipped) by decompressor. - The format of skippable frames is as follows : - a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F - b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - c) Frame Content - any content (User Data) of length equal to Frame Size - For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. - For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. -*/ + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + if (!nbMatches) { ip++; continue; } -/*===== Buffer-less streaming decompression functions =====*/ -typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; -typedef struct { - unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ - unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ - unsigned blockSizeMax; - ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ - unsigned headerSize; - unsigned dictID; - unsigned checksumFlag; -} ZSTD_frameHeader; + /* initialize opt[0] */ + { U32 i ; for (i=0; i0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ -/*! ZSTD_getFrameHeader_advanced() : - * same as ZSTD_getFrameHeader(), - * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); -ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + /* large match -> immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + U32 const maxOffset = matches[nbMatches-1].off; + DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", + nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); -ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + if (maxML > sufficient_len) { + lastSequence.litlen = litlen; + lastSequence.mlen = maxML; + lastSequence.off = maxOffset; + DEBUGLOG(6, "large match (%u>%u), immediate encoding", + maxML, sufficient_len); + cur = 0; + last_pos = ZSTD_totalLen(lastSequence); + goto _shortestPath; + } } -ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + /* set prices for first matches starting position == 0 */ + { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 pos; + U32 matchNb; + for (pos = 1; pos < minMatch; pos++) { + opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const end = matches[matchNb].len; + for ( ; pos <= end ; pos++ ) { + U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const sequencePrice = literalsPrice + matchPrice; + DEBUGLOG(7, "rPos:%u => set initial price : %.2f", + pos, ZSTD_fCost(sequencePrice)); + opt[pos].mlen = pos; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = sequencePrice; + } } + last_pos = pos-1; + } + } -/* misc */ -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); -typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; -ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + const BYTE* const inr = ip + cur; + assert(cur < ZSTD_OPT_NUM); + DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) + /* Fix current position with one literal if cheaper */ + { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; + int const price = opt[cur-1].price + + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, + opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); + opt[cur].mlen = 0; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + } else { + DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), + opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); + } + } + /* Set the repcodes of the current position. We must do it here + * because we rely on the repcodes of the 2nd to last sequence being + * correct to set the next chunks repcodes during the backward + * traversal. + */ + ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); + assert(cur >= opt[cur].mlen); + if (opt[cur].mlen != 0) { + U32 const prev = cur - opt[cur].mlen; + repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); + } else { + memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); + } + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; -/* ============================ */ -/** Block level API */ -/* ============================ */ + if (cur == last_pos) break; -/*! - Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). - But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + if ( (optLevel==0) /*static_test*/ + && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { + DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + } - A few rules to respect : - - Compressing and decompressing require a context structure - + Use ZSTD_createCCtx() and ZSTD_createDCtx() - - It is necessary to init context before starting - + compression : any ZSTD_compressBegin*() variant, including with dictionary - + decompression : any ZSTD_decompressBegin*() variant, including with dictionary - + copyCCtx() and copyDCtx() can be used too - - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB - + If input is larger than a block size, it's necessary to split input data into multiple blocks - + For inputs larger than a single block, consider using regular ZSTD_compress() instead. - Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! - ===> In which case, nothing is produced into `dst` ! - + User __must__ test for such outcome and deal directly with uncompressed data - + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. - Doing so would mess up with statistics history, leading to potential data corruption. - + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! - + In case of multiple successive blocks, should some of them be uncompressed, - decoder must be informed of their existence in order to follow proper history. - Use ZSTD_insertBlock() for such a case. -*/ + { U32 const ll0 = (opt[cur].mlen != 0); + U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; + U32 const previousPrice = opt[cur].price; + U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + U32 matchNb; + if (!nbMatches) { + DEBUGLOG(7, "rPos:%u : no match found", cur); + continue; + } -/*===== Raw zstd block functions =====*/ -ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", + inr-istart, cur, nbMatches, maxML); -} + if ( (maxML > sufficient_len) + || (cur + maxML >= ZSTD_OPT_NUM) ) { + lastSequence.mlen = maxML; + lastSequence.off = matches[nbMatches-1].off; + lastSequence.litlen = litlen; + cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ + last_pos = cur + ZSTD_totalLen(lastSequence); + if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ + goto _shortestPath; + } } -#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; + DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", + matchNb, matches[matchNb].off, lastML, litlen); -// LICENSE_CHANGE_END + for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ + U32 const pos = cur + mlen; + int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = price; + } else { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + } /* for (cur = 1; cur <= last_pos; cur++) */ + lastSequence = opt[last_pos]; + cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ + assert(cur < ZSTD_OPT_NUM); /* control overflow*/ +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 0); + /* Set the next chunk's repcodes based on the repcodes of the beginning + * of the last match, and the last sequence. This avoids us having to + * update them while traversing the sequences. + */ + if (lastSequence.mlen != 0) { + repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); + memcpy(rep, &reps, sizeof(reps)); + } else { + memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); + } + { U32 const storeEnd = cur + 1; + U32 storeStart = storeEnd; + U32 seqPos = cur; + DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); (void)last_pos; + assert(storeEnd < ZSTD_OPT_NUM); + DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); + opt[storeEnd] = lastSequence; + while (seqPos > 0) { + U32 const backDist = ZSTD_totalLen(opt[seqPos]); + storeStart--; + DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); + opt[storeStart] = opt[seqPos]; + seqPos = (seqPos > backDist) ? seqPos - backDist : 0; + } -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + /* save sequences */ + DEBUGLOG(6, "sending selected sequences into seqStore") + { U32 storePos; + for (storePos=storeStart; storePos <= storeEnd; storePos++) { + U32 const llen = opt[storePos].litlen; + U32 const mlen = opt[storePos].mlen; + U32 const offCode = opt[storePos].off; + U32 const advance = llen + mlen; + DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", + anchor - istart, (unsigned)llen, (unsigned)mlen); -/* - * xxHash - Extremely Fast Hash algorithm - * Header File - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - xxHash source repository : https://github.com/Cyan4973/xxHash - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -*/ + if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ + assert(storePos == storeEnd); /* must be last sequence */ + ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ + continue; /* will finish */ + } -/* Notice extracted from xxHash homepage : + assert(anchor + llen <= iend); + ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); + anchor += advance; + ip = anchor; + } } + ZSTD_setBasePrices(optStatePtr, optLevel); + } + } /* while (ip < ilimit) */ -xxHash is an extremely fast Hash algorithm, running at RAM speed limits. -It also successfully passes all tests from the SMHasher suite. + /* Return the last literals size */ + return (size_t)(iend - anchor); +} -Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) -Name Speed Q.Score Author -xxHash 5.4 GB/s 10 -CrapWow 3.2 GB/s 2 Andrew -MumurHash 3a 2.7 GB/s 10 Austin Appleby -SpookyHash 2.0 GB/s 10 Bob Jenkins -SBox 1.4 GB/s 9 Bret Mulvey -Lookup3 1.2 GB/s 9 Bob Jenkins -SuperFastHash 1.2 GB/s 1 Paul Hsieh -CityHash64 1.05 GB/s 10 Pike & Alakuijala -FNV 0.55 GB/s 5 Fowler, Noll, Vo -CRC32 0.43 GB/s 9 -MD5-32 0.33 GB/s 10 Ronald L. Rivest -SHA1-32 0.28 GB/s 10 +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); +} -Q.Score is a measure of quality of the hash function. -It depends on successfully passing SMHasher test set. -10 is a perfect score. -A 64-bits version, named XXH64, is available since r35. -It offers much better speed, but for 64-bits applications only. -Name Speed on 64 bits Speed on 32 bits -XXH64 13.8 GB/s 1.9 GB/s -XXH32 6.8 GB/s 6.0 GB/s -*/ +/* used in 2-pass strategy */ +static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+bonus >= 0); + for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); +} +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error, hence its contract must be respected. + */ +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + memcpy(tmpRep, rep, sizeof(tmpRep)); -/* **************************** -* Definitions -******************************/ -#include /* size_t */ -namespace duckdb_zstd { -typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ -/* **************************** -* API modifier -******************************/ -/** XXH_PRIVATE_API -* This is useful if you want to include xxhash functions in `static` mode -* in order to inline them, and remove their symbol from the public list. -* Methodology : -* #define XXH_PRIVATE_API -* #include "zstd/common/xxhash.h" -* `xxhash.c` is automatically included. -* It's not useful to compile and link it as a separate module anymore. -*/ -#ifdef XXH_PRIVATE_API -# ifndef XXH_STATIC_LINKING_ONLY -# define XXH_STATIC_LINKING_ONLY -# endif -# if defined(__GNUC__) -# define XXH_PUBLIC_API static __inline __attribute__((unused)) -# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define XXH_PUBLIC_API static inline -# elif defined(_MSC_VER) -# define XXH_PUBLIC_API static __inline -# else -# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ -# endif -#else -# define XXH_PUBLIC_API /* do nothing */ -#endif /* XXH_PRIVATE_API */ + /* invalidate first scan from history */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; -/*!XXH_NAMESPACE, aka Namespace Emulation : + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); +} -If you want to include _and expose_ xxHash functions from within your own library, -but also want to avoid symbol collisions with another library which also includes xxHash, +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} -you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library -with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 const current = (U32)((const BYTE*)src - ms->window.base); + DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); -Note that no change is required within the calling program as long as it includes `xxhash.h` : -regular symbol name will be automatically translated by this header. -*/ -#ifdef XXH_NAMESPACE -# define XXH_CAT(A,B) A##B -# define XXH_NAME2(A,B) XXH_CAT(A,B) -# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) -# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) -# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) -# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) -# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) -# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) -# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) -# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) -# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) -# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) -# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) -# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) -# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) -# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) -# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) -# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) -# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) -# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) -# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) -#endif - - -/* ************************************* -* Version -***************************************/ -#define XXH_VERSION_MAJOR 0 -#define XXH_VERSION_MINOR 6 -#define XXH_VERSION_RELEASE 2 -#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) -XXH_PUBLIC_API unsigned XXH_versionNumber (void); - - -/* **************************** -* Simple Hash Functions -******************************/ -typedef unsigned int XXH32_hash_t; -typedef unsigned long long XXH64_hash_t; - -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); -XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + /* 2-pass strategy: + * this strategy makes a first pass over first block to collect statistics + * and seed next round's statistics with it. + * After 1st pass, function forgets everything, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. + * The compression ratio gain is generally small (~0.5% on first block), + * the cost is 2x cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) + ) { + ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); + } -/*! -XXH32() : - Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". - The memory between input & input+length must be valid (allocated and read-accessible). - "seed" can be used to alter the result predictably. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s -XXH64() : - Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". - "seed" can be used to alter the result predictably. - This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). -*/ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); +} -/* **************************** -* Streaming Hash Functions -******************************/ -typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ -typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); +} -/*! State allocation, compatible with dynamic libraries */ +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); +} -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); +} -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +/* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ +} -/* hash streaming */ -XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); -XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); +// LICENSE_CHANGE_END -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); -XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); -/* -These functions generate the xxHash of an input provided in multiple segments. -Note that, for small input, they are slower than single-call functions, due to state management. -For small input, prefer `XXH32()` and `XXH64()` . +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -XXH state must first be allocated, using XXH*_createState() . +/* ****************************************************************** + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ -Start a new hash by initializing state with a seed, using XXH*_reset(). +/* ************************************************************** +* Dependencies +****************************************************************/ +#include /* memcpy, memset */ -Then, feed the hash state by calling XXH*_update() as many times as necessary. -Obviously, input must be allocated and read accessible. -The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + /* BIT_* */ + /* to compress headers */ -Finally, a hash value can be produced anytime, by using XXH*_digest(). -This function returns the nn-bits hash as an int or long long. -It's still possible to continue inserting input into the hash state after a digest, -and generate some new hashes later on, by calling again XXH*_digest(). -When done, free XXH state space if it was allocated dynamically. -*/ +namespace duckdb_zstd { +/* ************************************************************** +* Macros +****************************************************************/ -/* ************************** -* Utils -****************************/ -#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ -# define __restrict /* disable restrict */ +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. + */ +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" #endif -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* __restrict dst_state, const XXH32_state_t* __restrict src_state); -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* __restrict dst_state, const XXH64_state_t* __restrict src_state); - - -/* ************************** -* Canonical representation -****************************/ -/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. -* The canonical representation uses human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. -*/ -typedef struct { unsigned char digest[4]; } XXH32_canonical_t; -typedef struct { unsigned char digest[8]; } XXH64_canonical_t; -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); +/* ************************************************************** +* Error Management +****************************************************************/ +// #define HUF_isError ERR_isError -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); -} +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) -#endif /* XXHASH_H_5627135585666179 */ +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +#if DYNAMIC_BMI2 +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } -// LICENSE_CHANGE_END - /* XXH_reset, update, digest */ +#else +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +#endif -/* ================================================================================================ - This section contains definitions which are not guaranteed to remain stable. - They may change in future versions, becoming incompatible with a different version of the library. - They shall only be used with static linking. - Never use these definitions in association with dynamic linking ! -=================================================================================================== */ -#ifndef XXH_STATIC_H_3543687687345 -#define XXH_STATIC_H_3543687687345 +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; -namespace duckdb_zstd { +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} -/* These definitions are only meant to allow allocation of XXH state - statically, on stack, or in a struct for example. - Do not use members directly. */ - struct XXH32_state_s { - unsigned total_len_32; - unsigned large_len; - unsigned v1; - unsigned v2; - unsigned v3; - unsigned v4; - unsigned mem32[4]; /* buffer defined as U32 for alignment */ - unsigned memsize; - unsigned reserved; /* never read nor write, will be removed in a future version */ - }; /* typedef'd to XXH32_state_t */ +#ifndef HUF_FORCE_DECOMPRESS_X2 - struct XXH64_state_s { - unsigned long long total_len; - unsigned long long v1; - unsigned long long v2; - unsigned long long v3; - unsigned long long v4; - unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ - unsigned memsize; - unsigned reserved[2]; /* never read nor write, will be removed in a future version */ - }; /* typedef'd to XXH64_state_t */ +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ -} -// # ifdef XXH_PRIVATE_API -// # include "xxhash.cpp" /* include xxhash functions as `static`, for inlining */ -// # endif +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; -#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ + U32* rankVal; + BYTE* huffWeight; + size_t spaceUsed32 = 0; + rankVal = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; + huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; -// LICENSE_CHANGE_END - /* XXH_reset, update, digest */ + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); -namespace duckdb_zstd { + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ -/* ---- static assert (debug) --- */ -#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) -#define ZSTD_isError ERR_isError /* for inlining */ -// #define FSE_isError ERR_isError -// #define HUF_isError ERR_isError + iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + memcpy(DTable, &dtd, sizeof(dtd)); + } -/*-************************************* -* shared macros -***************************************/ -#undef MIN -#undef MAX -#define MIN(a,b) ((a)<(b) ? (a) : (b)) -#define MAX(a,b) ((a)>(b) ? (a) : (b)) + /* Calculate starting value for each rank */ + { U32 n, nextRankStart = 0; + for (n=1; n> 1; + size_t const uStart = rankVal[w]; + size_t const uEnd = uStart + length; + size_t u; + HUF_DEltX1 D; + D.byte = (BYTE)n; + D.nbBits = (BYTE)(tableLog + 1 - w); + rankVal[w] = (U32)uEnd; + if (length < 4) { + /* Use length in the loop bound so the compiler knows it is short. */ + for (u = 0; u < length; ++u) + dt[uStart + u] = D; + } else { + /* Unroll the loop 4 times, we know it is a power of 2. */ + for (u = uStart; u < uEnd; u += 4) { + dt[u + 0] = D; + dt[u + 1] = D; + dt[u + 2] = D; + dt[u + 3] = D; + } } } } + return iSize; } -/** - * Ignore: this is an internal helper. - * - * We want to force this function invocation to be syntactically correct, but - * we don't want to force runtime evaluation of its arguments. - */ -#define _FORCE_HAS_FORMAT_STRING(...) \ - if (0) { \ - _force_has_format_string(__VA_ARGS__); \ - } +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX1_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} -/** - * Return the specified error if the condition evaluates to true. - * - * In debug modes, prints additional information. - * In order to do that (particularly, printing the conditional that failed), - * this can't just wrap RETURN_ERROR(). - */ -#define RETURN_ERROR_IF(cond, err, ...) \ - if (cond) { \ - RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} -/** - * Unconditionally return the specified error. - * - * In debug modes, prints additional information. - */ -#define RETURN_ERROR(err, ...) \ - do { \ - RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } while(0); +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) -/** - * If the provided expression evaluates to an error code, returns that error code. - * - * In debug modes, prints additional information. - */ -#define FORWARD_IF_ERROR(err, ...) \ - do { \ - size_t const err_code = (err); \ - if (ERR_isError(err_code)) { \ - RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return err_code; \ - } \ - } while(0); +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) -/*-************************************* -* Common constants -***************************************/ -#define ZSTD_OPT_NUM (1<<12) +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; -#define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); -#define BIT7 128 -#define BIT6 64 -#define BIT5 32 -#define BIT4 16 -#define BIT1 2 -#define BIT0 1 + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); -#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 + return pEnd-pStart; +} -#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; -#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); -typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); -#define ZSTD_FRAMECHECKSUMSIZE 4 + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); -#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + return dstSize; +} -#define HufLog 12 -typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; - -#define LONGNBSEQ 0x7F00 - -#define MINMATCH 3 +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ -#define Litbits 8 -#define MaxLit ((1< cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); -#define ML_DEFAULTNORMLOG 6 /* for static allocation */ + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } -#define OF_DEFAULTNORMLOG 5 /* for static allocation */ + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + /* finish bitStreams one by one */ + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); -/*-******************************************* -* Shared functions to include for inlining -*********************************************/ -static void ZSTD_copy8(void* dst, const void* src) { -#ifdef __aarch64__ - vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); -#else - memcpy(dst, src, 8); -#endif -} + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } -#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } -static void ZSTD_copy16(void* dst, const void* src) { -#ifdef __aarch64__ - vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); -#else - memcpy(dst, src, 16); -#endif + /* decoded size */ + return dstSize; + } } -#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } -#define WILDCOPY_OVERLENGTH 32 -#define WILDCOPY_VECLEN 16 -typedef enum { - ZSTD_no_overlap, - ZSTD_overlap_src_before_dst - /* ZSTD_overlap_dst_before_src, */ -} ZSTD_overlap_e; +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); -/*! ZSTD_wildcopy() : - * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) - * @param ovtype controls the overlap detection - * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. - * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. - * The src buffer must be before the dst buffer. - */ -MEM_STATIC FORCE_INLINE_ATTR -void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) -{ - ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + length; +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) - assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); - if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { - /* Handle short offset copies. */ - do { - COPY8(op, ip) - } while (op < oend); - } else { - assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); - /* Separate out the first COPY16() call because the copy length is - * almost certain to be short, so the branches have different - * probabilities. Since it is almost certain to be short, only do - * one COPY16() in the first call. Then, do two calls per loop since - * at that point it is more likely to have a high trip count. - */ -#ifndef __aarch64__ - do { - COPY16(op, ip); - } - while (op < oend); -#else - COPY16(op, ip); - if (op >= oend) return; - do { - COPY16(op, ip); - COPY16(op, ip); - } - while (op < oend); -#endif - } -} -MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +size_t HUF_decompress1X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) { - size_t const length = MIN(dstCapacity, srcSize); - if (length > 0) { - memcpy(dst, src, length); - } - return length; + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); } -/* define "workspace is too large" as this number of times larger than needed */ -#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 - -/* when workspace is continuously too large - * during at least this number of times, - * context's memory usage is considered wasteful, - * because it's sized to handle a worst case scenario which rarely happens. - * In which case, resize it down to free some memory */ -#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; -/*-******************************************* -* Private declarations -*********************************************/ -typedef struct seqDef_s { - U32 offset; - U16 litLength; - U16 matchLength; -} seqDef; + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} -typedef struct { - seqDef* sequencesStart; - seqDef* sequences; - BYTE* litStart; - BYTE* lit; - BYTE* llCode; - BYTE* mlCode; - BYTE* ofCode; - size_t maxNbSeq; - size_t maxNbLit; - U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ - U32 longLengthPos; -} seqStore_t; -typedef struct { - U32 litLength; - U32 matchLength; -} ZSTD_sequenceLength; +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} -/** - * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences - * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. - */ -MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - ZSTD_sequenceLength seqLen; - seqLen.litLength = seq->litLength; - seqLen.matchLength = seq->matchLength + MINMATCH; - if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { - if (seqStore->longLengthID == 1) { - seqLen.litLength += 0xFFFF; - } - if (seqStore->longLengthID == 2) { - seqLen.matchLength += 0xFFFF; - } - } - return seqLen; + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); } -/** - * Contains the compressed frame size and an upper-bound for the decompressed frame size. - * Note: before using `compressedSize`, check for errors using ZSTD_isError(). - * similarly, before using `decompressedBound`, check for errors using: - * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` - */ -typedef struct { - size_t compressedSize; - unsigned long long decompressedBound; -} ZSTD_frameSizeInfo; /* decompress & legacy */ +size_t HUF_decompress4X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; -/* custom memory allocation functions */ -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); -void* ZSTD_calloc(size_t size, ZSTD_customMem customMem); -void ZSTD_free(void* ptr, ZSTD_customMem customMem); + size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} -MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) { - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - return _BitScanReverse(&r, val) ? (unsigned)r : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return __builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; -# endif - } + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); } -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif /* HUF_FORCE_DECOMPRESS_X2 */ -typedef struct { - blockType_e blockType; - U32 lastBlock; - U32 origSize; -} blockProperties_t; /* declared here for decompress and fullbench */ -/*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ -/* Used by: decompress, fullbench (does not get its definition from here) */ -size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, - blockProperties_t* bpPtr); +#ifndef HUF_FORCE_DECOMPRESS_X1 -/*! ZSTD_decodeSeqHeaders() : - * decode sequence header from src */ -/* Used by: decompress, fullbench (does not get its definition from here) */ -size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - const void* src, size_t srcSize); +/* *************************/ +/* double-symbols decoding */ +/* *************************/ -} +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; -#endif /* ZSTD_CCOMMON_H_MODULE */ +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX2 DElt; + U32 rankVal[HUF_TABLELOG_MAX + 1]; -// LICENSE_CHANGE_END + /* get pre-calculated rankVal */ + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + rankVal[weight] += length; + } } +} -#ifndef ZSTD_CWKSP_H -#define ZSTD_CWKSP_H -/*-************************************* -* Dependencies -***************************************/ +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_TABLELOG_MAX + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); -/*-************************************* -* Constants -***************************************/ + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } else { + HUF_DEltX2 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} -namespace duckdb_zstd { +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; -/*-************************************* -* Structures -***************************************/ -typedef enum { - ZSTD_cwksp_alloc_objects, - ZSTD_cwksp_alloc_buffers, - ZSTD_cwksp_alloc_aligned -} ZSTD_cwksp_alloc_phase_e; + rankValCol_t* rankVal; + U32* rankStats; + U32* rankStart0; + sortedSymbol_t* sortedSymbol; + BYTE* weightList; + size_t spaceUsed32 = 0; -/** - * Zstd fits all its internal datastructures into a single continuous buffer, - * so that it only needs to perform a single OS allocation (or so that a buffer - * can be provided to it and it can perform no allocations at all). This buffer - * is called the workspace. - * - * Several optimizations complicate that process of allocating memory ranges - * from this workspace for each internal datastructure: - * - * - These different internal datastructures have different setup requirements: - * - * - The static objects need to be cleared once and can then be trivially - * reused for each compression. - * - * - Various buffers don't need to be initialized at all--they are always - * written into before they're read. - * - * - The matchstate tables have a unique requirement that they don't need - * their memory to be totally cleared, but they do need the memory to have - * some bound, i.e., a guarantee that all values in the memory they've been - * allocated is less than some maximum value (which is the starting value - * for the indices that they will then use for compression). When this - * guarantee is provided to them, they can use the memory without any setup - * work. When it can't, they have to clear the area. - * - * - These buffers also have different alignment requirements. - * - * - We would like to reuse the objects in the workspace for multiple - * compressions without having to perform any expensive reallocation or - * reinitialization work. - * - * - We would like to be able to efficiently reuse the workspace across - * multiple compressions **even when the compression parameters change** and - * we need to resize some of the objects (where possible). - * - * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp - * abstraction was created. It works as follows: - * - * Workspace Layout: - * - * [ ... workspace ... ] - * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] - * - * The various objects that live in the workspace are divided into the - * following categories, and are allocated separately: - * - * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, - * so that literally everything fits in a single buffer. Note: if present, - * this must be the first object in the workspace, since ZSTD_free{CCtx, - * CDict}() rely on a pointer comparison to see whether one or two frees are - * required. - * - * - Fixed size objects: these are fixed-size, fixed-count objects that are - * nonetheless "dynamically" allocated in the workspace so that we can - * control how they're initialized separately from the broader ZSTD_CCtx. - * Examples: - * - Entropy Workspace - * - 2 x ZSTD_compressedBlockState_t - * - CDict dictionary contents - * - * - Tables: these are any of several different datastructures (hash tables, - * chain tables, binary trees) that all respect a common format: they are - * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). - * Their sizes depend on the cparams. - * - * - Aligned: these buffers are used for various purposes that require 4 byte - * alignment, but don't require any initialization before they're used. - * - * - Buffers: these buffers are used for various purposes that don't require - * any alignment or initialization before they're used. This means they can - * be moved around at no cost for a new compression. - * - * Allocating Memory: - * - * The various types of objects must be allocated in order, so they can be - * correctly packed into the workspace buffer. That order is: - * - * 1. Objects - * 2. Buffers - * 3. Aligned - * 4. Tables - * - * Attempts to reserve objects of different types out of order will fail. - */ -typedef struct { - void* workspace; - void* workspaceEnd; - - void* objectEnd; - void* tableEnd; - void* tableValidEnd; - void* allocStart; + rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; + rankStats = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 1; + rankStart0 = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 2; + sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); + spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; + weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - int allocFailed; - int workspaceOversizedDuration; - ZSTD_cwksp_alloc_phase_e phase; -} ZSTD_cwksp; + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); -/*-************************************* -* Functions -***************************************/ + rankStart = rankStart0 + 1; + memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); -MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ -MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { - (void)ws; - assert(ws->workspace <= ws->objectEnd); - assert(ws->objectEnd <= ws->tableEnd); - assert(ws->objectEnd <= ws->tableValidEnd); - assert(ws->tableEnd <= ws->allocStart); - assert(ws->tableValidEnd <= ws->allocStart); - assert(ws->allocStart <= ws->workspaceEnd); -} + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; -/** - * Align must be a power of 2. - */ -MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { - size_t const mask = align - 1; - assert((align & mask) == 0); - return (size + mask) & ~mask; -} + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ -/** - * Use this to determine how much space in the workspace we will consume to - * allocate this object. (Normally it should be exactly the size of the object, - * but under special conditions, like ASAN, where we pad each object, it might - * be larger.) - * - * Since tables aren't currently redzoned, you don't need to call through this - * to figure out how much space you need for the matchState tables. Everything - * else is though. - */ -MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; -#else - return size; -#endif -} + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ -MEM_STATIC void ZSTD_cwksp_internal_advance_phase( - ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { - assert(phase >= ws->phase); - if (phase > ws->phase) { - if (ws->phase < ZSTD_cwksp_alloc_buffers && - phase >= ZSTD_cwksp_alloc_buffers) { - ws->tableValidEnd = ws->objectEnd; - } - if (ws->phase < ZSTD_cwksp_alloc_aligned && - phase >= ZSTD_cwksp_alloc_aligned) { - /* If unaligned allocations down from a too-large top have left us - * unaligned, we need to realign our alloc ptr. Technically, this - * can consume space that is unaccounted for in the neededSpace - * calculation. However, I believe this can only happen when the - * workspace is too large, and specifically when it is too large - * by a larger margin than the space that will be consumed. */ - /* TODO: cleaner, compiler warning friendly way to do this??? */ - ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); - if (ws->allocStart < ws->tableValidEnd) { - ws->tableValidEnd = ws->allocStart; - } + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; wphase = phase; + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; } -} - -/** - * Returns whether this object/buffer/etc was allocated in this workspace. - */ -MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { - return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); -} - -/** - * Internal function. Do not use directly. - */ -MEM_STATIC void* ZSTD_cwksp_reserve_internal( - ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { - void* alloc; - void* bottom = ws->tableEnd; - ZSTD_cwksp_internal_advance_phase(ws, phase); - alloc = (BYTE *)ws->allocStart - bytes; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* over-reserve space */ - alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; -#endif - DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", - alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); - ZSTD_cwksp_assert_internal_consistency(ws); - assert(alloc >= bottom); - if (alloc < bottom) { - DEBUGLOG(4, "cwksp: alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - if (alloc < ws->tableValidEnd) { - ws->tableValidEnd = alloc; + /* sort symbols by weight */ + { U32 s; + for (s=0; sallocStart = alloc; -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on - * either size. */ - alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; - __asan_unpoison_memory_region(alloc, bytes); -#endif + /* Build rankVal */ + { U32* const rankVal0 = rankVal[0]; + { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ + U32 nextRankVal = 0; + U32 w; + for (w=1; w> consumed; + } } } } - return alloc; -} + HUF_fillDTableX2(dt, maxTableLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); -/** - * Reserves and returns unaligned memory. - */ -MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { - return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; } -/** - * Reserves and returns memory sized on and aligned on sizeof(unsigned). - */ -MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { - assert((bytes & (sizeof(U32)-1)) == 0); - return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); } -/** - * Aligned on sizeof(unsigned). These buffers have the special property that - * their values remain constrained, allowing us to re-use them without - * memset()-ing them. - */ -MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { - const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; - void* alloc = ws->tableEnd; - void* end = (BYTE *)alloc + bytes; - void* top = ws->allocStart; - - DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", - alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); - assert((bytes & (sizeof(U32)-1)) == 0); - ZSTD_cwksp_internal_advance_phase(ws, phase); - ZSTD_cwksp_assert_internal_consistency(ws); - assert(end <= top); - if (end > top) { - DEBUGLOG(4, "cwksp: table alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - ws->tableEnd = end; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - __asan_unpoison_memory_region(alloc, bytes); -#endif - return alloc; +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; } -/** - * Aligned on sizeof(void*). - */ -MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { - size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); - void* alloc = ws->objectEnd; - void* end = (BYTE*)alloc + roundedBytes; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* over-reserve space */ - end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; -#endif - - DEBUGLOG(5, - "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", - alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); - assert(((size_t)alloc & (sizeof(void*)-1)) == 0); - assert((bytes & (sizeof(void*)-1)) == 0); - ZSTD_cwksp_assert_internal_consistency(ws); - /* we must be in the first phase, no advance is possible */ - if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { - DEBUGLOG(4, "cwksp: object alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - ws->objectEnd = end; - ws->tableEnd = end; - ws->tableValidEnd = end; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on - * either size. */ - alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; - __asan_unpoison_memory_region(alloc, bytes); -#endif - - return alloc; +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; } -MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) - /* To validate that the table re-use logic is sound, and that we don't - * access table space that we haven't cleaned, we re-"poison" the table - * space every time we mark it dirty. */ - { - size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; - assert(__msan_test_shadow(ws->objectEnd, size) == -1); - __msan_poison(ws->objectEnd, size); - } -#endif +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - ws->tableValidEnd = ws->objectEnd; - ZSTD_cwksp_assert_internal_consistency(ws); -} +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - if (ws->tableValidEnd < ws->tableEnd) { - ws->tableValidEnd = ws->tableEnd; - } - ZSTD_cwksp_assert_internal_consistency(ws); -} +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; -/** - * Zero the part of the allocated tables not already marked clean. - */ -MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - if (ws->tableValidEnd < ws->tableEnd) { - memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); } - ZSTD_cwksp_mark_tables_clean(ws); -} -/** - * Invalidates table allocations. - * All other allocations remain valid. - */ -MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: clearing tables!"); + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - { - size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; - __asan_poison_memory_region(ws->objectEnd, size); - } -#endif + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ - ws->tableEnd = ws->objectEnd; - ZSTD_cwksp_assert_internal_consistency(ws); + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + + return p-pStart; } -/** - * Invalidates all buffer, aligned, and table allocations. - * Object allocations remain valid. - */ -MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: clearing!"); +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) - /* To validate that the context re-use logic is sound, and that we don't - * access stuff that this compression hasn't initialized, we re-"poison" - * the workspace (or at least the non-static, non-table parts of it) - * every time we start a new compression. */ - { - size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; - __msan_poison(ws->tableValidEnd, size); - } -#endif + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - { - size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; - __asan_poison_memory_region(ws->objectEnd, size); + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); } -#endif - ws->tableEnd = ws->objectEnd; - ws->allocStart = ws->workspaceEnd; - ws->allocFailed = 0; - if (ws->phase > ZSTD_cwksp_alloc_buffers) { - ws->phase = ZSTD_cwksp_alloc_buffers; - } - ZSTD_cwksp_assert_internal_consistency(ws); -} + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); -/** - * The provided workspace takes ownership of the buffer [start, start+size). - * Any existing values in the workspace are ignored (the previously managed - * buffer, if present, must be separately freed). - */ -MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { - DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); - assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ - ws->workspace = start; - ws->workspaceEnd = (BYTE*)start + size; - ws->objectEnd = ws->workspace; - ws->tableValidEnd = ws->objectEnd; - ws->phase = ZSTD_cwksp_alloc_objects; - ZSTD_cwksp_clear(ws); - ws->workspaceOversizedDuration = 0; - ZSTD_cwksp_assert_internal_consistency(ws); + /* decoded size */ + return dstSize; } -MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { - void* workspace = ZSTD_malloc(size, customMem); - DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); - RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); - ZSTD_cwksp_init(ws, workspace, size); - return 0; -} +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ -MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { - void *ptr = ws->workspace; - DEBUGLOG(4, "cwksp: freeing workspace"); - memset(ws, 0, sizeof(ZSTD_cwksp)); - ZSTD_free(ptr, customMem); -} + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; -/** - * Moves the management of a workspace from one cwksp to another. The src cwksp - * is left in an invalid state (src must be re-init()'ed before its used again). - */ -MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { - *dst = *src; - memset(src, 0, sizeof(ZSTD_cwksp)); -} + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = 1; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; -MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { - return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); -} + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); -MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { - return ws->allocFailed; -} + /* 16-32 symbols per loop (4-8 symbols per stream) */ + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY( + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif + } -/*-************************************* -* Functions Checking Free Space -***************************************/ + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ -MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { - return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); -} + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); -MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { - return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; -} + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } -MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { - return ZSTD_cwksp_check_available( - ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); + /* decoded size */ + return dstSize; + } } -MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { - return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) - && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; -} +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) -MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( - ZSTD_cwksp* ws, size_t additionalNeededSpace) { - if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { - ws->workspaceOversizedDuration++; - } else { - ws->workspaceOversizedDuration = 0; - } +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); } -} +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; -#endif /* ZSTD_CWKSP_H */ + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} -// LICENSE_CHANGE_END -// #ifdef ZSTD_MULTITHREAD -// # include "zstdmt_compress.h" -// #endif +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} -/*-************************************* -* Constants -***************************************/ -#define kSearchStrength 8 -#define HASH_READ_SIZE 8 -#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". - It could be confused for a real successor at index "1", if sorted as larger than its predecessor. - It's not a big deal though : candidate will just be sorted again. - Additionally, candidate position 1 will be lost. - But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. - The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. - This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} -namespace duckdb_zstd { -/*-************************************* -* Context memory management -***************************************/ -typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; -typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; -typedef struct ZSTD_prefixDict_s { - const void* dict; - size_t dictSize; - ZSTD_dictContentType_e dictContentType; -} ZSTD_prefixDict; + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; -typedef struct { - void* dictBuffer; - void const* dict; - size_t dictSize; - ZSTD_dictContentType_e dictContentType; - ZSTD_CDict* cdict; -} ZSTD_localDict; + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} -typedef struct { - U32 CTable[HUF_CTABLE_SIZE_U32(255)]; - HUF_repeat repeatMode; -} ZSTD_hufCTables_t; +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); +} -typedef struct { - FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; - FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; - FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; - FSE_repeat offcode_repeatMode; - FSE_repeat matchlength_repeatMode; - FSE_repeat litlength_repeatMode; -} ZSTD_fseCTables_t; -typedef struct { - ZSTD_hufCTables_t huf; - ZSTD_fseCTables_t fse; -} ZSTD_entropyCTables_t; +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} -typedef struct { - U32 off; - U32 len; -} ZSTD_match_t; +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} -typedef struct { - int price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_optimal_t; +#endif /* HUF_FORCE_DECOMPRESS_X1 */ -typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; -typedef struct { - /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ - unsigned* litFreq; /* table of literals statistics, of size 256 */ - unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ - unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ - unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ - ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ - ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ - U32 litSum; /* nb of literals */ - U32 litLengthSum; /* nb of litLength codes */ - U32 matchLengthSum; /* nb of matchLength codes */ - U32 offCodeSum; /* nb of offset codes */ - U32 litSumBasePrice; /* to compare to log2(litfreq) */ - U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ - U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ - U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ - ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ - const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ - ZSTD_literalCompressionMode_e literalCompressionMode; -} optState_t; +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} -typedef struct { - ZSTD_entropyCTables_t entropy; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_compressedBlockState_t; +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} -typedef struct { - BYTE const* nextSrc; /* next block here to continue on current prefix */ - BYTE const* base; /* All regular indexes relative to this position */ - BYTE const* dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more valid data */ -} ZSTD_window_t; -typedef struct ZSTD_matchState_t ZSTD_matchState_t; -struct ZSTD_matchState_t { - ZSTD_window_t window; /* State for window round buffer management */ - U32 loadedDictEnd; /* index of end of dictionary, within context's referential. - * When loadedDictEnd != 0, a dictionary is in use, and still valid. - * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. - * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). - * When dict referential is copied into active context (i.e. not attached), - * loadedDictEnd == dictSize, since referential starts from zero. - */ - U32 nextToUpdate; /* index from which to continue table update */ - U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ - U32* hashTable; - U32* hashTable3; - U32* chainTable; - optState_t opt; /* optimal parser state */ - const ZSTD_matchState_t* dictMatchState; - ZSTD_compressionParameters cParams; +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ }; +#endif -typedef struct { - ZSTD_compressedBlockState_t* prevCBlock; - ZSTD_compressedBlockState_t* nextCBlock; - ZSTD_matchState_t matchState; -} ZSTD_blockState_t; - -typedef struct { - U32 offset; - U32 checksum; -} ldmEntry_t; +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128*1024); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; + return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; +#else + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; + } +#endif +} -typedef struct { - ZSTD_window_t window; /* State for the window round buffer management */ - ldmEntry_t* hashTable; - U32 loadedDictEnd; - BYTE* bucketOffsets; /* Next position in bucket to insert entry */ - U64 hashPower; /* Used to compute the rolling hash. - * Depends on ldmParams.minMatchLength */ -} ldmState_t; -typedef struct { - U32 enableLdm; /* 1 if enable long distance matching */ - U32 hashLog; /* Log size of hashTable */ - U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ - U32 minMatchLength; /* Minimum match length */ - U32 hashRateLog; /* Log number of entries to skip */ - U32 windowLog; /* Window log for the LDM */ -} ldmParams_t; +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); -typedef struct { - U32 offset; - U32 litLength; - U32 matchLength; -} rawSeq; +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif -typedef struct { - rawSeq* seq; /* The start of the sequences */ - size_t pos; /* The position where reading stopped. <= size. */ - size_t size; /* The number of sequences. <= capacity. */ - size_t capacity; /* The capacity starting from `seq` pointer */ -} rawSeqStore_t; + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ -typedef struct { - int collectSequences; - ZSTD_Sequence* seqStart; - size_t seqIndex; - size_t maxSequences; -} SeqCollector; + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); +#else + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif + } +} -struct ZSTD_CCtx_params_s { - ZSTD_format_e format; - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - int compressionLevel; - int forceWindow; /* force back-references to respect limit of - * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; + } } -/* ZSTD_MLcode() : - * note : mlBase = matchLength - MINMATCH; - * because it's the format it's stored in seqStore->sequences */ -MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; - static const U32 ML_deltaCode = 36; - return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); } -typedef struct repcodes_s { - U32 rep[3]; -} repcodes_t; -MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) { - repcodes_t newReps; - if (offset >= ZSTD_REP_NUM) { /* full offset */ - newReps.rep[2] = rep[1]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = offset - ZSTD_REP_MOVE; - } else { /* repcode */ - U32 const repCode = offset + ll0; - if (repCode > 0) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = currentOffset; - } else { /* repCode == 0 */ - memcpy(&newReps, rep, sizeof(newReps)); - } + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif } - return newReps; } -/* ZSTD_cParam_withinBounds: - * @return 1 if value is within cParam bounds, - * 0 otherwise */ -MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) { - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); - if (ZSTD_isError(bounds.error)) return 0; - if (value < bounds.lowerBound) return 0; - if (value > bounds.upperBound) return 0; - return 1; -} + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ -/* ZSTD_noCompressBlock() : - * Writes uncompressed block to dst buffer from given src. - * Returns the size of the block */ -MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) -{ - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); - RETURN_ERROR_IF(srcSize + ZSTDInternalConstants::ZSTD_blockHeaderSize > dstCapacity, - dstSize_tooSmall, "dst buf too small for uncompressed block"); - MEM_writeLE24(dst, cBlockHeader24); - memcpy((BYTE*)dst + ZSTDInternalConstants::ZSTD_blockHeaderSize, src, srcSize); - return ZSTDInternalConstants::ZSTD_blockHeaderSize + srcSize; + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#endif + } } -MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) { - BYTE* const op = (BYTE*)dst; - U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); - RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); - MEM_writeLE24(op, cBlockHeader); - op[3] = src; - return 4; + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); } -/* ZSTD_minGain() : - * minimum compression required - * to generate a compress block or a compressed literals section. - * note : use same formula for both situations */ -MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { - U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; - ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - return (srcSize >> minlog) + 2; + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif } -MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) { - switch (cctxParams->literalCompressionMode) { - case ZSTD_lcm_huffman: - return 0; - case ZSTD_lcm_uncompressed: - return 1; - default: - assert(0 /* impossible: pre-validated */); - /* fall-through */ - case ZSTD_lcm_auto: - return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); - } -} + const BYTE* ip = (const BYTE*) cSrc; -/*! ZSTD_safecopyLiterals() : - * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. - * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single - * large copies. - */ -static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { - assert(iend > ilimit_w); - if (ip <= ilimit_w) { - ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); - op += ilimit_w - ip; - ip = ilimit_w; - } - while (ip < iend) *op++ = *ip++; + size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); } +#endif -/*! ZSTD_storeSeq() : - * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. - * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). - * `mlBase` : matchLength - MINMATCH - * Allowed to overread literals up to litLimit. -*/ -HINT_INLINE UNUSED_ATTR -void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { - BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; - BYTE const* const litEnd = literals + litLength; -#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) - static const BYTE* g_start = NULL; - if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ - { U32 const pos = (U32)((const BYTE*)literals - g_start); - DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", - pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); - } + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); #endif - assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); - /* copy Literals */ - assert(seqStorePtr->maxNbLit <= 128 KB); - assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); - assert(literals + litLength <= litLimit); - if (litEnd <= litLimit_w) { - /* Common case we can use wildcopy. - * First copy 16 bytes, because literals are likely short. - */ - assert(WILDCOPY_OVERLENGTH >= 16); - ZSTD_copy16(seqStorePtr->lit, literals); - if (litLength > 16) { - ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); - } - } else { - ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); - } - seqStorePtr->lit += litLength; - - /* literal Length */ - if (litLength>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 1; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].litLength = (U16)litLength; +} - /* match offset */ - seqStorePtr->sequences[0].offset = offCode + 1; +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); - /* match Length */ - if (mlBase>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 2; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#endif } - seqStorePtr->sequences[0].matchLength = (U16)mlBase; - - seqStorePtr->sequences++; } - -/*-************************************* -* Match length counter -***************************************/ -static unsigned ZSTD_NbCommonBytes (size_t val) -{ - if (MEM_isLittleEndian()) { - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, - 0, 3, 1, 3, 1, 4, 2, 7, - 0, 2, 3, 6, 1, 5, 3, 5, - 1, 3, 4, 4, 2, 5, 6, 7, - 7, 0, 1, 2, 3, 3, 4, 6, - 2, 6, 5, 5, 3, 4, 5, 6, - 7, 1, 2, 4, 6, 4, 4, 5, - 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r=0; - return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } else { /* Big Endian CPU */ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r = 0; - return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } } } -MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) -{ - const BYTE* const pStart = pIn; - const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); +// LICENSE_CHANGE_END - if (pIn < pInLoopLimit) { - { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (diff) return ZSTD_NbCommonBytes(diff); } - pIn+=sizeof(size_t); pMatch+=sizeof(size_t); - while (pIn < pInLoopLimit) { - size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } - pIn += ZSTD_NbCommonBytes(diff); - return (size_t)(pIn - pStart); - } } - if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } - if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } - if ((pIn> (32-h) ; } -MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ +/*-******************************************************* +* Dependencies +*********************************************************/ +#include /* memcpy, memmove, memset */ + /* low level memory routines */ -static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } -static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } -static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } -static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } -static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } -static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } -static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } -static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } -static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; -static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } -static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } -MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) -{ - switch(mls) - { - default: - case 4: return ZSTD_hash4Ptr(p, hBits); - case 5: return ZSTD_hash5Ptr(p, hBits); - case 6: return ZSTD_hash6Ptr(p, hBits); - case 7: return ZSTD_hash7Ptr(p, hBits); - case 8: return ZSTD_hash8Ptr(p, hBits); - } -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list -/** ZSTD_ipow() : - * Return base^exponent. +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ -static U64 ZSTD_ipow(U64 base, U64 exponent) + + +/* zstd_decompress_internal: + * objects and definitions shared within lib/decompress modules */ + + #ifndef ZSTD_DECOMPRESS_INTERNAL_H + #define ZSTD_DECOMPRESS_INTERNAL_H + + +/*-******************************************************* + * Dependencies + *********************************************************/ + /* BYTE, U16, U32 */ + /* ZSTD_seqSymbol */ + +namespace duckdb_zstd { + +/*-******************************************************* + * Constants + *********************************************************/ +struct ZSTDConstants { + static const U32 LL_base[MaxLL+1]; + static const U32 OF_base[MaxOff+1]; + static const U32 OF_bits[MaxOff+1]; + static const U32 ML_base[MaxML+1]; +}; + + +/*-******************************************************* + * Decompression types + *********************************************************/ + typedef struct { + U32 fastMode; + U32 tableLog; + } ZSTD_seqSymbol_header; + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; +} ZSTD_entropyDTables_t; + +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +typedef enum { + ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ + ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ + ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ +} ZSTD_dictUses_e; + +typedef enum { + ZSTD_obm_buffered = 0, /* Buffer the output */ + ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ +} ZSTD_outBufferMode_e; + +struct ZSTD_DCtx_s { - U64 power = 1; - while (exponent) { - if (exponent & 1) power *= base; - exponent >>= 1; - base *= base; - } - return power; + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + ZSTD_format_e format; + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + + /* dictionary */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + ZSTD_dictUses_e dictUses; + + /* streaming */ + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + int noForwardProgress; + ZSTD_outBufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; + + /* workspace */ + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + + size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + + +/*-******************************************************* + * Shared internal functions + *********************************************************/ + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + +/*! ZSTD_checkContinuity() : + * check if next `dst` follows previous position, where decompression ended. + * If yes, do nothing (continue on current segment). + * If not, classify previous segment as "external dictionary", and start a new segment. + * This function cannot fail. */ +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); + } -#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 +#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ -/** ZSTD_rollingHash_append() : - * Add the buffer to the hash value. + +// LICENSE_CHANGE_END + + + +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list + +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ -static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) -{ - BYTE const* istart = (BYTE const*)buf; - size_t pos; - for (pos = 0; pos < size; ++pos) { - hash *= prime8bytes; - hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; - } - return hash; -} -/** ZSTD_rollingHash_compute() : - * Compute the rolling hash value of the buffer. + +#ifndef ZSTD_DDICT_H +#define ZSTD_DDICT_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include /* size_t */ + /* ZSTD_DDict, and several public functions */ + +namespace duckdb_zstd { +/*-******************************************************* + * Interface + *********************************************************/ + +/* note: several prototypes are already published in `zstd.h` : + * ZSTD_createDDict() + * ZSTD_createDDict_byReference() + * ZSTD_createDDict_advanced() + * ZSTD_freeDDict() + * ZSTD_initStaticDDict() + * ZSTD_sizeof_DDict() + * ZSTD_estimateDDictSize() + * ZSTD_getDictID_fromDict() */ -MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) -{ - return ZSTD_rollingHash_append(0, buf, size); + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + } -/** ZSTD_rollingHash_primePower() : - * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash - * over a window of length bytes. - */ -MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) +#endif /* ZSTD_DDICT_H */ + + +// LICENSE_CHANGE_END + + +// #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +// # include "../legacy/zstd_legacy.h" +// #endif + +namespace duckdb_zstd { + +/*-******************************************************* +* Types +*********************************************************/ +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) { - return ZSTD_ipow(prime8bytes, length - 1); + assert(ddict != NULL); + return ddict->dictContent; } -/** ZSTD_rollingHash_rotate() : - * Rotate the rolling hash by one byte. - */ -MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) { - hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; - hash *= prime8bytes; - hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; - return hash; + assert(ddict != NULL); + return ddict->dictSize; } -/*-************************************* -* Round buffer management -***************************************/ -#if (ZSTD_WINDOWLOG_MAX_64 > 31) -# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_copyDDictParameters"); + assert(dctx != NULL); + assert(ddict != NULL); + dctx->dictID = ddict->dictID; + dctx->prefixStart = ddict->dictContent; + dctx->virtualStart = ddict->dictContent; + dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; #endif -/* Max current allowed */ -#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) -/* Maximum chunk size before overflow correction needs to be called again */ -#define ZSTD_CHUNKSIZE_MAX \ - ( ((U32)-1) /* Maximum ending current index */ \ - - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ + if (ddict->entropyPresent) { + dctx->litEntropy = 1; + dctx->fseEntropy = 1; + dctx->LLTptr = ddict->entropy.LLTable; + dctx->MLTptr = ddict->entropy.MLTable; + dctx->OFTptr = ddict->entropy.OFTable; + dctx->HUFptr = ddict->entropy.hufTable; + dctx->entropy.rep[0] = ddict->entropy.rep[0]; + dctx->entropy.rep[1] = ddict->entropy.rep[1]; + dctx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dctx->litEntropy = 0; + dctx->fseEntropy = 0; + } +} -/** - * ZSTD_window_clear(): - * Clears the window containing the history by simply setting it to empty. - */ -MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) + +static size_t +ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, + ZSTD_dictContentType_e dictContentType) { - size_t const endT = (size_t)(window->nextSrc - window->base); - U32 const end = (U32)endT; + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; - window->lowLimit = end; - window->dictLimit = end; + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( + &ddict->entropy, ddict->dictContent, ddict->dictSize)), + dictionary_corrupted, ""); + ddict->entropyPresent = 1; + return 0; } -/** - * ZSTD_window_hasExtDict(): - * Returns non-zero if the window has a non-empty extDict. - */ -MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) { - return window.lowLimit < window.dictLimit; + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + if (!dict) dictSize = 0; + } else { + void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); + + return 0; } -/** - * ZSTD_matchState_dictMode(): - * Inspects the provided matchState and figures out what dictMode should be - * passed to the compressor. - */ -MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) { - return ZSTD_window_hasExtDict(ms->window) ? - ZSTD_extDict : - ms->dictMatchState != NULL ? - ZSTD_dictMatchState : - ZSTD_noDict; + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); + if (ddict == NULL) return NULL; + ddict->cMem = customMem; + { size_t const initResult = ZSTD_initDDict_internal(ddict, + dict, dictSize, + dictLoadMethod, dictContentType); + if (ZSTD_isError(initResult)) { + ZSTD_freeDDict(ddict); + return NULL; + } } + return ddict; + } } -/** - * ZSTD_window_needOverflowCorrection(): - * Returns non-zero if the indices are getting too large and need overflow - * protection. - */ -MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, - void const* srcEnd) +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) { - U32 const current = (U32)((BYTE const*)srcEnd - window.base); - return current > ZSTD_CURRENT_MAX; + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); } -/** - * ZSTD_window_correctOverflow(): - * Reduces the indices to protect from index overflow. - * Returns the correction made to the indices, which must be applied to every - * stored index. - * - * The least significant cycleLog bits of the indices must remain the same, - * which may be 0. Every index up to maxDist in the past must be valid. - * NOTE: (maxDist & cycleMask) must be zero. - */ -MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, - U32 maxDist, void const* src) +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) { - /* preemptive overflow correction: - * 1. correction is large enough: - * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) - * > 1<<29 - * - * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: - * After correction, current is less than (1<base < 1<<32. - * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); - U32 const currentCycle0 = current & cycleMask; - /* Exclude zero so that newCurrent - maxDist >= 1. */ - U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; - U32 const newCurrent = currentCycle1 + maxDist; - U32 const correction = current - newCurrent; - assert((maxDist & cycleMask) == 0); - assert(current > newCurrent); - /* Loose bound, should be around 1<<29 (see above) */ - assert(correction > 1<<28); + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} - window->base += correction; - window->dictBase += correction; - if (window->lowLimit <= correction) window->lowLimit = 1; - else window->lowLimit -= correction; - if (window->dictLimit <= correction) window->dictLimit = 1; - else window->dictLimit -= correction; - /* Ensure we can still reference the full window. */ - assert(newCurrent >= maxDist); - assert(newCurrent - maxDist >= 1); - /* Ensure that lowLimit and dictLimit didn't underflow. */ - assert(window->lowLimit <= newCurrent); - assert(window->dictLimit <= newCurrent); - - DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, - window->lowLimit); - return correction; -} - -/** - * ZSTD_window_enforceMaxDist(): - * Updates lowLimit so that: - * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd - * - * It ensures index is valid as long as index >= lowLimit. - * This must be called before a block compression call. - * - * loadedDictEnd is only defined if a dictionary is in use for current compression. - * As the name implies, loadedDictEnd represents the index at end of dictionary. - * The value lies within context's referential, it can be directly compared to blockEndIdx. - * - * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. - * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. - * This is because dictionaries are allowed to be referenced fully - * as long as the last byte of the dictionary is in the window. - * Once input has progressed beyond window size, dictionary cannot be referenced anymore. - * - * In normal dict mode, the dictionary lies between lowLimit and dictLimit. - * In dictMatchState mode, lowLimit and dictLimit are the same, - * and the dictionary is below them. - * forceWindow and dictMatchState are therefore incompatible. - */ -MEM_STATIC void -ZSTD_window_enforceMaxDist(ZSTD_window_t* window, - const void* blockEnd, - U32 maxDist, - U32* loadedDictEndPtr, - const ZSTD_matchState_t** dictMatchStatePtr) +const ZSTD_DDict* ZSTD_initStaticDDict( + void* sBuffer, size_t sBufferSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) { - U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); - U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; - DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", - (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); - - /* - When there is no dictionary : loadedDictEnd == 0. - In which case, the test (blockEndIdx > maxDist) is merely to avoid - overflowing next operation `newLowLimit = blockEndIdx - maxDist`. - - When there is a standard dictionary : - Index referential is copied from the dictionary, - which means it starts from 0. - In which case, loadedDictEnd == dictSize, - and it makes sense to compare `blockEndIdx > maxDist + dictSize` - since `blockEndIdx` also starts from zero. - - When there is an attached dictionary : - loadedDictEnd is expressed within the referential of the context, - so it can be directly compared against blockEndIdx. - */ - if (blockEndIdx > maxDist + loadedDictEnd) { - U32 const newLowLimit = blockEndIdx - maxDist; - if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; - if (window->dictLimit < window->lowLimit) { - DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", - (unsigned)window->dictLimit, (unsigned)window->lowLimit); - window->dictLimit = window->lowLimit; - } - /* On reaching window size, dictionaries are invalidated */ - if (loadedDictEndPtr) *loadedDictEndPtr = 0; - if (dictMatchStatePtr) *dictMatchStatePtr = NULL; + size_t const neededSpace = sizeof(ZSTD_DDict) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; + assert(sBuffer != NULL); + assert(dict != NULL); + if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ + if (sBufferSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; } -/* Similar to ZSTD_window_enforceMaxDist(), - * but only invalidates dictionary - * when input progresses beyond window size. - * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) - * loadedDictEnd uses same referential as window->base - * maxDist is the window size */ -MEM_STATIC void -ZSTD_checkDictValidity(const ZSTD_window_t* window, - const void* blockEnd, - U32 maxDist, - U32* loadedDictEndPtr, - const ZSTD_matchState_t** dictMatchStatePtr) -{ - assert(loadedDictEndPtr != NULL); - assert(dictMatchStatePtr != NULL); - { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); - U32 const loadedDictEnd = *loadedDictEndPtr; - DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", - (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); - assert(blockEndIdx >= loadedDictEnd); - - if (blockEndIdx > loadedDictEnd + maxDist) { - /* On reaching window size, dictionaries are invalidated. - * For simplification, if window size is reached anywhere within next block, - * the dictionary is invalidated for the full block. - */ - DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); - *loadedDictEndPtr = 0; - *dictMatchStatePtr = NULL; - } else { - if (*loadedDictEndPtr != 0) { - DEBUGLOG(6, "dictionary considered valid for current block"); - } } } -} - -MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { - memset(window, 0, sizeof(*window)); - window->base = (BYTE const*)""; - window->dictBase = (BYTE const*)""; - window->dictLimit = 1; /* start from 1, so that 1st position is valid */ - window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - window->nextSrc = window->base + 1; /* see issue #1241 */ -} -/** - * ZSTD_window_update(): - * Updates the window by appending [src, src + srcSize) to the window. - * If it is not contiguous, the current prefix becomes the extDict, and we - * forget about the extDict. Handles overlap of the prefix and extDict. - * Returns non-zero if the segment is contiguous. - */ -MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, - void const* src, size_t srcSize) +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) { - BYTE const* const ip = (BYTE const*)src; - U32 contiguous = 1; - DEBUGLOG(5, "ZSTD_window_update"); - if (srcSize == 0) - return contiguous; - assert(window->base != NULL); - assert(window->dictBase != NULL); - /* Check if blocks follow each other */ - if (src != window->nextSrc) { - /* not contiguous */ - size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); - DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); - window->lowLimit = window->dictLimit; - assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ - window->dictLimit = (U32)distanceFromBase; - window->dictBase = window->base; - window->base = ip - distanceFromBase; - /* ms->nextToUpdate = window->dictLimit; */ - if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ - contiguous = 0; - } - window->nextSrc = ip + srcSize; - /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ - if ( (ip+srcSize > window->dictBase + window->lowLimit) - & (ip < window->dictBase + window->dictLimit)) { - ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; - U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; - window->lowLimit = lowLimitMax; - DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_free(ddict->dictBuffer, cMem); + ZSTD_free(ddict, cMem); + return 0; } - return contiguous; -} - -/** - * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. - */ -MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) -{ - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.lowLimit; - U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; - U32 const isDictionary = (ms->loadedDictEnd != 0); - U32 const matchLowest = isDictionary ? lowestValid : withinWindow; - return matchLowest; } -/** - * Returns the lowest allowed match index in the prefix. - */ -MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) { - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.dictLimit; - U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; - U32 const isDictionary = (ms->loadedDictEnd != 0); - U32 const matchLowest = isDictionary ? lowestValid : withinWindow; - return matchLowest; + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); } - - -/* debug functions */ -#if (DEBUGLEVEL>=2) - -MEM_STATIC double ZSTD_fWeight(U32 rawStat) +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) { - U32 const fp_accuracy = 8; - U32 const fp_multiplier = (1 << fp_accuracy); - U32 const newStat = rawStat + 1; - U32 const hb = ZSTD_highbit32(newStat); - U32 const BWeight = hb * fp_multiplier; - U32 const FWeight = (newStat << fp_accuracy) >> hb; - U32 const weight = BWeight + FWeight; - assert(hb + fp_accuracy < 31); - return (double)weight / fp_multiplier; + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; } -/* display a table content, - * listing each element, its frequency, and its predicted bit cost */ -MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) { - unsigned u, sum; - for (u=0, sum=0; u<=max; u++) sum += table[u]; - DEBUGLOG(2, "total nb elts: %u", sum); - for (u=0; u<=max; u++) { - DEBUGLOG(2, "%2u: %5u (%.2f)", - u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); - } + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); } -#endif - -/* =============================================================== - * Shared internal declarations - * These prototypes may be called from sources not in lib/compress - * =============================================================== */ - -/* ZSTD_loadCEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * return : size of dictionary header (size of magic number + dict ID + entropy tables) - * assumptions : magic number supposed already checked - * and dictSize >= 8 */ -size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, - short* offcodeNCount, unsigned* offcodeMaxValue, - const void* const dict, size_t dictSize); - -void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); - -/* ============================================================== - * Private declarations - * These prototypes shall only be called from within lib/compress - * ============================================================== */ - -/* ZSTD_getCParamsFromCCtxParams() : - * cParams are built depending on compressionLevel, src size hints, - * LDM and manually set compression parameters. - * Note: srcSizeHint == 0 means 0! - */ -ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); - -/*! ZSTD_initCStream_internal() : - * Private use only. Init streaming operation. - * expects params to be valid. - * must receive dict, or cdict, or none, but not both. - * @return : 0, or an error code */ -size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); - -void ZSTD_resetSeqStore(seqStore_t* ssPtr); - -/*! ZSTD_getCParamsFromCDict() : - * as the name implies */ -ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); - -/* ZSTD_compressBegin_advanced_internal() : - * Private use only. To be called from zstdmt_compress.c. */ -size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - unsigned long long pledgedSrcSize); - -/* ZSTD_compress_advanced_internal() : - * Private use only. To be called from zstdmt_compress.c. */ -size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_CCtx_params* params); - - -/* ZSTD_writeLastEmptyBlock() : - * output an empty Block with end-of-frame mark to complete a frame - * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) - * or an error code if `dstCapacity` is too small ( 1 */ -U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); - } -#endif /* ZSTD_COMPRESS_H */ - // LICENSE_CHANGE_END - // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list /* @@ -22410,63 +22279,66 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); * You may select, at your option, one of the above-listed licenses. */ -#ifndef ZSTD_COMPRESS_SEQUENCES_H -#define ZSTD_COMPRESS_SEQUENCES_H - - /* FSE_repeat, FSE_CTable */ - /* symbolEncodingType_e, ZSTD_strategy */ - -namespace duckdb_zstd { - -typedef enum { - ZSTD_defaultDisallowed = 0, - ZSTD_defaultAllowed = 1 -} ZSTD_defaultPolicy_e; -symbolEncodingType_e -ZSTD_selectEncodingType( - FSE_repeat* repeatMode, unsigned const* count, unsigned const max, - size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, - FSE_CTable const* prevCTable, - short const* defaultNorm, U32 defaultNormLog, - ZSTD_defaultPolicy_e const isDefaultAllowed, - ZSTD_strategy const strategy); +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif -size_t -ZSTD_buildCTable(void* dst, size_t dstCapacity, - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, - unsigned* count, U32 max, - const BYTE* codeTable, size_t nbSeq, - const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, - const FSE_CTable* prevCTable, size_t prevCTableSize, - void* entropyWorkspace, size_t entropyWorkspaceSize); +/*! +* LEGACY_SUPPORT : +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif -size_t ZSTD_encodeSequences( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); +/*! + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) +#endif -size_t ZSTD_fseBitCost( - FSE_CTable const* ctable, - unsigned const* count, - unsigned const max); +/*! + * NO_FORWARD_PROGRESS_MAX : + * maximum allowed nb of calls to ZSTD_decompressStream() + * without any forward progress + * (defined as: no byte read from input, and no byte flushed to output) + * before triggering an error. + */ +#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX +# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 +#endif -size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max); -} +/*-******************************************************* +* Dependencies +*********************************************************/ +#include /* memcpy, memmove, memset */ + /* low level memory routines */ -#endif /* ZSTD_COMPRESS_SEQUENCES_H */ -// LICENSE_CHANGE_END + /* blockProperties_t */ + /* ZSTD_DCtx */ + /* ZSTD_DDictDictContent */ // LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list /* @@ -22479,16922 +22351,18156 @@ size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, * You may select, at your option, one of the above-listed licenses. */ -#ifndef ZSTD_COMPRESS_LITERALS_H -#define ZSTD_COMPRESS_LITERALS_H - - /* ZSTD_hufCTables_t, ZSTD_minGain() */ - -namespace duckdb_zstd { - -size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); -size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2); +#ifndef ZSTD_DEC_BLOCK_H +#define ZSTD_DEC_BLOCK_H -} +/*-******************************************************* + * Dependencies + *********************************************************/ +#include /* size_t */ + /* DCtx, and some public functions */ + /* blockProperties_t, and some public functions */ + /* ZSTD_seqSymbol */ -#endif /* ZSTD_COMPRESS_LITERALS_H */ +namespace duckdb_zstd { +/* === Prototypes === */ -// LICENSE_CHANGE_END +/* note: prototypes already published within `zstd.h` : + * ZSTD_decompressBlock() + */ +/* note: prototypes already published within `zstd_internal.h` : + * ZSTD_getcBlockSize() + * ZSTD_decodeSeqHeaders() + */ -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +/* ZSTD_decompressBlock_internal() : + * decompress block, starting at `src`, + * into destination buffer `dst`. + * @return : decompressed block size, + * or an error code (which can be tested using ZSTD_isError()) + */ +size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame); -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * this function must be called with valid parameters only + * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) + * in which case it cannot fail. + * Internal use only. */ +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog); -#ifndef ZSTD_FAST_H -#define ZSTD_FAST_H +} - /* U32 */ +#endif /* ZSTD_DEC_BLOCK_H */ +// LICENSE_CHANGE_END + /* ZSTD_decompressBlock_internal */ + +// #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +// # include "../legacy/zstd_legacy.h" +// #endif namespace duckdb_zstd { +const U32 ZSTDConstants::LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; -void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm); -size_t ZSTD_compressBlock_fast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_fast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_fast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); +const U32 ZSTDConstants::OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; -} +const U32 ZSTDConstants::OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; -#endif /* ZSTD_FAST_H */ +const U32 ZSTDConstants::ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; +const size_t ZSTDInternalConstants::ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +const U32 ZSTDInternalConstants::LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 3, 3, + 4, 6, 7, 8, 9,10,11,12, + 13,14,15,16 }; +const S16 ZSTDInternalConstants::LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 3, 2, 1, 1, 1, 1, 1, + -1,-1,-1,-1 }; +#define LL_DEFAULTNORMLOG 6 /* for static allocation */ +const U32 ZSTDInternalConstants::LL_defaultNormLog = LL_DEFAULTNORMLOG; +const U32 ZSTDInternalConstants::ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 7, 8, 9,10,11, + 12,13,14,15,16 }; +const S16 ZSTDInternalConstants::ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1,-1,-1, + -1,-1,-1,-1,-1 }; +#define ML_DEFAULTNORMLOG 6 /* for static allocation */ +const U32 ZSTDInternalConstants::ML_defaultNormLog = ML_DEFAULTNORMLOG; -// LICENSE_CHANGE_END +const S16 ZSTDInternalConstants::OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + -1,-1,-1,-1,-1 }; +#define OF_DEFAULTNORMLOG 5 /* for static allocation */ +const U32 ZSTDInternalConstants::OF_defaultNormLog = OF_DEFAULTNORMLOG; +const U32 ZSTDInternalConstants::repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; +const ZSTD_customMem ZSTDInternalConstants::ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ +/*-************************************************************* +* Context management +***************************************************************/ +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ -#ifndef ZSTD_DOUBLE_FAST_H -#define ZSTD_DOUBLE_FAST_H +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; +} - /* U32 */ - /* ZSTD_CCtx, size_t */ +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) +{ + dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ + dctx->staticSize = 0; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->dictEnd = NULL; + dctx->ddictIsCold = 0; + dctx->dictUses = ZSTD_dont_use; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; + dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; + dctx->bmi2 = 0; + dctx->outBufferMode = ZSTD_obm_buffered; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif +} -namespace duckdb_zstd { +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; -void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm); -size_t ZSTD_compressBlock_doubleFast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_doubleFast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_doubleFast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; } -#endif /* ZSTD_DOUBLE_FAST_H */ +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} -// LICENSE_CHANGE_END +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); +} +static void ZSTD_clearDict(ZSTD_DCtx* dctx) +{ + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + dctx->ddict = NULL; + dctx->dictUses = ZSTD_dont_use; +} +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_clearDict(dctx); + ZSTD_free(dctx->inBuff, cMem); + dctx->inBuff = NULL; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (dctx->legacyContext) + ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); +#endif + ZSTD_free(dctx, cMem); + return 0; + } +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +/* no longer useful */ +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ -#ifndef ZSTD_LAZY_H -#define ZSTD_LAZY_H +/*-************************************************************* + * Frame header decoding + ***************************************************************/ +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(buffer, size)) return 1; +#endif + return 0; +} +static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) +{ + size_t const minInputSize = ZSTD_startingInputLength(format); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); -namespace duckdb_zstd { + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} -U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} -void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ -size_t ZSTD_compressBlock_btlazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_greedy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); +/** ZSTD_getFrameHeader_advanced() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) +{ + const BYTE* ip = (const BYTE*)src; + size_t const minInputSize = ZSTD_startingInputLength(format); -size_t ZSTD_compressBlock_btlazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_greedy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); + memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + if (srcSize < minInputSize) return minInputSize; + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); -size_t ZSTD_compressBlock_greedy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btlazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) + return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ + memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); + zfhPtr->frameType = ZSTD_skippableFrame; + return 0; + } + RETURN_ERROR(prefix_unknown, ""); + } -} + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } -#endif /* ZSTD_LAZY_H */ + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U64 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, + "reserved bits, must be zero"); + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); + windowSize = (1ULL << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (singleSegment) windowSize = frameContentSize; -// LICENSE_CHANGE_END + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; + } + return 0; +} +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +/** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); + return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; + } +#endif + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +static size_t readSkippableFrameSize(void const* src, size_t srcSize) +{ + size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; + U32 sizeU32; -#ifndef ZSTD_OPT_H -#define ZSTD_OPT_H + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); + sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); + RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, + frameParameter_unsupported, ""); + { + size_t const skippableSize = skippableHeaderSize + sizeU32; + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); + return skippableSize; + } +} +/** ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long totalDstSize = 0; -namespace duckdb_zstd { + while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { + U32 const magicNumber = MEM_readLE32(src); -/* used in ZSTD_loadDictionaryContent() */ -void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + if (ZSTD_isError(skippableSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + assert(skippableSize <= srcSize); -size_t ZSTD_compressBlock_btopt( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; -size_t ZSTD_compressBlock_btopt_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; + } + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } -size_t ZSTD_compressBlock_btopt_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ - /* note : no btultra2 variant for extDict nor dictMatchState, - * because btultra2 is not meant to work with dictionaries - * and is only specific for the first block (no prefix) */ + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; + return totalDstSize; } -#endif /* ZSTD_OPT_H */ - - -// LICENSE_CHANGE_END - +/** ZSTD_getDecompressedSize() : + * compatible with legacy mode + * @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +/** ZSTD_decodeFrameHeader() : + * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ + RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), + dictionary_wrong, ""); +#endif + if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); + return 0; +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) +{ + ZSTD_frameSizeInfo frameSizeInfo; + frameSizeInfo.compressedSize = ret; + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + return frameSizeInfo; +} -#ifndef ZSTD_LDM_H -#define ZSTD_LDM_H +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); - /* ldmParams_t, U32 */ - /* ZSTD_CCtx, size_t */ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) + return ZSTD_findFrameSizeInfoLegacy(src, srcSize); +#endif -/*-************************************* -* Long distance matching -***************************************/ + if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + assert(ZSTD_isError(frameSizeInfo.compressedSize) || + frameSizeInfo.compressedSize <= srcSize); + return frameSizeInfo; + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + ZSTD_frameHeader zfh; -#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) + return ZSTD_errorFrameSizeInfo(ret); + if (ret > 0) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + } -namespace duckdb_zstd { + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; -void ZSTD_ldm_fillHashTable( - ldmState_t* state, const BYTE* ip, - const BYTE* iend, ldmParams_t const* params); + /* Iterate over each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) + return ZSTD_errorFrameSizeInfo(cBlockSize); -/** - * ZSTD_ldm_generateSequences(): - * - * Generates the sequences using the long distance match finder. - * Generates long range matching sequences in `sequences`, which parse a prefix - * of the source. `sequences` must be large enough to store every sequence, - * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. - * @returns 0 or an error code. - * - * NOTE: The user must have called ZSTD_window_update() for all of the input - * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. - * NOTE: This function returns an error if it runs out of space to store - * sequences. - */ -size_t ZSTD_ldm_generateSequences( - ldmState_t* ldms, rawSeqStore_t* sequences, - ldmParams_t const* params, void const* src, size_t srcSize); + if (ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); -/** - * ZSTD_ldm_blockCompress(): - * - * Compresses a block using the predefined sequences, along with a secondary - * block compressor. The literals section of every sequence is passed to the - * secondary block compressor, and those sequences are interspersed with the - * predefined sequences. Returns the length of the last literals. - * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. - * `rawSeqStore.seq` may also be updated to split the last sequence between two - * blocks. - * @return The length of the last literals. - * - * NOTE: The source must be at most the maximum block size, but the predefined - * sequences can be any size, and may be longer than the block. In the case that - * they are longer than the block, the last sequences may need to be split into - * two. We handle that case correctly, and update `rawSeqStore` appropriately. - * NOTE: This function does not return any errors. - */ -size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); + ip += ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize; + nbBlocks++; -/** - * ZSTD_ldm_skipSequences(): - * - * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. - * Avoids emitting matches less than `minMatch` bytes. - * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). - */ -void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, - U32 const minMatch); + if (blockProperties.lastBlock) break; + } + /* Final frame content checksum */ + if (zfh.checksumFlag) { + if (remainingSize < 4) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + ip += 4; + } -/** ZSTD_ldm_getTableSize() : - * Estimate the space needed for long distance matching tables or 0 if LDM is - * disabled. - */ -size_t ZSTD_ldm_getTableSize(ldmParams_t params); + frameSizeInfo.compressedSize = ip - ipstart; + frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) + ? zfh.frameContentSize + : nbBlocks * zfh.blockSizeMax; + return frameSizeInfo; + } +} -/** ZSTD_ldm_getSeqSpace() : - * Return an upper bound on the number of sequences that can be produced by - * the long distance matcher, or 0 if LDM is disabled. - */ -size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); +/** ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + return frameSizeInfo.compressedSize; +} -/** ZSTD_ldm_adjustParameters() : - * If the params->hashRateLog is not set, set it to its default value based on - * windowLog and params->hashLog. - * - * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to - * params->hashLog if it is not). - * - * Ensures that the minMatchLength >= targetLength during optimal parsing. +/** ZSTD_decompressBound() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame or a skippeable frame + * `srcSize` must be at least as large as the frame contained + * @return : the maximum decompressed size of the compressed source */ -void ZSTD_ldm_adjustParameters(ldmParams_t* params, - ZSTD_compressionParameters const* cParams); +unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) +{ + unsigned long long bound = 0; + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ZSTD_CONTENTSIZE_ERROR; + assert(srcSize >= compressedSize); + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + bound += decompressedBound; + } + return bound; +} + + +/*-************************************************************* + * Frame decoding + ***************************************************************/ +/** ZSTD_insertBlock() : + * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); + ZSTD_checkContinuity(dctx, blockStart); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; } -#endif /* ZSTD_FAST_H */ +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_copyRawBlock"); + if (dst == NULL) { + if (srcSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); + memcpy(dst, src, srcSize); + return srcSize; +} -// LICENSE_CHANGE_END +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + BYTE b, + size_t regenSize) +{ + if (dst == NULL) { + if (regenSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); + memset(dst, b, regenSize); + return regenSize; +} +/*! ZSTD_decompressFrame() : + * @dctx must be properly initialized + * will update *srcPtr and *srcSizePtr, + * to make *srcPtr progress by one frame. */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* ip = (const BYTE*)(*srcPtr); + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; + BYTE* op = ostart; + size_t remainingSrcSize = *srcSizePtr; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + /* check */ + RETURN_ERROR_IF( + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTDInternalConstants::ZSTD_blockHeaderSize, + srcSize_wrong, ""); -#ifndef ZSTD_COMPRESS_ADVANCED_H -#define ZSTD_COMPRESS_ADVANCED_H + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( + ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTDInternalConstants::ZSTD_blockHeaderSize, + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); + ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; + } -/*-************************************* -* Dependencies -***************************************/ + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; - /* ZSTD_CCtx */ + ip += ZSTDInternalConstants::ZSTD_blockHeaderSize; + remainingSrcSize -= ZSTDInternalConstants::ZSTD_blockHeaderSize; + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); -namespace duckdb_zstd { -/*-************************************* -* Target Compressed Block Size -***************************************/ + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } -/* ZSTD_compressSuperBlock() : - * Used to compress a super block when targetCBlockSize is being used. - * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ -size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - void const* src, size_t srcSize, - unsigned lastBlock); + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->fParams.checksumFlag) + XXH64_update(&dctx->xxhState, op, decodedSize); + if (decodedSize != 0) + op += decodedSize; + assert(ip != NULL); + ip += cBlockSize; + remainingSrcSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, + corruption_detected, ""); + } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + ip += 4; + remainingSrcSize -= 4; + } + + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSrcSize; + return op-ostart; } +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + int moreThan1Frame = 0; -#endif /* ZSTD_COMPRESS_ADVANCED_H */ + DEBUGLOG(5, "ZSTD_decompressMultiFrame"); + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + if (ddict) { + dict = ZSTD_DDict_dictContent(ddict); + dictSize = ZSTD_DDict_dictSize(ddict); + } -// LICENSE_CHANGE_END + while (srcSize >= ZSTD_startingInputLength(dctx->format)) { +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + size_t decodedSize; + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isError(frameSize)) return frameSize; + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, + "legacy support is not compatible with static dctx"); + decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); + if (ZSTD_isError(decodedSize)) return decodedSize; -namespace duckdb_zstd { -/*-************************************* -* Helper functions -***************************************/ -/* ZSTD_compressBound() - * Note that the result from this function is only compatible with the "normal" - * full-block strategy. - * When there are a lot of small blocks due to frequent flush in streaming mode - * the overhead of headers can make the compressed data to be larger than the - * return value of ZSTD_compressBound(). - */ -size_t ZSTD_compressBound(size_t srcSize) { - return ZSTD_COMPRESSBOUND(srcSize); -} + assert(decodedSize <=- dstCapacity); + dst = (BYTE*)dst + decodedSize; + dstCapacity -= decodedSize; + src = (const BYTE*)src + frameSize; + srcSize -= frameSize; -/*-************************************* -* Context memory management -***************************************/ -struct ZSTD_CDict_s { - const void* dictContent; - size_t dictContentSize; - U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ - ZSTD_cwksp workspace; - ZSTD_matchState_t matchState; - ZSTD_compressedBlockState_t cBlockState; - ZSTD_customMem customMem; - U32 dictID; - int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ -}; /* typedef'd to ZSTD_CDict within "zstd.h" */ + continue; + } +#endif -ZSTD_CCtx* ZSTD_createCCtx(void) -{ - return ZSTD_createCCtx_advanced({NULL, NULL, NULL}); + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (unsigned)magicNumber, ZSTD_MAGICNUMBER); + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); + } + ZSTD_checkContinuity(dctx, dst); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + RETURN_ERROR_IF( + (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1), + srcSize_wrong, + "at least one frame successfully completed, but following " + "bytes are garbage: it's more likely to be a srcSize error, " + "specifying more bytes than compressed size of frame(s). This " + "error message replaces ERROR(prefix_unknown), which would be " + "confusing, as the first header is actually correct. Note that " + "one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic " + "bytes. But this is _much_ less likely than a srcSize field " + "error."); + if (ZSTD_isError(res)) return res; + assert(res <= dstCapacity); + if (res != 0) + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + moreThan1Frame = 1; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); + + return (BYTE*)dst - (BYTE*)dststart; } -static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) { - assert(cctx != NULL); - memset(cctx, 0, sizeof(*cctx)); - cctx->customMem = memManager; - cctx->bmi2 = 0; - { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); - assert(!ZSTD_isError(err)); - (void)err; - } + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); } -ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) + +static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) { - ZSTD_STATIC_ASSERT(zcss_init==0); - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); - if (!cctx) return NULL; - ZSTD_initCCtx(cctx, customMem); - return cctx; + switch (dctx->dictUses) { + default: + assert(0 /* Impossible */); + /* fall-through */ + case ZSTD_dont_use: + ZSTD_clearDict(dctx); + return NULL; + case ZSTD_use_indefinitely: + return dctx->ddict; + case ZSTD_use_once: + dctx->dictUses = ZSTD_dont_use; + return dctx->ddict; } } -ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - ZSTD_cwksp ws; - ZSTD_CCtx* cctx; - if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ - if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ - ZSTD_cwksp_init(&ws, workspace, workspaceSize); + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); +} - cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); - if (cctx == NULL) return NULL; - memset(cctx, 0, sizeof(ZSTD_CCtx)); - ZSTD_cwksp_move(&cctx->workspace, &ws); - cctx->staticSize = workspaceSize; - - /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ - if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; - cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); - cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); - cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, HUF_WORKSPACE_SIZE); - cctx->bmi2 = 0; - return cctx; +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + ZSTD_initDCtx_internal(&dctx); + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif } + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + /** - * Clears and frees all of the dictionaries in the CCtx. + * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * we allow taking a partial block as the input. Currently only raw uncompressed blocks can + * be streamed. + * + * For blocks that can be streamed, this allows us to reduce the latency until we produce + * output, and avoid copying the input. + * + * @param inputSize - The total amount of input that the caller currently has. */ -static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) -{ - ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); - ZSTD_freeCDict(cctx->localDict.cdict); - memset(&cctx->localDict, 0, sizeof(cctx->localDict)); - memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); - cctx->cdict = NULL; +static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { + if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) + return dctx->expected; + if (dctx->bType != bt_raw) + return dctx->expected; + return MIN(MAX(inputSize, 1), dctx->expected); } -static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) -{ - size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; - size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); - return bufferSize + cdictSize; +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + assert(0); + case ZSTDds_getFrameHeaderSize: + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } } -static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) -{ - assert(cctx != NULL); - assert(cctx->staticSize == 0); - ZSTD_clearAllDicts(cctx); -#ifdef ZSTD_MULTITHREAD - ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; -#endif - ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); -} +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } -size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +/** ZSTD_decompressContinue() : + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - if (cctx==NULL) return 0; /* support free on NULL */ - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, - "not compatible with static CCtx"); + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); + /* Sanity check */ + RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); + if (dstCapacity) ZSTD_checkContinuity(dctx, dst); + + switch (dctx->stage) { - int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); - ZSTD_freeCCtxContent(cctx); - if (!cctxInWorkspace) { - ZSTD_free(cctx, cctx->customMem); + case ZSTDds_getFrameHeaderSize : + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + + case ZSTDds_decodeFrameHeader: + assert(src != NULL); + memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); + dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTDInternalConstants::ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; /* jump to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_raw : + assert(srcSize <= dctx->expected); + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); + assert(rSize == srcSize); + dctx->expected -= rSize; + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_reserved : /* should never happen */ + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + FORWARD_IF_ERROR(rSize, ""); + RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + dctx->decodedSize += rSize; + if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + dctx->previousDstEnd = (char*)dst + rSize; + + /* Stay on the same stage until we are finished streaming the block. */ + if (dctx->expected > 0) { + return rSize; + } + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); + RETURN_ERROR_IF( + dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && dctx->decodedSize != dctx->fParams.frameContentSize, + corruption_detected, ""); + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; + } + return rSize; + } + + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ + { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; } + + case ZSTDds_decodeSkippableHeader: + assert(src != NULL); + assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + + case ZSTDds_skipFrame: + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ } - return 0; } -static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { -#ifdef ZSTD_MULTITHREAD - return ZSTDMT_sizeof_CCtx(cctx->mtctx); -#else - (void)cctx; - return 0; + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; #endif + return 0; } - -size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t +ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize) { - if (cctx==NULL) return 0; /* support sizeof on NULL */ - /* cctx may be in the workspace */ - return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) - + ZSTD_cwksp_sizeof(&cctx->workspace) - + ZSTD_sizeof_localDict(cctx->localDict) - + ZSTD_sizeof_mtctx(cctx); -} + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; -size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) -{ - return ZSTD_sizeof_CCtx(zcs); /* same object */ -} + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); + assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ + dictPtr += 8; /* skip header = magic + dictID */ -/* private API call, for dictBuilder only */ -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); + ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); + { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ + size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); +#ifdef HUF_FORCE_DECOMPRESS_X1 + /* in minimal huffman, we always use X1 variants */ + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#else + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#endif + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); + dictPtr += hSize; + } -static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( - ZSTD_compressionParameters cParams) -{ - ZSTD_CCtx_params cctxParams; - memset(&cctxParams, 0, sizeof(cctxParams)); - cctxParams.cParams = cParams; - cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - assert(!ZSTD_checkCParams(cParams)); - cctxParams.fParams.contentSizeFlag = 1; - return cctxParams; -} + { short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->OFTable, + offcodeNCount, offcodeMaxValue, + ZSTDConstants::OF_base, ZSTDConstants::OF_bits, + offcodeLog); + dictPtr += offcodeHeaderSize; + } -static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( - ZSTD_customMem customMem) -{ - ZSTD_CCtx_params* params; - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - params = (ZSTD_CCtx_params*)ZSTD_calloc( - sizeof(ZSTD_CCtx_params), customMem); - if (!params) { return NULL; } - params->customMem = customMem; - params->compressionLevel = ZSTD_CLEVEL_DEFAULT; - params->fParams.contentSizeFlag = 1; - return params; + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ZSTDConstants::ML_base, ZSTDInternalConstants::ML_bits, + matchlengthLog); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->LLTable, + litlengthNCount, litlengthMaxValue, + ZSTDConstants::LL_base, ZSTDInternalConstants::LL_bits, + litlengthLog); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + RETURN_ERROR_IF(rep==0 || rep > dictContentSize, + dictionary_corrupted, ""); + entropy->rep[i] = rep; + } } + + return dictPtr - (const BYTE*)dict; } -ZSTD_CCtx_params* ZSTD_createCCtxParams(void) +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - return ZSTD_createCCtxParams_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_MAGIC_DICTIONARY) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); } -size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) { - if (params == NULL) { return 0; } - ZSTD_free(params, params->customMem); + assert(dctx != NULL); + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + dctx->bType = bt_reserved; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; return 0; } -size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); -} - -size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { - RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); - memset(cctxParams, 0, sizeof(*cctxParams)); - cctxParams->compressionLevel = compressionLevel; - cctxParams->fParams.contentSizeFlag = 1; + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (dict && dictSize) + RETURN_ERROR_IF( + ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), + dictionary_corrupted, ""); return 0; } -size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) + +/* ====== ZSTD_DDict ====== */ + +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { - RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); - memset(cctxParams, 0, sizeof(*cctxParams)); - assert(!ZSTD_checkCParams(params.cParams)); - cctxParams->cParams = params.cParams; - cctxParams->fParams = params.fParams; - cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); + assert(dctx != NULL); + if (ddict) { + const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); + size_t const dictSize = ZSTD_DDict_dictSize(ddict); + const void* const dictEnd = dictStart + dictSize; + dctx->ddictIsCold = (dctx->dictEnd != dictEnd); + DEBUGLOG(4, "DDict is %s", + dctx->ddictIsCold ? "~cold~" : "hot!"); + } + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (ddict) { /* NULL ddict is equivalent to no dictionary */ + ZSTD_copyDDictParameters(dctx, ddict); + } return 0; } -/* ZSTD_assignParamsToCCtxParams() : - * params is presumed valid at this stage */ -static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( - const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) { - ZSTD_CCtx_params ret = *cctxParams; - assert(!ZSTD_checkCParams(params->cParams)); - ret.cParams = params->cParams; - ret.fParams = params->fParams; - ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - return ret; + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); } -ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompress frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) { - ZSTD_bounds bounds = { 0, 0, 0 }; + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} - switch(param) - { - case ZSTD_c_compressionLevel: - bounds.lowerBound = ZSTD_minCLevel(); - bounds.upperBound = ZSTD_maxCLevel(); - return bounds; - case ZSTD_c_windowLog: - bounds.lowerBound = ZSTD_WINDOWLOG_MIN; - bounds.upperBound = ZSTD_WINDOWLOG_MAX; - return bounds; - - case ZSTD_c_hashLog: - bounds.lowerBound = ZSTD_HASHLOG_MIN; - bounds.upperBound = ZSTD_HASHLOG_MAX; - return bounds; - - case ZSTD_c_chainLog: - bounds.lowerBound = ZSTD_CHAINLOG_MIN; - bounds.upperBound = ZSTD_CHAINLOG_MAX; - return bounds; - - case ZSTD_c_searchLog: - bounds.lowerBound = ZSTD_SEARCHLOG_MIN; - bounds.upperBound = ZSTD_SEARCHLOG_MAX; - return bounds; - - case ZSTD_c_minMatch: - bounds.lowerBound = ZSTD_MINMATCH_MIN; - bounds.upperBound = ZSTD_MINMATCH_MAX; - return bounds; +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} - case ZSTD_c_targetLength: - bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; - bounds.upperBound = ZSTD_TARGETLENGTH_MAX; - return bounds; - case ZSTD_c_strategy: - bounds.lowerBound = ZSTD_STRATEGY_MIN; - bounds.upperBound = ZSTD_STRATEGY_MAX; - return bounds; +/*===================================== +* Streaming decompression +*====================================*/ - case ZSTD_c_contentSizeFlag: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; +ZSTD_DStream* ZSTD_createDStream(void) +{ + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDStream_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); +} - case ZSTD_c_checksumFlag: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} - case ZSTD_c_dictIDFlag: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_advanced(customMem); +} - case ZSTD_c_nbWorkers: - bounds.lowerBound = 0; -#ifdef ZSTD_MULTITHREAD - bounds.upperBound = ZSTDMT_NBWORKERS_MAX; -#else - bounds.upperBound = 0; -#endif - return bounds; +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + return ZSTD_freeDCtx(zds); +} - case ZSTD_c_jobSize: - bounds.lowerBound = 0; -#ifdef ZSTD_MULTITHREAD - bounds.upperBound = ZSTDMT_JOBSIZE_MAX; -#else - bounds.upperBound = 0; -#endif - return bounds; - case ZSTD_c_overlapLog: -#ifdef ZSTD_MULTITHREAD - bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; - bounds.upperBound = ZSTD_OVERLAPLOG_MAX; -#else - bounds.lowerBound = 0; - bounds.upperBound = 0; -#endif - return bounds; +/* *** Initialization *** */ - case ZSTD_c_enableLongDistanceMatching: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTDInternalConstants::ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } - case ZSTD_c_ldmHashLog: - bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; - bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; - return bounds; +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (dict && dictSize != 0) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); + dctx->ddict = dctx->ddictLocal; + dctx->dictUses = ZSTD_use_indefinitely; + } + return 0; +} - case ZSTD_c_ldmMinMatch: - bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; - bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; - return bounds; +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} - case ZSTD_c_ldmBucketSizeLog: - bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; - bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; - return bounds; +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} - case ZSTD_c_ldmHashRateLog: - bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; - bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; - return bounds; +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); + dctx->dictUses = ZSTD_use_once; + return 0; +} - /* experimental parameters */ - case ZSTD_c_rsyncable: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} - case ZSTD_c_forceMaxWindow : - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - case ZSTD_c_format: - ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); - bounds.lowerBound = ZSTD_f_zstd1; - bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ - return bounds; +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); + return ZSTD_startingInputLength(zds->format); +} - case ZSTD_c_forceAttachDict: - ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); - bounds.lowerBound = ZSTD_dictDefaultAttach; - bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ - return bounds; +/* note : this variant can't fail */ +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + DEBUGLOG(4, "ZSTD_initDStream"); + return ZSTD_initDStream_usingDDict(zds, NULL); +} - case ZSTD_c_literalCompressionMode: - ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); - bounds.lowerBound = ZSTD_lcm_auto; - bounds.upperBound = ZSTD_lcm_uncompressed; - return bounds; +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) +{ + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); + return ZSTD_startingInputLength(dctx->format); +} - case ZSTD_c_targetCBlockSize: - bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; - bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; - return bounds; +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); + return ZSTD_startingInputLength(dctx->format); +} - case ZSTD_c_srcSizeHint: - bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; - bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; - return bounds; - default: - bounds.error = ERROR(parameter_unsupported); - return bounds; +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (ddict) { + dctx->ddict = ddict; + dctx->dictUses = ZSTD_use_indefinitely; } + return 0; } -/* ZSTD_cParam_clampBounds: - * Clamps the value into the bounded range. - */ -static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) +/* ZSTD_DCtx_setMaxWindowSize() : + * note : no direct equivalence in ZSTD_DCtx_setParameter, + * since this version sets windowSize, and the other sets windowLog */ +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) { - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); - if (ZSTD_isError(bounds.error)) return bounds.error; - if (*value < bounds.lowerBound) *value = bounds.lowerBound; - if (*value > bounds.upperBound) *value = bounds.upperBound; + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); + size_t const min = (size_t)1 << bounds.lowerBound; + size_t const max = (size_t)1 << bounds.upperBound; + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); + dctx->maxWindowSize = maxWindowSize; return 0; } -#define BOUNDCHECK(cParam, val) { \ - RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ - parameter_outOfBound, "Param out of bounds"); \ +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); } - -static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) +ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) { - switch(param) - { - case ZSTD_c_compressionLevel: - case ZSTD_c_hashLog: - case ZSTD_c_chainLog: - case ZSTD_c_searchLog: - case ZSTD_c_minMatch: - case ZSTD_c_targetLength: - case ZSTD_c_strategy: - return 1; - - case ZSTD_c_format: - case ZSTD_c_windowLog: - case ZSTD_c_contentSizeFlag: - case ZSTD_c_checksumFlag: - case ZSTD_c_dictIDFlag: - case ZSTD_c_forceMaxWindow : - case ZSTD_c_nbWorkers: - case ZSTD_c_jobSize: - case ZSTD_c_overlapLog: - case ZSTD_c_rsyncable: - case ZSTD_c_enableLongDistanceMatching: - case ZSTD_c_ldmHashLog: - case ZSTD_c_ldmMinMatch: - case ZSTD_c_ldmBucketSizeLog: - case ZSTD_c_ldmHashRateLog: - case ZSTD_c_forceAttachDict: - case ZSTD_c_literalCompressionMode: - case ZSTD_c_targetCBlockSize: - case ZSTD_c_srcSizeHint: - default: - return 0; + ZSTD_bounds bounds = { 0, 0, 0 }; + switch(dParam) { + case ZSTD_d_windowLogMax: + bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + case ZSTD_d_format: + bounds.lowerBound = (int)ZSTD_f_zstd1; + bounds.upperBound = (int)ZSTD_f_zstd1_magicless; + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_obm_buffered; + bounds.upperBound = (int)ZSTD_obm_stable; + return bounds; + default:; } + bounds.error = ERROR(parameter_unsupported); + return bounds; } -size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) +/* ZSTD_dParam_withinBounds: + * @return 1 if value is within dParam bounds, + * 0 otherwise */ +static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) { - DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); - if (cctx->streamStage != zcss_init) { - if (ZSTD_isUpdateAuthorized(param)) { - cctx->cParamsChanged = 1; - } else { - RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); - } } - - switch(param) - { - case ZSTD_c_nbWorkers: - RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, - "MT not compatible with static alloc"); - break; + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} - case ZSTD_c_compressionLevel: - case ZSTD_c_windowLog: - case ZSTD_c_hashLog: - case ZSTD_c_chainLog: - case ZSTD_c_searchLog: - case ZSTD_c_minMatch: - case ZSTD_c_targetLength: - case ZSTD_c_strategy: - case ZSTD_c_ldmHashRateLog: - case ZSTD_c_format: - case ZSTD_c_contentSizeFlag: - case ZSTD_c_checksumFlag: - case ZSTD_c_dictIDFlag: - case ZSTD_c_forceMaxWindow: - case ZSTD_c_forceAttachDict: - case ZSTD_c_literalCompressionMode: - case ZSTD_c_jobSize: - case ZSTD_c_overlapLog: - case ZSTD_c_rsyncable: - case ZSTD_c_enableLongDistanceMatching: - case ZSTD_c_ldmHashLog: - case ZSTD_c_ldmMinMatch: - case ZSTD_c_ldmBucketSizeLog: - case ZSTD_c_targetCBlockSize: - case ZSTD_c_srcSizeHint: - break; +#define CHECK_DBOUNDS(p,v) { \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +} - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); +size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + switch(dParam) { + case ZSTD_d_windowLogMax: + if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); + dctx->maxWindowSize = ((size_t)1) << value; + return 0; + case ZSTD_d_format: + CHECK_DBOUNDS(ZSTD_d_format, value); + dctx->format = (ZSTD_format_e)value; + return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_outBufferMode_e)value; + return 0; + default:; } - return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); + RETURN_ERROR(parameter_unsupported, ""); } -size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, - ZSTD_cParameter param, int value) +size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) { - DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); - switch(param) - { - case ZSTD_c_format : - BOUNDCHECK(ZSTD_c_format, value); - CCtxParams->format = (ZSTD_format_e)value; - return (size_t)CCtxParams->format; - - case ZSTD_c_compressionLevel : { - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); - if (value) { /* 0 : does not change current level */ - CCtxParams->compressionLevel = value; - } - if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; - return 0; /* return type (size_t) cannot represent negative values */ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + dctx->streamStage = zdss_init; + dctx->noForwardProgress = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; } + return 0; +} - case ZSTD_c_windowLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_windowLog, value); - CCtxParams->cParams.windowLog = (U32)value; - return CCtxParams->cParams.windowLog; - case ZSTD_c_hashLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_hashLog, value); - CCtxParams->cParams.hashLog = (U32)value; - return CCtxParams->cParams.hashLog; +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} - case ZSTD_c_chainLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_chainLog, value); - CCtxParams->cParams.chainLog = (U32)value; - return CCtxParams->cParams.chainLog; +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, + frameParameter_windowTooLarge, ""); + return minRBSize; +} - case ZSTD_c_searchLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_searchLog, value); - CCtxParams->cParams.searchLog = (U32)value; - return (size_t)value; +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} - case ZSTD_c_minMatch : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_minMatch, value); - CCtxParams->cParams.minMatch = value; - return CCtxParams->cParams.minMatch; +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); + RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, + frameParameter_windowTooLarge, ""); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); +} - case ZSTD_c_targetLength : - BOUNDCHECK(ZSTD_c_targetLength, value); - CCtxParams->cParams.targetLength = value; - return CCtxParams->cParams.targetLength; - case ZSTD_c_strategy : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_strategy, value); - CCtxParams->cParams.strategy = (ZSTD_strategy)value; - return (size_t)CCtxParams->cParams.strategy; +/* ***** Decompression ***** */ - case ZSTD_c_contentSizeFlag : - /* Content size written in frame header _when known_ (default:1) */ - DEBUGLOG(4, "set content size flag = %u", (value!=0)); - CCtxParams->fParams.contentSizeFlag = value != 0; - return CCtxParams->fParams.contentSizeFlag; +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; +} - case ZSTD_c_checksumFlag : - /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ - CCtxParams->fParams.checksumFlag = value != 0; - return CCtxParams->fParams.checksumFlag; +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} - case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ - DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); - CCtxParams->fParams.noDictIDFlag = !value; - return !CCtxParams->fParams.noDictIDFlag; +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} - case ZSTD_c_forceMaxWindow : - CCtxParams->forceWindow = (value != 0); - return CCtxParams->forceWindow; +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_obm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!"); +} + +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_obm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : oend - *op; + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_obm_stable); + } + return 0; +} + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; + const char* ip = istart; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; + char* op = ostart; + U32 someMoreWork = 1; + + DEBUGLOG(5, "ZSTD_decompressStream"); + RETURN_ERROR_IF( + input->pos > input->size, + srcSize_wrong, + "forbidden. in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + RETURN_ERROR_IF( + output->pos > output->size, + dstSize_tooSmall, + "forbidden. out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); + + while (someMoreWork) { + switch(zds->streamStage) + { + case zdss_init : + DEBUGLOG(5, "stage zdss_init => transparent reset "); + zds->streamStage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + zds->legacyVersion = 0; + zds->hostageByte = 0; + zds->expectedOutBuffer = *output; + /* fall-through */ + + case zdss_loadHeader : + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) { + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; + return hint; + } } +#endif + { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + DEBUGLOG(5, "header size : %u", (U32)hSize); + if (ZSTD_isError(hSize)) { +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + if (legacyVersion) { + ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); + const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; + size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; + DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, + zds->previousLegacyVersion, legacyVersion, + dict, dictSize), ""); + zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ + return hint; + } } +#endif + return hSize; /* error */ + } + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } + input->pos = input->size; + return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTDInternalConstants::ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + assert(ip != NULL); + memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds)); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + ip = istart + cSize; + op += decompressedSize; + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_obm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); + + if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); + zds->stage = ZSTDds_skipFrame; + } else { + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); + zds->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, + frameParameter_windowTooLarge, ""); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered + ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_free(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } + zds->streamStage = zdss_read; + /* fall-through */ + + case zdss_read: + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); + if (neededInSize==0) { /* end of frame */ + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); + ip += neededInSize; + /* Function modifies the stage so we must break */ + break; + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + /* fall-through */ + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t loadedSize; + /* At this point we shouldn't be decompressing a block that we can stream. */ + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, + corruption_detected, + "should never happen"); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + } + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); + zds->outStart = zds->outEnd = 0; + } + break; + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } } + + /* result */ + input->pos = (size_t)(ip - (const char*)(input->src)); + output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + + if ((ip==istart) && (op==ostart)) { /* no forward progress */ + zds->noForwardProgress ++; + if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { + RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); + RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); + assert(0); + } + } else { + zds->noForwardProgress = 0; + } + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } + input->pos++; /* release hostage */ + } /* zds->hostageByte */ + return 0; + } /* zds->outEnd == zds->outStart */ + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTDInternalConstants::ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ + return nextSrcSizeHint; + } +} + +size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + +} + + +// LICENSE_CHANGE_END + + +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 +// See the end of this file for a list + +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_decompress_block : + * this module takes care of decompressing _compressed_ block */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include /* memcpy, memmove, memset */ + /* prefetch */ + /* low level memory routines */ + + + + + + /* ZSTD_DCtx */ + /* ZSTD_DDictDictContent */ + +namespace duckdb_zstd { +/*_******************************************************* +* Macros +**********************************************************/ + +/* These two optional macros force the use one way or another of the two + * ZSTD_decompressSequences implementations. You can't force in both directions + * at the same time. + */ +#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" +#endif + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + RETURN_ERROR_IF(srcSize < ZSTDInternalConstants::ZSTD_blockHeaderSize, srcSize_wrong, ""); + + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); + return cSize; + } +} + + +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize); +/*! ZSTD_decodeLiteralsBlock() : + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); + /* fall-through */ + + case set_compressed: + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + size_t hufSuccess; + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + + if (litEncType==set_repeat) { + if (singleStream) { + hufSuccess = HUF_decompress1X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } else { + hufSuccess = HUF_decompress4X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } + } else { + if (singleStream) { +#if defined(HUF_FORCE_DECOMPRESS_X2) + hufSuccess = HUF_decompress1X_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace)); +#else + hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); +#endif + } else { + hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); + } + } + + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); + memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + RETURN_ERROR(corruption_detected, "impossible"); + } + } +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * cannot fail if input is valid => + * all inputs are presumed validated at this stage */ +void +ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U16 symbolNext[MaxSeq+1]; + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + assert(normalizedCounter[s]>=0); + symbolNext[s] = (U16)normalizedCounter[s]; + } } } + memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + { U32 const tableMask = tableSize-1; + U32 const step = FSE_TABLESTEP(tableSize); + U32 s, position = 0; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; u max, corruption_detected, ""); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U32 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); + /* prefetch FSE table if used */ + if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { + const void* const pStart = *DTablePtr; + size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); + PREFETCH_AREA(pStart, pSize); + } + return 0; + case set_compressed : + { unsigned tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); + *DTablePtr = DTableSpace; + return headerSize; + } + default : + assert(0); + RETURN_ERROR(GENERIC, "impossible"); + } +} + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + int nbSeq; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); + + /* SeqHead */ + nbSeq = *ip++; + if (!nbSeq) { + *nbSeqPtr=0; + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); + return 1; + } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + } else { + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + + /* FSE table descriptors */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + ZSTDConstants::LL_base, ZSTDInternalConstants::LL_bits, + LL_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + ZSTDConstants::OF_base, ZSTDConstants::OF_bits, + OF_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ZSTDConstants::ML_base, ZSTDInternalConstants::ML_bits, + ML_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; + const BYTE* match; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* prefixStart; + const BYTE* dictEnd; + size_t pos; +} seqState_t; + +/*! ZSTD_overlapCopy8() : + * Copies 8 bytes from ip to op and updates op and ip where ip <= op. + * If the offset is < 8 then the offset is spread to at least 8 bytes. + * + * Precondition: *ip <= *op + * Postcondition: *op - *op >= 8 + */ +HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { + assert(*ip <= *op); + if (offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[offset]; + (*op)[0] = (*ip)[0]; + (*op)[1] = (*ip)[1]; + (*op)[2] = (*ip)[2]; + (*op)[3] = (*ip)[3]; + *ip += dec32table[offset]; + ZSTD_copy4(*op+4, *ip); + *ip -= sub2; + } else { + ZSTD_copy8(*op, *ip); + } + *ip += 8; + *op += 8; + assert(*op - *ip >= 8); +} + +/*! ZSTD_safecopy() : + * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer + * and write up to 16 bytes past oend_w (op >= oend_w is allowed). + * This function is only called in the uncommon case where the sequence is near the end of the block. It + * should be fast for a single long sequence, but can be slow for several short sequences. + * + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. + * The src buffer must be before the dst buffer. + */ +static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || + (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); + + if (length < 8) { + /* Handle short lengths. */ + while (op < oend) *op++ = *ip++; + return; + } + if (ovtype == ZSTD_overlap_src_before_dst) { + /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ + assert(length >= 8); + ZSTD_overlapCopy8(&op, &ip, diff); + assert(op - ip >= 8); + assert(op <= oend); + } + + if (oend <= oend_w) { + /* No risk of overwrite. */ + ZSTD_wildcopy(op, ip, length, ovtype); + return; + } + if (op <= oend_w) { + /* Wildcopy until we get close to the end. */ + assert(oend > oend_w); + ZSTD_wildcopy(op, ip, oend_w - op, ovtype); + ip += oend_w - op; + op = oend_w; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_execSequenceEnd(): + * This version handles cases that are near the end of the output buffer. It requires + * more careful checks to make sure there is no overflow. By separating out these hard + * and unlikely cases, we can speed up the common cases. + * + * NOTE: This function needs to be fast for a single long sequence, but doesn't need + * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). + */ +FORCE_NOINLINE +size_t ZSTD_execSequenceEnd(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart-match); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +HINT_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +{ + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offsets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) +{ + seq_t seq; + ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; + ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; + ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; + U32 const llBase = llDInfo.baseValue; + U32 const mlBase = mlDInfo.baseValue; + U32 const ofBase = ofDInfo.baseValue; + BYTE const llBits = llDInfo.nbAdditionalBits; + BYTE const mlBits = mlDInfo.nbAdditionalBits; + BYTE const ofBits = ofDInfo.nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; + + /* sequence */ + { size_t offset; + if (ofBits > 1) { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } else { + U32 const ll0 = (llBase == 0); + if (LIKELY((ofBits == 0))) { + if (LIKELY(!ll0)) + offset = seqState->prevOffset[0]; + else { + offset = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; + } + + seq.matchLength = mlBase; + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase; + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + if (prefetch == ZSTD_p_prefetch) { + size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, offset is only used for prefetching */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update + * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). + * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). + * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the + * better option, so it is the default for other compilers. But, if you + * measure that it is worse, please put up a pull request. + */ + { +#if defined(__GNUC__) && !defined(__clang__) + const int kUseUpdateFseState = 1; +#else + const int kUseUpdateFseState = 0; +#endif + if (kUseUpdateFseState) { + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + } else { + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + } + } + + return seq; +} + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +} +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + size_t error = 0; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + +#if defined(__GNUC__) && defined(__x86_64__) + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * I've been able to reproduce this issue on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +#endif + for ( ; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + BIT_reloadDStream(&(seqState.DStream)); + /* gcc and clang both don't like early returns in this loop. + * gcc doesn't like early breaks either. + * Instead save an error and report it at the end. + * When there is an error, don't increment op, so we don't + * overwrite. + */ + if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize; + else op += oneSeqSize; + if (UNLIKELY(!--nbSeq)) break; + } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); + if (ZSTD_isError(error)) return error; + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STORED_SEQS_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { int i; for (i=0; ientropy.rep[i]; } + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; + assert(dst != NULL); + assert(iend >= ip); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if DYNAMIC_BMI2 + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + +#endif /* DYNAMIC_BMI2 */ + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame); + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static size_t +ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +/* ZSTD_decompressSequencesLong() : + * decompression function triggered when a minimum share of offsets is considered "long", + * aka out of cache. + * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". + * This function will try to mitigate main memory latency through the use of prefetching */ +static size_t +ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +/* ZSTD_getLongOffsetsShare() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) total += 1; + } + + assert(tableLog <= OffFSELog); + total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + + return total; +} +#endif + +size_t +ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. + * We don't expect that to be the case in 64-bit mode. + * In block mode, window size is not known, so we have to be conservative. + * (note: but it could be evaluated from current-lowLimit) + */ + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { + /* These macros control at build-time which decompressor implementation + * we use. If neither is defined, we do some inspection and dispatch at + * runtime. + */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + int usePrefetchDecoder = dctx->ddictIsCold; +#endif + int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + + RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if ( !usePrefetchDecoder + && (!frame || (dctx->fParams.windowSize > (1<<24))) + && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ + U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + usePrefetchDecoder = (shareLongOffsets >= minShare); + } +#endif + + dctx->ddictIsCold = 0; + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if (usePrefetchDecoder) +#endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + /* else */ + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + } +} + + +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +{ + if (dst != dctx->previousDstEnd) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} + +} + +// LICENSE_CHANGE_END + +//===----------------------------------------------------------------------===// +// DuckDB +// +// parquet_timestamp.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#include "duckdb.hpp" + +namespace duckdb { + +struct Int96 { + uint32_t value[3]; +}; + +int64_t ImpalaTimestampToNanoseconds(const Int96 &impala_timestamp); +timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts); +Int96 TimestampToImpalaTimestamp(timestamp_t &ts); +timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts); +timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts); +date_t ParquetIntToDate(const int32_t &raw_date); + +} // namespace duckdb + + + +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 +// See the end of this file for a list + + + +#include +#include +#include + +namespace duckdb { + +enum class UnicodeType { INVALID, ASCII, UNICODE }; +enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE }; + +class Utf8Proc { +public: + //! Distinguishes ASCII, Valid UTF8 and Invalid UTF8 strings + static UnicodeType Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason = nullptr, size_t *invalid_pos = nullptr); + //! Performs UTF NFC normalization of string, return value needs to be free'd + static char* Normalize(const char* s, size_t len); + //! Returns whether or not the UTF8 string is valid + static bool IsValid(const char *s, size_t len); + //! Returns the position (in bytes) of the next grapheme cluster + static size_t NextGraphemeCluster(const char *s, size_t len, size_t pos); + //! Returns the position (in bytes) of the previous grapheme cluster + static size_t PreviousGraphemeCluster(const char *s, size_t len, size_t pos); + + //! Transform a codepoint to utf8 and writes it to "c", sets "sz" to the size of the codepoint + static bool CodepointToUtf8(int cp, int &sz, char *c); + //! Returns the codepoint length in bytes when encoded in UTF8 + static int CodepointLength(int cp); + //! Transform a UTF8 string to a codepoint; returns the codepoint and writes the length of the codepoint (in UTF8) to sz + static int32_t UTF8ToCodepoint(const char *c, int &sz); + static size_t RenderWidth(const char *s, size_t len, size_t pos); + +}; + +} + + +// LICENSE_CHANGE_END + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// boolean_column_reader.hpp +// +// +//===----------------------------------------------------------------------===// + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// templated__column_reader.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +template +struct TemplatedParquetValueConversion { + static VALUE_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { + D_ASSERT(offset < dict.len / sizeof(VALUE_TYPE)); + return ((VALUE_TYPE *)dict.ptr)[offset]; + } + + static VALUE_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { + return plain_data.read(); + } + + static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { + plain_data.inc(sizeof(VALUE_TYPE)); + } +}; + +template +class TemplatedColumnReader : public ColumnReader { +public: + TemplatedColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, + idx_t max_define_p, idx_t max_repeat_p) + : ColumnReader(reader, move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p) {}; + + shared_ptr dict; + +public: + void Dictionary(shared_ptr data, idx_t num_entries) override { + dict = move(data); + } + + void Offsets(uint32_t *offsets, uint8_t *defines, uint64_t num_values, parquet_filter_t &filter, + idx_t result_offset, Vector &result) override { + auto result_ptr = FlatVector::GetData(result); + auto &result_mask = FlatVector::Validity(result); + + idx_t offset_idx = 0; + for (idx_t row_idx = 0; row_idx < num_values; row_idx++) { + if (HasDefines() && defines[row_idx + result_offset] != max_define) { + result_mask.SetInvalid(row_idx + result_offset); + continue; + } + if (filter[row_idx + result_offset]) { + VALUE_TYPE val = VALUE_CONVERSION::DictRead(*dict, offsets[offset_idx++], *this); + if (!Value::IsValid(val)) { + result_mask.SetInvalid(row_idx + result_offset); + continue; + } + result_ptr[row_idx + result_offset] = val; + } else { + offset_idx++; + } + } + } + + void Plain(shared_ptr plain_data, uint8_t *defines, uint64_t num_values, parquet_filter_t &filter, + idx_t result_offset, Vector &result) override { + auto result_ptr = FlatVector::GetData(result); + auto &result_mask = FlatVector::Validity(result); + for (idx_t row_idx = 0; row_idx < num_values; row_idx++) { + if (HasDefines() && defines[row_idx + result_offset] != max_define) { + result_mask.SetInvalid(row_idx + result_offset); + continue; + } + if (filter[row_idx + result_offset]) { + VALUE_TYPE val = VALUE_CONVERSION::PlainRead(*plain_data, *this); + if (!Value::IsValid(val)) { + result_mask.SetInvalid(row_idx + result_offset); + continue; + } + result_ptr[row_idx + result_offset] = val; + } else { // there is still some data there that we have to skip over + VALUE_CONVERSION::PlainSkip(*plain_data, *this); + } + } + } +}; + +template +struct CallbackParquetValueConversion { + static DUCKDB_PHYSICAL_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { + return TemplatedParquetValueConversion::DictRead(dict, offset, reader); + } + + static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { + return FUNC(plain_data.read()); + } + + static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { + plain_data.inc(sizeof(PARQUET_PHYSICAL_TYPE)); + } +}; + +} // namespace duckdb + +namespace duckdb { + +struct BooleanParquetValueConversion; + +class BooleanColumnReader : public TemplatedColumnReader { +public: + BooleanColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, + idx_t max_define_p, idx_t max_repeat_p) + : TemplatedColumnReader(reader, move(type_p), schema_p, schema_idx_p, + max_define_p, max_repeat_p), + byte_pos(0) {}; + + uint8_t byte_pos; + + void InitializeRead(const std::vector &columns, TProtocol &protocol_p) override { + byte_pos = 0; + TemplatedColumnReader::InitializeRead(columns, protocol_p); + } +}; + +struct BooleanParquetValueConversion { + static bool DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { + throw std::runtime_error("Dicts for booleans make no sense"); + } + + static bool PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { + plain_data.available(1); + auto &byte_pos = ((BooleanColumnReader &)reader).byte_pos; + bool ret = (*plain_data.ptr >> byte_pos) & 1; + byte_pos++; + if (byte_pos == 8) { + byte_pos = 0; + plain_data.inc(1); + } + return ret; + } + + static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { + PlainRead(plain_data, reader); + } +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// callback_column_reader.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { + +template +class CallbackColumnReader + : public TemplatedColumnReader> { + +public: + CallbackColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, + idx_t max_define_p, idx_t max_repeat_p) + : TemplatedColumnReader>( + reader, move(type_p), schema_p, file_idx_p, max_define_p, max_repeat_p) { + } + +protected: + void Dictionary(shared_ptr dictionary_data, idx_t num_entries) { + this->dict = make_shared(this->reader.allocator, num_entries * sizeof(DUCKDB_PHYSICAL_TYPE)); + auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)this->dict->ptr; + for (idx_t i = 0; i < num_entries; i++) { + dict_ptr[i] = FUNC(dictionary_data->read()); + } + } +}; + +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// decimal_column_reader.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +template +struct DecimalParquetValueConversion { + static DUCKDB_PHYSICAL_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { + auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)dict.ptr; + return dict_ptr[offset]; + } + + static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { + DUCKDB_PHYSICAL_TYPE res = 0; + auto byte_len = (idx_t)reader.Schema().type_length; /* sure, type length needs to be a signed int */ + D_ASSERT(byte_len <= sizeof(DUCKDB_PHYSICAL_TYPE)); + plain_data.available(byte_len); + auto res_ptr = (uint8_t *)&res; + + // numbers are stored as two's complement so some muckery is required + bool positive = (*plain_data.ptr & 0x80) == 0; + + for (idx_t i = 0; i < byte_len; i++) { + auto byte = *(plain_data.ptr + (byte_len - i - 1)); + res_ptr[i] = positive ? byte : byte ^ 0xFF; + } + plain_data.inc(byte_len); + if (!positive) { + res += 1; + return -res; + } + return res; + } + + static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { + plain_data.inc(reader.Schema().type_length); + } +}; + +template +class DecimalColumnReader + : public TemplatedColumnReader> { + +public: + DecimalColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, + idx_t max_define_p, idx_t max_repeat_p) + : TemplatedColumnReader>( + reader, move(type_p), schema_p, file_idx_p, max_define_p, max_repeat_p) {}; + +protected: + void Dictionary(shared_ptr dictionary_data, idx_t num_entries) { + this->dict = make_shared(this->reader.allocator, num_entries * sizeof(DUCKDB_PHYSICAL_TYPE)); + auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)this->dict->ptr; + for (idx_t i = 0; i < num_entries; i++) { + dict_ptr[i] = DecimalParquetValueConversion::PlainRead(*dictionary_data, *this); + } + } +}; + +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// list_column_reader.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +class ListColumnReader : public ColumnReader { +public: + ListColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, + idx_t max_define_p, idx_t max_repeat_p, unique_ptr child_column_reader_p); + + idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, + Vector &result_out) override; + + virtual void Skip(idx_t num_values) override { + D_ASSERT(0); + } + + void InitializeRead(const std::vector &columns, TProtocol &protocol_p) override { + child_column_reader->InitializeRead(columns, protocol_p); + } + + idx_t GroupRowsAvailable() override { + return child_column_reader->GroupRowsAvailable() + overflow_child_count; + } + +private: + unique_ptr child_column_reader; + ResizeableBuffer child_defines; + ResizeableBuffer child_repeats; + uint8_t *child_defines_ptr; + uint8_t *child_repeats_ptr; + + VectorCache read_cache; + Vector read_vector; + + parquet_filter_t child_filter; + + idx_t overflow_child_count; +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// string_column_reader.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +struct StringParquetValueConversion { + static string_t DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader); + + static string_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader); + + static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader); +}; + +class StringColumnReader : public TemplatedColumnReader { +public: + StringColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, + idx_t max_define_p, idx_t max_repeat_p) + : TemplatedColumnReader(reader, move(type_p), schema_p, schema_idx_p, + max_define_p, max_repeat_p) { + fixed_width_string_length = 0; + if (schema_p.type == Type::FIXED_LEN_BYTE_ARRAY) { + D_ASSERT(schema_p.__isset.type_length); + fixed_width_string_length = schema_p.type_length; + } + }; + + void Dictionary(shared_ptr dictionary_data, idx_t num_entries) override; + + unique_ptr dict_strings; + uint32_t VerifyString(const char *str_data, uint32_t str_len); + idx_t fixed_width_string_length; + +protected: + void DictReference(Vector &result) override; + void PlainReference(shared_ptr plain_data, Vector &result) override; +}; + +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// struct_column_reader.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +class StructColumnReader : public ColumnReader { +public: + StructColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, + idx_t max_define_p, idx_t max_repeat_p, vector> child_readers_p) + : ColumnReader(reader, move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p), + child_readers(move(child_readers_p)) { + D_ASSERT(type.id() == LogicalTypeId::STRUCT); + D_ASSERT(!StructType::GetChildTypes(type).empty()); + }; + + ColumnReader *GetChildReader(idx_t child_idx) { + return child_readers[child_idx].get(); + } + + void InitializeRead(const std::vector &columns, TProtocol &protocol_p) override { + for (auto &child : child_readers) { + child->InitializeRead(columns, protocol_p); + } + } + + idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, + Vector &result) override { + auto &struct_entries = StructVector::GetEntries(result); + D_ASSERT(StructType::GetChildTypes(Type()).size() == struct_entries.size()); + for (idx_t i = 0; i < struct_entries.size(); i++) { + auto child_num_values = + child_readers[i]->Read(num_values, filter, define_out, repeat_out, *struct_entries[i]); + if (child_num_values != num_values) { + throw std::runtime_error("Struct child row count mismatch"); + } + } + + return num_values; + } + + virtual void Skip(idx_t num_values) override { + D_ASSERT(0); + } + + idx_t GroupRowsAvailable() override { + return child_readers[0]->GroupRowsAvailable(); + } + + vector> child_readers; +}; + +} // namespace duckdb + + + + + +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 +// See the end of this file for a list + +// Copyright 2005 and onwards Google Inc. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// A light-weight compression algorithm. It is designed for speed of +// compression and decompression, rather than for the utmost in space +// savings. +// +// For getting better compression ratios when you are compressing data +// with long repeated sequences or compressing data that is similar to +// other data, while still compressing fast, you might look at first +// using BMDiff and then compressing the output of BMDiff with +// Snappy. + +#ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__ +#define THIRD_PARTY_SNAPPY_SNAPPY_H__ + +#include +#include + + + +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 +// See the end of this file for a list + +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Various type stubs for the open-source version of Snappy. +// +// This file cannot include config.h, as it is included from snappy.h, +// which is a public header. Instead, snappy-stubs-public.h is generated by +// from snappy-stubs-public.h.in at configure time. + +#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ +#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ + +#include +#include +#include + +#ifndef _WIN32 // HAVE_SYS_UIO_H +#include +#endif // HAVE_SYS_UIO_H + +#define SNAPPY_MAJOR 1 +#define SNAPPY_MINOR 1 +#define SNAPPY_PATCHLEVEL 7 +#define SNAPPY_VERSION \ + ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) + +namespace snappy { + +using int8 = std::int8_t; +using uint8 = std::uint8_t; +using int16 = std::int16_t; +using uint16 = std::uint16_t; +using int32 = std::int32_t; +using uint32 = std::uint32_t; +using int64 = std::int64_t; +using uint64 = std::uint64_t; + +using string = std::string; + +#ifdef _WIN32 // !HAVE_SYS_UIO_H +// Windows does not have an iovec type, yet the concept is universally useful. +// It is simple to define it ourselves, so we put it inside our own namespace. +struct iovec { + void* iov_base; + size_t iov_len; +}; +#endif // !HAVE_SYS_UIO_H + +} // namespace snappy + +#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ + + +// LICENSE_CHANGE_END + + +namespace snappy { + class Source; + class Sink; + + // ------------------------------------------------------------------------ + // Generic compression/decompression routines. + // ------------------------------------------------------------------------ + + // Compress the bytes read from "*source" and append to "*sink". Return the + // number of bytes written. + size_t Compress(Source* source, Sink* sink); + + // Find the uncompressed length of the given stream, as given by the header. + // Note that the true length could deviate from this; the stream could e.g. + // be truncated. + // + // Also note that this leaves "*source" in a state that is unsuitable for + // further operations, such as RawUncompress(). You will need to rewind + // or recreate the source yourself before attempting any further calls. + bool GetUncompressedLength(Source* source, uint32* result); + + // ------------------------------------------------------------------------ + // Higher-level string based routines (should be sufficient for most users) + // ------------------------------------------------------------------------ + + // Sets "*output" to the compressed version of "input[0,input_length-1]". + // Original contents of *output are lost. + // + // REQUIRES: "input[]" is not an alias of "*output". + size_t Compress(const char* input, size_t input_length, string* output); + + // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". + // Original contents of "*uncompressed" are lost. + // + // REQUIRES: "compressed[]" is not an alias of "*uncompressed". + // + // returns false if the message is corrupted and could not be decompressed + bool Uncompress(const char* compressed, size_t compressed_length, + string* uncompressed); + + // Decompresses "compressed" to "*uncompressed". + // + // returns false if the message is corrupted and could not be decompressed + bool Uncompress(Source* compressed, Sink* uncompressed); + + // This routine uncompresses as much of the "compressed" as possible + // into sink. It returns the number of valid bytes added to sink + // (extra invalid bytes may have been added due to errors; the caller + // should ignore those). The emitted data typically has length + // GetUncompressedLength(), but may be shorter if an error is + // encountered. + size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed); + + // ------------------------------------------------------------------------ + // Lower-level character array based routines. May be useful for + // efficiency reasons in certain circumstances. + // ------------------------------------------------------------------------ + + // REQUIRES: "compressed" must point to an area of memory that is at + // least "MaxCompressedLength(input_length)" bytes in length. + // + // Takes the data stored in "input[0..input_length]" and stores + // it in the array pointed to by "compressed". + // + // "*compressed_length" is set to the length of the compressed output. + // + // Example: + // char* output = new char[snappy::MaxCompressedLength(input_length)]; + // size_t output_length; + // RawCompress(input, input_length, output, &output_length); + // ... Process(output, output_length) ... + // delete [] output; + void RawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length); + + // Given data in "compressed[0..compressed_length-1]" generated by + // calling the Snappy::Compress routine, this routine + // stores the uncompressed data to + // uncompressed[0..GetUncompressedLength(compressed)-1] + // returns false if the message is corrupted and could not be decrypted + bool RawUncompress(const char* compressed, size_t compressed_length, + char* uncompressed); + + // Given data from the byte source 'compressed' generated by calling + // the Snappy::Compress routine, this routine stores the uncompressed + // data to + // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] + // returns false if the message is corrupted and could not be decrypted + bool RawUncompress(Source* compressed, char* uncompressed); + + // Given data in "compressed[0..compressed_length-1]" generated by + // calling the Snappy::Compress routine, this routine + // stores the uncompressed data to the iovec "iov". The number of physical + // buffers in "iov" is given by iov_cnt and their cumulative size + // must be at least GetUncompressedLength(compressed). The individual buffers + // in "iov" must not overlap with each other. + // + // returns false if the message is corrupted and could not be decrypted + bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, + const struct iovec* iov, size_t iov_cnt); - case ZSTD_c_forceAttachDict : { - const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; - BOUNDCHECK(ZSTD_c_forceAttachDict, pref); - CCtxParams->attachDictPref = pref; - return CCtxParams->attachDictPref; - } + // Given data from the byte source 'compressed' generated by calling + // the Snappy::Compress routine, this routine stores the uncompressed + // data to the iovec "iov". The number of physical + // buffers in "iov" is given by iov_cnt and their cumulative size + // must be at least GetUncompressedLength(compressed). The individual buffers + // in "iov" must not overlap with each other. + // + // returns false if the message is corrupted and could not be decrypted + bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, + size_t iov_cnt); - case ZSTD_c_literalCompressionMode : { - const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; - BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); - CCtxParams->literalCompressionMode = lcm; - return CCtxParams->literalCompressionMode; - } + // Returns the maximal size of the compressed representation of + // input data that is "source_bytes" bytes in length; + size_t MaxCompressedLength(size_t source_bytes); - case ZSTD_c_nbWorkers : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; + // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() + // Returns true and stores the length of the uncompressed data in + // *result normally. Returns false on parsing error. + // This operation takes O(1) time. + bool GetUncompressedLength(const char* compressed, size_t compressed_length, + size_t* result); + + // Returns true iff the contents of "compressed[]" can be uncompressed + // successfully. Does not return the uncompressed data. Takes + // time proportional to compressed_length, but is usually at least + // a factor of four faster than actual decompression. + bool IsValidCompressedBuffer(const char* compressed, + size_t compressed_length); + + // Returns true iff the contents of "compressed" can be uncompressed + // successfully. Does not return the uncompressed data. Takes + // time proportional to *compressed length, but is usually at least + // a factor of four faster than actual decompression. + // On success, consumes all of *compressed. On failure, consumes an + // unspecified prefix of *compressed. + bool IsValidCompressed(Source* compressed); + +} // end namespace snappy + +#endif // THIRD_PARTY_SNAPPY_SNAPPY_H__ + + +// LICENSE_CHANGE_END + + + +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 +// See the end of this file for a list + +//===----------------------------------------------------------------------===// +// DuckDB +// +// miniz_wrapper.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 +// See the end of this file for a list + +/* miniz.c 2.0.8 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + See "unlicense" statement at the end of this file. + Rich Geldreich , last updated Oct. 13, 2013 + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define + MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). + + * Low-level Deflate/Inflate implementation notes: + + Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or + greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses + approximately as well as zlib. + + Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function + coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory + block large enough to hold the entire file. + + The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. + + * zlib-style API notes: + + miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in + zlib replacement in many apps: + The z_stream struct, optional memory allocation callbacks + deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound + inflateInit/inflateInit2/inflate/inflateEnd + compress, compress2, compressBound, uncompress + CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. + Supports raw deflate streams or standard zlib streams with adler-32 checking. + + Limitations: + The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. + I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but + there are no guarantees that miniz.c pulls this off perfectly. + + * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by + Alex Evans. Supports 1-4 bytes/pixel images. + + * ZIP archive API notes: + + The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to + get the job done with minimal fuss. There are simple API's to retrieve file information, read files from + existing archives, create new archives, append new files to existing archives, or clone archive data from + one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), + or you can specify custom file read/write callbacks. + + - Archive reading: Just call this function to read a single file from a disk archive: + + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + + For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central + directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. + + - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: + + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); + + The locate operation can optionally check file comments too, which (as one example) can be used to identify + multiple versions of the same file in an archive. This function uses a simple linear search through the central + directory, so it's not very fast. + + Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and + retrieve detailed info on each file by calling mz_zip_reader_file_stat(). + + - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data + to disk and builds an exact image of the central directory in memory. The central directory image is written + all at once at the end of the archive file when the archive is finalized. + + The archive writer can optionally align each file's local header and file data to any power of 2 alignment, + which can be useful when the archive will be read from optical media. Also, the writer supports placing + arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still + readable by any ZIP tool. + + - Archive appending: The simple way to add a single file to an archive is to call this function: + + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, + const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + + The archive will be created if it doesn't already exist, otherwise it'll be appended to. + Note the appending is done in-place and is not an atomic operation, so if something goes wrong + during the operation it's possible the archive could be left without a central directory (although the local + file headers and file data will be fine, so the archive will be recoverable). + + For more complex archive modification scenarios: + 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to + preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the + compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and + you're done. This is safe but requires a bunch of temporary disk space or heap memory. + + 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), + append new files as needed, then finalize the archive which will write an updated central directory to the + original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a + possibility that the archive's central directory could be lost with this method if anything goes wrong, though. + + - ZIP archive support limitations: + No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. + Requires streams capable of seeking. + + * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the + below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. + + * Important: For best perf. be sure to customize the below macros for your target platform: + #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 + #define MINIZ_LITTLE_ENDIAN 1 + #define MINIZ_HAS_64BIT_REGISTERS 1 + + * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz + uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files + (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). +*/ + + + + + +/* Defines to completely disable specific portions of miniz.c: + If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. */ + +/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */ +#define MINIZ_NO_STDIO + +/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */ +/* get/set file times, and the C run-time funcs that get/set times won't be called. */ +/* The current downside is the times written to your archives will be from 1979. */ +#define MINIZ_NO_TIME + +/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */ +/* #define MINIZ_NO_ARCHIVE_APIS */ + +/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */ +/* #define MINIZ_NO_ARCHIVE_WRITING_APIS */ + +/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */ +/*#define MINIZ_NO_ZLIB_APIS */ + +/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */ +#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. + Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc + callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user + functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */ +/*#define MINIZ_NO_MALLOC */ + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) +/* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */ +#define MINIZ_NO_TIME +#endif + +#include + + + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) +#include +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +/* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */ +#define MINIZ_X86_OR_X64_CPU 1 #else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); - CCtxParams->nbWorkers = value; - return CCtxParams->nbWorkers; +#define MINIZ_X86_OR_X64_CPU 0 #endif - case ZSTD_c_jobSize : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +/* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */ +#define MINIZ_LITTLE_ENDIAN 1 #else - /* Adjust to the minimum non-default value. */ - if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) - value = ZSTDMT_JOBSIZE_MIN; - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); - assert(value >= 0); - CCtxParams->jobSize = value; - return CCtxParams->jobSize; +#define MINIZ_LITTLE_ENDIAN 0 #endif - case ZSTD_c_overlapLog : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; +#if MINIZ_X86_OR_X64_CPU +/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. */ +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 // always 0 because alignment #else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); - CCtxParams->overlapLog = value; - return CCtxParams->overlapLog; +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 #endif - case ZSTD_c_rsyncable : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) +/* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). */ +#define MINIZ_HAS_64BIT_REGISTERS 1 #else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); - CCtxParams->rsyncable = value; - return CCtxParams->rsyncable; +#define MINIZ_HAS_64BIT_REGISTERS 0 #endif - case ZSTD_c_enableLongDistanceMatching : - CCtxParams->ldmParams.enableLdm = (value!=0); - return CCtxParams->ldmParams.enableLdm; +namespace duckdb_miniz { - case ZSTD_c_ldmHashLog : - if (value!=0) /* 0 ==> auto */ - BOUNDCHECK(ZSTD_c_ldmHashLog, value); - CCtxParams->ldmParams.hashLog = value; - return CCtxParams->ldmParams.hashLog; +/* ------------------- zlib-style API Definitions. */ + +/* For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */ +typedef unsigned long mz_ulong; + +/* mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. */ +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +/* mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. */ +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +/* mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. */ +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +/* Compression strategies. */ +enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; + +/* Method */ +#define MZ_DEFLATED 8 + +/* Heap allocation callbacks. +Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. */ +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); + +/* Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */ +enum { + MZ_NO_COMPRESSION = 0, + MZ_BEST_SPEED = 1, + MZ_BEST_COMPRESSION = 9, + MZ_UBER_COMPRESSION = 10, + MZ_DEFAULT_LEVEL = 6, + MZ_DEFAULT_COMPRESSION = -1 +}; + +#define MZ_VERSION "10.0.3" +#define MZ_VERNUM 0xA030 +#define MZ_VER_MAJOR 10 +#define MZ_VER_MINOR 0 +#define MZ_VER_REVISION 3 +#define MZ_VER_SUBREVISION 0 + +#ifndef MINIZ_NO_ZLIB_APIS + +/* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). */ +enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; + +/* Return status codes. MZ_PARAM_ERROR is non-standard. */ +enum { + MZ_OK = 0, + MZ_STREAM_END = 1, + MZ_NEED_DICT = 2, + MZ_ERRNO = -1, + MZ_STREAM_ERROR = -2, + MZ_DATA_ERROR = -3, + MZ_MEM_ERROR = -4, + MZ_BUF_ERROR = -5, + MZ_VERSION_ERROR = -6, + MZ_PARAM_ERROR = -10000 +}; + +/* Window bits */ +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +/* Compression/decompression stream struct. */ +typedef struct mz_stream_s { + const unsigned char *next_in; /* pointer to next byte to read */ + unsigned int avail_in; /* number of bytes available at next_in */ + mz_ulong total_in; /* total number of bytes consumed so far */ + + unsigned char *next_out; /* pointer to next byte to write */ + unsigned int avail_out; /* number of bytes that can be written to next_out */ + mz_ulong total_out; /* total number of bytes produced so far */ + + char *msg; /* error msg (unused) */ + struct mz_internal_state *state; /* internal state, allocated by zalloc/zfree */ + + mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */ + mz_free_func zfree; /* optional heap free function (defaults to free) */ + void *opaque; /* heap alloc function user pointer */ + + int data_type; /* data_type (unused) */ + mz_ulong adler; /* adler32 of the source or uncompressed data */ + mz_ulong reserved; /* not used */ +} mz_stream; + +typedef mz_stream *mz_streamp; + +/* Returns the version string of miniz.c. */ +const char *mz_version(void); + +/* mz_deflateInit() initializes a compressor with default options: */ +/* Parameters: */ +/* pStream must point to an initialized mz_stream struct. */ +/* level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */ +/* level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. + */ +/* (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */ +/* Return values: */ +/* MZ_OK on success. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +/* MZ_PARAM_ERROR if the input parameters are bogus. */ +/* MZ_MEM_ERROR on out of memory. */ +int mz_deflateInit(mz_streamp pStream, int level); + +/* mz_deflateInit2() is like mz_deflate(), except with more control: */ +/* Additional parameters: */ +/* method must be MZ_DEFLATED */ +/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) */ +/* mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */ +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + +/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */ +int mz_deflateReset(mz_streamp pStream); + +/* mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. + */ +/* Parameters: */ +/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ +/* flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. */ +/* Return values: */ +/* MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). */ +/* MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +/* MZ_PARAM_ERROR if one of the parameters is invalid. */ +/* MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) */ +int mz_deflate(mz_streamp pStream, int flush); + +/* mz_deflateEnd() deinitializes a compressor: */ +/* Return values: */ +/* MZ_OK on success. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +int mz_deflateEnd(mz_streamp pStream); + +/* mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. */ +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +/* Single-call compression functions mz_compress() and mz_compress2(): */ +/* Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. */ +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, + int level); + +/* mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). */ +mz_ulong mz_compressBound(mz_ulong source_len); + +/* Initializes a decompressor. */ +int mz_inflateInit(mz_streamp pStream); + +/* mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: */ +/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). */ +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +/* Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. */ +/* Parameters: */ +/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ +/* flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */ +/* On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). */ +/* MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. */ +/* Return values: */ +/* MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. */ +/* MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +/* MZ_DATA_ERROR if the deflate stream is invalid. */ +/* MZ_PARAM_ERROR if one of the parameters is invalid. */ +/* MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again */ +/* with more input data, or with more room in the output buffer (except when using single call decompression, described above). */ +int mz_inflate(mz_streamp pStream, int flush); + +/* Deinitializes a decompressor. */ +int mz_inflateEnd(mz_streamp pStream); + +/* Single-call decompression. */ +/* Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. */ +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + +/* Returns a string description of the specified error code, or NULL if the error code is invalid. */ +const char *mz_error(int err); + +/* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. */ +/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. */ +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES +typedef unsigned char Byte; +typedef unsigned int uInt; +typedef mz_ulong uLong; +typedef Byte Bytef; +typedef uInt uIntf; +typedef char charf; +typedef int intf; +typedef void *voidpf; +typedef uLong uLongf; +typedef void *voidp; +typedef void *const voidpc; +#define Z_NULL 0 +#define Z_NO_FLUSH MZ_NO_FLUSH +#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH +#define Z_SYNC_FLUSH MZ_SYNC_FLUSH +#define Z_FULL_FLUSH MZ_FULL_FLUSH +#define Z_FINISH MZ_FINISH +#define Z_BLOCK MZ_BLOCK +#define Z_OK MZ_OK +#define Z_STREAM_END MZ_STREAM_END +#define Z_NEED_DICT MZ_NEED_DICT +#define Z_ERRNO MZ_ERRNO +#define Z_STREAM_ERROR MZ_STREAM_ERROR +#define Z_DATA_ERROR MZ_DATA_ERROR +#define Z_MEM_ERROR MZ_MEM_ERROR +#define Z_BUF_ERROR MZ_BUF_ERROR +#define Z_VERSION_ERROR MZ_VERSION_ERROR +#define Z_PARAM_ERROR MZ_PARAM_ERROR +#define Z_NO_COMPRESSION MZ_NO_COMPRESSION +#define Z_BEST_SPEED MZ_BEST_SPEED +#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION +#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION +#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY +#define Z_FILTERED MZ_FILTERED +#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY +#define Z_RLE MZ_RLE +#define Z_FIXED MZ_FIXED +#define Z_DEFLATED MZ_DEFLATED +#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS +#define alloc_func mz_alloc_func +#define free_func mz_free_func +#define internal_state mz_internal_state +#define z_stream mz_stream +#define deflateInit mz_deflateInit +#define deflateInit2 mz_deflateInit2 +#define deflateReset mz_deflateReset +#define deflate mz_deflate +#define deflateEnd mz_deflateEnd +#define deflateBound mz_deflateBound +#define compress mz_compress +#define compress2 mz_compress2 +#define compressBound mz_compressBound +#define inflateInit mz_inflateInit +#define inflateInit2 mz_inflateInit2 +#define inflate mz_inflate +#define inflateEnd mz_inflateEnd +#define uncompress mz_uncompress +#define crc32 mz_crc32 +#define adler32 mz_adler32 +#define MAX_WBITS 15 +#define MAX_MEM_LEVEL 9 +#define zError mz_error +#define ZLIB_VERSION MZ_VERSION +#define ZLIB_VERNUM MZ_VERNUM +#define ZLIB_VER_MAJOR MZ_VER_MAJOR +#define ZLIB_VER_MINOR MZ_VER_MINOR +#define ZLIB_VER_REVISION MZ_VER_REVISION +#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION +#define zlibVersion mz_version +#define zlib_version mz_version() +#endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ - case ZSTD_c_ldmMinMatch : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmMinMatch, value); - CCtxParams->ldmParams.minMatchLength = value; - return CCtxParams->ldmParams.minMatchLength; +#endif /* MINIZ_NO_ZLIB_APIS */ - case ZSTD_c_ldmBucketSizeLog : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); - CCtxParams->ldmParams.bucketSizeLog = value; - return CCtxParams->ldmParams.bucketSizeLog; +} - case ZSTD_c_ldmHashRateLog : - RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, - parameter_outOfBound, "Param out of bounds!"); - CCtxParams->ldmParams.hashRateLog = value; - return CCtxParams->ldmParams.hashRateLog; - case ZSTD_c_targetCBlockSize : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_targetCBlockSize, value); - CCtxParams->targetCBlockSize = value; - return CCtxParams->targetCBlockSize; +#include +#include +#include +#include - case ZSTD_c_srcSizeHint : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_srcSizeHint, value); - CCtxParams->srcSizeHint = value; - return CCtxParams->srcSizeHint; +namespace duckdb_miniz { - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } -} +/* ------------------- Types and macros */ +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef int64_t mz_int64; +typedef uint64_t mz_uint64; +typedef int mz_bool; -size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) -{ - return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); -} +#define MZ_FALSE (0) +#define MZ_TRUE (1) -size_t ZSTD_CCtxParams_getParameter( - ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) -{ - switch(param) - { - case ZSTD_c_format : - *value = CCtxParams->format; - break; - case ZSTD_c_compressionLevel : - *value = CCtxParams->compressionLevel; - break; - case ZSTD_c_windowLog : - *value = (int)CCtxParams->cParams.windowLog; - break; - case ZSTD_c_hashLog : - *value = (int)CCtxParams->cParams.hashLog; - break; - case ZSTD_c_chainLog : - *value = (int)CCtxParams->cParams.chainLog; - break; - case ZSTD_c_searchLog : - *value = CCtxParams->cParams.searchLog; - break; - case ZSTD_c_minMatch : - *value = CCtxParams->cParams.minMatch; - break; - case ZSTD_c_targetLength : - *value = CCtxParams->cParams.targetLength; - break; - case ZSTD_c_strategy : - *value = (unsigned)CCtxParams->cParams.strategy; - break; - case ZSTD_c_contentSizeFlag : - *value = CCtxParams->fParams.contentSizeFlag; - break; - case ZSTD_c_checksumFlag : - *value = CCtxParams->fParams.checksumFlag; - break; - case ZSTD_c_dictIDFlag : - *value = !CCtxParams->fParams.noDictIDFlag; - break; - case ZSTD_c_forceMaxWindow : - *value = CCtxParams->forceWindow; - break; - case ZSTD_c_forceAttachDict : - *value = CCtxParams->attachDictPref; - break; - case ZSTD_c_literalCompressionMode : - *value = CCtxParams->literalCompressionMode; - break; - case ZSTD_c_nbWorkers : -#ifndef ZSTD_MULTITHREAD - assert(CCtxParams->nbWorkers == 0); -#endif - *value = CCtxParams->nbWorkers; - break; - case ZSTD_c_jobSize : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); -#else - assert(CCtxParams->jobSize <= INT_MAX); - *value = (int)CCtxParams->jobSize; - break; -#endif - case ZSTD_c_overlapLog : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +/* Works around MSVC's spammy "warning C4127: conditional expression is constant" message. */ +#ifdef _MSC_VER +#define MZ_MACRO_END while (0, 0) #else - *value = CCtxParams->overlapLog; - break; +#define MZ_MACRO_END while (0) #endif - case ZSTD_c_rsyncable : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); + +#ifdef MINIZ_NO_STDIO +#define MZ_FILE void * #else - *value = CCtxParams->rsyncable; - break; -#endif - case ZSTD_c_enableLongDistanceMatching : - *value = CCtxParams->ldmParams.enableLdm; - break; - case ZSTD_c_ldmHashLog : - *value = CCtxParams->ldmParams.hashLog; - break; - case ZSTD_c_ldmMinMatch : - *value = CCtxParams->ldmParams.minMatchLength; - break; - case ZSTD_c_ldmBucketSizeLog : - *value = CCtxParams->ldmParams.bucketSizeLog; - break; - case ZSTD_c_ldmHashRateLog : - *value = CCtxParams->ldmParams.hashRateLog; - break; - case ZSTD_c_targetCBlockSize : - *value = (int)CCtxParams->targetCBlockSize; - break; - case ZSTD_c_srcSizeHint : - *value = (int)CCtxParams->srcSizeHint; - break; - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } - return 0; -} +#include +#define MZ_FILE FILE +#endif /* #ifdef MINIZ_NO_STDIO */ -/** ZSTD_CCtx_setParametersUsingCCtxParams() : - * just applies `params` into `cctx` - * no action is performed, parameters are merely stored. - * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. - * This is possible even if a compression is ongoing. - * In which case, new parameters will be applied on the fly, starting with next compression job. - */ -size_t ZSTD_CCtx_setParametersUsingCCtxParams( - ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +#ifdef MINIZ_NO_TIME +typedef struct mz_dummy_time_t_tag { - DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "The context is in the wrong stage!"); - RETURN_ERROR_IF(cctx->cdict, stage_wrong, - "Can't override parameters with cdict attached (some must " - "be inherited from the cdict)."); - - cctx->requestedParams = *params; - return 0; -} + int m_dummy; +} mz_dummy_time_t; +#define MZ_TIME_T mz_dummy_time_t +#else +#define MZ_TIME_T time_t +#endif -ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't set pledgedSrcSize when not in init stage."); - cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; - return 0; -} +#define MZ_ASSERT(x) assert(x) -/** - * Initializes the local dict using the requested parameters. - * NOTE: This does not use the pledged src size, because it may be used for more - * than one compression. - */ -static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) -{ - ZSTD_localDict* const dl = &cctx->localDict; - ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize); - if (dl->dict == NULL) { - /* No local dictionary. */ - assert(dl->dictBuffer == NULL); - assert(dl->cdict == NULL); - assert(dl->dictSize == 0); - return 0; - } - if (dl->cdict != NULL) { - assert(cctx->cdict == dl->cdict); - /* Local dictionary already initialized. */ - return 0; - } - assert(dl->dictSize > 0); - assert(cctx->cdict == NULL); - assert(cctx->prefixDict.dict == NULL); +#ifdef MINIZ_NO_MALLOC +#define MZ_MALLOC(x) NULL +#define MZ_FREE(x) (void)x, ((void)0) +#define MZ_REALLOC(p, x) NULL +#else +#define MZ_MALLOC(x) malloc(x) +#define MZ_FREE(x) free(x) +#define MZ_REALLOC(p, x) realloc(p, x) +#endif - dl->cdict = ZSTD_createCDict_advanced( - dl->dict, - dl->dictSize, - ZSTD_dlm_byRef, - dl->dictContentType, - cParams, - cctx->customMem); - RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); - cctx->cdict = dl->cdict; - return 0; -} +#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) -size_t ZSTD_CCtx_loadDictionary_advanced( - ZSTD_CCtx* cctx, const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) -{ - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't load a dictionary when ctx is not in init stage."); - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, - "no malloc for static CCtx"); - DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); - ZSTD_clearAllDicts(cctx); /* in case one already exists */ - if (dict == NULL || dictSize == 0) /* no dictionary mode */ - return 0; - if (dictLoadMethod == ZSTD_dlm_byRef) { - cctx->localDict.dict = dict; - } else { - void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); - RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); - memcpy(dictBuffer, dict, dictSize); - cctx->localDict.dictBuffer = dictBuffer; - cctx->localDict.dict = dictBuffer; - } - cctx->localDict.dictSize = dictSize; - cctx->localDict.dictContentType = dictContentType; - return 0; -} +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) +#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else +#define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) +#define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( - ZSTD_CCtx* cctx, const void* dict, size_t dictSize) -{ - return ZSTD_CCtx_loadDictionary_advanced( - cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); -} +#define MZ_READ_LE64(p) (((mz_uint64)MZ_READ_LE32(p)) | (((mz_uint64)MZ_READ_LE32((const mz_uint8 *)(p) + sizeof(mz_uint32))) << 32U)) -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) -{ - return ZSTD_CCtx_loadDictionary_advanced( - cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); -} +#ifdef _MSC_VER +#define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__)) +#else +#define MZ_FORCEINLINE inline +#endif +extern void *miniz_def_alloc_func(void *opaque, size_t items, size_t size); +extern void miniz_def_free_func(void *opaque, void *address); +extern void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size); -size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) -{ - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't ref a dict when ctx not in init stage."); - /* Free the existing local cdict (if any) to save memory. */ - ZSTD_clearAllDicts(cctx); - cctx->cdict = cdict; - return 0; -} +#define MZ_UINT16_MAX (0xFFFFU) +#define MZ_UINT32_MAX (0xFFFFFFFFU) -size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) -{ - return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); -} -size_t ZSTD_CCtx_refPrefix_advanced( - ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) -{ - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't ref a prefix when ctx not in init stage."); - ZSTD_clearAllDicts(cctx); - if (prefix != NULL && prefixSize > 0) { - cctx->prefixDict.dict = prefix; - cctx->prefixDict.dictSize = prefixSize; - cctx->prefixDict.dictContentType = dictContentType; - } - return 0; -} -/*! ZSTD_CCtx_reset() : - * Also dumps dictionary */ -size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) -{ - if ( (reset == ZSTD_reset_session_only) - || (reset == ZSTD_reset_session_and_parameters) ) { - cctx->streamStage = zcss_init; - cctx->pledgedSrcSizePlusOne = 0; - } - if ( (reset == ZSTD_reset_parameters) - || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't reset parameters only when not in init stage."); - ZSTD_clearAllDicts(cctx); - return ZSTD_CCtxParams_reset(&cctx->requestedParams); - } - return 0; -} -/** ZSTD_checkCParams() : - control CParam values remain within authorized range. - @return : 0, or an error code if one value is beyond authorized range */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) -{ - BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); - BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); - BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); - BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); - BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); - BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); - BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); - return 0; -} +/* ------------------- Low-level Compression API Definitions */ -/** ZSTD_clampCParams() : - * make CParam values within valid range. - * @return : valid CParams */ -static ZSTD_compressionParameters -ZSTD_clampCParams(ZSTD_compressionParameters cParams) -{ -# define CLAMP_TYPE(cParam, val, type) { \ - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ - if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ - } -# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) - CLAMP(ZSTD_c_windowLog, cParams.windowLog); - CLAMP(ZSTD_c_chainLog, cParams.chainLog); - CLAMP(ZSTD_c_hashLog, cParams.hashLog); - CLAMP(ZSTD_c_searchLog, cParams.searchLog); - CLAMP(ZSTD_c_minMatch, cParams.minMatch); - CLAMP(ZSTD_c_targetLength,cParams.targetLength); - CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); - return cParams; -} +/* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). */ +#define TDEFL_LESS_MEMORY 0 -/** ZSTD_cycleLog() : - * condition for correct operation : hashLog > 1 */ -U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +/* tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): */ +/* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). */ +enum { - U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); - return hashLog - btScale; -} + TDEFL_HUFFMAN_ONLY = 0, + TDEFL_DEFAULT_MAX_PROBES = 128, + TDEFL_MAX_PROBES_MASK = 0xFFF +}; -/** ZSTD_adjustCParams_internal() : - * optimize `cPar` for a specified input (`srcSize` and `dictSize`). - * mostly downsize to reduce memory consumption and initialization latency. - * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. - * note : `srcSize==0` means 0! - * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ -static ZSTD_compressionParameters -ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, - unsigned long long srcSize, - size_t dictSize) +/* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. */ +/* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). */ +/* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. */ +/* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). */ +/* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) */ +/* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */ +/* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */ +/* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */ +/* The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). */ +enum { - static const U64 minSrcSize = 513; /* (1<<9) + 1 */ - static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); - assert(ZSTD_checkCParams(cPar)==0); - - if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) - srcSize = minSrcSize; + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; - /* resize windowLog if input is small enough, to use less memory */ - if ( (srcSize < maxWindowResize) - && (dictSize < maxWindowResize) ) { - U32 const tSize = (U32)(srcSize + dictSize); - static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; - U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : - ZSTD_highbit32(tSize-1) + 1; - if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; - } - if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1; - { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); - if (cycleLog > cPar.windowLog) - cPar.chainLog -= (cycleLog - cPar.windowLog); - } +/* High level compression functions: */ +/* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). */ +/* On entry: */ +/* pSrc_buf, src_buf_len: Pointer and size of source block to compress. */ +/* flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. */ +/* On return: */ +/* Function returns a pointer to the compressed data, or NULL on failure. */ +/* *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. */ +/* The caller must free() the returned block when it's no longer needed. */ +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); - if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) - cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ +/* tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. */ +/* Returns 0 on failure. */ +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); - return cPar; -} +/* Compresses an image to a compressed PNG file in memory. */ +/* On entry: */ +/* pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. */ +/* The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. */ +/* level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */ +/* If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). */ +/* On return: */ +/* Function returns a pointer to the compressed data, or NULL on failure. */ +/* *pLen_out will be set to the size of the PNG image file. */ +/* The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. */ +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); -ZSTD_compressionParameters -ZSTD_adjustCParams(ZSTD_compressionParameters cPar, - unsigned long long srcSize, - size_t dictSize) -{ - cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ - if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); -} +/* Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */ +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); -static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); -static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); +/* tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. */ +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); -ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) +enum { - ZSTD_compressionParameters cParams; - if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { - srcSizeHint = CCtxParams->srcSizeHint; - } - cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize); - if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; - if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; - if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; - if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; - if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; - if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch; - if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; - if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; - assert(!ZSTD_checkCParams(cParams)); - /* srcSizeHint == 0 means 0 */ - return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); -} + TDEFL_MAX_HUFF_TABLES = 3, + TDEFL_MAX_HUFF_SYMBOLS_0 = 288, + TDEFL_MAX_HUFF_SYMBOLS_1 = 32, + TDEFL_MAX_HUFF_SYMBOLS_2 = 19, + TDEFL_LZ_DICT_SIZE = 32768, + TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, + TDEFL_MIN_MATCH_LEN = 3, + TDEFL_MAX_MATCH_LEN = 258 +}; -static size_t -ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, - const U32 forCCtx) +/* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). */ +#if TDEFL_LESS_MEMORY +enum { - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); - size_t const hSize = ((size_t)1) << cParams->hashLog; - U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; - size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; - /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't - * surrounded by redzones in ASAN. */ - size_t const tableSpace = chainSize * sizeof(U32) - + hSize * sizeof(U32) - + h3Size * sizeof(U32); - size_t const optPotentialSpace = - ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((1<strategy >= ZSTD_btopt)) - ? optPotentialSpace - : 0; - DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", - (U32)chainSize, (U32)hSize, (U32)h3Size); - return tableSpace + optSpace; -} - -size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) + TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, + TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, + TDEFL_MAX_HUFF_SYMBOLS = 288, + TDEFL_LZ_HASH_BITS = 12, + TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, + TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, + TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS +}; +#else +enum { - RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); - { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); - U32 const divider = (cParams.minMatch==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); - size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); - size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); + TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, + TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, + TDEFL_MAX_HUFF_SYMBOLS = 288, + TDEFL_LZ_HASH_BITS = 15, + TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, + TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, + TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS +}; +#endif - size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); - size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq)); +/* The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. */ +typedef enum { + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1 +} tdefl_status; - /* estimateCCtxSize is for one-shot compression. So no buffers should - * be needed. However, we still allocate two 0-sized buffers, which can - * take space under ASAN. */ - size_t const bufferSpace = ZSTD_cwksp_alloc_size(0) - + ZSTD_cwksp_alloc_size(0); +/* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */ +typedef enum { + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; - size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)); +/* tdefl's compression state structure. */ +typedef struct +{ + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; - size_t const neededSpace = - cctxSpace + - entropySpace + - blockStateSpace + - ldmSpace + - ldmSeqSpace + - matchStateSize + - tokenSpace + - bufferSpace; +/* Initializes the compressor. */ +/* There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. */ +/* pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. */ +/* If pBut_buf_func is NULL the user should always call the tdefl_compress() API. */ +/* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) */ +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); - DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); - return neededSpace; - } -} +/* Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. */ +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); -size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) -{ - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); -} +/* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. */ +/* tdefl_compress_buffer() always consumes the entire input buffer. */ +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); -static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) -{ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); - return ZSTD_estimateCCtxSize_usingCParams(cParams); -} +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); -size_t ZSTD_estimateCCtxSize(int compressionLevel) -{ - int level; - size_t memBudget = 0; - for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { - size_t const newMB = ZSTD_estimateCCtxSize_internal(level); - if (newMB > memBudget) memBudget = newMB; - } - return memBudget; -} +/* Create tdefl_compress() flags given zlib-style compression parameters. */ +/* level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) */ +/* window_bits may be -15 (raw deflate) or 15 (zlib) */ +/* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED */ +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); -size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) -{ - RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); - { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); - size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); - size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; - size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; - size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize) - + ZSTD_cwksp_alloc_size(outBuffSize); +/* Allocate the tdefl_compressor structure in C so that */ +/* non-C language bindings to tdefl_ API don't need to worry about */ +/* structure size and allocation mechanism. */ +tdefl_compressor *tdefl_compressor_alloc(); +void tdefl_compressor_free(tdefl_compressor *pComp); - return CCtxSize + streamingSize; - } -} -size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) -{ - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); -} -static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) -{ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); - return ZSTD_estimateCStreamSize_usingCParams(cParams); -} -size_t ZSTD_estimateCStreamSize(int compressionLevel) -{ - int level; - size_t memBudget = 0; - for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { - size_t const newMB = ZSTD_estimateCStreamSize_internal(level); - if (newMB > memBudget) memBudget = newMB; - } - return memBudget; -} +/* ------------------- Low-level Decompression API Definitions */ -/* ZSTD_getFrameProgression(): - * tells how much data has been consumed (input) and produced (output) for current frame. - * able to count progression inside worker threads (non-blocking mode). - */ -ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) -{ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers > 0) { - return ZSTDMT_getFrameProgression(cctx->mtctx); - } -#endif - { ZSTD_frameProgression fp; - size_t const buffered = (cctx->inBuff == NULL) ? 0 : - cctx->inBuffPos - cctx->inToCompress; - if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); - assert(buffered <= ZSTD_BLOCKSIZE_MAX); - fp.ingested = cctx->consumedSrcSize + buffered; - fp.consumed = cctx->consumedSrcSize; - fp.produced = cctx->producedCSize; - fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ - fp.currentJobID = 0; - fp.nbActiveWorkers = 0; - return fp; -} } -/*! ZSTD_toFlushNow() - * Only useful for multithreading scenarios currently (nbWorkers >= 1). - */ -size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) +/* Decompression flags used by tinfl_decompress(). */ +/* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. */ +/* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. */ +/* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). */ +/* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. */ +enum { -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers > 0) { - return ZSTDMT_toFlushNow(cctx->mtctx); - } -#endif - (void)cctx; - return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ -} + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; -static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, - ZSTD_compressionParameters cParams2) -{ - (void)cParams1; - (void)cParams2; - assert(cParams1.windowLog == cParams2.windowLog); - assert(cParams1.chainLog == cParams2.chainLog); - assert(cParams1.hashLog == cParams2.hashLog); - assert(cParams1.searchLog == cParams2.searchLog); - assert(cParams1.minMatch == cParams2.minMatch); - assert(cParams1.targetLength == cParams2.targetLength); - assert(cParams1.strategy == cParams2.strategy); -} +/* High level decompression functions: */ +/* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). */ +/* On entry: */ +/* pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. */ +/* On return: */ +/* Function returns a pointer to the decompressed data, or NULL on failure. */ +/* *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. */ +/* The caller must call mz_free() on the returned block when it's no longer needed. */ +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); -void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) -{ - int i; - for (i = 0; i < ZSTD_REP_NUM; ++i) - bs->rep[i] = ZSTDInternalConstants::repStartValue[i]; - bs->entropy.huf.repeatMode = HUF_repeat_none; - bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; - bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; - bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; -} +/* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. */ +/* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. */ +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); -/*! ZSTD_invalidateMatchState() - * Invalidate all the matches in the match finder tables. - * Requires nextSrc and base to be set (can be NULL). - */ -static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) -{ - ZSTD_window_clear(&ms->window); +/* tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. */ +/* Returns 1 on success or 0 on failure. */ +typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); - ms->nextToUpdate = ms->window.dictLimit; - ms->loadedDictEnd = 0; - ms->opt.litLengthSum = 0; /* force reset of btopt stats */ - ms->dictMatchState = NULL; -} +struct tinfl_decompressor_tag; +typedef struct tinfl_decompressor_tag tinfl_decompressor; -/** - * Indicates whether this compression proceeds directly from user-provided - * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or - * whether the context needs to buffer the input/output (ZSTDb_buffered). - */ -typedef enum { - ZSTDb_not_buffered, - ZSTDb_buffered -} ZSTD_buffered_policy_e; +/* Allocate the tinfl_decompressor structure in C so that */ +/* non-C language bindings to tinfl_ API don't need to worry about */ +/* structure size and allocation mechanism. */ -/** - * Controls, for this matchState reset, whether the tables need to be cleared / - * prepared for the coming compression (ZSTDcrp_makeClean), or whether the - * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a - * subsequent operation will overwrite the table space anyways (e.g., copying - * the matchState contents in from a CDict). - */ -typedef enum { - ZSTDcrp_makeClean, - ZSTDcrp_leaveDirty -} ZSTD_compResetPolicy_e; +tinfl_decompressor *tinfl_decompressor_alloc(); +void tinfl_decompressor_free(tinfl_decompressor *pDecomp); -/** - * Controls, for this matchState reset, whether indexing can continue where it - * left off (ZSTDirp_continue), or whether it needs to be restarted from zero - * (ZSTDirp_reset). - */ -typedef enum { - ZSTDirp_continue, - ZSTDirp_reset -} ZSTD_indexResetPolicy_e; +/* Max size of LZ dictionary. */ +#define TINFL_LZ_DICT_SIZE 32768 +/* Return status. */ typedef enum { - ZSTD_resetTarget_CDict, - ZSTD_resetTarget_CCtx -} ZSTD_resetTarget_e; + /* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */ + /* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */ + /* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */ + TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4, -static size_t -ZSTD_reset_matchState(ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - const ZSTD_compressionParameters* cParams, - const ZSTD_compResetPolicy_e crp, - const ZSTD_indexResetPolicy_e forceResetIndex, - const ZSTD_resetTarget_e forWho) -{ - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); - size_t const hSize = ((size_t)1) << cParams->hashLog; - U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; - size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + /* This flag indicates that one or more of the input parameters was obviously bogus. (You can try calling it again, but if you get this error the calling code is wrong.) */ + TINFL_STATUS_BAD_PARAM = -3, - DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); - if (forceResetIndex == ZSTDirp_reset) { - ZSTD_window_init(&ms->window); - ZSTD_cwksp_mark_tables_dirty(ws); - } + /* This flags indicate the inflator is finished but the adler32 check of the uncompressed data didn't match. If you call it again it'll return TINFL_STATUS_DONE. */ + TINFL_STATUS_ADLER32_MISMATCH = -2, - ms->hashLog3 = hashLog3; + /* This flags indicate the inflator has somehow failed (bad code, corrupted input, etc.). If you call it again without resetting via tinfl_init() it it'll just keep on returning the same status failure code. */ + TINFL_STATUS_FAILED = -1, - ZSTD_invalidateMatchState(ms); + /* Any status code less than TINFL_STATUS_DONE must indicate a failure. */ - assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ + /* This flag indicates the inflator has returned every byte of uncompressed data that it can, has consumed every byte that it needed, has successfully reached the end of the deflate stream, and */ + /* if zlib headers and adler32 checking enabled that it has successfully checked the uncompressed data's adler32. If you call it again you'll just get TINFL_STATUS_DONE over and over again. */ + TINFL_STATUS_DONE = 0, - ZSTD_cwksp_clear_tables(ws); + /* This flag indicates the inflator MUST have more input data (even 1 byte) before it can make any more forward progress, or you need to clear the TINFL_FLAG_HAS_MORE_INPUT */ + /* flag on the next call if you don't have any more source data. If the source data was somehow corrupted it's also possible (but unlikely) for the inflator to keep on demanding input to */ + /* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */ + TINFL_STATUS_NEEDS_MORE_INPUT = 1, - DEBUGLOG(5, "reserving table space"); - /* table Space */ - ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); - ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); - ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); - RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, - "failed a workspace allocation in ZSTD_reset_matchState"); + /* This flag indicates the inflator definitely has 1 or more bytes of uncompressed data available, but it cannot write this data into the output buffer. */ + /* Note if the source compressed data was corrupted it's possible for the inflator to return a lot of uncompressed data to the caller. I've been assuming you know how much uncompressed data to expect */ + /* (either exact or worst case) and will stop calling the inflator and fail after receiving too much. In pure streaming scenarios where you have no idea how many bytes to expect this may not be possible */ + /* so I may need to add some code to address this. */ + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; - DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); - if (crp!=ZSTDcrp_leaveDirty) { - /* reset tables only */ - ZSTD_cwksp_clean_tables(ws); - } +/* Initializes the decompressor to its initial state. */ +#define tinfl_init(r) \ + do \ + { \ + (r)->m_state = 0; \ + } \ + MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 - /* opt parser space */ - if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { - DEBUGLOG(4, "reserving optimal parser space"); - ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); - ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); - ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); - ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); - ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); - } +/* Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. */ +/* This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. */ +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); - ms->cParams = *cParams; +/* Internal/private bits follow. */ +enum +{ + TINFL_MAX_HUFF_TABLES = 3, + TINFL_MAX_HUFF_SYMBOLS_0 = 288, + TINFL_MAX_HUFF_SYMBOLS_1 = 32, + TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, + TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; - RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, - "failed a workspace allocation in ZSTD_reset_matchState"); +typedef struct +{ + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; - return 0; -} +#if MINIZ_HAS_64BIT_REGISTERS +#define TINFL_USE_64BIT_BITBUF 1 +#else +#define TINFL_USE_64BIT_BITBUF 0 +#endif -/* ZSTD_indexTooCloseToMax() : - * minor optimization : prefer memset() rather than reduceIndex() - * which is measurably slow in some circumstances (reported for Visual Studio). - * Works when re-using a context for a lot of smallish inputs : - * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, - * memset() will be triggered before reduceIndex(). - */ -#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) -static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) -{ - return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); -} +#if TINFL_USE_64BIT_BITBUF +typedef mz_uint64 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (64) +#else +typedef mz_uint32 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (32) +#endif -/*! ZSTD_resetCCtx_internal() : - note : `params` are assumed fully validated at this stage */ -static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - ZSTD_CCtx_params params, - U64 const pledgedSrcSize, - ZSTD_compResetPolicy_e const crp, - ZSTD_buffered_policy_e const zbuff) +struct tinfl_decompressor_tag { - ZSTD_cwksp* const ws = &zc->workspace; - DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", - (U32)pledgedSrcSize, params.cParams.windowLog); - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; - zc->isFirstBlock = 1; - if (params.ldmParams.enableLdm) { - /* Adjust long distance matching parameters */ - ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); - assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); - assert(params.ldmParams.hashRateLog < 32); - zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); - } - { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); - size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; - size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; - size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); - size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); - ZSTD_indexResetPolicy_e needsIndexReset = zc->initialized ? ZSTDirp_continue : ZSTDirp_reset; - if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { - needsIndexReset = ZSTDirp_reset; - } - if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); +/* ------------------- ZIP archive reading/writing */ - /* Check if workspace is large enough, alloc a new one if needed */ - { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); - size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize); - size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); - size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)); +#ifndef MINIZ_NO_ARCHIVE_APIS - size_t const neededSpace = - cctxSpace + - entropySpace + - blockStateSpace + - ldmSpace + - ldmSeqSpace + - matchStateSize + - tokenSpace + - bufferSpace; - int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; - int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); +enum +{ + /* Note: These enums can be reduced as needed to save memory or stack space - they are pretty conservative. */ + MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, + MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 512, + MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 512 +}; - DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", - neededSpace>>10, matchStateSize>>10, bufferSpace>>10); - DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); +typedef struct +{ + /* Central directory file index. */ + mz_uint32 m_file_index; - if (workspaceTooSmall || workspaceWasteful) { - DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", - ZSTD_cwksp_sizeof(ws) >> 10, - neededSpace >> 10); + /* Byte offset of this entry in the archive's central directory. Note we currently only support up to UINT_MAX or less bytes in the central dir. */ + mz_uint64 m_central_dir_ofs; - RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); + /* These fields are copied directly from the zip's central dir. */ + mz_uint16 m_version_made_by; + mz_uint16 m_version_needed; + mz_uint16 m_bit_flag; + mz_uint16 m_method; - needsIndexReset = ZSTDirp_reset; +#ifndef MINIZ_NO_TIME + MZ_TIME_T m_time; +#endif - ZSTD_cwksp_free(ws, zc->customMem); - FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); + /* CRC-32 of uncompressed data. */ + mz_uint32 m_crc32; - DEBUGLOG(5, "reserving object space"); - /* Statically sized space. - * entropyWorkspace never moves, - * though prev/next block swap places */ - assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); - zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); - RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); - zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); - RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); - zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE); - RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); - } } + /* File's compressed size. */ + mz_uint64 m_comp_size; - ZSTD_cwksp_clear(ws); + /* File's uncompressed size. Note, I've seen some old archives where directory entries had 512 bytes for their uncompressed sizes, but when you try to unpack them you actually get 0 bytes. */ + mz_uint64 m_uncomp_size; - /* init params */ - zc->appliedParams = params; - zc->blockState.matchState.cParams = params.cParams; - zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; - zc->consumedSrcSize = 0; - zc->producedCSize = 0; - if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) - zc->appliedParams.fParams.contentSizeFlag = 0; - DEBUGLOG(4, "pledged content size : %u ; flag : %u", - (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); - zc->blockSize = blockSize; + /* Zip internal and external file attributes. */ + mz_uint16 m_internal_attr; + mz_uint32 m_external_attr; - XXH64_reset(&zc->xxhState, 0); - zc->stage = ZSTDcs_init; - zc->dictID = 0; + /* Entry's local header file offset in bytes. */ + mz_uint64 m_local_header_ofs; - ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); + /* Size of comment in bytes. */ + mz_uint32 m_comment_size; - /* ZSTD_wildcopy() is used to copy into the literals buffer, - * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. - */ - zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); - zc->seqStore.maxNbLit = blockSize; + /* MZ_TRUE if the entry appears to be a directory. */ + mz_bool m_is_directory; - /* buffers */ - zc->inBuffSize = buffInSize; - zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); - zc->outBuffSize = buffOutSize; - zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); + /* MZ_TRUE if the entry uses encryption/strong encryption (which miniz_zip doesn't support) */ + mz_bool m_is_encrypted; - /* ldm bucketOffsets table */ - if (params.ldmParams.enableLdm) { - /* TODO: avoid memset? */ - size_t const ldmBucketSize = - ((size_t)1) << (params.ldmParams.hashLog - - params.ldmParams.bucketSizeLog); - zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); - memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); - } + /* MZ_TRUE if the file is not encrypted, a patch file, and if it uses a compression method we support. */ + mz_bool m_is_supported; - /* sequences storage */ - ZSTD_referenceExternalSequences(zc, NULL, 0); - zc->seqStore.maxNbSeq = maxNbSeq; - zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); + /* Filename. If string ends in '/' it's a subdirectory entry. */ + /* Guaranteed to be zero terminated, may be truncated to fit. */ + char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; - FORWARD_IF_ERROR(ZSTD_reset_matchState( - &zc->blockState.matchState, - ws, - ¶ms.cParams, - crp, - needsIndexReset, - ZSTD_resetTarget_CCtx), ""); + /* Comment field. */ + /* Guaranteed to be zero terminated, may be truncated to fit. */ + char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; - /* ldm hash table */ - if (params.ldmParams.enableLdm) { - /* TODO: avoid memset? */ - size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; - zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); - memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); - zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); - zc->maxNbLdmSequences = maxNbLdmSeq; +} mz_zip_archive_file_stat; - ZSTD_window_init(&zc->ldmState.window); - ZSTD_window_clear(&zc->ldmState.window); - zc->ldmState.loadedDictEnd = 0; - } +typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n); +typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n); +typedef mz_bool (*mz_file_needs_keepalive)(void *pOpaque); - DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); - zc->initialized = 1; +struct mz_zip_internal_state_tag; +typedef struct mz_zip_internal_state_tag mz_zip_internal_state; - return 0; - } -} +typedef enum { + MZ_ZIP_MODE_INVALID = 0, + MZ_ZIP_MODE_READING = 1, + MZ_ZIP_MODE_WRITING = 2, + MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 +} mz_zip_mode; -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { - int i; - for (i=0; iblockState.prevCBlock->rep[i] = 0; - assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); -} +typedef enum { + MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, + MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, + MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, + MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800, + MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG = 0x1000, /* if enabled, mz_zip_reader_locate_file() will be called on each file as its validated to ensure the func finds the file in the central dir (intended for testing) */ + MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY = 0x2000, /* validate the local headers, but don't decompress the entire file and check the crc32 */ + MZ_ZIP_FLAG_WRITE_ZIP64 = 0x4000, /* always use the zip64 file format, instead of the original zip file format with automatic switch to zip64. Use as flags parameter with mz_zip_writer_init*_v2 */ + MZ_ZIP_FLAG_WRITE_ALLOW_READING = 0x8000, + MZ_ZIP_FLAG_ASCII_FILENAME = 0x10000 +} mz_zip_flags; -/* These are the approximate sizes for each strategy past which copying the - * dictionary tables into the working context is faster than using them - * in-place. - */ -static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { - 8 KB, /* unused */ - 8 KB, /* ZSTD_fast */ - 16 KB, /* ZSTD_dfast */ - 32 KB, /* ZSTD_greedy */ - 32 KB, /* ZSTD_lazy */ - 32 KB, /* ZSTD_lazy2 */ - 32 KB, /* ZSTD_btlazy2 */ - 32 KB, /* ZSTD_btopt */ - 8 KB, /* ZSTD_btultra */ - 8 KB /* ZSTD_btultra2 */ -}; +typedef enum { + MZ_ZIP_TYPE_INVALID = 0, + MZ_ZIP_TYPE_USER, + MZ_ZIP_TYPE_MEMORY, + MZ_ZIP_TYPE_HEAP, + MZ_ZIP_TYPE_FILE, + MZ_ZIP_TYPE_CFILE, + MZ_ZIP_TOTAL_TYPES +} mz_zip_type; -static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - U64 pledgedSrcSize) -{ - size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; - return ( pledgedSrcSize <= cutoff - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || params->attachDictPref == ZSTD_dictForceAttach ) - && params->attachDictPref != ZSTD_dictForceCopy - && !params->forceWindow; /* dictMatchState isn't correctly - * handled in _enforceMaxDist */ -} +/* miniz error codes. Be sure to update mz_zip_get_error_string() if you add or modify this enum. */ +typedef enum { + MZ_ZIP_NO_ERROR = 0, + MZ_ZIP_UNDEFINED_ERROR, + MZ_ZIP_TOO_MANY_FILES, + MZ_ZIP_FILE_TOO_LARGE, + MZ_ZIP_UNSUPPORTED_METHOD, + MZ_ZIP_UNSUPPORTED_ENCRYPTION, + MZ_ZIP_UNSUPPORTED_FEATURE, + MZ_ZIP_FAILED_FINDING_CENTRAL_DIR, + MZ_ZIP_NOT_AN_ARCHIVE, + MZ_ZIP_INVALID_HEADER_OR_CORRUPTED, + MZ_ZIP_UNSUPPORTED_MULTIDISK, + MZ_ZIP_DECOMPRESSION_FAILED, + MZ_ZIP_COMPRESSION_FAILED, + MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE, + MZ_ZIP_CRC_CHECK_FAILED, + MZ_ZIP_UNSUPPORTED_CDIR_SIZE, + MZ_ZIP_ALLOC_FAILED, + MZ_ZIP_FILE_OPEN_FAILED, + MZ_ZIP_FILE_CREATE_FAILED, + MZ_ZIP_FILE_WRITE_FAILED, + MZ_ZIP_FILE_READ_FAILED, + MZ_ZIP_FILE_CLOSE_FAILED, + MZ_ZIP_FILE_SEEK_FAILED, + MZ_ZIP_FILE_STAT_FAILED, + MZ_ZIP_INVALID_PARAMETER, + MZ_ZIP_INVALID_FILENAME, + MZ_ZIP_BUF_TOO_SMALL, + MZ_ZIP_INTERNAL_ERROR, + MZ_ZIP_FILE_NOT_FOUND, + MZ_ZIP_ARCHIVE_TOO_LARGE, + MZ_ZIP_VALIDATION_FAILED, + MZ_ZIP_WRITE_CALLBACK_FAILED, + MZ_ZIP_TOTAL_ERRORS +} mz_zip_error; -static size_t -ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) +typedef struct { - { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; - unsigned const windowLog = params.cParams.windowLog; - assert(windowLog != 0); - /* Resize working context table params for input only, since the dict - * has its own tables. */ - /* pledgeSrcSize == 0 means 0! */ - params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); - params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff), ""); - assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); - } - - { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc - - cdict->matchState.window.base); - const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; - if (cdictLen == 0) { - /* don't even attach dictionaries with no contents */ - DEBUGLOG(4, "skipping attaching empty dictionary"); - } else { - DEBUGLOG(4, "attaching dictionary into context"); - cctx->blockState.matchState.dictMatchState = &cdict->matchState; - - /* prep working match state so dict matches never have negative indices - * when they are translated to the working context's index space. */ - if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { - cctx->blockState.matchState.window.nextSrc = - cctx->blockState.matchState.window.base + cdictEnd; - ZSTD_window_clear(&cctx->blockState.matchState.window); - } - /* loadedDictEnd is expressed within the referential of the active context */ - cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; - } } + mz_uint64 m_archive_size; + mz_uint64 m_central_directory_file_ofs; - cctx->dictID = cdict->dictID; + /* We only support up to UINT32_MAX files in zip64 mode. */ + mz_uint32 m_total_files; + mz_zip_mode m_zip_mode; + mz_zip_type m_zip_type; + mz_zip_error m_last_error; - /* copy block state */ - memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + mz_uint64 m_file_offset_alignment; - return 0; -} + mz_alloc_func m_pAlloc; + mz_free_func m_pFree; + mz_realloc_func m_pRealloc; + void *m_pAlloc_opaque; -static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + mz_file_read_func m_pRead; + mz_file_write_func m_pWrite; + mz_file_needs_keepalive m_pNeeds_keepalive; + void *m_pIO_opaque; - DEBUGLOG(4, "copying dictionary into context"); + mz_zip_internal_state *m_pState; - { unsigned const windowLog = params.cParams.windowLog; - assert(windowLog != 0); - /* Copy only compression parameters related to tables. */ - params.cParams = *cdict_cParams; - params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_leaveDirty, zbuff), ""); - assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); - assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); - assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); - } +} mz_zip_archive; - ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); +typedef struct +{ + mz_zip_archive *pZip; + mz_uint flags; - /* copy tables */ - { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); - size_t const hSize = (size_t)1 << cdict_cParams->hashLog; + int status; +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + mz_uint file_crc32; +#endif + mz_uint64 read_buf_size, read_buf_ofs, read_buf_avail, comp_remaining, out_buf_ofs, cur_file_ofs; + mz_zip_archive_file_stat file_stat; + void *pRead_buf; + void *pWrite_buf; - memcpy(cctx->blockState.matchState.hashTable, - cdict->matchState.hashTable, - hSize * sizeof(U32)); - memcpy(cctx->blockState.matchState.chainTable, - cdict->matchState.chainTable, - chainSize * sizeof(U32)); - } + size_t out_blk_remain; - /* Zero the hashTable3, since the cdict never fills it */ - { int const h3log = cctx->blockState.matchState.hashLog3; - size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; - assert(cdict->matchState.hashLog3 == 0); - memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); - } + tinfl_decompressor inflator; - ZSTD_cwksp_mark_tables_clean(&cctx->workspace); +} mz_zip_reader_extract_iter_state; - /* copy dictionary offsets */ - { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; - ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; - dstMatchState->window = srcMatchState->window; - dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; - } +/* -------- ZIP reading */ - cctx->dictID = cdict->dictID; +/* Inits a ZIP archive reader. */ +/* These functions read and validate the archive's central directory. */ +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags); - /* copy block state */ - memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags); - return 0; -} +#ifndef MINIZ_NO_STDIO +/* Read a archive from a disk file. */ +/* file_start_ofs is the file offset where the archive actually begins, or 0. */ +/* actual_archive_size is the true total size of the archive, which may be smaller than the file's actual size on disk. If zero the entire file is treated as the archive. */ +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags); +mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size); -/* We have a choice between copying the dictionary context into the working - * context, or referencing the dictionary context from the working context - * in-place. We decide here which strategy to use. */ -static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ +/* Read an archive from an already opened FILE, beginning at the current file position. */ +/* The archive is assumed to be archive_size bytes long. If archive_size is < 0, then the entire rest of the file is assumed to contain the archive. */ +/* The FILE will NOT be closed when mz_zip_reader_end() is called. */ +mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags); +#endif - DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", - (unsigned)pledgedSrcSize); +/* Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. */ +mz_bool mz_zip_reader_end(mz_zip_archive *pZip); - if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { - return ZSTD_resetCCtx_byAttachingCDict( - cctx, cdict, *params, pledgedSrcSize, zbuff); - } else { - return ZSTD_resetCCtx_byCopyingCDict( - cctx, cdict, *params, pledgedSrcSize, zbuff); - } -} +/* -------- ZIP reading or writing */ -/*! ZSTD_copyCCtx_internal() : - * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. - * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). - * The "context", in this case, refers to the hash and chain tables, - * entropy tables, and dictionary references. - * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. - * @return : 0, or an error code */ -static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, - const ZSTD_CCtx* srcCCtx, - ZSTD_frameParameters fParams, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - DEBUGLOG(5, "ZSTD_copyCCtx_internal"); - RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, - "Can't copy a ctx that's not in init stage."); +/* Clears a mz_zip_archive struct to all zeros. */ +/* Important: This must be done before passing the struct to any mz_zip functions. */ +void mz_zip_zero_struct(mz_zip_archive *pZip); - memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); - { ZSTD_CCtx_params params = dstCCtx->requestedParams; - /* Copy only compression parameters related to tables. */ - params.cParams = srcCCtx->appliedParams.cParams; - params.fParams = fParams; - ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, - ZSTDcrp_leaveDirty, zbuff); - assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); - assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); - assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); - assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); - assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); - } +mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip); +mz_zip_type mz_zip_get_type(mz_zip_archive *pZip); - ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); +/* Returns the total number of files in the archive. */ +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); - /* copy tables */ - { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); - size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; - int const h3log = srcCCtx->blockState.matchState.hashLog3; - size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; +mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip); +mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip); +MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip); - memcpy(dstCCtx->blockState.matchState.hashTable, - srcCCtx->blockState.matchState.hashTable, - hSize * sizeof(U32)); - memcpy(dstCCtx->blockState.matchState.chainTable, - srcCCtx->blockState.matchState.chainTable, - chainSize * sizeof(U32)); - memcpy(dstCCtx->blockState.matchState.hashTable3, - srcCCtx->blockState.matchState.hashTable3, - h3Size * sizeof(U32)); - } +/* Reads n bytes of raw archive data, starting at file offset file_ofs, to pBuf. */ +size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n); - ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); +/* All mz_zip funcs set the m_last_error field in the mz_zip_archive struct. These functions retrieve/manipulate this field. */ +/* Note that the m_last_error functionality is not thread safe. */ +mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num); +mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip); +mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip); +mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip); +const char *mz_zip_get_error_string(mz_zip_error mz_err); - /* copy dictionary offsets */ - { - const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; - ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; - dstMatchState->window = srcMatchState->window; - dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; - } - dstCCtx->dictID = srcCCtx->dictID; +/* MZ_TRUE if the archive file entry is a directory entry. */ +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index); - /* copy block state */ - memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); +/* MZ_TRUE if the file is encrypted/strong encrypted. */ +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index); - return 0; -} +/* MZ_TRUE if the compression method is supported, and the file is not encrypted, and the file is not a compressed patch file. */ +mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index); -/*! ZSTD_copyCCtx() : - * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. - * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). - * pledgedSrcSize==0 means "unknown". -* @return : 0, or an error code */ -size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) -{ - ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); - ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); - if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; - fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); +/* Retrieves the filename of an archive file entry. */ +/* Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. */ +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size); - return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, - fParams, pledgedSrcSize, - zbuff); -} +/* Attempts to locates a file in the archive's central directory. */ +/* Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH */ +/* Returns -1 if the file cannot be found. */ +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); +int mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *file_index); +/* Returns detailed information about an archive file entry. */ +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat); -#define ZSTD_ROWSIZE 16 -/*! ZSTD_reduceTable() : - * reduce table indexes by `reducerValue`, or squash to zero. - * PreserveMark preserves "unsorted mark" for btlazy2 strategy. - * It must be set to a clear 0/1 value, to remove branch during inlining. - * Presume table size is a multiple of ZSTD_ROWSIZE - * to help auto-vectorization */ -FORCE_INLINE_TEMPLATE void -ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) -{ - int const nbRows = (int)size / ZSTD_ROWSIZE; - int cellNb = 0; - int rowNb; - assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ - assert(size < (1U<<31)); /* can be casted to int */ +/* MZ_TRUE if the file is in zip64 format. */ +/* A file is considered zip64 if it contained a zip64 end of central directory marker, or if it contained any zip64 extended file information fields in the central directory. */ +mz_bool mz_zip_is_zip64(mz_zip_archive *pZip); -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) - /* To validate that the table re-use logic is sound, and that we don't - * access table space that we haven't cleaned, we re-"poison" the table - * space every time we mark it dirty. - * - * This function however is intended to operate on those dirty tables and - * re-clean them. So when this function is used correctly, we can unpoison - * the memory it operated on. This introduces a blind spot though, since - * if we now try to operate on __actually__ poisoned memory, we will not - * detect that. */ - __msan_unpoison(table, size * sizeof(U32)); -#endif +/* Returns the total central directory size in bytes. */ +/* The current max supported size is <= MZ_UINT32_MAX. */ +size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip); - for (rowNb=0 ; rowNb < nbRows ; rowNb++) { - int column; - for (column=0; columncParams.hashLog; - ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); - } +/* Extracts a archive file using a callback function to output the file's data. */ +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); - if (params->cParams.strategy != ZSTD_fast) { - U32 const chainSize = (U32)1 << params->cParams.chainLog; - if (params->cParams.strategy == ZSTD_btlazy2) - ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); - else - ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); - } +/* Extract a file iteratively */ +mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); +mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); +size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size); +mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState); - if (ms->hashLog3) { - U32 const h3Size = (U32)1 << ms->hashLog3; - ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); - } -} +#ifndef MINIZ_NO_STDIO +/* Extracts a archive file to a disk file and sets its last accessed and modified times. */ +/* This function only extracts files, not archive directory records. */ +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags); +/* Extracts a archive file starting at the current position in the destination FILE stream. */ +mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *File, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags); +#endif -/*-******************************************************* -* Block entropic compression -*********************************************************/ +#if 0 +/* TODO */ + typedef void *mz_zip_streaming_extract_state_ptr; + mz_zip_streaming_extract_state_ptr mz_zip_streaming_extract_begin(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); + uint64_t mz_zip_streaming_extract_get_size(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); + uint64_t mz_zip_streaming_extract_get_cur_ofs(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); + mz_bool mz_zip_streaming_extract_seek(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, uint64_t new_ofs); + size_t mz_zip_streaming_extract_read(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, void *pBuf, size_t buf_size); + mz_bool mz_zip_streaming_extract_end(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); +#endif -/* See doc/zstd_compression_format.md for detailed format description */ +/* This function compares the archive's local headers, the optional local zip64 extended information block, and the optional descriptor following the compressed data vs. the data in the central directory. */ +/* It also validates that each file can be successfully uncompressed unless the MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY is specified. */ +mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) -{ - const seqDef* const sequences = seqStorePtr->sequencesStart; - BYTE* const llCodeTable = seqStorePtr->llCode; - BYTE* const ofCodeTable = seqStorePtr->ofCode; - BYTE* const mlCodeTable = seqStorePtr->mlCode; - U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - U32 u; - assert(nbSeq <= seqStorePtr->maxNbSeq); - for (u=0; ulongLengthID==1) - llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthID==2) - mlCodeTable[seqStorePtr->longLengthPos] = MaxML; -} +/* Validates an entire archive by calling mz_zip_validate_file() on each file. */ +mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags); -/* ZSTD_useTargetCBlockSize(): - * Returns if target compressed block size param is being used. - * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. - * Returns 1 if true, 0 otherwise. */ -static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) -{ - DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); - return (cctxParams->targetCBlockSize != 0); -} +/* Misc utils/helpers, valid for ZIP reading or writing */ +mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr); +mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr); -/* ZSTD_compressSequences_internal(): - * actually compresses both literals and sequences */ -MEM_STATIC size_t -ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - void* entropyWorkspace, size_t entropyWkspSize, - const int bmi2) -{ - const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - unsigned count[MaxSeq+1]; - FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const seqDef* const sequences = seqStorePtr->sequencesStart; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - BYTE* seqHead; - BYTE* lastNCount = NULL; +/* Universal end function - calls either mz_zip_reader_end() or mz_zip_writer_end(). */ +mz_bool mz_zip_end(mz_zip_archive *pZip); - DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<litStart; - size_t const litSize = (size_t)(seqStorePtr->lit - literals); - size_t const cSize = ZSTD_compressLiterals( - &prevEntropy->huf, &nextEntropy->huf, - cctxParams->cParams.strategy, - ZSTD_disableLiteralsCompression(cctxParams), - op, dstCapacity, - literals, litSize, - entropyWorkspace, entropyWkspSize, - bmi2); - FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); - assert(cSize <= dstCapacity); - op += cSize; - } +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - /* Sequences Header */ - RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall, "Can't fit seq hdr in output buf!"); - if (nbSeq < 128) { - *op++ = (BYTE)nbSeq; - } else if (nbSeq < LONGNBSEQ) { - op[0] = (BYTE)((nbSeq>>8) + 0x80); - op[1] = (BYTE)nbSeq; - op+=2; - } else { - op[0]=0xFF; - MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); - op+=3; - } - assert(op <= oend); - if (nbSeq==0) { - /* Copy the old tables over as if we repeated them */ - memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); - return (size_t)(op - ostart); - } +/* Inits a ZIP archive writer. */ +/*Set pZip->m_pWrite (and pZip->m_pIO_opaque) before calling mz_zip_writer_init or mz_zip_writer_init_v2*/ +/*The output is streamable, i.e. file_ofs in mz_file_write_func always increases only by n*/ +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); +mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags); - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - assert(op <= oend); +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size); +mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags); - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, - count, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->fse.litlengthCTable, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, MaxLL, - prevEntropy->fse.litlengthCTable, - sizeof(prevEntropy->fse.litlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for Offsets */ - { unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, - count, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->fse.offcodeCTable, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, DefaultMaxOff, - prevEntropy->fse.offcodeCTable, - sizeof(prevEntropy->fse.offcodeCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for MatchLengths */ - { unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, - count, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->fse.matchlengthCTable, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, MaxML, - prevEntropy->fse.matchlengthCTable, - sizeof(prevEntropy->fse.matchlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning); +mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags); +mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags); +#endif - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); +/* Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. */ +/* For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. */ +/* For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). */ +/* Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. */ +/* Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before */ +/* the archive is finalized the file's central directory will be hosed. */ +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename); +mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); - { size_t const bitstreamSize = ZSTD_encodeSequences( - op, (size_t)(oend - op), - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, - longOffsets, bmi2); - FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); - op += bitstreamSize; - assert(op <= oend); - /* zstd versions <= 1.3.4 mistakenly report corruption when - * FSE_readNCount() receives a buffer < 4 bytes. - * Fixed by https://github.com/facebook/zstd/pull/1146. - * This can happen when the last set_compressed table present is 2 - * bytes and the bitstream is only one byte. - * In this exceedingly rare case, we will simply emit an uncompressed - * block, since it isn't worth optimizing. - */ - if (lastNCount && (op - lastNCount) < 4) { - /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(op - lastNCount == 3); - DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " - "emitting an uncompressed block."); - return 0; - } - } +/* Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. */ +/* To add a directory entry, call this method with an archive name ending in a forwardslash with an empty buffer. */ +/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags); - DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); - return (size_t)(op - ostart); -} +/* Like mz_zip_writer_add_mem(), except you can specify a file comment field, and optionally supply the function with already compressed data. */ +/* uncomp_size/uncomp_crc32 are only used if the MZ_ZIP_FLAG_COMPRESSED_DATA flag is specified. */ +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, + mz_uint64 uncomp_size, mz_uint32 uncomp_crc32); -MEM_STATIC size_t -ZSTD_compressSequences(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - size_t srcSize, - void* entropyWorkspace, size_t entropyWkspSize, - int bmi2) -{ - size_t const cSize = ZSTD_compressSequences_internal( - seqStorePtr, prevEntropy, nextEntropy, cctxParams, - dst, dstCapacity, - entropyWorkspace, entropyWkspSize, bmi2); - if (cSize == 0) return 0; - /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. - * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. - */ - if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) - return 0; /* block not compressed */ - FORWARD_IF_ERROR(cSize, "ZSTD_compressSequences_internal failed"); +mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, + mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, const char *user_extra_data_local, mz_uint user_extra_data_local_len, + const char *user_extra_data_central, mz_uint user_extra_data_central_len); - /* Check compressibility */ - { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); - if (cSize >= maxCSize) return 0; /* block not compressed */ - } +#ifndef MINIZ_NO_STDIO +/* Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. */ +/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); - return cSize; -} +/* Like mz_zip_writer_add_file(), except the file data is read from the specified FILE stream. */ +mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 size_to_add, + const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len, + const char *user_extra_data_central, mz_uint user_extra_data_central_len); +#endif -/* ZSTD_selectBlockCompressor() : - * Not static, but internal use only (used by long distance matcher) - * assumption : strat is a valid strategy */ -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) -{ - static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = { - { ZSTD_compressBlock_fast /* default for 0 */, - ZSTD_compressBlock_fast, - ZSTD_compressBlock_doubleFast, - ZSTD_compressBlock_greedy, - ZSTD_compressBlock_lazy, - ZSTD_compressBlock_lazy2, - ZSTD_compressBlock_btlazy2, - ZSTD_compressBlock_btopt, - ZSTD_compressBlock_btultra, - ZSTD_compressBlock_btultra2 }, - { ZSTD_compressBlock_fast_extDict /* default for 0 */, - ZSTD_compressBlock_fast_extDict, - ZSTD_compressBlock_doubleFast_extDict, - ZSTD_compressBlock_greedy_extDict, - ZSTD_compressBlock_lazy_extDict, - ZSTD_compressBlock_lazy2_extDict, - ZSTD_compressBlock_btlazy2_extDict, - ZSTD_compressBlock_btopt_extDict, - ZSTD_compressBlock_btultra_extDict, - ZSTD_compressBlock_btultra_extDict }, - { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, - ZSTD_compressBlock_fast_dictMatchState, - ZSTD_compressBlock_doubleFast_dictMatchState, - ZSTD_compressBlock_greedy_dictMatchState, - ZSTD_compressBlock_lazy_dictMatchState, - ZSTD_compressBlock_lazy2_dictMatchState, - ZSTD_compressBlock_btlazy2_dictMatchState, - ZSTD_compressBlock_btopt_dictMatchState, - ZSTD_compressBlock_btultra_dictMatchState, - ZSTD_compressBlock_btultra_dictMatchState } - }; - ZSTD_blockCompressor selectedCompressor; - ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); +/* Adds a file to an archive by fully cloning the data from another archive. */ +/* This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data (it may add or modify the zip64 local header extra data field), and the optional descriptor following the compressed data. */ +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index); - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; - assert(selectedCompressor != NULL); - return selectedCompressor; -} +/* Finalizes the archive by writing the central directory records followed by the end of central directory record. */ +/* After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). */ +/* An archive must be manually finalized by calling this function for it to be valid. */ +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); -static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, - const BYTE* anchor, size_t lastLLSize) -{ - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; -} +/* Finalizes a heap archive, returning a poiner to the heap block and its size. */ +/* The heap block will be allocated using the mz_zip_archive's alloc/realloc callbacks. */ +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize); -void ZSTD_resetSeqStore(seqStore_t* ssPtr) -{ - ssPtr->lit = ssPtr->litStart; - ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; -} +/* Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. */ +/* Note for the archive to be valid, it *must* have been finalized before ending (this function will not do it for you). */ +mz_bool mz_zip_writer_end(mz_zip_archive *pZip); -typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; +/* -------- Misc. high-level helper functions: */ -static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) -{ - ZSTD_matchState_t* const ms = &zc->blockState.matchState; - DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); - assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - /* Assert that we have correctly flushed the ctx params into the ms's copy */ - ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - if (srcSize < MIN_CBLOCK_SIZE+ZSTDInternalConstants::ZSTD_blockHeaderSize+1) { - ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); - return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ - } - ZSTD_resetSeqStore(&(zc->seqStore)); - /* required for optimal parser to read stats from dictionary */ - ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; - /* tell the optimal parser how we expect to compress literals */ - ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; - /* a gap between an attached dict and the current window is not safe, - * they must remain adjacent, - * and when that stops being the case, the dict must be unset */ - assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); +/* mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. */ +/* Note this is NOT a fully safe operation. If it crashes or dies in some way your archive can be left in a screwed up state (without a central directory). */ +/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ +/* TODO: Perhaps add an option to leave the existing central dir in place in case the add dies? We could then truncate the file (so the old central dir would be at the end) if something goes wrong. */ +mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); +mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr); - /* limited update after a very long match */ - { const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - const U32 current = (U32)(istart-base); - if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ - if (current > ms->nextToUpdate + 384) - ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); - } +/* Reads a single file from an archive into a heap block. */ +/* If pComment is not NULL, only the file with the specified comment will be extracted. */ +/* Returns NULL on failure. */ +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags); +void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr); - /* select and store sequences */ - { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); - size_t lastLLSize; - { int i; - for (i = 0; i < ZSTD_REP_NUM; ++i) - zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; - } - if (zc->externSeqStore.pos < zc->externSeqStore.size) { - assert(!zc->appliedParams.ldmParams.enableLdm); - /* Updates ldmSeqStore.pos */ - lastLLSize = - ZSTD_ldm_blockCompress(&zc->externSeqStore, - ms, &zc->seqStore, - zc->blockState.nextCBlock->rep, - src, srcSize); - assert(zc->externSeqStore.pos <= zc->externSeqStore.size); - } else if (zc->appliedParams.ldmParams.enableLdm) { - rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; +#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */ - ldmSeqStore.seq = zc->ldmSequences; - ldmSeqStore.capacity = zc->maxNbLdmSequences; - /* Updates ldmSeqStore.size */ - FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, - &zc->appliedParams.ldmParams, - src, srcSize), ""); - /* Updates ldmSeqStore.pos */ - lastLLSize = - ZSTD_ldm_blockCompress(&ldmSeqStore, - ms, &zc->seqStore, - zc->blockState.nextCBlock->rep, - src, srcSize); - assert(ldmSeqStore.pos == ldmSeqStore.size); - } else { /* not long range mode */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); - lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); - } - { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; - ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); - } } - return ZSTDbss_compress; -} -static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) -{ - const seqStore_t* seqStore = ZSTD_getSeqStore(zc); - const seqDef* seqs = seqStore->sequencesStart; - size_t seqsSize = seqStore->sequences - seqs; - ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; - size_t i; size_t position; int repIdx; +#endif /* MINIZ_NO_ARCHIVE_APIS */ - assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); - for (i = 0, position = 0; i < seqsSize; ++i) { - outSeqs[i].offset = seqs[i].offset; - outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; +} // namespace duckdb_miniz - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { - outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { - outSeqs[i].matchLength += 0x10000; - } - } - if (outSeqs[i].offset <= ZSTD_REP_NUM) { - outSeqs[i].rep = outSeqs[i].offset; - repIdx = (unsigned int)i - outSeqs[i].offset; +// LICENSE_CHANGE_END - if (outSeqs[i].litLength == 0) { - if (outSeqs[i].offset < 3) { - --repIdx; - } else { - repIdx = (unsigned int)i - 1; - } - ++outSeqs[i].rep; - } - assert(repIdx >= -3); - outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : ZSTDInternalConstants::repStartValue[-repIdx - 1]; - if (outSeqs[i].rep == 4) { - --outSeqs[i].offset; - } - } else { - outSeqs[i].offset -= ZSTD_REP_NUM; - } +#include +#include - position += outSeqs[i].litLength; - outSeqs[i].matchPos = (unsigned int)position; - position += outSeqs[i].matchLength; - } - zc->seqCollector.seqIndex += seqsSize; -} +namespace duckdb { -size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize) -{ - const size_t dstCapacity = ZSTD_compressBound(srcSize); - void* dst = ZSTD_malloc(dstCapacity, ZSTDInternalConstants::ZSTD_defaultCMem); - SeqCollector seqCollector; +enum class MiniZStreamType { + MINIZ_TYPE_NONE, + MINIZ_TYPE_INFLATE, + MINIZ_TYPE_DEFLATE +}; - RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); +struct MiniZStream { + MiniZStream() : type(MiniZStreamType::MINIZ_TYPE_NONE) { + memset(&stream, 0, sizeof(duckdb_miniz::mz_stream)); + } + ~MiniZStream() { + switch(type) { + case MiniZStreamType::MINIZ_TYPE_INFLATE: + duckdb_miniz::mz_inflateEnd(&stream); + break; + case MiniZStreamType::MINIZ_TYPE_DEFLATE: + duckdb_miniz::mz_deflateEnd(&stream); + break; + default: + break; + } + } + void FormatException(std::string error_msg) { + throw std::runtime_error(error_msg); + } + void FormatException(const char *error_msg, int mz_ret) { + auto err = duckdb_miniz::mz_error(mz_ret); + FormatException(error_msg + std::string(": ") + (err ? err : "Unknown error code")); + } + void Decompress(const char *compressed_data, size_t compressed_size, char *out_data, size_t out_size) { + auto mz_ret = mz_inflateInit2(&stream, -MZ_DEFAULT_WINDOW_BITS); + if (mz_ret != duckdb_miniz::MZ_OK) { + FormatException("Failed to initialize miniz", mz_ret); + } + type = MiniZStreamType::MINIZ_TYPE_INFLATE; - seqCollector.collectSequences = 1; - seqCollector.seqStart = outSeqs; - seqCollector.seqIndex = 0; - seqCollector.maxSequences = outSeqsSize; - zc->seqCollector = seqCollector; + if (compressed_size < GZIP_HEADER_MINSIZE) { + FormatException("Failed to decompress GZIP block: compressed size is less than gzip header size"); + } + auto gzip_hdr = (const unsigned char *)compressed_data; + if (gzip_hdr[0] != 0x1F || gzip_hdr[1] != 0x8B || gzip_hdr[2] != GZIP_COMPRESSION_DEFLATE || + gzip_hdr[3] & GZIP_FLAG_UNSUPPORTED) { + FormatException("Input is invalid/unsupported GZIP stream"); + } - ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); - ZSTD_free(dst, ZSTDInternalConstants::ZSTD_defaultCMem); - return zc->seqCollector.seqIndex; -} + stream.next_in = (const unsigned char *)compressed_data + GZIP_HEADER_MINSIZE; + stream.avail_in = compressed_size - GZIP_HEADER_MINSIZE; + stream.next_out = (unsigned char *)out_data; + stream.avail_out = out_size; -/* Returns true if the given block is a RLE block */ -static int ZSTD_isRLE(const BYTE *ip, size_t length) { - size_t i; - if (length < 2) return 1; - for (i = 1; i < length; ++i) { - if (ip[0] != ip[i]) return 0; - } - return 1; -} + mz_ret = mz_inflate(&stream, duckdb_miniz::MZ_FINISH); + if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) { + FormatException("Failed to decompress GZIP block", mz_ret); + } + } + size_t MaxCompressedLength(size_t input_size) { + return duckdb_miniz::mz_compressBound(input_size) + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE; + } + void Compress(const char *uncompressed_data, size_t uncompressed_size, char *out_data, size_t *out_size) { + auto mz_ret = mz_deflateInit2(&stream, duckdb_miniz::MZ_DEFAULT_LEVEL, MZ_DEFLATED, -MZ_DEFAULT_WINDOW_BITS, 1, 0); + if (mz_ret != duckdb_miniz::MZ_OK) { + FormatException("Failed to initialize miniz", mz_ret); + } + type = MiniZStreamType::MINIZ_TYPE_DEFLATE; -/* Returns true if the given block may be RLE. - * This is just a heuristic based on the compressibility. - * It may return both false positives and false negatives. - */ -static int ZSTD_maybeRLE(seqStore_t const* seqStore) -{ - size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); - size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); + auto gzip_header = (unsigned char*) out_data; + memset(gzip_header, 0, GZIP_HEADER_MINSIZE); + gzip_header[0] = 0x1F; + gzip_header[1] = 0x8B; + gzip_header[2] = GZIP_COMPRESSION_DEFLATE; + gzip_header[3] = 0; + gzip_header[4] = 0; + gzip_header[5] = 0; + gzip_header[6] = 0; + gzip_header[7] = 0; + gzip_header[8] = 0; + gzip_header[9] = 0xFF; - return nbSeqs < 4 && nbLits < 10; -} + auto gzip_body = gzip_header + GZIP_HEADER_MINSIZE; -static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) -{ - ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; - zc->blockState.prevCBlock = zc->blockState.nextCBlock; - zc->blockState.nextCBlock = tmp; -} + stream.next_in = (const unsigned char*) uncompressed_data; + stream.avail_in = uncompressed_size; + stream.next_out = gzip_body; + stream.avail_out = *out_size - GZIP_HEADER_MINSIZE; -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame) -{ - /* This the upper bound for the length of an rle block. - * This isn't the actual upper bound. Finding the real threshold - * needs further investigation. - */ - const U32 rleMaxLength = 25; - size_t cSize; - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", - (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, - (unsigned)zc->blockState.matchState.nextToUpdate); + mz_ret = mz_deflate(&stream, duckdb_miniz::MZ_FINISH); + if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) { + FormatException("Failed to compress GZIP block", mz_ret); + } + auto gzip_footer = gzip_body + stream.total_out; + auto crc = duckdb_miniz::mz_crc32(MZ_CRC32_INIT, (const unsigned char*) uncompressed_data, uncompressed_size); + gzip_footer[0] = crc & 0xFF; + gzip_footer[1] = (crc >> 8) & 0xFF; + gzip_footer[2] = (crc >> 16) & 0xFF; + gzip_footer[3] = (crc >> 24) & 0xFF; + gzip_footer[4] = uncompressed_size & 0xFF; + gzip_footer[5] = (uncompressed_size >> 8) & 0xFF; + gzip_footer[6] = (uncompressed_size >> 16) & 0xFF; + gzip_footer[7] = (uncompressed_size >> 24) & 0xFF; - { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); - if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } - } + *out_size = stream.total_out + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE; + } - if (zc->seqCollector.collectSequences) { - ZSTD_copyBlockSequences(zc); - return 0; - } +private: + static constexpr uint8_t GZIP_HEADER_MINSIZE = 10; + static constexpr uint8_t GZIP_FOOTER_SIZE = 8; + static constexpr uint8_t GZIP_COMPRESSION_DEFLATE = 0x08; + static constexpr unsigned char GZIP_FLAG_UNSUPPORTED = 0x1 | 0x2 | 0x4 | 0x10 | 0x20; + duckdb_miniz::mz_stream stream; + MiniZStreamType type; +}; - /* encode sequences and literals */ - cSize = ZSTD_compressSequences(&zc->seqStore, - &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - dst, dstCapacity, - srcSize, - zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, - zc->bmi2); +} - if (frame && - /* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder (cli only) to throw a "should consume all input error." - * This is only an issue for zstd <= v1.4.3 - */ - !zc->isFirstBlock && - cSize < rleMaxLength && - ZSTD_isRLE(ip, srcSize)) - { - cSize = 1; - op[0] = ip[0]; - } -out: - if (!ZSTD_isError(cSize) && cSize > 1) { - ZSTD_confirmRepcodesAndEntropyTables(zc); - } - /* We check that dictionaries have offset codes available for the first - * block. After the first block, the offcode table might not have large - * enough codes to represent the offsets in the data. - */ - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) - zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; +// LICENSE_CHANGE_END - return cSize; -} -static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const size_t bss, U32 lastBlock) -{ - DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); - if (bss == ZSTDbss_compress) { - if (/* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder (cli only) to throw a "should consume all input error." - * This is only an issue for zstd <= v1.4.3 - */ - !zc->isFirstBlock && - ZSTD_maybeRLE(&zc->seqStore) && - ZSTD_isRLE((BYTE const*)src, srcSize)) - { - return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); - } - /* Attempt superblock compression. - * - * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the - * standard ZSTD_compressBound(). This is a problem, because even if we have - * space now, taking an extra byte now could cause us to run out of space later - * and violate ZSTD_compressBound(). - * - * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. - * - * In order to respect ZSTD_compressBound() we must attempt to emit a raw - * uncompressed block in these cases: - * * cSize == 0: Return code for an uncompressed block. - * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). - * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of - * output space. - * * cSize >= blockBound(srcSize): We have expanded the block too much so - * emit an uncompressed block. - */ - { - size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); - if (cSize != ERROR(dstSize_tooSmall)) { - size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); - FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); - if (cSize != 0 && cSize < maxCSize + ZSTDInternalConstants::ZSTD_blockHeaderSize) { - ZSTD_confirmRepcodesAndEntropyTables(zc); - return cSize; - } - } - } - } +#include - DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); - /* Superblock compression failed, attempt to emit a single no compress block. - * The decoder will be able to stream this block since it is uncompressed. - */ - return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); -} +#include "duckdb.hpp" +#ifndef DUCKDB_AMALGAMATION +#include "duckdb/common/types/blob.hpp" +#include "duckdb/common/types/chunk_collection.hpp" +#endif -static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 lastBlock) -{ - size_t cSize = 0; - const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", - (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); - FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); +namespace duckdb { - cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); +using duckdb_parquet::format::CompressionCodec; +using duckdb_parquet::format::ConvertedType; +using duckdb_parquet::format::Encoding; +using duckdb_parquet::format::PageType; +using duckdb_parquet::format::Type; - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) - zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; +const uint32_t RleBpDecoder::BITPACK_MASKS[] = { + 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, + 2047, 4095, 8191, 16383, 32767, 65535, 131071, 262143, 524287, 1048575, 2097151, + 4194303, 8388607, 16777215, 33554431, 67108863, 134217727, 268435455, 536870911, 1073741823, 2147483647}; - return cSize; +const uint8_t RleBpDecoder::BITPACK_DLEN = 8; + +ColumnReader::ColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, + idx_t max_define_p, idx_t max_repeat_p) + : schema(schema_p), file_idx(file_idx_p), max_define(max_define_p), max_repeat(max_repeat_p), reader(reader), + type(move(type_p)), page_rows_available(0), dummy_result(type, nullptr) { + + // dummies for Skip() + none_filter.none(); + dummy_define.resize(reader.allocator, STANDARD_VECTOR_SIZE); + dummy_repeat.resize(reader.allocator, STANDARD_VECTOR_SIZE); } -static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - void const* ip, - void const* iend) -{ - if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { - U32 const maxDist = (U32)1 << params->cParams.windowLog; - U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); - U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); - ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); - ZSTD_cwksp_mark_tables_dirty(ws); - ZSTD_reduceIndex(ms, params, correction); - ZSTD_cwksp_mark_tables_clean(ws); - if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; - else ms->nextToUpdate -= correction; - /* invalidate dictionaries on overflow correction */ - ms->loadedDictEnd = 0; - ms->dictMatchState = NULL; - } +ColumnReader::~ColumnReader() { +} + +unique_ptr ColumnReader::CreateReader(ParquetReader &reader, const LogicalType &type_p, + const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define, + idx_t max_repeat) { + switch (type_p.id()) { + case LogicalTypeId::BOOLEAN: + return make_unique(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case LogicalTypeId::UTINYINT: + return make_unique>>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case LogicalTypeId::USMALLINT: + return make_unique>>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case LogicalTypeId::UINTEGER: + return make_unique>>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case LogicalTypeId::UBIGINT: + return make_unique>>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case LogicalTypeId::INTEGER: + return make_unique>>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case LogicalTypeId::BIGINT: + return make_unique>>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case LogicalTypeId::FLOAT: + return make_unique>>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case LogicalTypeId::DOUBLE: + return make_unique>>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case LogicalTypeId::TIMESTAMP: + switch (schema_p.type) { + case Type::INT96: + return make_unique>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case Type::INT64: + switch (schema_p.converted_type) { + case ConvertedType::TIMESTAMP_MICROS: + return make_unique>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case ConvertedType::TIMESTAMP_MILLIS: + return make_unique>( + reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + default: + break; + } + default: + break; + } + break; + case LogicalTypeId::DATE: + return make_unique>(reader, type_p, schema_p, + file_idx_p, max_define, max_repeat); + case LogicalTypeId::BLOB: + case LogicalTypeId::VARCHAR: + return make_unique(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); + case LogicalTypeId::DECIMAL: + // we have to figure out what kind of int we need + switch (type_p.InternalType()) { + case PhysicalType::INT16: + return make_unique>(reader, type_p, schema_p, file_idx_p, max_define, + max_repeat); + case PhysicalType::INT32: + return make_unique>(reader, type_p, schema_p, file_idx_p, max_define, + max_repeat); + case PhysicalType::INT64: + return make_unique>(reader, type_p, schema_p, file_idx_p, max_define, + max_repeat); + case PhysicalType::INT128: + return make_unique>(reader, type_p, schema_p, file_idx_p, max_define, + max_repeat); + + default: + break; + } + break; + default: + break; + } + throw NotImplementedException(type_p.ToString()); } -/*! ZSTD_compress_frameChunk() : -* Compress a chunk of data into one or multiple blocks. -* All blocks will be terminated, all input will be consumed. -* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. -* Frame is supposed already started (header already produced) -* @return : compressed size, or an error code -*/ -static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 lastFrameChunk) -{ - size_t blockSize = cctx->blockSize; - size_t remaining = srcSize; - const BYTE* ip = (const BYTE*)src; - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; +void ColumnReader::PrepareRead(parquet_filter_t &filter) { + dict_decoder.reset(); + defined_decoder.reset(); + block.reset(); - assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); + PageHeader page_hdr; + page_hdr.read(protocol); - DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); - if (cctx->appliedParams.fParams.checksumFlag && srcSize) - XXH64_update(&cctx->xxhState, src, srcSize); + // page_hdr.printTo(std::cout); + // std::cout << '\n'; - while (remaining) { - ZSTD_matchState_t* const ms = &cctx->blockState.matchState; - U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + PreparePage(page_hdr.compressed_page_size, page_hdr.uncompressed_page_size); - RETURN_ERROR_IF(dstCapacity < ZSTDInternalConstants::ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, - dstSize_tooSmall, - "not enough space to store compressed block"); - if (remaining < blockSize) blockSize = remaining; + switch (page_hdr.type) { + case PageType::DATA_PAGE_V2: + case PageType::DATA_PAGE: + PrepareDataPage(page_hdr); + break; + case PageType::DICTIONARY_PAGE: + Dictionary(move(block), page_hdr.dictionary_page_header.num_values); + break; + default: + break; // ignore INDEX page type and any other custom extensions + } +} - ZSTD_overflowCorrectIfNeeded( - ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); - ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); +void ColumnReader::PreparePage(idx_t compressed_page_size, idx_t uncompressed_page_size) { + auto &trans = (ThriftFileTransport &)*protocol->getTransport(); - /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ - if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; + block = make_shared(reader.allocator, compressed_page_size + 1); + trans.read((uint8_t *)block->ptr, compressed_page_size); - { size_t cSize; - if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { - cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); - assert(cSize > 0); - assert(cSize <= blockSize + ZSTDInternalConstants::ZSTD_blockHeaderSize); - } else { - cSize = ZSTD_compressBlock_internal(cctx, - op+ZSTDInternalConstants::ZSTD_blockHeaderSize, dstCapacity-ZSTDInternalConstants::ZSTD_blockHeaderSize, - ip, blockSize, 1 /* frame */); - FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); + shared_ptr unpacked_block; + if (chunk->meta_data.codec != CompressionCodec::UNCOMPRESSED) { + unpacked_block = make_shared(reader.allocator, uncompressed_page_size + 1); + } - if (cSize == 0) { /* block is not compressible */ - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - } else { - U32 const cBlockHeader = cSize == 1 ? - lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : - lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader); - cSize += ZSTDInternalConstants::ZSTD_blockHeaderSize; - } - } + switch (chunk->meta_data.codec) { + case CompressionCodec::UNCOMPRESSED: + break; + case CompressionCodec::GZIP: { + MiniZStream s; + s.Decompress((const char *)block->ptr, compressed_page_size, (char *)unpacked_block->ptr, + uncompressed_page_size); + block = move(unpacked_block); - ip += blockSize; - assert(remaining >= blockSize); - remaining -= blockSize; - op += cSize; - assert(dstCapacity >= cSize); - dstCapacity -= cSize; - cctx->isFirstBlock = 0; - DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", - (unsigned)cSize); - } } + break; + } + case CompressionCodec::SNAPPY: { + auto res = snappy::RawUncompress((const char *)block->ptr, compressed_page_size, (char *)unpacked_block->ptr); + if (!res) { + throw std::runtime_error("Decompression failure"); + } + block = move(unpacked_block); + break; + } + case CompressionCodec::ZSTD: { + auto res = duckdb_zstd::ZSTD_decompress((char *)unpacked_block->ptr, uncompressed_page_size, + (const char *)block->ptr, compressed_page_size); + if (duckdb_zstd::ZSTD_isError(res) || res != (size_t)uncompressed_page_size) { + throw std::runtime_error("ZSTD Decompression failure"); + } + block = move(unpacked_block); + break; + } - if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; - return (size_t)(op-ostart); + default: { + std::stringstream codec_name; + codec_name << chunk->meta_data.codec; + throw std::runtime_error("Unsupported compression codec \"" + codec_name.str() + + "\". Supported options are uncompressed, gzip or snappy"); + break; + } + } } +static uint8_t ComputeBitWidth(idx_t val) { + if (val == 0) { + return 0; + } + uint8_t ret = 1; + while (((idx_t)(1 << ret) - 1) < val) { + ret++; + } + return ret; +} -static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, - const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) -{ BYTE* const op = (BYTE*)dst; - U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ - U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ - U32 const checksumFlag = params->fParams.checksumFlag>0; - U32 const windowSize = (U32)1 << params->cParams.windowLog; - U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); - BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); - U32 const fcsCode = params->fParams.contentSizeFlag ? - (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ - BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); - size_t pos=0; +void ColumnReader::PrepareDataPage(PageHeader &page_hdr) { + if (page_hdr.type == PageType::DATA_PAGE && !page_hdr.__isset.data_page_header) { + throw std::runtime_error("Missing data page header from data page"); + } + if (page_hdr.type == PageType::DATA_PAGE_V2 && !page_hdr.__isset.data_page_header_v2) { + throw std::runtime_error("Missing data page header from data page v2"); + } - assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); - RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, - "dst buf is too small to fit worst-case frame header size."); - DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", - !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); + page_rows_available = page_hdr.type == PageType::DATA_PAGE ? page_hdr.data_page_header.num_values + : page_hdr.data_page_header_v2.num_values; + auto page_encoding = page_hdr.type == PageType::DATA_PAGE ? page_hdr.data_page_header.encoding + : page_hdr.data_page_header_v2.encoding; - if (params->format == ZSTD_f_zstd1) { - MEM_writeLE32(dst, ZSTD_MAGICNUMBER); - pos = 4; - } - op[pos++] = frameHeaderDescriptionByte; - if (!singleSegment) op[pos++] = windowLogByte; - switch(dictIDSizeCode) - { - default: assert(0); /* impossible */ - case 0 : break; - case 1 : op[pos] = (BYTE)(dictID); pos++; break; - case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; - case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; - } - switch(fcsCode) - { - default: assert(0); /* impossible */ - case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; - case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; - case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; - case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; - } - return pos; -} + if (HasRepeats()) { + uint32_t rep_length = page_hdr.type == PageType::DATA_PAGE + ? block->read() + : page_hdr.data_page_header_v2.repetition_levels_byte_length; + block->available(rep_length); + repeated_decoder = + make_unique((const uint8_t *)block->ptr, rep_length, ComputeBitWidth(max_repeat)); + block->inc(rep_length); + } -/* ZSTD_writeLastEmptyBlock() : - * output an empty Block with end-of-frame mark to complete a frame - * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) - * or an error code if `dstCapacity` is too small (read() + : page_hdr.data_page_header_v2.definition_levels_byte_length; + block->available(def_length); + defined_decoder = + make_unique((const uint8_t *)block->ptr, def_length, ComputeBitWidth(max_define)); + block->inc(def_length); + } -size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) -{ - RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, - "wrong cctx stage"); - RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, - parameter_unsupported, - "incompatible with ldm"); - cctx->externSeqStore.seq = seq; - cctx->externSeqStore.size = nbSeq; - cctx->externSeqStore.capacity = nbSeq; - cctx->externSeqStore.pos = 0; - return 0; + switch (page_encoding) { + case Encoding::RLE_DICTIONARY: + case Encoding::PLAIN_DICTIONARY: { + // TODO there seems to be some confusion whether this is in the bytes for v2 + // where is it otherwise?? + auto dict_width = block->read(); + // TODO somehow dict_width can be 0 ? + dict_decoder = make_unique((const uint8_t *)block->ptr, block->len, dict_width); + block->inc(block->len); + break; + } + case Encoding::PLAIN: + // nothing to do here, will be read directly below + break; + + default: + throw std::runtime_error("Unsupported page encoding"); + } } +idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, + Vector &result) { + // we need to reset the location because multiple column readers share the same protocol + auto &trans = (ThriftFileTransport &)*protocol->getTransport(); + trans.SetLocation(chunk_read_offset); -static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 frame, U32 lastFrameChunk) -{ - ZSTD_matchState_t* const ms = &cctx->blockState.matchState; - size_t fhSize = 0; + idx_t result_offset = 0; + auto to_read = num_values; - DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", - cctx->stage, (unsigned)srcSize); - RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, - "missing init (ZSTD_compressBegin)"); + while (to_read > 0) { + while (page_rows_available == 0) { + PrepareRead(filter); + } - if (frame && (cctx->stage==ZSTDcs_init)) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, - cctx->pledgedSrcSizePlusOne-1, cctx->dictID); - FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); - assert(fhSize <= dstCapacity); - dstCapacity -= fhSize; - dst = (char*)dst + fhSize; - cctx->stage = ZSTDcs_ongoing; - } + D_ASSERT(block); + auto read_now = MinValue(to_read, page_rows_available); - if (!srcSize) return fhSize; /* do not generate an empty block if no input */ + D_ASSERT(read_now <= STANDARD_VECTOR_SIZE); - if (!ZSTD_window_update(&ms->window, src, srcSize)) { - ms->nextToUpdate = ms->window.dictLimit; - } - if (cctx->appliedParams.ldmParams.enableLdm) { - ZSTD_window_update(&cctx->ldmState.window, src, srcSize); - } + if (HasRepeats()) { + D_ASSERT(repeated_decoder); + repeated_decoder->GetBatch((char *)repeat_out + result_offset, read_now); + } - if (!frame) { - /* overflow check and correction for block mode */ - ZSTD_overflowCorrectIfNeeded( - ms, &cctx->workspace, &cctx->appliedParams, - src, (BYTE const*)src + srcSize); - } + if (HasDefines()) { + D_ASSERT(defined_decoder); + defined_decoder->GetBatch((char *)define_out + result_offset, read_now); + } - DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); - { size_t const cSize = frame ? - ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : - ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); - FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); - cctx->consumedSrcSize += srcSize; - cctx->producedCSize += (cSize + fhSize); - assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); - if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); - RETURN_ERROR_IF( - cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, - srcSize_wrong, - "error : pledgedSrcSize = %u, while realSrcSize >= %u", - (unsigned)cctx->pledgedSrcSizePlusOne-1, - (unsigned)cctx->consumedSrcSize); - } - return cSize + fhSize; - } -} + if (dict_decoder) { + // we need the null count because the offsets and plain values have no entries for nulls + idx_t null_count = 0; + if (HasDefines()) { + for (idx_t i = 0; i < read_now; i++) { + if (define_out[i + result_offset] != max_define) { + null_count++; + } + } + } -size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); -} + offset_buffer.resize(reader.allocator, sizeof(uint32_t) * (read_now - null_count)); + dict_decoder->GetBatch(offset_buffer.ptr, read_now - null_count); + DictReference(result); + Offsets((uint32_t *)offset_buffer.ptr, define_out, read_now, filter, result_offset, result); + } else { + PlainReference(block, result); + Plain(block, define_out, read_now, filter, result_offset, result); + } + result_offset += read_now; + page_rows_available -= read_now; + to_read -= read_now; + } + group_rows_available -= num_values; + chunk_read_offset = trans.GetLocation(); -size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) -{ - ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; - assert(!ZSTD_checkCParams(cParams)); - return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); + return num_values; } -size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); - { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); - RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } +void ColumnReader::Skip(idx_t num_values) { + dummy_define.zero(); + dummy_repeat.zero(); - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); + // TODO this can be optimized, for example we dont actually have to bitunpack offsets + auto values_read = + Read(num_values, none_filter, (uint8_t *)dummy_define.ptr, (uint8_t *)dummy_repeat.ptr, dummy_result); + if (values_read != num_values) { + throw std::runtime_error("Row count mismatch when skipping rows"); + } } -/*! ZSTD_loadDictionaryContent() : - * @return : 0, or an error code - */ -static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, - ldmState_t* ls, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* src, size_t srcSize, - ZSTD_dictTableLoadMethod_e dtlm) -{ - const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; - - ZSTD_window_update(&ms->window, src, srcSize); - ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); +uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) { + if (Type() != LogicalTypeId::VARCHAR) { + return str_len; + } + // verify if a string is actually UTF8, and if there are no null bytes in the middle of the string + // technically Parquet should guarantee this, but reality is often disappointing + UnicodeInvalidReason reason; + size_t pos; + auto utf_type = Utf8Proc::Analyze(str_data, str_len, &reason, &pos); + if (utf_type == UnicodeType::INVALID) { + if (reason == UnicodeInvalidReason::NULL_BYTE) { + // for null bytes we just truncate the string + return pos; + } + throw InvalidInputException("Invalid string encoding found in Parquet file: value \"" + + Blob::ToString(string_t(str_data, str_len)) + "\" is not valid UTF8!"); + } + return str_len; +} - if (params->ldmParams.enableLdm && ls != NULL) { - ZSTD_window_update(&ls->window, src, srcSize); - ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); - } +void StringColumnReader::Dictionary(shared_ptr data, idx_t num_entries) { + dict = move(data); + dict_strings = unique_ptr(new string_t[num_entries]); + for (idx_t dict_idx = 0; dict_idx < num_entries; dict_idx++) { + uint32_t str_len = dict->read(); + dict->available(str_len); - /* Assert that we the ms params match the params we're being given */ - ZSTD_assertEqualCParams(params->cParams, ms->cParams); + auto actual_str_len = VerifyString(dict->ptr, str_len); + dict_strings[dict_idx] = string_t(dict->ptr, actual_str_len); + dict->inc(str_len); + } +} - if (srcSize <= HASH_READ_SIZE) return 0; +class ParquetStringVectorBuffer : public VectorBuffer { +public: + explicit ParquetStringVectorBuffer(shared_ptr buffer_p) + : VectorBuffer(VectorBufferType::OPAQUE_BUFFER), buffer(move(buffer_p)) { + } - while (iend - ip > HASH_READ_SIZE) { - size_t const remaining = (size_t)(iend - ip); - size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); - const BYTE* const ichunk = ip + chunk; +private: + shared_ptr buffer; +}; - ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); +void StringColumnReader::DictReference(Vector &result) { + StringVector::AddBuffer(result, make_buffer(dict)); +} +void StringColumnReader::PlainReference(shared_ptr plain_data, Vector &result) { + StringVector::AddBuffer(result, make_buffer(move(plain_data))); +} - if (params->ldmParams.enableLdm && ls != NULL) - ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); +string_t StringParquetValueConversion::DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { + auto &dict_strings = ((StringColumnReader &)reader).dict_strings; + return dict_strings[offset]; +} - switch(params->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, ichunk, dtlm); - break; - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); - break; +string_t StringParquetValueConversion::PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { + auto &scr = ((StringColumnReader &)reader); + uint32_t str_len = scr.fixed_width_string_length == 0 ? plain_data.read() : scr.fixed_width_string_length; + plain_data.available(str_len); + auto actual_str_len = ((StringColumnReader &)reader).VerifyString(plain_data.ptr, str_len); + auto ret_str = string_t(plain_data.ptr, actual_str_len); + plain_data.inc(str_len); + return ret_str; +} - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - if (chunk >= HASH_READ_SIZE) - ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); - break; +void StringParquetValueConversion::PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { + auto &scr = ((StringColumnReader &)reader); + uint32_t str_len = scr.fixed_width_string_length == 0 ? plain_data.read() : scr.fixed_width_string_length; + plain_data.available(str_len); + plain_data.inc(str_len); +} - case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - if (chunk >= HASH_READ_SIZE) - ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); - break; +idx_t ListColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, + Vector &result_out) { + idx_t result_offset = 0; + auto result_ptr = FlatVector::GetData(result_out); + auto &result_mask = FlatVector::Validity(result_out); - default: - assert(0); /* not possible : not a valid strategy id */ - } + D_ASSERT(ListVector::GetListSize(result_out) == 0); + // if an individual list is longer than STANDARD_VECTOR_SIZE we actually have to loop the child read to fill it + bool finished = false; + while (!finished) { + idx_t child_actual_num_values = 0; - ip = ichunk; - } + // check if we have any overflow from a previous read + if (overflow_child_count == 0) { + // we don't: read elements from the child reader + child_defines.zero(); + child_repeats.zero(); + // we don't know in advance how many values to read because of the beautiful repetition/definition setup + // we just read (up to) a vector from the child column, and see if we have read enough + // if we have not read enough, we read another vector + // if we have read enough, we leave any unhandled elements in the overflow vector for a subsequent read + auto child_req_num_values = + MinValue(STANDARD_VECTOR_SIZE, child_column_reader->GroupRowsAvailable()); + read_vector.ResetFromCache(read_cache); + child_actual_num_values = child_column_reader->Read(child_req_num_values, child_filter, child_defines_ptr, + child_repeats_ptr, read_vector); + } else { + // we do: use the overflow values + child_actual_num_values = overflow_child_count; + overflow_child_count = 0; + } - ms->nextToUpdate = (U32)(iend - ms->window.base); - return 0; -} + if (child_actual_num_values == 0) { + // no more elements available: we are done + break; + } + read_vector.Verify(child_actual_num_values); + idx_t current_chunk_offset = ListVector::GetListSize(result_out); + // hard-won piece of code this, modify at your own risk + // the intuition is that we have to only collapse values into lists that are repeated *on this level* + // the rest is pretty much handed up as-is as a single-valued list or NULL + idx_t child_idx; + for (child_idx = 0; child_idx < child_actual_num_values; child_idx++) { + if (child_repeats_ptr[child_idx] == max_repeat) { + // value repeats on this level, append + D_ASSERT(result_offset > 0); + result_ptr[result_offset - 1].length++; + continue; + } -/* Dictionaries that assign zero probability to symbols that show up causes problems - when FSE encoding. Refuse dictionaries that assign zero probability to symbols - that we may encounter during compression. - NOTE: This behavior is not standard and could be improved in the future. */ -static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { - U32 s; - RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted, "dict fse tables don't have all symbols"); - for (s = 0; s <= maxSymbolValue; ++s) { - RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted, "dict fse tables don't have all symbols"); - } - return 0; -} + if (result_offset >= num_values) { + // we ran out of output space + finished = true; + break; + } + if (child_defines_ptr[child_idx] >= max_define) { + // value has been defined down the stack, hence its NOT NULL + result_ptr[result_offset].offset = child_idx + current_chunk_offset; + result_ptr[result_offset].length = 1; + } else { + // value is NULL somewhere up the stack + result_mask.SetInvalid(result_offset); + result_ptr[result_offset].offset = 0; + result_ptr[result_offset].length = 0; + } -size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, - short* offcodeNCount, unsigned* offcodeMaxValue, - const void* const dict, size_t dictSize) -{ - const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ - const BYTE* const dictEnd = dictPtr + dictSize; - dictPtr += 8; - bs->entropy.huf.repeatMode = HUF_repeat_check; + repeat_out[result_offset] = child_repeats_ptr[child_idx]; + define_out[result_offset] = child_defines_ptr[child_idx]; - { unsigned maxSymbolValue = 255; - unsigned hasZeroWeights = 1; - size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, - dictEnd-dictPtr, &hasZeroWeights); + result_offset++; + } + // actually append the required elements to the child list + ListVector::Append(result_out, read_vector, child_idx); - /* We only set the loaded table as valid if it contains all non-zero - * weights. Otherwise, we set it to check */ - if (!hasZeroWeights) - bs->entropy.huf.repeatMode = HUF_repeat_valid; + // we have read more values from the child reader than we can fit into the result for this read + // we have to pass everything from child_idx to child_actual_num_values into the next call + if (child_idx < child_actual_num_values && result_offset == num_values) { + read_vector.Slice(read_vector, child_idx); + overflow_child_count = child_actual_num_values - child_idx; + read_vector.Verify(overflow_child_count); - RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); - dictPtr += hufHeaderSize; - } + // move values in the child repeats and defines *backward* by child_idx + for (idx_t repdef_idx = 0; repdef_idx < overflow_child_count; repdef_idx++) { + child_defines_ptr[repdef_idx] = child_defines_ptr[child_idx + repdef_idx]; + child_repeats_ptr[repdef_idx] = child_repeats_ptr[child_idx + repdef_idx]; + } + } + } + result_out.Verify(result_offset); + return result_offset; +} - { unsigned offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); - /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ - /* fill all offset symbols to avoid garbage at end of table */ - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( - bs->entropy.fse.offcodeCTable, - offcodeNCount, MaxOff, offcodeLog, - workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted, ""); - dictPtr += offcodeHeaderSize; - } +ListColumnReader::ListColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, + idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p, + unique_ptr child_column_reader_p) + : ColumnReader(reader, move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p), + child_column_reader(move(child_column_reader_p)), read_cache(ListType::GetChildType(Type())), + read_vector(read_cache), overflow_child_count(0) { - { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); - /* Every match length code must have non-zero probability */ - FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), ""); - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( - bs->entropy.fse.matchlengthCTable, - matchlengthNCount, matchlengthMaxValue, matchlengthLog, - workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted, ""); - dictPtr += matchlengthHeaderSize; - } + child_defines.resize(reader.allocator, STANDARD_VECTOR_SIZE); + child_repeats.resize(reader.allocator, STANDARD_VECTOR_SIZE); + child_defines_ptr = (uint8_t *)child_defines.ptr; + child_repeats_ptr = (uint8_t *)child_repeats.ptr; - { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); - /* Every literal length code must have non-zero probability */ - FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), ""); - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( - bs->entropy.fse.litlengthCTable, - litlengthNCount, litlengthMaxValue, litlengthLog, - workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted, ""); - dictPtr += litlengthHeaderSize; - } + child_filter.set(); +} - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); - bs->rep[0] = MEM_readLE32(dictPtr+0); - bs->rep[1] = MEM_readLE32(dictPtr+4); - bs->rep[2] = MEM_readLE32(dictPtr+8); - dictPtr += 12; +} // namespace duckdb +#include +#include +#include +#include - return dictPtr - (const BYTE*)dict; -} -/* Dictionary format : - * See : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format - */ -/*! ZSTD_loadZstdDictionary() : - * @return : dictID, or an error code - * assumptions : magic number supposed already checked - * dictSize supposed >= 8 - */ -static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* dict, size_t dictSize, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) -{ - const BYTE* dictPtr = (const BYTE*)dict; - const BYTE* const dictEnd = dictPtr + dictSize; - short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff; - size_t dictID; - size_t eSize; - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); - assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); +#include "duckdb.hpp" - dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); - eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize); - FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); - dictPtr += eSize; +namespace duckdb { - { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); - U32 offcodeMax = MaxOff; - if (dictContentSize <= ((U32)-1) - 128 KB) { - U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ - offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ - } - /* All offset values <= dictContentSize + 128 KB must be representable */ - FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), ""); - /* All repCodes must be <= dictContentSize and != 0*/ - { U32 u; - for (u=0; u<3; u++) { - RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); - RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); - } } +class ParquetExtension : public Extension { +public: + void Load(DuckDB &db) override; +}; - bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; - bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; - bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; - FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( - ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); - return dictID; - } -} +} // namespace duckdb -/** ZSTD_compress_insertDictionary() : -* @return : dictID, or an error code */ -static size_t -ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_matchState_t* ms, - ldmState_t* ls, - ZSTD_cwksp* ws, - const ZSTD_CCtx_params* params, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) -{ - DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); - if ((dict==NULL) || (dictSize<8)) { - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); - return 0; - } - ZSTD_reset_compressedBlockState(bs); - /* dict restricted modes */ - if (dictContentType == ZSTD_dct_rawContent) - return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); +//===----------------------------------------------------------------------===// +// DuckDB +// +// parquet_metadata.hpp +// +// +//===----------------------------------------------------------------------===// - if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { - if (dictContentType == ZSTD_dct_auto) { - DEBUGLOG(4, "raw content dictionary detected"); - return ZSTD_loadDictionaryContent( - ms, ls, ws, params, dict, dictSize, dtlm); - } - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); - assert(0); /* impossible */ - } - /* dict as full zstd dictionary */ - return ZSTD_loadZstdDictionary( - bs, ms, ws, params, dict, dictSize, dtlm, workspace); -} -#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) -#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6) -/*! ZSTD_compressBegin_internal() : - * @return : 0, or an error code */ -static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); - /* params are supposed to be fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); - assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - if ( (cdict) - && (cdict->dictContentSize > 0) - && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF - || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || cdict->compressionLevel == 0) - && (params->attachDictPref != ZSTD_dictForceLoad) ) { - return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); - } - FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff) , ""); - { size_t const dictID = cdict ? - ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, - cdict->dictContentSize, dictContentType, dtlm, - cctx->entropyWorkspace) - : ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, - dictContentType, dtlm, cctx->entropyWorkspace); - FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); - assert(dictID <= UINT_MAX); - cctx->dictID = (U32)dictID; - } - return 0; -} +namespace duckdb { -size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); - /* compression parameters verification and optimization */ - FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); - return ZSTD_compressBegin_internal(cctx, - dict, dictSize, dictContentType, dtlm, - cdict, - params, pledgedSrcSize, - ZSTDb_not_buffered); -} +class ParquetMetaDataFunction : public TableFunction { +public: + ParquetMetaDataFunction(); +}; -/*! ZSTD_compressBegin_advanced() : -* @return : 0, or an error code */ -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize) -{ - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); - return ZSTD_compressBegin_advanced_internal(cctx, - dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, - NULL /*cdict*/, - &cctxParams, pledgedSrcSize); -} +class ParquetSchemaFunction : public TableFunction { +public: + ParquetSchemaFunction(); +}; -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); - DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); -} +} // namespace duckdb -size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) -{ - return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); -} +#include "duckdb.hpp" +#ifndef DUCKDB_AMALGAMATION +#include "duckdb.hpp" +#include "duckdb/common/types/chunk_collection.hpp" +#include "duckdb/function/copy_function.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/common/file_system.hpp" +#include "duckdb/parallel/parallel_state.hpp" +#include "duckdb/parser/parsed_data/create_copy_function_info.hpp" +#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -/*! ZSTD_writeEpilogue() : -* Ends a frame. -* @return : nb of bytes written into dst (or an error code) */ -static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) -{ - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - size_t fhSize = 0; +#include "duckdb/main/config.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/function_expression.hpp" +#include "duckdb/parser/tableref/table_function_ref.hpp" - DEBUGLOG(4, "ZSTD_writeEpilogue"); - RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); +#include "duckdb/storage/statistics/base_statistics.hpp" - /* special case : empty frame */ - if (cctx->stage == ZSTDcs_init) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); - FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); - dstCapacity -= fhSize; - op += fhSize; - cctx->stage = ZSTDcs_ongoing; - } +#include "duckdb/main/client_context.hpp" +#include "duckdb/catalog/catalog.hpp" +#endif - if (cctx->stage != ZSTDcs_ending) { - /* write one last empty block, make it the "last" block */ - U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); - MEM_writeLE32(op, cBlockHeader24); - op += ZSTDInternalConstants::ZSTD_blockHeaderSize; - dstCapacity -= ZSTDInternalConstants::ZSTD_blockHeaderSize; - } +namespace duckdb { - if (cctx->appliedParams.fParams.checksumFlag) { - U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); - DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); - MEM_writeLE32(op, checksum); - op += 4; - } +struct ParquetReadBindData : public FunctionData { + shared_ptr initial_reader; + vector files; + vector column_ids; + atomic chunk_count; + atomic cur_file; +}; - cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ - return op-ostart; -} +struct ParquetReadOperatorData : public FunctionOperatorData { + shared_ptr reader; + ParquetReaderScanState scan_state; + bool is_parallel; + idx_t file_index; + vector column_ids; + TableFilterSet *table_filters; +}; -size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t endResult; - size_t const cSize = ZSTD_compressContinue_internal(cctx, - dst, dstCapacity, src, srcSize, - 1 /* frame mode */, 1 /* last chunk */); - FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); - endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); - FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); - assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); - if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); - DEBUGLOG(4, "end of frame : controlling src size"); - RETURN_ERROR_IF( - cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, - srcSize_wrong, - "error : pledgedSrcSize = %u, while realSrcSize = %u", - (unsigned)cctx->pledgedSrcSizePlusOne-1, - (unsigned)cctx->consumedSrcSize); - } - return cSize + endResult; -} +struct ParquetReadParallelState : public ParallelState { + mutex lock; + shared_ptr current_reader; + idx_t file_index; + idx_t row_group_index; +}; +class ParquetScanFunction { +public: + static TableFunctionSet GetFunctionSet() { + TableFunctionSet set("parquet_scan"); + set.AddFunction(TableFunction({LogicalType::VARCHAR}, ParquetScanImplementation, ParquetScanBind, + ParquetScanInit, /* statistics */ ParquetScanStats, /* cleanup */ nullptr, + /* dependency */ nullptr, ParquetCardinality, + /* pushdown_complex_filter */ nullptr, /* to_string */ nullptr, + ParquetScanMaxThreads, ParquetInitParallelState, ParquetScanFuncParallel, + ParquetScanParallelInit, ParquetParallelStateNext, true, true, ParquetProgress)); + set.AddFunction(TableFunction({LogicalType::LIST(LogicalType::VARCHAR)}, ParquetScanImplementation, + ParquetScanBindList, ParquetScanInit, /* statistics */ ParquetScanStats, + /* cleanup */ nullptr, + /* dependency */ nullptr, ParquetCardinality, + /* pushdown_complex_filter */ nullptr, /* to_string */ nullptr, + ParquetScanMaxThreads, ParquetInitParallelState, ParquetScanFuncParallel, + ParquetScanParallelInit, ParquetParallelStateNext, true, true, ParquetProgress)); + return set; + } -static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_parameters* params) -{ - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); - DEBUGLOG(4, "ZSTD_compress_internal"); - return ZSTD_compress_advanced_internal(cctx, - dst, dstCapacity, - src, srcSize, - dict, dictSize, - &cctxParams); -} + static unique_ptr ParquetReadBind(ClientContext &context, CopyInfo &info, + vector &expected_names, + vector &expected_types) { + for (auto &option : info.options) { + throw NotImplementedException("Unsupported option for COPY FROM parquet: %s", option.first); + } + auto result = make_unique(); -size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params) -{ - DEBUGLOG(4, "ZSTD_compress_advanced"); - FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); - return ZSTD_compress_internal(cctx, - dst, dstCapacity, - src, srcSize, - dict, dictSize, - ¶ms); -} + FileSystem &fs = FileSystem::GetFileSystem(context); + result->files = fs.Glob(info.file_path); + if (result->files.empty()) { + throw IOException("No files found that match the pattern \"%s\"", info.file_path); + } + result->initial_reader = make_shared(context, result->files[0], expected_types); + return move(result); + } -/* Internal */ -size_t ZSTD_compress_advanced_internal( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_CCtx_params* params) -{ - DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); - FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, - dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - params, srcSize, ZSTDb_not_buffered) , ""); - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -} + static unique_ptr ParquetScanStats(ClientContext &context, const FunctionData *bind_data_p, + column_t column_index) { + auto &bind_data = (ParquetReadBindData &)*bind_data_p; -size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize, - int compressionLevel) -{ - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0); - ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); - DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); - assert(params.fParams.contentSizeFlag == 1); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); -} + if (column_index == COLUMN_IDENTIFIER_ROW_ID) { + return nullptr; + } -size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel) -{ - DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); - assert(cctx != NULL); - return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); -} + // we do not want to parse the Parquet metadata for the sole purpose of getting column statistics -size_t ZSTD_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel) -{ - size_t result; - ZSTD_CCtx ctxBody; - ZSTD_initCCtx(&ctxBody, ZSTDInternalConstants::ZSTD_defaultCMem); - result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); - ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ - return result; -} + // We already parsed the metadata for the first file in a glob because we need some type info. + auto overall_stats = ParquetReader::ReadStatistics( + *bind_data.initial_reader, bind_data.initial_reader->return_types[column_index], column_index, + bind_data.initial_reader->metadata->metadata.get()); + if (!overall_stats) { + return nullptr; + } -/* ===== Dictionary API ===== */ + // if there is only one file in the glob (quite common case), we are done + auto &config = DBConfig::GetConfig(context); + if (bind_data.files.size() < 2) { + return overall_stats; + } else if (config.object_cache_enable) { + auto &cache = ObjectCache::GetObjectCache(context); + // for more than one file, we could be lucky and metadata for *every* file is in the object cache (if + // enabled at all) + FileSystem &fs = FileSystem::GetFileSystem(context); + for (idx_t file_idx = 1; file_idx < bind_data.files.size(); file_idx++) { + auto &file_name = bind_data.files[file_idx]; + auto metadata = std::dynamic_pointer_cast(cache.Get(file_name)); + auto handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ); + // but we need to check if the metadata cache entries are current + if (!metadata || (fs.GetLastModifiedTime(*handle) >= metadata->read_time)) { + // missing or invalid metadata entry in cache, no usable stats overall + return nullptr; + } + // get and merge stats for file + auto file_stats = ParquetReader::ReadStatistics(*bind_data.initial_reader, + bind_data.initial_reader->return_types[column_index], + column_index, metadata->metadata.get()); + if (!file_stats) { + return nullptr; + } + overall_stats->Merge(*file_stats); + } + // success! + return overall_stats; + } + // we have more than one file and no object cache so no statistics overall + return nullptr; + } -/*! ZSTD_estimateCDictSize_advanced() : - * Estimate amount of memory that will be needed to create a dictionary with following arguments */ -size_t ZSTD_estimateCDictSize_advanced( - size_t dictSize, ZSTD_compressionParameters cParams, - ZSTD_dictLoadMethod_e dictLoadMethod) -{ - DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); - return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) - + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) - + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 - : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); -} + static void ParquetScanFuncParallel(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output, + ParallelState *parallel_state_p) { + //! FIXME: Have specialized parallel function from pandas scan here + ParquetScanImplementation(context, bind_data, operator_state, input, output); + } -size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) -{ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); -} + static unique_ptr ParquetScanBindInternal(ClientContext &context, vector files, + vector &return_types, vector &names) { + auto result = make_unique(); + result->files = move(files); -size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) -{ - if (cdict==NULL) return 0; /* support sizeof on NULL */ - DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); - /* cdict may be in the workspace */ - return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) - + ZSTD_cwksp_sizeof(&cdict->workspace); -} + result->initial_reader = make_shared(context, result->files[0]); + return_types = result->initial_reader->return_types; -static size_t ZSTD_initCDict_internal( - ZSTD_CDict* cdict, - const void* dictBuffer, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams) -{ - DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); - assert(!ZSTD_checkCParams(cParams)); - cdict->matchState.cParams = cParams; - if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { - cdict->dictContent = dictBuffer; - } else { - void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); - RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); - cdict->dictContent = internalBuffer; - memcpy(internalBuffer, dictBuffer, dictSize); - } - cdict->dictContentSize = dictSize; + names = result->initial_reader->names; + return move(result); + } - cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); + static vector ParquetGlob(FileSystem &fs, const string &glob) { + auto files = fs.Glob(glob); + if (files.empty()) { + throw IOException("No files found that match the pattern \"%s\"", glob); + } + return files; + } + static unique_ptr ParquetScanBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + auto file_name = inputs[0].GetValue(); - /* Reset the state to no dictionary */ - ZSTD_reset_compressedBlockState(&cdict->cBlockState); - FORWARD_IF_ERROR(ZSTD_reset_matchState( - &cdict->matchState, - &cdict->workspace, - &cParams, - ZSTDcrp_makeClean, - ZSTDirp_reset, - ZSTD_resetTarget_CDict), ""); - /* (Maybe) load the dictionary - * Skips loading the dictionary if it is < 8 bytes. - */ - { ZSTD_CCtx_params params; - memset(¶ms, 0, sizeof(params)); - params.compressionLevel = ZSTD_CLEVEL_DEFAULT; - params.fParams.contentSizeFlag = 1; - params.cParams = cParams; - { size_t const dictID = ZSTD_compress_insertDictionary( - &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, - ¶ms, cdict->dictContent, cdict->dictContentSize, - dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); - FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); - assert(dictID <= (size_t)(U32)-1); - cdict->dictID = (U32)dictID; - } - } + FileSystem &fs = FileSystem::GetFileSystem(context); + auto files = ParquetGlob(fs, file_name); + return ParquetScanBindInternal(context, move(files), return_types, names); + } - return 0; -} + static unique_ptr ParquetScanBindList(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + FileSystem &fs = FileSystem::GetFileSystem(context); + vector files; + for (auto &val : inputs[0].list_value) { + auto glob_files = ParquetGlob(fs, val.ToString()); + files.insert(files.end(), glob_files.begin(), glob_files.end()); + } + if (files.empty()) { + throw IOException("Parquet reader needs at least one file to read"); + } + return ParquetScanBindInternal(context, move(files), return_types, names); + } -ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams, ZSTD_customMem customMem) -{ - DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + static unique_ptr ParquetScanInit(ClientContext &context, const FunctionData *bind_data_p, + const vector &column_ids, + TableFilterCollection *filters) { + auto &bind_data = (ParquetReadBindData &)*bind_data_p; + bind_data.chunk_count = 0; + bind_data.cur_file = 0; + auto result = make_unique(); + result->column_ids = column_ids; - { size_t const workspaceSize = - ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + - ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + - ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + - (dictLoadMethod == ZSTD_dlm_byRef ? 0 - : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); - void* const workspace = ZSTD_malloc(workspaceSize, customMem); - ZSTD_cwksp ws; - ZSTD_CDict* cdict; + result->is_parallel = false; + result->file_index = 0; + result->table_filters = filters->table_filters; + // single-threaded: one thread has to read all groups + vector group_ids; + for (idx_t i = 0; i < bind_data.initial_reader->NumRowGroups(); i++) { + group_ids.push_back(i); + } + result->reader = bind_data.initial_reader; + result->reader->InitializeScan(result->scan_state, column_ids, move(group_ids), filters->table_filters); + return move(result); + } - if (!workspace) { - ZSTD_free(workspace, customMem); - return NULL; - } + static int ParquetProgress(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (ParquetReadBindData &)*bind_data_p; + if (bind_data.initial_reader->NumRows() == 0) { + return (100 * (bind_data.cur_file + 1)) / bind_data.files.size(); + } + auto percentage = (bind_data.chunk_count * STANDARD_VECTOR_SIZE * 100 / bind_data.initial_reader->NumRows()) / + bind_data.files.size(); + percentage += 100 * bind_data.cur_file / bind_data.files.size(); + return percentage; + } - ZSTD_cwksp_init(&ws, workspace, workspaceSize); + static unique_ptr + ParquetScanParallelInit(ClientContext &context, const FunctionData *bind_data_p, ParallelState *parallel_state_p, + const vector &column_ids, TableFilterCollection *filters) { + auto result = make_unique(); + result->column_ids = column_ids; + result->is_parallel = true; + result->table_filters = filters->table_filters; + if (!ParquetParallelStateNext(context, bind_data_p, result.get(), parallel_state_p)) { + return nullptr; + } + return move(result); + } - cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); - assert(cdict != NULL); - ZSTD_cwksp_move(&cdict->workspace, &ws); - cdict->customMem = customMem; - cdict->compressionLevel = 0; /* signals advanced API usage */ + static void ParquetScanImplementation(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (ParquetReadOperatorData &)*operator_state; + auto &bind_data = (ParquetReadBindData &)*bind_data_p; - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dictBuffer, dictSize, - dictLoadMethod, dictContentType, - cParams) )) { - ZSTD_freeCDict(cdict); - return NULL; - } + do { + data.reader->Scan(data.scan_state, output); + bind_data.chunk_count++; + if (output.size() == 0 && !data.is_parallel) { + auto &bind_data = (ParquetReadBindData &)*bind_data_p; + // check if there is another file + if (data.file_index + 1 < bind_data.files.size()) { + data.file_index++; + bind_data.cur_file++; + bind_data.chunk_count = 0; + string file = bind_data.files[data.file_index]; + // move to the next file + data.reader = + make_shared(context, file, data.reader->return_types, bind_data.files[0]); + vector group_ids; + for (idx_t i = 0; i < data.reader->NumRowGroups(); i++) { + group_ids.push_back(i); + } + data.reader->InitializeScan(data.scan_state, data.column_ids, move(group_ids), data.table_filters); + } else { + // exhausted all the files: done + break; + } + } else { + break; + } + } while (true); + } - return cdict; - } -} + static unique_ptr ParquetCardinality(ClientContext &context, const FunctionData *bind_data) { + auto &data = (ParquetReadBindData &)*bind_data; + return make_unique(data.initial_reader->NumRows() * data.files.size()); + } -ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, ZSTD_dct_auto, - cParams, ZSTDInternalConstants::ZSTD_defaultCMem); - if (cdict) - cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; - return cdict; -} + static idx_t ParquetScanMaxThreads(ClientContext &context, const FunctionData *bind_data) { + auto &data = (ParquetReadBindData &)*bind_data; + return data.initial_reader->NumRowGroups() * data.files.size(); + } -ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - return ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byRef, ZSTD_dct_auto, - cParams, ZSTDInternalConstants::ZSTD_defaultCMem); -} + static unique_ptr ParquetInitParallelState(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = (ParquetReadBindData &)*bind_data_p; + auto result = make_unique(); + result->current_reader = bind_data.initial_reader; + result->row_group_index = 0; + result->file_index = 0; + return move(result); + } -size_t ZSTD_freeCDict(ZSTD_CDict* cdict) -{ - if (cdict==NULL) return 0; /* support free on NULL */ - { ZSTD_customMem const cMem = cdict->customMem; - int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); - ZSTD_cwksp_free(&cdict->workspace, cMem); - if (!cdictInWorkspace) { - ZSTD_free(cdict, cMem); - } - return 0; - } -} + static bool ParquetParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *state_p, ParallelState *parallel_state_p) { + auto &bind_data = (ParquetReadBindData &)*bind_data_p; + auto ¶llel_state = (ParquetReadParallelState &)*parallel_state_p; + auto &scan_data = (ParquetReadOperatorData &)*state_p; -/*! ZSTD_initStaticCDict_advanced() : - * Generate a digested dictionary in provided memory area. - * workspace: The memory area to emplace the dictionary into. - * Provided pointer must 8-bytes aligned. - * It must outlive dictionary usage. - * workspaceSize: Use ZSTD_estimateCDictSize() - * to determine how large workspace must be. - * cParams : use ZSTD_getCParams() to transform a compression level - * into its relevants cParams. - * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) - * Note : there is no corresponding "free" function. - * Since workspace was allocated externally, it must be freed externally. - */ -const ZSTD_CDict* ZSTD_initStaticCDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams) -{ - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); - size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 - : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) - + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) - + matchStateSize; - ZSTD_CDict* cdict; + lock_guard parallel_lock(parallel_state.lock); + if (parallel_state.row_group_index < parallel_state.current_reader->NumRowGroups()) { + // groups remain in the current parquet file: read the next group + scan_data.reader = parallel_state.current_reader; + vector group_indexes {parallel_state.row_group_index}; + scan_data.reader->InitializeScan(scan_data.scan_state, scan_data.column_ids, group_indexes, + scan_data.table_filters); + parallel_state.row_group_index++; + return true; + } else { + // no groups remain in the current parquet file: check if there are more files to read + while (parallel_state.file_index + 1 < bind_data.files.size()) { + // read the next file + string file = bind_data.files[++parallel_state.file_index]; + parallel_state.current_reader = + make_shared(context, file, parallel_state.current_reader->return_types); + if (parallel_state.current_reader->NumRowGroups() == 0) { + // empty parquet file, move to next file + continue; + } + // set up the scan state to read the first group + scan_data.reader = parallel_state.current_reader; + vector group_indexes {0}; + scan_data.reader->InitializeScan(scan_data.scan_state, scan_data.column_ids, group_indexes, + scan_data.table_filters); + parallel_state.row_group_index = 1; + return true; + } + } + return false; + } +}; - if ((size_t)workspace & 7) return NULL; /* 8-aligned */ +struct ParquetWriteBindData : public FunctionData { + vector sql_types; + string file_name; + vector column_names; + duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY; +}; - { - ZSTD_cwksp ws; - ZSTD_cwksp_init(&ws, workspace, workspaceSize); - cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); - if (cdict == NULL) return NULL; - ZSTD_cwksp_move(&cdict->workspace, &ws); - } +struct ParquetWriteGlobalState : public GlobalFunctionData { + unique_ptr writer; +}; - DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", - (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); - if (workspaceSize < neededSize) return NULL; +struct ParquetWriteLocalState : public LocalFunctionData { + ParquetWriteLocalState() { + buffer = make_unique(); + } - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dict, dictSize, - dictLoadMethod, dictContentType, - cParams) )) - return NULL; + unique_ptr buffer; +}; - return cdict; +unique_ptr ParquetWriteBind(ClientContext &context, CopyInfo &info, vector &names, + vector &sql_types) { + auto bind_data = make_unique(); + for (auto &option : info.options) { + auto loption = StringUtil::Lower(option.first); + if (loption == "compression" || loption == "codec") { + if (!option.second.empty()) { + auto roption = StringUtil::Lower(option.second[0].ToString()); + if (roption == "uncompressed") { + bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED; + continue; + } else if (roption == "snappy") { + bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY; + continue; + } else if (roption == "gzip") { + bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP; + continue; + } else if (roption == "zstd") { + bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD; + continue; + } + } + throw ParserException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption); + } else { + throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str()); + } + } + bind_data->sql_types = sql_types; + bind_data->column_names = names; + bind_data->file_name = info.file_path; + return move(bind_data); } -ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) -{ - assert(cdict != NULL); - return cdict->matchState.cParams; +unique_ptr ParquetWriteInitializeGlobal(ClientContext &context, FunctionData &bind_data) { + auto global_state = make_unique(); + auto &parquet_bind = (ParquetWriteBindData &)bind_data; + + auto &fs = FileSystem::GetFileSystem(context); + global_state->writer = make_unique(fs, parquet_bind.file_name, parquet_bind.sql_types, + parquet_bind.column_names, parquet_bind.codec); + return move(global_state); } -/* ZSTD_compressBegin_usingCDict_advanced() : - * cdict must be != NULL */ -size_t ZSTD_compressBegin_usingCDict_advanced( - ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, - ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); - RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); - { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF - || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || cdict->compressionLevel == 0 ) - && (params.attachDictPref != ZSTD_dictForceLoad) ? - ZSTD_getCParamsFromCDict(cdict) - : ZSTD_getCParams(cdict->compressionLevel, - pledgedSrcSize, - cdict->dictContentSize); - /* Increase window log to fit the entire dictionary and source if the - * source size is known. Limit the increase to 19, which is the - * window log for compression level 1 with the largest source size. - */ - if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { - U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); - U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; - params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); - } - params.fParams = fParams; - return ZSTD_compressBegin_internal(cctx, - NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, - cdict, - ¶ms, pledgedSrcSize, - ZSTDb_not_buffered); - } +void ParquetWriteSink(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, + LocalFunctionData &lstate, DataChunk &input) { + auto &global_state = (ParquetWriteGlobalState &)gstate; + auto &local_state = (ParquetWriteLocalState &)lstate; + + // append data to the local (buffered) chunk collection + local_state.buffer->Append(input); + if (local_state.buffer->Count() > 100000) { + // if the chunk collection exceeds a certain size we flush it to the parquet file + global_state.writer->Flush(*local_state.buffer); + // and reset the buffer + local_state.buffer = make_unique(); + } } -/* ZSTD_compressBegin_usingCDict() : - * pledgedSrcSize=0 means "unknown" - * if pledgedSrcSize>0, it will enable contentSizeFlag */ -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) -{ - ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); - return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); +void ParquetWriteCombine(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, + LocalFunctionData &lstate) { + auto &global_state = (ParquetWriteGlobalState &)gstate; + auto &local_state = (ParquetWriteLocalState &)lstate; + // flush any data left in the local state to the file + global_state.writer->Flush(*local_state.buffer); } -size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) -{ - FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +void ParquetWriteFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { + auto &global_state = (ParquetWriteGlobalState &)gstate; + // finalize: write any additional metadata to the file here + global_state.writer->Finalize(); } -/*! ZSTD_compress_usingCDict() : - * Compression using a digested Dictionary. - * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. - * Note that compression parameters are decided at CDict creation time - * while frame parameters are hardcoded */ -size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict) -{ - ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +unique_ptr ParquetWriteInitializeLocal(ClientContext &context, FunctionData &bind_data) { + return make_unique(); +} + +unique_ptr ParquetScanReplacement(const string &table_name, void *data) { + if (!StringUtil::EndsWith(table_name, ".parquet")) { + return nullptr; + } + auto table_function = make_unique(); + vector> children; + children.push_back(make_unique(Value(table_name))); + table_function->function = make_unique("parquet_scan", move(children)); + return table_function; } +void ParquetExtension::Load(DuckDB &db) { + auto scan_fun = ParquetScanFunction::GetFunctionSet(); + CreateTableFunctionInfo cinfo(scan_fun); + cinfo.name = "read_parquet"; + CreateTableFunctionInfo pq_scan = cinfo; + pq_scan.name = "parquet_scan"; + ParquetMetaDataFunction meta_fun; + CreateTableFunctionInfo meta_cinfo(meta_fun); -/* ****************************************************************** -* Streaming -********************************************************************/ + ParquetSchemaFunction schema_fun; + CreateTableFunctionInfo schema_cinfo(schema_fun); -ZSTD_CStream* ZSTD_createCStream(void) -{ - DEBUGLOG(3, "ZSTD_createCStream"); - return ZSTD_createCStream_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); -} + CopyFunction function("parquet"); + function.copy_to_bind = ParquetWriteBind; + function.copy_to_initialize_global = ParquetWriteInitializeGlobal; + function.copy_to_initialize_local = ParquetWriteInitializeLocal; + function.copy_to_sink = ParquetWriteSink; + function.copy_to_combine = ParquetWriteCombine; + function.copy_to_finalize = ParquetWriteFinalize; + function.copy_from_bind = ParquetScanFunction::ParquetReadBind; + function.copy_from_function = scan_fun.functions[0]; -ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) -{ - return ZSTD_initStaticCCtx(workspace, workspaceSize); -} + function.extension = "parquet"; + CreateCopyFunctionInfo info(function); -ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) -{ /* CStream and CCtx are now same object */ - return ZSTD_createCCtx_advanced(customMem); -} + Connection con(db); + con.BeginTransaction(); + auto &context = *con.context; + auto &catalog = Catalog::GetCatalog(context); + catalog.CreateCopyFunction(context, &info); + catalog.CreateTableFunction(context, &cinfo); + catalog.CreateTableFunction(context, &pq_scan); + catalog.CreateTableFunction(context, &meta_cinfo); + catalog.CreateTableFunction(context, &schema_cinfo); + con.Commit(); -size_t ZSTD_freeCStream(ZSTD_CStream* zcs) -{ - return ZSTD_freeCCtx(zcs); /* same object */ + auto &config = DBConfig::GetConfig(*db.instance); + config.replacement_scans.emplace_back(ParquetScanReplacement); } +} // namespace duckdb +#include -/*====== Initialization ======*/ +#ifndef DUCKDB_AMALGAMATION +#include "duckdb/common/types/blob.hpp" +#endif -size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } +namespace duckdb { -size_t ZSTD_CStreamOutSize(void) -{ - return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTDInternalConstants::ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; -} +struct ParquetMetaDataBindData : public FunctionData { + vector return_types; + vector files; +}; -static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, - const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, - const ZSTD_CDict* const cdict, - ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_resetCStream_internal"); - /* Finalize the compression parameters */ - params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); - /* params are supposed to be fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - assert(!((dict) && (cdict))); /* either dict or cdict, not both */ +struct ParquetMetaDataOperatorData : public FunctionOperatorData { + idx_t file_index; + ChunkCollection collection; - FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, - dict, dictSize, dictContentType, ZSTD_dtlm_fast, - cdict, - ¶ms, pledgedSrcSize, - ZSTDb_buffered) , ""); + static void BindMetaData(vector &return_types, vector &names); + static void BindSchema(vector &return_types, vector &names); - cctx->inToCompress = 0; - cctx->inBuffPos = 0; - cctx->inBuffTarget = cctx->blockSize - + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ - cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; - cctx->streamStage = zcss_load; - cctx->frameEnded = 0; - return 0; /* ready to go */ -} + void LoadFileMetaData(ClientContext &context, const vector &return_types, const string &file_path); + void LoadSchemaData(ClientContext &context, const vector &return_types, const string &file_path); +}; -/* ZSTD_resetCStream(): - * pledgedSrcSize == 0 means "unknown" */ -size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) -{ - /* temporary : 0 interpreted as "unknown" during transition period. - * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. - * 0 will be interpreted as "empty" in the future. - */ - U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; - DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - return 0; +template +string ConvertParquetElementToString(T &&entry) { + std::stringstream ss; + ss << entry; + return ss.str(); } -/*! ZSTD_initCStream_internal() : - * Note : for lib/compress only. Used by zstdmt_compress.c. - * Assumption 1 : params are valid - * Assumption 2 : either dict, or cdict, is defined, not both */ -size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_initCStream_internal"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); - zcs->requestedParams = *params; - assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - if (dict) { - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); - } else { - /* Dictionary is cleared if !cdict */ - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); - } - return 0; +template +string PrintParquetElementToString(T &&entry) { + std::stringstream ss; + entry.printTo(ss); + return ss.str(); } -/* ZSTD_initCStream_usingCDict_advanced() : - * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ -size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, - unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - zcs->requestedParams.fParams = fParams; - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); - return 0; -} +void ParquetMetaDataOperatorData::BindMetaData(vector &return_types, vector &names) { + names.emplace_back("file_name"); + return_types.push_back(LogicalType::VARCHAR); -/* note : cdict must outlive compression session */ -size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) -{ - DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); - return 0; -} + names.emplace_back("row_group_id"); + return_types.push_back(LogicalType::BIGINT); + names.emplace_back("row_group_num_rows"); + return_types.push_back(LogicalType::BIGINT); -/* ZSTD_initCStream_advanced() : - * pledgedSrcSize must be exact. - * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ -size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pss) -{ - /* for compatibility with older programs relying on this behavior. - * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. - * This line will be removed in the future. - */ - U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; - DEBUGLOG(4, "ZSTD_initCStream_advanced"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); - zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms); - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); - return 0; -} + names.emplace_back("row_group_num_columns"); + return_types.push_back(LogicalType::BIGINT); -size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) -{ - DEBUGLOG(4, "ZSTD_initCStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); - return 0; -} + names.emplace_back("row_group_bytes"); + return_types.push_back(LogicalType::BIGINT); -size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) -{ - /* temporary : 0 interpreted as "unknown" during transition period. - * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. - * 0 will be interpreted as "empty" in the future. - */ - U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; - DEBUGLOG(4, "ZSTD_initCStream_srcSize"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - return 0; -} + names.emplace_back("column_id"); + return_types.push_back(LogicalType::BIGINT); -size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) -{ - DEBUGLOG(4, "ZSTD_initCStream"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); - return 0; -} + names.emplace_back("file_offset"); + return_types.push_back(LogicalType::BIGINT); -/*====== Compression ======*/ + names.emplace_back("num_values"); + return_types.push_back(LogicalType::BIGINT); -static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) -{ - size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; - if (hintInSize==0) hintInSize = cctx->blockSize; - return hintInSize; -} + names.emplace_back("path_in_schema"); + return_types.push_back(LogicalType::VARCHAR); -/** ZSTD_compressStream_generic(): - * internal function for all *compressStream*() variants - * non-static, because can be called from zstdmt_compress.c - * @return : hint size for next input */ -static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective const flushMode) -{ - const char* const istart = (const char*)input->src; - const char* const iend = input->size != 0 ? istart + input->size : istart; - const char* ip = input->pos != 0 ? istart + input->pos : istart; - char* const ostart = (char*)output->dst; - char* const oend = output->size != 0 ? ostart + output->size : ostart; - char* op = output->pos != 0 ? ostart + output->pos : ostart; - U32 someMoreWork = 1; + names.emplace_back("type"); + return_types.push_back(LogicalType::VARCHAR); - /* check expectations */ - DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); - assert(zcs->inBuff != NULL); - assert(zcs->inBuffSize > 0); - assert(zcs->outBuff != NULL); - assert(zcs->outBuffSize > 0); - assert(output->pos <= output->size); - assert(input->pos <= input->size); + names.emplace_back("stats_min"); + return_types.push_back(LogicalType::VARCHAR); - while (someMoreWork) { - switch(zcs->streamStage) - { - case zcss_init: - RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); + names.emplace_back("stats_max"); + return_types.push_back(LogicalType::VARCHAR); - case zcss_load: - if ( (flushMode == ZSTD_e_end) - && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ - && (zcs->inBuffPos == 0) ) { - /* shortcut to compression pass directly into output buffer */ - size_t const cSize = ZSTD_compressEnd(zcs, - op, oend-op, ip, iend-ip); - DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); - FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); - ip = iend; - op += cSize; - zcs->frameEnded = 1; - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - someMoreWork = 0; break; - } - /* complete loading into inBuffer */ - { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; - size_t const loaded = ZSTD_limitCopy( - zcs->inBuff + zcs->inBuffPos, toLoad, - ip, iend-ip); - zcs->inBuffPos += loaded; - if (loaded != 0) - ip += loaded; - if ( (flushMode == ZSTD_e_continue) - && (zcs->inBuffPos < zcs->inBuffTarget) ) { - /* not enough input to fill full block : stop here */ - someMoreWork = 0; break; - } - if ( (flushMode == ZSTD_e_flush) - && (zcs->inBuffPos == zcs->inToCompress) ) { - /* empty */ - someMoreWork = 0; break; - } - } - /* compress current block (note : this stage cannot be stopped in the middle) */ - DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); - { void* cDst; - size_t cSize; - size_t const iSize = zcs->inBuffPos - zcs->inToCompress; - size_t oSize = oend-op; - unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); - if (oSize >= ZSTD_compressBound(iSize)) - cDst = op; /* compress into output buffer, to skip flush stage */ - else - cDst = zcs->outBuff, oSize = zcs->outBuffSize; - cSize = lastBlock ? - ZSTD_compressEnd(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize) : - ZSTD_compressContinue(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize); - FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); - zcs->frameEnded = lastBlock; - /* prepare next block */ - zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; - if (zcs->inBuffTarget > zcs->inBuffSize) - zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; - DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", - (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); - if (!lastBlock) - assert(zcs->inBuffTarget <= zcs->inBuffSize); - zcs->inToCompress = zcs->inBuffPos; - if (cDst == op) { /* no need to flush */ - op += cSize; - if (zcs->frameEnded) { - DEBUGLOG(5, "Frame completed directly in outBuffer"); - someMoreWork = 0; - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - } - break; - } - zcs->outBuffContentSize = cSize; - zcs->outBuffFlushedSize = 0; - zcs->streamStage = zcss_flush; /* pass-through to flush stage */ - } - /* fall-through */ - case zcss_flush: - DEBUGLOG(5, "flush stage"); - { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), - zcs->outBuff + zcs->outBuffFlushedSize, toFlush); - DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", - (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); - if (flushed) - op += flushed; - zcs->outBuffFlushedSize += flushed; - if (toFlush!=flushed) { - /* flush not fully completed, presumably because dst is too small */ - assert(op==oend); - someMoreWork = 0; - break; - } - zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; - if (zcs->frameEnded) { - DEBUGLOG(5, "Frame completed on flush"); - someMoreWork = 0; - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - break; - } - zcs->streamStage = zcss_load; - break; - } + names.emplace_back("stats_null_count"); + return_types.push_back(LogicalType::BIGINT); - default: /* impossible */ - assert(0); - } - } + names.emplace_back("stats_distinct_count"); + return_types.push_back(LogicalType::BIGINT); - input->pos = ip - istart; - output->pos = op - ostart; - if (zcs->frameEnded) return 0; - return ZSTD_nextInputSizeHint(zcs); -} + names.emplace_back("stats_min_value"); + return_types.push_back(LogicalType::VARCHAR); -static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) -{ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers >= 1) { - assert(cctx->mtctx != NULL); - return ZSTDMT_nextInputSizeHint(cctx->mtctx); - } -#endif - return ZSTD_nextInputSizeHint(cctx); + names.emplace_back("stats_max_value"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("compression"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("encodings"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("index_page_offset"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("dictionary_page_offset"); + return_types.push_back(LogicalType::BIGINT); + names.emplace_back("data_page_offset"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("total_compressed_size"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("total_uncompressed_size"); + return_types.push_back(LogicalType::BIGINT); } -size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) -{ - FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); - return ZSTD_nextInputSizeHint_MTorST(zcs); +Value ConvertParquetStats(duckdb_parquet::format::Type::type type, bool stats_is_set, const std::string &stats) { + if (!stats_is_set) { + return Value(LogicalType::VARCHAR); + } + switch (type) { + case Type::BOOLEAN: + if (stats.size() == sizeof(bool)) { + return Value(Value::BOOLEAN(Load((data_ptr_t)stats.c_str())).ToString()); + } + break; + case Type::INT32: + if (stats.size() == sizeof(int32_t)) { + return Value(Value::INTEGER(Load((data_ptr_t)stats.c_str())).ToString()); + } + break; + case Type::INT64: + if (stats.size() == sizeof(int64_t)) { + return Value(Value::BIGINT(Load((data_ptr_t)stats.c_str())).ToString()); + } + break; + case Type::FLOAT: + if (stats.size() == sizeof(float)) { + float val = Load((data_ptr_t)stats.c_str()); + if (Value::FloatIsValid(val)) { + return Value(Value::FLOAT(val).ToString()); + } + } + break; + case Type::DOUBLE: + if (stats.size() == sizeof(double)) { + double val = Load((data_ptr_t)stats.c_str()); + if (Value::DoubleIsValid(val)) { + return Value(Value::DOUBLE(val).ToString()); + } + } + break; + case Type::BYTE_ARRAY: + case Type::INT96: + case Type::FIXED_LEN_BYTE_ARRAY: + default: + break; + } + if (Value::StringIsValid(stats)) { + return Value(stats); + } else { + return Value(Blob::ToString(string_t(stats))); + } } +void ParquetMetaDataOperatorData::LoadFileMetaData(ClientContext &context, const vector &return_types, + const string &file_path) { + collection.Reset(); -size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective endOp) -{ - DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); - /* check conditions */ - RETURN_ERROR_IF(output->pos > output->size, GENERIC, "invalid buffer"); - RETURN_ERROR_IF(input->pos > input->size, GENERIC, "invalid buffer"); - assert(cctx!=NULL); + auto reader = make_unique(context, file_path); + idx_t count = 0; + DataChunk current_chunk; + current_chunk.Initialize(return_types); + auto meta_data = reader->GetFileMetadata(); + for (idx_t row_group_idx = 0; row_group_idx < meta_data->row_groups.size(); row_group_idx++) { + auto &row_group = meta_data->row_groups[row_group_idx]; - /* transparent initialization stage */ - if (cctx->streamStage == zcss_init) { - ZSTD_CCtx_params params = cctx->requestedParams; - ZSTD_prefixDict const prefixDict = cctx->prefixDict; - FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ - memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ - assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ - DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); - if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ - params.cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); + for (idx_t col_idx = 0; col_idx < row_group.columns.size(); col_idx++) { + auto &column = row_group.columns[col_idx]; + auto &col_meta = column.meta_data; + auto &stats = col_meta.statistics; + // file_name, LogicalType::VARCHAR + current_chunk.SetValue(0, count, file_path); -#ifdef ZSTD_MULTITHREAD - if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { - params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ - } - if (params.nbWorkers > 0) { - /* mt context creation */ - if (cctx->mtctx == NULL) { - DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", - params.nbWorkers); - cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem); - RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); - } - /* mt compression */ - DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); - FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( - cctx->mtctx, - prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, - cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); - cctx->streamStage = zcss_load; - cctx->appliedParams.nbWorkers = params.nbWorkers; - } else -#endif - { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, - prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, - cctx->cdict, - params, cctx->pledgedSrcSizePlusOne-1) , ""); - assert(cctx->streamStage == zcss_load); - assert(cctx->appliedParams.nbWorkers == 0); - } } - /* end of transparent initialization stage */ + // row_group_id, LogicalType::BIGINT + current_chunk.SetValue(1, count, Value::BIGINT(row_group_idx)); - /* compression stage */ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers > 0) { - int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); - size_t flushMin; - assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); - if (cctx->cParamsChanged) { - ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); - cctx->cParamsChanged = 0; - } - do { - flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); - if ( ZSTD_isError(flushMin) - || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ - ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); - } - FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); - } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); - DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); - /* Either we don't require maximum forward progress, we've finished the - * flush, or we are out of output space. - */ - assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); - return flushMin; - } -#endif - FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); - DEBUGLOG(5, "completed ZSTD_compressStream2"); - return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ -} + // row_group_num_rows, LogicalType::BIGINT + current_chunk.SetValue(2, count, Value::BIGINT(row_group.num_rows)); -size_t ZSTD_compressStream2_simpleArgs ( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos, - ZSTD_EndDirective endOp) -{ - ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; - ZSTD_inBuffer input = { src, srcSize, *srcPos }; - /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ - size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); - *dstPos = output.pos; - *srcPos = input.pos; - return cErr; -} + // row_group_num_columns, LogicalType::BIGINT + current_chunk.SetValue(3, count, Value::BIGINT(row_group.columns.size())); -size_t ZSTD_compress2(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); - ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); - { size_t oPos = 0; - size_t iPos = 0; - size_t const result = ZSTD_compressStream2_simpleArgs(cctx, - dst, dstCapacity, &oPos, - src, srcSize, &iPos, - ZSTD_e_end); - FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); - if (result != 0) { /* compression not completed, due to lack of output space */ - assert(oPos == dstCapacity); - RETURN_ERROR(dstSize_tooSmall, ""); - } - assert(iPos == srcSize); /* all input is expected consumed */ - return oPos; - } + // row_group_bytes, LogicalType::BIGINT + current_chunk.SetValue(4, count, Value::BIGINT(row_group.total_byte_size)); + + // column_id, LogicalType::BIGINT + current_chunk.SetValue(5, count, Value::BIGINT(col_idx)); + + // file_offset, LogicalType::BIGINT + current_chunk.SetValue(6, count, Value::BIGINT(column.file_offset)); + + // num_values, LogicalType::BIGINT + current_chunk.SetValue(7, count, Value::BIGINT(col_meta.num_values)); + + // path_in_schema, LogicalType::VARCHAR + current_chunk.SetValue(8, count, StringUtil::Join(col_meta.path_in_schema, ", ")); + + // type, LogicalType::VARCHAR + current_chunk.SetValue(9, count, ConvertParquetElementToString(col_meta.type)); + + // stats_min, LogicalType::VARCHAR + current_chunk.SetValue(10, count, ConvertParquetStats(col_meta.type, stats.__isset.min, stats.min)); + + // stats_max, LogicalType::VARCHAR + current_chunk.SetValue(11, count, ConvertParquetStats(col_meta.type, stats.__isset.max, stats.max)); + + // stats_null_count, LogicalType::BIGINT + current_chunk.SetValue( + 12, count, stats.__isset.null_count ? Value::BIGINT(stats.null_count) : Value(LogicalType::BIGINT)); + + // stats_distinct_count, LogicalType::BIGINT + current_chunk.SetValue(13, count, + stats.__isset.distinct_count ? Value::BIGINT(stats.distinct_count) + : Value(LogicalType::BIGINT)); + + // stats_min_value, LogicalType::VARCHAR + current_chunk.SetValue(14, count, + ConvertParquetStats(col_meta.type, stats.__isset.min_value, stats.min_value)); + + // stats_max_value, LogicalType::VARCHAR + current_chunk.SetValue(15, count, + ConvertParquetStats(col_meta.type, stats.__isset.max_value, stats.max_value)); + + // compression, LogicalType::VARCHAR + current_chunk.SetValue(16, count, ConvertParquetElementToString(col_meta.codec)); + + // encodings, LogicalType::VARCHAR + vector encoding_string; + for (auto &encoding : col_meta.encodings) { + encoding_string.push_back(ConvertParquetElementToString(encoding)); + } + current_chunk.SetValue(17, count, Value(StringUtil::Join(encoding_string, ", "))); + + // index_page_offset, LogicalType::BIGINT + current_chunk.SetValue(18, count, Value::BIGINT(col_meta.index_page_offset)); + + // dictionary_page_offset, LogicalType::BIGINT + current_chunk.SetValue(19, count, Value::BIGINT(col_meta.dictionary_page_offset)); + + // data_page_offset, LogicalType::BIGINT + current_chunk.SetValue(20, count, Value::BIGINT(col_meta.data_page_offset)); + + // total_compressed_size, LogicalType::BIGINT + current_chunk.SetValue(21, count, Value::BIGINT(col_meta.total_compressed_size)); + + // total_uncompressed_size, LogicalType::BIGINT + current_chunk.SetValue(22, count, Value::BIGINT(col_meta.total_uncompressed_size)); + + count++; + if (count >= STANDARD_VECTOR_SIZE) { + current_chunk.SetCardinality(count); + collection.Append(current_chunk); + + count = 0; + current_chunk.Reset(); + } + } + } + current_chunk.SetCardinality(count); + collection.Append(current_chunk); } -/*====== Finalize ======*/ +void ParquetMetaDataOperatorData::BindSchema(vector &return_types, vector &names) { + names.emplace_back("file_name"); + return_types.push_back(LogicalType::VARCHAR); -/*! ZSTD_flushStream() : - * @return : amount of data remaining to flush */ -size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) -{ - ZSTD_inBuffer input = { NULL, 0, 0 }; - return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); + names.emplace_back("name"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("type"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("type_length"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("repetition_type"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("num_children"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("converted_type"); + return_types.push_back(LogicalType::VARCHAR); + + names.emplace_back("scale"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("precision"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("field_id"); + return_types.push_back(LogicalType::BIGINT); + + names.emplace_back("logical_type"); + return_types.push_back(LogicalType::VARCHAR); } +Value ParquetLogicalTypeToString(const duckdb_parquet::format::LogicalType &type) { -size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) -{ - ZSTD_inBuffer input = { NULL, 0, 0 }; - size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); - FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); - if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ - /* single thread mode : attempt to calculate remaining to flush more precisely */ - { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; - size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); - size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; - DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); - return toFlush; - } + if (type.__isset.STRING) { + return Value(PrintParquetElementToString(type.STRING)); + } + if (type.__isset.MAP) { + return Value(PrintParquetElementToString(type.MAP)); + } + if (type.__isset.LIST) { + return Value(PrintParquetElementToString(type.LIST)); + } + if (type.__isset.ENUM) { + return Value(PrintParquetElementToString(type.ENUM)); + } + if (type.__isset.DECIMAL) { + return Value(PrintParquetElementToString(type.DECIMAL)); + } + if (type.__isset.DATE) { + return Value(PrintParquetElementToString(type.DATE)); + } + if (type.__isset.TIME) { + return Value(PrintParquetElementToString(type.TIME)); + } + if (type.__isset.TIMESTAMP) { + return Value(PrintParquetElementToString(type.TIMESTAMP)); + } + if (type.__isset.INTEGER) { + return Value(PrintParquetElementToString(type.INTEGER)); + } + if (type.__isset.UNKNOWN) { + return Value(PrintParquetElementToString(type.UNKNOWN)); + } + if (type.__isset.JSON) { + return Value(PrintParquetElementToString(type.JSON)); + } + if (type.__isset.BSON) { + return Value(PrintParquetElementToString(type.BSON)); + } + if (type.__isset.UUID) { + return Value(PrintParquetElementToString(type.UUID)); + } + return Value(); } +void ParquetMetaDataOperatorData::LoadSchemaData(ClientContext &context, const vector &return_types, + const string &file_path) { + collection.Reset(); -/*-===== Pre-defined compression levels =====-*/ + auto reader = make_unique(context, file_path); + idx_t count = 0; + DataChunk current_chunk; + current_chunk.Initialize(return_types); + auto meta_data = reader->GetFileMetadata(); + for (idx_t col_idx = 0; col_idx < meta_data->schema.size(); col_idx++) { + auto &column = meta_data->schema[col_idx]; -#define ZSTD_MAX_CLEVEL 22 -int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } -int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } + // file_name, LogicalType::VARCHAR + current_chunk.SetValue(0, count, file_path); -static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { -{ /* "default" - for any srcSize > 256 KB */ - /* W, C, H, S, L, TL, strat */ - { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ - { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ - { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ - { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ - { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ - { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ - { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ - { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ - { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ - { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ - { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ - { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ - { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ - { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ - { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ - { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ - { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ - { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ - { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ - { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ - { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ - { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ - { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ -}, -{ /* for srcSize <= 256 KB */ - /* W, C, H, S, L, T, strat */ - { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ - { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ - { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ - { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ - { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ - { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ - { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ - { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ - { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -{ /* for srcSize <= 128 KB */ - /* W, C, H, S, L, T, strat */ - { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ - { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ - { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ - { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ - { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ - { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ - { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ - { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ - { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ - { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -{ /* for srcSize <= 16 KB */ - /* W, C, H, S, L, T, strat */ - { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ - { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ - { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ - { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ - { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ - { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ - { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ - { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ - { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ - { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ - { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ - { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ - { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ - { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ - { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -}; + // name, LogicalType::VARCHAR + current_chunk.SetValue(1, count, column.name); -/*! ZSTD_getCParams_internal() : - * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. - * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. - * Use dictSize == 0 for unknown or unused. */ -static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) -{ - int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; - size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; - U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; - U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); - int row = compressionLevel; - DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); - if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ - if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ - if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; - { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; - if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ - /* refine parameters based on srcSize & dictSize */ - return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); - } + // type, LogicalType::VARCHAR + current_chunk.SetValue(2, count, ConvertParquetElementToString(column.type)); + + // type_length, LogicalType::VARCHAR + current_chunk.SetValue(3, count, Value::INTEGER(column.type_length)); + + // repetition_type, LogicalType::VARCHAR + current_chunk.SetValue(4, count, ConvertParquetElementToString(column.repetition_type)); + + // num_children, LogicalType::BIGINT + current_chunk.SetValue(5, count, Value::BIGINT(column.num_children)); + + // converted_type, LogicalType::VARCHAR + current_chunk.SetValue(6, count, ConvertParquetElementToString(column.converted_type)); + + // scale, LogicalType::BIGINT + current_chunk.SetValue(7, count, Value::BIGINT(column.scale)); + + // precision, LogicalType::BIGINT + current_chunk.SetValue(8, count, Value::BIGINT(column.precision)); + + // field_id, LogicalType::BIGINT + current_chunk.SetValue(9, count, Value::BIGINT(column.field_id)); + + // logical_type, LogicalType::VARCHAR + current_chunk.SetValue(10, count, ParquetLogicalTypeToString(column.logicalType)); + + count++; + if (count >= STANDARD_VECTOR_SIZE) { + current_chunk.SetCardinality(count); + collection.Append(current_chunk); + + count = 0; + current_chunk.Reset(); + } + } + current_chunk.SetCardinality(count); + collection.Append(current_chunk); } -/*! ZSTD_getCParams() : - * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. - * Size values are optional, provide 0 if not known or unused */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) -{ - if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); +template +unique_ptr ParquetMetaDataBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, vector &input_table_names, + vector &return_types, vector &names) { + if (SCHEMA) { + ParquetMetaDataOperatorData::BindSchema(return_types, names); + } else { + ParquetMetaDataOperatorData::BindMetaData(return_types, names); + } + + auto file_name = inputs[0].GetValue(); + auto result = make_unique(); + + FileSystem &fs = FileSystem::GetFileSystem(context); + result->return_types = return_types; + result->files = fs.Glob(file_name); + if (result->files.empty()) { + throw IOException("No files found that match the pattern \"%s\"", file_name); + } + return move(result); } -/*! ZSTD_getParams() : - * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). - * Fields of `ZSTD_frameParameters` are set to default values */ -static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); - DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); - memset(¶ms, 0, sizeof(params)); - params.cParams = cParams; - params.fParams.contentSizeFlag = 1; - return params; +template +unique_ptr ParquetMetaDataInit(ClientContext &context, const FunctionData *bind_data_p, + const vector &column_ids, + TableFilterCollection *filters) { + auto &bind_data = (ParquetMetaDataBindData &)*bind_data_p; + D_ASSERT(!bind_data.files.empty()); + + auto result = make_unique(); + if (SCHEMA) { + result->LoadSchemaData(context, bind_data.return_types, bind_data.files[0]); + } else { + result->LoadFileMetaData(context, bind_data.return_types, bind_data.files[0]); + } + result->file_index = 0; + return move(result); +} + +template +void ParquetMetaDataImplementation(ClientContext &context, const FunctionData *bind_data_p, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (ParquetMetaDataOperatorData &)*operator_state; + auto &bind_data = (ParquetMetaDataBindData &)*bind_data_p; + while (true) { + auto chunk = data.collection.Fetch(); + if (!chunk) { + if (data.file_index + 1 < bind_data.files.size()) { + // load the metadata for the next file + data.file_index++; + if (SCHEMA) { + data.LoadSchemaData(context, bind_data.return_types, bind_data.files[data.file_index]); + } else { + data.LoadFileMetaData(context, bind_data.return_types, bind_data.files[data.file_index]); + } + continue; + } else { + // no files remaining: done + return; + } + } + output.Move(*chunk); + if (output.size() != 0) { + return; + } + } } -/*! ZSTD_getParams() : - * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). - * Fields of `ZSTD_frameParameters` are set to default values */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize); +ParquetMetaDataFunction::ParquetMetaDataFunction() + : TableFunction("parquet_metadata", {LogicalType::VARCHAR}, ParquetMetaDataImplementation, + ParquetMetaDataBind, ParquetMetaDataInit, /* statistics */ nullptr, + /* cleanup */ nullptr, + /* dependency */ nullptr, nullptr, + /* pushdown_complex_filter */ nullptr, /* to_string */ nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, false, false, nullptr) { } +ParquetSchemaFunction::ParquetSchemaFunction() + : TableFunction("parquet_schema", {LogicalType::VARCHAR}, ParquetMetaDataImplementation, + ParquetMetaDataBind, ParquetMetaDataInit, /* statistics */ nullptr, + /* cleanup */ nullptr, + /* dependency */ nullptr, nullptr, + /* pushdown_complex_filter */ nullptr, /* to_string */ nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, false, false, nullptr) { } +} // namespace duckdb -// LICENSE_CHANGE_END -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - /*-************************************* - * Dependencies - ***************************************/ -namespace duckdb_zstd { -size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); - break; - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); - break; - default: /* not necessary : flSize is {1,2,3} */ - assert(0); - } - memcpy(ostart + flSize, src, srcSize); - DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); - return srcSize + flSize; + + + + + + + +#include "duckdb.hpp" +#ifndef DUCKDB_AMALGAMATION +#include "duckdb/planner/table_filter.hpp" +#include "duckdb/planner/filter/constant_filter.hpp" +#include "duckdb/planner/filter/null_filter.hpp" +#include "duckdb/planner/filter/conjunction_filter.hpp" +#include "duckdb/common/file_system.hpp" +#include "duckdb/common/string_util.hpp" +#include "duckdb/common/types/date.hpp" +#include "duckdb/common/pair.hpp" + +#include "duckdb/storage/object_cache.hpp" +#endif + +#include +#include +#include +#include +#include + +namespace duckdb { + +using duckdb_parquet::format::ColumnChunk; +using duckdb_parquet::format::ConvertedType; +using duckdb_parquet::format::FieldRepetitionType; +using duckdb_parquet::format::FileMetaData; +using ParquetRowGroup = duckdb_parquet::format::RowGroup; +using duckdb_parquet::format::SchemaElement; +using duckdb_parquet::format::Statistics; +using duckdb_parquet::format::Type; + +static unique_ptr CreateThriftProtocol(FileHandle &file_handle) { + shared_ptr trans(new ThriftFileTransport(file_handle)); + return make_unique>(trans); } -size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); +static shared_ptr LoadMetadata(Allocator &allocator, FileHandle &file_handle) { + auto current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + auto proto = CreateThriftProtocol(file_handle); + auto &transport = ((ThriftFileTransport &)*proto->getTransport()); + auto file_size = transport.GetSize(); + if (file_size < 12) { + throw InvalidInputException("File '%s' too small to be a Parquet file", file_handle.path); + } - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); - break; - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); - break; - default: /* not necessary : flSize is {1,2,3} */ - assert(0); - } + ResizeableBuffer buf; + buf.resize(allocator, 8); + buf.zero(); - ostart[flSize] = *(const BYTE*)src; - DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); - return flSize+1; + transport.SetLocation(file_size - 8); + transport.read((uint8_t *)buf.ptr, 8); + + if (strncmp(buf.ptr + 4, "PAR1", 4) != 0) { + throw InvalidInputException("No magic bytes found at end of file '%s'", file_handle.path); + } + // read four-byte footer length from just before the end magic bytes + auto footer_len = *(uint32_t *)buf.ptr; + if (footer_len <= 0 || file_size < 12 + footer_len) { + throw InvalidInputException("Footer length error in file '%s'", file_handle.path); + } + auto metadata_pos = file_size - (footer_len + 8); + transport.SetLocation(metadata_pos); + + auto metadata = make_unique(); + metadata->read(proto.get()); + return make_shared(move(metadata), current_time); } -size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2) -{ - size_t const minGain = ZSTD_minGain(srcSize, strategy); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE* const ostart = (BYTE*)dst; - U32 singleStream = srcSize < 256; - symbolEncodingType_e hType = set_compressed; - size_t cLitSize; +static LogicalType DeriveLogicalType(const SchemaElement &s_ele) { + // inner node + D_ASSERT(s_ele.__isset.type && s_ele.num_children == 0); + switch (s_ele.type) { + case Type::BOOLEAN: + return LogicalType::BOOLEAN; + case Type::INT32: + if (s_ele.__isset.converted_type) { + switch (s_ele.converted_type) { + case ConvertedType::DATE: + return LogicalType::DATE; + case ConvertedType::UINT_8: + return LogicalType::UTINYINT; + case ConvertedType::UINT_16: + return LogicalType::USMALLINT; + default: + return LogicalType::INTEGER; + } + } + return LogicalType::INTEGER; + case Type::INT64: + if (s_ele.__isset.converted_type) { + switch (s_ele.converted_type) { + case ConvertedType::TIMESTAMP_MICROS: + case ConvertedType::TIMESTAMP_MILLIS: + return LogicalType::TIMESTAMP; + case ConvertedType::UINT_32: + return LogicalType::UINTEGER; + case ConvertedType::UINT_64: + return LogicalType::UBIGINT; + default: + return LogicalType::BIGINT; + } + } + return LogicalType::BIGINT; - DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", - disableLiteralCompression, (U32)srcSize); + case Type::INT96: // always a timestamp it would seem + return LogicalType::TIMESTAMP; + case Type::FLOAT: + return LogicalType::FLOAT; + case Type::DOUBLE: + return LogicalType::DOUBLE; + case Type::BYTE_ARRAY: + case Type::FIXED_LEN_BYTE_ARRAY: + if (s_ele.type == Type::FIXED_LEN_BYTE_ARRAY && !s_ele.__isset.type_length) { + return LogicalType::INVALID; + } + if (s_ele.__isset.converted_type) { + switch (s_ele.converted_type) { + case ConvertedType::DECIMAL: + if (s_ele.type == Type::FIXED_LEN_BYTE_ARRAY && s_ele.__isset.scale && s_ele.__isset.type_length) { + return LogicalType::DECIMAL(s_ele.precision, s_ele.scale); + } + return LogicalType::INVALID; - /* Prepare nextEntropy assuming reusing the existing table */ - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + case ConvertedType::UTF8: + return LogicalType::VARCHAR; + default: + return LogicalType::BLOB; + } + } + return LogicalType::BLOB; + default: + return LogicalType::INVALID; + } +} - if (disableLiteralCompression) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); +static unique_ptr CreateReaderRecursive(ParquetReader &reader, const FileMetaData *file_meta_data, + idx_t depth, idx_t max_define, idx_t max_repeat, + idx_t &next_schema_idx, idx_t &next_file_idx) { + D_ASSERT(file_meta_data); + D_ASSERT(next_schema_idx < file_meta_data->schema.size()); + auto &s_ele = file_meta_data->schema[next_schema_idx]; + auto this_idx = next_schema_idx; - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } + if (s_ele.__isset.repetition_type) { + if (s_ele.repetition_type != FieldRepetitionType::REQUIRED) { + max_define++; + } + if (s_ele.repetition_type == FieldRepetitionType::REPEATED) { + max_repeat++; + } + } - RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); - { HUF_repeat repeat = prevHuf->repeatMode; - int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; - if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; - cLitSize = singleStream ? - HUF_compress1X_repeat( - ostart+lhSize, dstCapacity-lhSize, src, srcSize, - HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : - HUF_compress4X_repeat( - ostart+lhSize, dstCapacity-lhSize, src, srcSize, - HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); - if (repeat != HUF_repeat_none) { - /* reused the existing table */ - DEBUGLOG(5, "Reusing previous huffman table"); - hType = set_repeat; - } - } + if (!s_ele.__isset.type) { // inner node + if (s_ele.num_children == 0) { + throw std::runtime_error("Node has no children but should"); + } + child_list_t child_types; + vector> child_readers; - if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - if (cLitSize==1) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); - } + idx_t c_idx = 0; + while (c_idx < (idx_t)s_ele.num_children) { + next_schema_idx++; - if (hType == set_compressed) { - /* using a newly constructed table */ - nextHuf->repeatMode = HUF_repeat_check; - } + auto &child_ele = file_meta_data->schema[next_schema_idx]; + + auto child_reader = CreateReaderRecursive(reader, file_meta_data, depth + 1, max_define, max_repeat, + next_schema_idx, next_file_idx); + child_types.push_back(make_pair(child_ele.name, child_reader->Type())); + child_readers.push_back(move(child_reader)); - /* Build header */ - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - case 5: /* 2 - 2 - 18 - 18 */ - { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - default: /* not possible : lhSize is {3,4,5} */ - assert(0); - } - DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); - return lhSize+cLitSize; + c_idx++; + } + D_ASSERT(!child_types.empty()); + unique_ptr result; + LogicalType result_type; + // if we only have a single child no reason to create a struct ay + if (child_types.size() > 1 || depth == 0) { + result_type = LogicalType::STRUCT(move(child_types)); + result = make_unique(reader, result_type, s_ele, this_idx, max_define, max_repeat, + move(child_readers)); + } else { + // if we have a struct with only a single type, pull up + result_type = child_types[0].second; + result = move(child_readers[0]); + } + if (s_ele.repetition_type == FieldRepetitionType::REPEATED) { + result_type = LogicalType::LIST(result_type); + return make_unique(reader, result_type, s_ele, this_idx, max_define, max_repeat, + move(result)); + } + return result; + } else { // leaf node + // TODO check return value of derive type or should we only do this on read() + return ColumnReader::CreateReader(reader, DeriveLogicalType(s_ele), s_ele, next_file_idx++, max_define, + max_repeat); + } } -} +// TODO we don't need readers for columns we are not going to read ay +static unique_ptr CreateReader(ParquetReader &reader, const FileMetaData *file_meta_data) { + idx_t next_schema_idx = 0; + idx_t next_file_idx = 0; + auto ret = CreateReaderRecursive(reader, file_meta_data, 0, 0, 0, next_schema_idx, next_file_idx); + D_ASSERT(next_schema_idx == file_meta_data->schema.size() - 1); + D_ASSERT(file_meta_data->row_groups.empty() || next_file_idx == file_meta_data->row_groups[0].columns.size()); + return ret; +} -// LICENSE_CHANGE_END +void ParquetReader::InitializeSchema(const vector &expected_types_p, const string &initial_filename_p) { + auto file_meta_data = GetFileMetadata(); + if (file_meta_data->__isset.encryption_algorithm) { + throw FormatException("Encrypted Parquet files are not supported"); + } + // check if we like this schema + if (file_meta_data->schema.size() < 2) { + throw FormatException("Need at least one non-root column in the file"); + } -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + bool has_expected_types = !expected_types_p.empty(); + auto root_reader = CreateReader(*this, file_meta_data); -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + auto &root_type = root_reader->Type(); + auto &child_types = StructType::GetChildTypes(root_type); + D_ASSERT(root_type.id() == LogicalTypeId::STRUCT); + if (has_expected_types && child_types.size() != expected_types_p.size()) { + throw FormatException("column count mismatch"); + } + idx_t col_idx = 0; + for (auto &type_pair : child_types) { + if (has_expected_types && expected_types_p[col_idx] != type_pair.second) { + if (initial_filename_p.empty()) { + throw FormatException("column \"%d\" in parquet file is of type %s, could not auto cast to " + "expected type %s for this column", + col_idx, type_pair.second, expected_types_p[col_idx].ToString()); + } else { + throw FormatException("schema mismatch in Parquet glob: column \"%d\" in parquet file is of type " + "%s, but in the original file \"%s\" this column is of type \"%s\"", + col_idx, type_pair.second, initial_filename_p, + expected_types_p[col_idx].ToString()); + } + } else { + names.push_back(type_pair.first); + return_types.push_back(type_pair.second); + } + col_idx++; + } + D_ASSERT(!names.empty()); + D_ASSERT(!return_types.empty()); +} - /*-************************************* - * Dependencies - ***************************************/ +ParquetReader::ParquetReader(Allocator &allocator_p, unique_ptr file_handle_p, + const vector &expected_types_p, const string &initial_filename_p) + : allocator(allocator_p) { + file_name = file_handle_p->path; + file_handle = move(file_handle_p); + metadata = LoadMetadata(allocator, *file_handle); + InitializeSchema(expected_types_p, initial_filename_p); +} +ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, const vector &expected_types_p, + const string &initial_filename_p) + : allocator(Allocator::Get(context_p)) { + auto &fs = FileSystem::GetFileSystem(context_p); + file_name = move(file_name_p); + file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ); + // If object cached is disabled + // or if this file has cached metadata + // or if the cached version already expired -namespace duckdb_zstd { -/** - * -log2(x / 256) lookup table for x in [0, 256). - * If x == 0: Return 0 - * Else: Return floor(-log2(x / 256) * 256) - */ -static unsigned const kInverseProbabilityLog256[256] = { - 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, - 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, - 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, - 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, - 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, - 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, - 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, - 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, - 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, - 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, - 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, - 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, - 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, - 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, - 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, - 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, - 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, - 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, - 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, - 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, - 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, - 5, 4, 2, 1, -}; + auto last_modify_time = fs.GetLastModifiedTime(*file_handle); + if (!ObjectCache::ObjectCacheEnabled(context_p)) { + metadata = LoadMetadata(allocator, *file_handle); + } else { + metadata = + std::dynamic_pointer_cast(ObjectCache::GetObjectCache(context_p).Get(file_name)); + if (!metadata || (last_modify_time + 10 >= metadata->read_time)) { + metadata = LoadMetadata(allocator, *file_handle); + ObjectCache::GetObjectCache(context_p).Put(file_name, metadata); + } + } -static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { - void const* ptr = ctable; - U16 const* u16ptr = (U16 const*)ptr; - U32 const maxSymbolValue = MEM_read16(u16ptr + 1); - return maxSymbolValue; + InitializeSchema(expected_types_p, initial_filename_p); } -/** - * Returns the cost in bytes of encoding the normalized count header. - * Returns an error if any of the helper functions return an error. - */ -static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, - size_t const nbSeq, unsigned const FSELog) -{ - BYTE wksp[FSE_NCOUNTBOUND]; - S16 norm[MaxSeq + 1]; - const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), ""); - return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +ParquetReader::~ParquetReader() { } -/** - * Returns the cost in bits of encoding the distribution described by count - * using the entropy bound. - */ -static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) -{ - unsigned cost = 0; - unsigned s; - for (s = 0; s <= max; ++s) { - unsigned norm = (unsigned)((256 * count[s]) / total); - if (count[s] != 0 && norm == 0) - norm = 1; - assert(count[s] < total); - cost += count[s] * kInverseProbabilityLog256[norm]; - } - return cost >> 8; +const FileMetaData *ParquetReader::GetFileMetadata() { + D_ASSERT(metadata); + D_ASSERT(metadata->metadata); + return metadata->metadata.get(); } -/** - * Returns the cost in bits of encoding the distribution in count using ctable. - * Returns an error if ctable cannot represent all the symbols in count. - */ -size_t ZSTD_fseBitCost( - FSE_CTable const* ctable, - unsigned const* count, - unsigned const max) -{ - unsigned const kAccuracyLog = 8; - size_t cost = 0; - unsigned s; - FSE_CState_t cstate; - FSE_initCState(&cstate, ctable); - if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { - DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", - ZSTD_getFSEMaxSymbolValue(ctable), max); - return ERROR(GENERIC); - } - for (s = 0; s <= max; ++s) { - unsigned const tableLog = cstate.stateLog; - unsigned const badCost = (tableLog + 1) << kAccuracyLog; - unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); - if (count[s] == 0) - continue; - if (bitCost >= badCost) { - DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); - return ERROR(GENERIC); - } - cost += (size_t)count[s] * bitCost; - } - return cost >> kAccuracyLog; +// TODO also somewhat ugly, perhaps this can be moved to the column reader too +unique_ptr ParquetReader::ReadStatistics(ParquetReader &reader, LogicalType &type, + column_t file_col_idx, const FileMetaData *file_meta_data) { + unique_ptr column_stats; + auto root_reader = CreateReader(reader, file_meta_data); + auto column_reader = ((StructColumnReader *)root_reader.get())->GetChildReader(file_col_idx); + + for (auto &row_group : file_meta_data->row_groups) { + auto chunk_stats = column_reader->Stats(row_group.columns); + if (!chunk_stats) { + return nullptr; + } + if (!column_stats) { + column_stats = move(chunk_stats); + } else { + column_stats->Merge(*chunk_stats); + } + } + return column_stats; } -/** - * Returns the cost in bits of encoding the distribution in count using the - * table described by norm. The max symbol support by norm is assumed >= max. - * norm must be valid for every symbol with non-zero probability in count. - */ -size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max) -{ - unsigned const shift = 8 - accuracyLog; - size_t cost = 0; - unsigned s; - assert(accuracyLog <= 8); - for (s = 0; s <= max; ++s) { - unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; - unsigned const norm256 = normAcc << shift; - assert(norm256 > 0); - assert(norm256 < 256); - cost += count[s] * kInverseProbabilityLog256[norm256]; - } - return cost >> 8; +const ParquetRowGroup &ParquetReader::GetGroup(ParquetReaderScanState &state) { + auto file_meta_data = GetFileMetadata(); + D_ASSERT(state.current_group >= 0 && (idx_t)state.current_group < state.group_idx_list.size()); + D_ASSERT(state.group_idx_list[state.current_group] >= 0 && + state.group_idx_list[state.current_group] < file_meta_data->row_groups.size()); + return file_meta_data->row_groups[state.group_idx_list[state.current_group]]; } -symbolEncodingType_e -ZSTD_selectEncodingType( - FSE_repeat* repeatMode, unsigned const* count, unsigned const max, - size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, - FSE_CTable const* prevCTable, - short const* defaultNorm, U32 defaultNormLog, - ZSTD_defaultPolicy_e const isDefaultAllowed, - ZSTD_strategy const strategy) -{ - ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); - if (mostFrequent == nbSeq) { - *repeatMode = FSE_repeat_none; - if (isDefaultAllowed && nbSeq <= 2) { - /* Prefer set_basic over set_rle when there are 2 or less symbols, - * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. - * If basic encoding isn't possible, always choose RLE. - */ - DEBUGLOG(5, "Selected set_basic"); - return set_basic; - } - DEBUGLOG(5, "Selected set_rle"); - return set_rle; - } - if (strategy < ZSTD_lazy) { - if (isDefaultAllowed) { - size_t const staticFse_nbSeq_max = 1000; - size_t const mult = 10 - strategy; - size_t const baseLog = 3; - size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ - assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ - assert(mult <= 9 && mult >= 7); - if ( (*repeatMode == FSE_repeat_valid) - && (nbSeq < staticFse_nbSeq_max) ) { - DEBUGLOG(5, "Selected set_repeat"); - return set_repeat; - } - if ( (nbSeq < dynamicFse_nbSeq_min) - || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { - DEBUGLOG(5, "Selected set_basic"); - /* The format allows default tables to be repeated, but it isn't useful. - * When using simple heuristics to select encoding type, we don't want - * to confuse these tables with dictionaries. When running more careful - * analysis, we don't need to waste time checking both repeating tables - * and default tables. - */ - *repeatMode = FSE_repeat_none; - return set_basic; - } - } - } else { - size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); - size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); - size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); - size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); +void ParquetReader::PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx) { + auto &group = GetGroup(state); - if (isDefaultAllowed) { - assert(!ZSTD_isError(basicCost)); - assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); - } - assert(!ZSTD_isError(NCountCost)); - assert(compressedCost < ERROR(maxCode)); - DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", - (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); - if (basicCost <= repeatCost && basicCost <= compressedCost) { - DEBUGLOG(5, "Selected set_basic"); - assert(isDefaultAllowed); - *repeatMode = FSE_repeat_none; - return set_basic; - } - if (repeatCost <= compressedCost) { - DEBUGLOG(5, "Selected set_repeat"); - assert(!ZSTD_isError(repeatCost)); - return set_repeat; - } - assert(compressedCost < basicCost && compressedCost < repeatCost); - } - DEBUGLOG(5, "Selected set_compressed"); - *repeatMode = FSE_repeat_check; - return set_compressed; -} + auto column_reader = ((StructColumnReader *)state.root_reader.get())->GetChildReader(state.column_ids[out_col_idx]); -size_t -ZSTD_buildCTable(void* dst, size_t dstCapacity, - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, - unsigned* count, U32 max, - const BYTE* codeTable, size_t nbSeq, - const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, - const FSE_CTable* prevCTable, size_t prevCTableSize, - void* entropyWorkspace, size_t entropyWorkspaceSize) -{ - BYTE* op = (BYTE*)dst; - const BYTE* const oend = op + dstCapacity; - DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); + // TODO move this to columnreader too + if (state.filters) { + auto stats = column_reader->Stats(group.columns); + // filters contain output chunk index, not file col idx! + auto filter_entry = state.filters->filters.find(out_col_idx); + if (stats && filter_entry != state.filters->filters.end()) { + bool skip_chunk = false; + auto &filter = *filter_entry->second; + auto prune_result = filter.CheckStatistics(*stats); + if (prune_result == FilterPropagateResult::FILTER_ALWAYS_FALSE) { + skip_chunk = true; + } + if (skip_chunk) { + state.group_offset = group.num_rows; + return; + // this effectively will skip this chunk + } + } + } - switch (type) { - case set_rle: - FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); - RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); - *op = codeTable[0]; - return 1; - case set_repeat: - memcpy(nextCTable, prevCTable, prevCTableSize); - return 0; - case set_basic: - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ - return 0; - case set_compressed: { - S16 norm[MaxSeq + 1]; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - if (count[codeTable[nbSeq-1]] > 1) { - count[codeTable[nbSeq-1]]--; - nbSeq_1--; - } - assert(nbSeq_1 > 1); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), ""); - { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); - return NCountSize; - } - } - default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); - } + state.root_reader->InitializeRead(group.columns, *state.thrift_file_proto); } -FORCE_INLINE_TEMPLATE size_t -ZSTD_encodeSequences_body( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - - RETURN_ERROR_IF( - ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), - dstSize_tooSmall, "not enough space remaining"); - DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", - (int)(blockStream.endPtr - blockStream.startPtr), - (unsigned)dstCapacity); +idx_t ParquetReader::NumRows() { + return GetFileMetadata()->num_rows; +} - /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); - FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, ZSTDInternalConstants::LL_bits[llCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ZSTDInternalConstants::ML_bits[mlCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - if (longOffsets) { - U32 const ofBits = ofCodeTable[nbSeq-1]; - unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); - BIT_flushBits(&blockStream); - } - BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, - ofBits - extraBits); - } else { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); - } - BIT_flushBits(&blockStream); +idx_t ParquetReader::NumRowGroups() { + return GetFileMetadata()->row_groups.size(); +} - { size_t n; - for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) - BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].litLength, llBits); - if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); - if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); - if (longOffsets) { - unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); - BIT_flushBits(&blockStream); /* (7)*/ - } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, - ofBits - extraBits); /* 31 */ - } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ - } - BIT_flushBits(&blockStream); /* (7)*/ - DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); - } } +void ParquetReader::InitializeScan(ParquetReaderScanState &state, vector column_ids, + vector groups_to_read, TableFilterSet *filters) { + state.current_group = -1; + state.finished = false; + state.column_ids = move(column_ids); + state.group_offset = 0; + state.group_idx_list = move(groups_to_read); + state.filters = filters; + state.sel.Initialize(STANDARD_VECTOR_SIZE); + state.file_handle = file_handle->file_system.OpenFile(file_handle->path, FileFlags::FILE_FLAGS_READ); + state.thrift_file_proto = CreateThriftProtocol(*state.file_handle); + state.root_reader = CreateReader(*this, GetFileMetadata()); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); - FSE_flushCState(&blockStream, &stateMatchLength); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); - FSE_flushCState(&blockStream, &stateOffsetBits); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); - FSE_flushCState(&blockStream, &stateLitLength); + state.define_buf.resize(allocator, STANDARD_VECTOR_SIZE); + state.repeat_buf.resize(allocator, STANDARD_VECTOR_SIZE); +} - { size_t const streamSize = BIT_closeCStream(&blockStream); - RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); - return streamSize; - } +void FilterIsNull(Vector &v, parquet_filter_t &filter_mask, idx_t count) { + auto &mask = FlatVector::Validity(v); + if (mask.AllValid()) { + filter_mask.reset(); + } else { + for (idx_t i = 0; i < count; i++) { + filter_mask[i] = filter_mask[i] && !mask.RowIsValid(i); + } + } } -static size_t -ZSTD_encodeSequences_default( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); +void FilterIsNotNull(Vector &v, parquet_filter_t &filter_mask, idx_t count) { + auto &mask = FlatVector::Validity(v); + if (!mask.AllValid()) { + for (idx_t i = 0; i < count; i++) { + filter_mask[i] = filter_mask[i] && mask.RowIsValid(i); + } + } } +template +void TemplatedFilterOperation(Vector &v, T constant, parquet_filter_t &filter_mask, idx_t count) { + D_ASSERT(v.GetVectorType() == VectorType::FLAT_VECTOR); // we just created the damn thing it better be -#if DYNAMIC_BMI2 + auto v_ptr = FlatVector::GetData(v); + auto &mask = FlatVector::Validity(v); -static TARGET_ATTRIBUTE("bmi2") size_t -ZSTD_encodeSequences_bmi2( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); + if (!mask.AllValid()) { + for (idx_t i = 0; i < count; i++) { + if (mask.RowIsValid(i)) { + filter_mask[i] = filter_mask[i] && OP::Operation(v_ptr[i], constant); + } + } + } else { + for (idx_t i = 0; i < count; i++) { + filter_mask[i] = filter_mask[i] && OP::Operation(v_ptr[i], constant); + } + } } -#endif +template +static void FilterOperationSwitch(Vector &v, Value &constant, parquet_filter_t &filter_mask, idx_t count) { + if (filter_mask.none() || count == 0) { + return; + } + switch (v.GetType().id()) { + case LogicalTypeId::BOOLEAN: + TemplatedFilterOperation(v, constant.value_.boolean, filter_mask, count); + break; + case LogicalTypeId::UTINYINT: + TemplatedFilterOperation(v, constant.value_.utinyint, filter_mask, count); + break; + case LogicalTypeId::USMALLINT: + TemplatedFilterOperation(v, constant.value_.usmallint, filter_mask, count); + break; + case LogicalTypeId::UINTEGER: + TemplatedFilterOperation(v, constant.value_.uinteger, filter_mask, count); + break; + case LogicalTypeId::UBIGINT: + TemplatedFilterOperation(v, constant.value_.ubigint, filter_mask, count); + break; + case LogicalTypeId::INTEGER: + TemplatedFilterOperation(v, constant.value_.integer, filter_mask, count); + break; + case LogicalTypeId::BIGINT: + TemplatedFilterOperation(v, constant.value_.bigint, filter_mask, count); + break; + case LogicalTypeId::FLOAT: + TemplatedFilterOperation(v, constant.value_.float_, filter_mask, count); + break; + case LogicalTypeId::DOUBLE: + TemplatedFilterOperation(v, constant.value_.double_, filter_mask, count); + break; + case LogicalTypeId::DATE: + TemplatedFilterOperation(v, constant.value_.date, filter_mask, count); + break; + case LogicalTypeId::TIMESTAMP: + TemplatedFilterOperation(v, constant.value_.timestamp, filter_mask, count); + break; + case LogicalTypeId::BLOB: + case LogicalTypeId::VARCHAR: + TemplatedFilterOperation(v, string_t(constant.str_value), filter_mask, count); + break; + default: + throw NotImplementedException("Unsupported type for filter %s", v.ToString()); + } +} -size_t ZSTD_encodeSequences( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) -{ - DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); -#if DYNAMIC_BMI2 - if (bmi2) { - return ZSTD_encodeSequences_bmi2(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); - } -#endif - (void)bmi2; - return ZSTD_encodeSequences_default(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); +static void ApplyFilter(Vector &v, TableFilter &filter, parquet_filter_t &filter_mask, idx_t count) { + switch (filter.filter_type) { + case TableFilterType::CONJUNCTION_AND: { + auto &conjunction = (ConjunctionAndFilter &)filter; + for (auto &child_filter : conjunction.child_filters) { + ApplyFilter(v, *child_filter, filter_mask, count); + } + break; + } + case TableFilterType::CONJUNCTION_OR: { + auto &conjunction = (ConjunctionOrFilter &)filter; + for (auto &child_filter : conjunction.child_filters) { + parquet_filter_t child_mask = filter_mask; + ApplyFilter(v, *child_filter, child_mask, count); + filter_mask |= child_mask; + } + break; + } + case TableFilterType::CONSTANT_COMPARISON: { + auto &constant_filter = (ConstantFilter &)filter; + switch (constant_filter.comparison_type) { + case ExpressionType::COMPARE_EQUAL: + FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); + break; + case ExpressionType::COMPARE_LESSTHAN: + FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); + break; + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); + break; + case ExpressionType::COMPARE_GREATERTHAN: + FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); + break; + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); + break; + default: + D_ASSERT(0); + } + break; + } + case TableFilterType::IS_NOT_NULL: + FilterIsNotNull(v, filter_mask, count); + break; + case TableFilterType::IS_NULL: + FilterIsNull(v, filter_mask, count); + break; + default: + D_ASSERT(0); + break; + } } +void ParquetReader::Scan(ParquetReaderScanState &state, DataChunk &result) { + while (ScanInternal(state, result)) { + if (result.size() > 0) { + break; + } + result.Reset(); + } } +bool ParquetReader::ScanInternal(ParquetReaderScanState &state, DataChunk &result) { + if (state.finished) { + return false; + } -// LICENSE_CHANGE_END + // see if we have to switch to the next row group in the parquet file + if (state.current_group < 0 || (int64_t)state.group_offset >= GetGroup(state).num_rows) { + state.current_group++; + state.group_offset = 0; + if ((idx_t)state.current_group == state.group_idx_list.size()) { + state.finished = true; + return false; + } -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) { + // this is a special case where we are not interested in the actual contents of the file + if (state.column_ids[out_col_idx] == COLUMN_IDENTIFIER_ROW_ID) { + continue; + } -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + PrepareRowGroupBuffer(state, out_col_idx); + } + return true; + } - /*-************************************* - * Dependencies - ***************************************/ + auto this_output_chunk_rows = MinValue(STANDARD_VECTOR_SIZE, GetGroup(state).num_rows - state.group_offset); + result.SetCardinality(this_output_chunk_rows); + if (this_output_chunk_rows == 0) { + state.finished = true; + return false; // end of last group, we are done + } - /* ZSTD_getSequenceLength */ - /* HIST_countFast_wksp */ + // we evaluate simple table filters directly in this scan so we can skip decoding column data that's never going to + // be relevant + parquet_filter_t filter_mask; + filter_mask.set(); + state.define_buf.zero(); + state.repeat_buf.zero(); + auto define_ptr = (uint8_t *)state.define_buf.ptr; + auto repeat_ptr = (uint8_t *)state.repeat_buf.ptr; + auto root_reader = ((StructColumnReader *)state.root_reader.get()); -namespace duckdb_zstd { -/*-************************************* -* Superblock entropy buffer structs -***************************************/ -/** ZSTD_hufCTablesMetadata_t : - * Stores Literals Block Type for a super-block in hType, and - * huffman tree description in hufDesBuffer. - * hufDesSize refers to the size of huffman tree description in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ -typedef struct { - symbolEncodingType_e hType; - BYTE hufDesBuffer[500]; /* TODO give name to this value */ - size_t hufDesSize; -} ZSTD_hufCTablesMetadata_t; + if (state.filters) { + vector need_to_read(result.ColumnCount(), true); -/** ZSTD_fseCTablesMetadata_t : - * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and - * fse tables in fseTablesBuffer. - * fseTablesSize refers to the size of fse tables in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ -typedef struct { - symbolEncodingType_e llType; - symbolEncodingType_e ofType; - symbolEncodingType_e mlType; - BYTE fseTablesBuffer[500]; /* TODO give name to this value */ - size_t fseTablesSize; - size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ -} ZSTD_fseCTablesMetadata_t; + // first load the columns that are used in filters + for (auto &filter_col : state.filters->filters) { + auto file_col_idx = state.column_ids[filter_col.first]; -typedef struct { - ZSTD_hufCTablesMetadata_t hufMetadata; - ZSTD_fseCTablesMetadata_t fseMetadata; -} ZSTD_entropyCTablesMetadata_t; + if (filter_mask.none()) { // if no rows are left we can stop checking filters + break; + } + root_reader->GetChildReader(file_col_idx) + ->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result.data[filter_col.first]); -/** ZSTD_buildSuperBlockEntropy_literal() : - * Builds entropy for the super-block literals. - * Stores literals block type (raw, rle, compressed, repeat) and - * huffman description table to hufMetadata. - * @return : size of huffman description table or error code */ -static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, - const ZSTD_hufCTables_t* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_hufCTablesMetadata_t* hufMetadata, - const int disableLiteralsCompression, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); - BYTE* const nodeWksp = countWkspStart + countWkspSize; - const size_t nodeWkspSize = wkspEnd-nodeWksp; - unsigned maxSymbolValue = 255; - unsigned huffLog = HUF_TABLELOG_DEFAULT; - HUF_repeat repeat = prevHuf->repeatMode; + need_to_read[filter_col.first] = false; - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); + ApplyFilter(result.data[filter_col.first], *filter_col.second, filter_mask, this_output_chunk_rows); + } - /* Prepare nextEntropy assuming reusing the existing table */ - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + // we still may have to read some cols + for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) { + if (!need_to_read[out_col_idx]) { + continue; + } + auto file_col_idx = state.column_ids[out_col_idx]; - if (disableLiteralsCompression) { - DEBUGLOG(5, "set_basic - disabled"); - hufMetadata->hType = set_basic; - return 0; - } + if (filter_mask.none()) { + root_reader->GetChildReader(file_col_idx)->Skip(result.size()); + continue; + } + // TODO handle ROWID here, too + root_reader->GetChildReader(file_col_idx) + ->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result.data[out_col_idx]); + } - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) { - DEBUGLOG(5, "set_basic - too small"); - hufMetadata->hType = set_basic; - return 0; - } - } + idx_t sel_size = 0; + for (idx_t i = 0; i < this_output_chunk_rows; i++) { + if (filter_mask[i]) { + state.sel.set_index(sel_size++, i); + } + } - /* Scan input and build symbol stats */ - { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); - FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); - if (largest == srcSize) { - DEBUGLOG(5, "set_rle"); - hufMetadata->hType = set_rle; - return 0; - } - if (largest <= (srcSize >> 7)+4) { - DEBUGLOG(5, "set_basic - no gain"); - hufMetadata->hType = set_basic; - return 0; - } - } + result.Slice(state.sel, sel_size); + result.Verify(); - /* Validate the previous Huffman table */ - if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { - repeat = HUF_repeat_none; - } + } else { // #nofilter, just fricking load the data + for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) { + auto file_col_idx = state.column_ids[out_col_idx]; - /* Build Huffman Tree */ - memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, - maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); - huffLog = (U32)maxBits; - { /* Build and write the CTable */ - size_t const newCSize = HUF_estimateCompressedSize( - (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); - size_t const hSize = HUF_writeCTable( - hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), - (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); - /* Check against repeating the previous CTable */ - if (repeat != HUF_repeat_none) { - size_t const oldCSize = HUF_estimateCompressedSize( - (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); - if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { - DEBUGLOG(5, "set_repeat - smaller"); - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_repeat; - return 0; - } - } - if (newCSize + hSize >= srcSize) { - DEBUGLOG(5, "set_basic - no gains"); - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_basic; - return 0; - } - DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); - hufMetadata->hType = set_compressed; - nextHuf->repeatMode = HUF_repeat_check; - return hSize; - } - } + if (file_col_idx == COLUMN_IDENTIFIER_ROW_ID) { + Value constant_42 = Value::BIGINT(42); + result.data[out_col_idx].Reference(constant_42); + continue; + } + + root_reader->GetChildReader(file_col_idx) + ->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result.data[out_col_idx]); + } + } + + state.group_offset += this_output_chunk_rows; + return true; } -/** ZSTD_buildSuperBlockEntropy_sequences() : - * Builds entropy for the super-block sequences. - * Stores symbol compression modes and fse table to fseMetadata. - * @return : size of fse tables or error code */ -static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, - const ZSTD_fseCTables_t* prevEntropy, - ZSTD_fseCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); - BYTE* const cTableWksp = countWkspStart + countWkspSize; - const size_t cTableWkspSize = wkspEnd-cTableWksp; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; - BYTE* const ostart = fseMetadata->fseTablesBuffer; - BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); - BYTE* op = ostart; +} // namespace duckdb - assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); - memset(workspace, 0, wkspSize); - fseMetadata->lastCountSize = 0; - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { U32 LLtype; - unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->litlengthCTable, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - countWksp, max, llCodeTable, nbSeq, ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->llType = (symbolEncodingType_e) LLtype; - } } - /* build CTable for Offsets */ - { U32 Offtype; - unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, - countWksp, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->offcodeCTable, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - countWksp, max, ofCodeTable, nbSeq, ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->ofType = (symbolEncodingType_e) Offtype; - } } - /* build CTable for MatchLengths */ - { U32 MLtype; - unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->matchlengthCTable, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - countWksp, max, mlCodeTable, nbSeq, ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->mlType = (symbolEncodingType_e) MLtype; - } } - assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); - return op-ostart; + +#include "duckdb.hpp" +#ifndef DUCKDB_AMALGAMATION +#include "duckdb/common/types/value.hpp" +#include "duckdb/storage/statistics/string_statistics.hpp" +#include "duckdb/storage/statistics/numeric_statistics.hpp" +#endif + +namespace duckdb { + +using duckdb_parquet::format::ConvertedType; +using duckdb_parquet::format::Type; + +template +static unique_ptr TemplatedGetNumericStats(const LogicalType &type, + const duckdb_parquet::format::Statistics &parquet_stats) { + auto stats = make_unique(type); + + // for reasons unknown to science, Parquet defines *both* `min` and `min_value` as well as `max` and + // `max_value`. All are optional. such elegance. + if (parquet_stats.__isset.min) { + stats->min = FUNC((const_data_ptr_t)parquet_stats.min.data()); + } else if (parquet_stats.__isset.min_value) { + stats->min = FUNC((const_data_ptr_t)parquet_stats.min_value.data()); + } else { + stats->min.is_null = true; + } + if (parquet_stats.__isset.max) { + stats->max = FUNC((const_data_ptr_t)parquet_stats.max.data()); + } else if (parquet_stats.__isset.max_value) { + stats->max = FUNC((const_data_ptr_t)parquet_stats.max_value.data()); + } else { + stats->max.is_null = true; + } + // GCC 4.x insists on a move() here + return move(stats); } +template +static Value TransformStatisticsPlain(const_data_ptr_t input) { + return Value::CreateValue(Load(input)); +} -/** ZSTD_buildSuperBlockEntropy() : - * Builds entropy for the super-block. - * @return : 0 on success or error code */ -static size_t -ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize) -{ - size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); - entropyMetadata->hufMetadata.hufDesSize = - ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, - &prevEntropy->huf, &nextEntropy->huf, - &entropyMetadata->hufMetadata, - ZSTD_disableLiteralsCompression(cctxParams), - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); - entropyMetadata->fseMetadata.fseTablesSize = - ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, - &prevEntropy->fse, &nextEntropy->fse, - cctxParams, - &entropyMetadata->fseMetadata, - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); - return 0; +static Value TransformStatisticsFloat(const_data_ptr_t input) { + auto val = Load(input); + if (!Value::FloatIsValid(val)) { + return Value(LogicalType::FLOAT); + } + return Value::CreateValue(val); } -/** ZSTD_compressSubBlock_literal() : - * Compresses literals section for a sub-block. - * When we have to write the Huffman table we will sometimes choose a header - * size larger than necessary. This is because we have to pick the header size - * before we know the table size + compressed size, so we have a bound on the - * table size. If we guessed incorrectly, we fall back to uncompressed literals. - * - * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded - * in writing the header, otherwise it is set to 0. - * - * hufMetadata->hType has literals block type info. - * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. - * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. - * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block - * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block - * and the following sub-blocks' literals sections will be Treeless_Literals_Block. - * @return : compressed size of literals section of a sub-block - * Or 0 if it unable to compress. - * Or error code */ -static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, - const ZSTD_hufCTablesMetadata_t* hufMetadata, - const BYTE* literals, size_t litSize, - void* dst, size_t dstSize, - const int bmi2, int writeEntropy, int* entropyWritten) -{ - size_t const header = writeEntropy ? 200 : 0; - size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart + lhSize; - U32 const singleStream = lhSize == 3; - symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; - size_t cLitSize = 0; +static Value TransformStatisticsDouble(const_data_ptr_t input) { + auto val = Load(input); + if (!Value::DoubleIsValid(val)) { + return Value(LogicalType::DOUBLE); + } + return Value::CreateValue(val); +} - (void)bmi2; /* TODO bmi2... */ +static Value TransformStatisticsDate(const_data_ptr_t input) { + return Value::DATE(ParquetIntToDate(Load(input))); +} - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); +static Value TransformStatisticsTimestampMs(const_data_ptr_t input) { + return Value::TIMESTAMP(ParquetTimestampMsToTimestamp(Load(input))); +} - *entropyWritten = 0; - if (litSize == 0 || hufMetadata->hType == set_basic) { - DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); - return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); - } else if (hufMetadata->hType == set_rle) { - DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); - return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); - } +static Value TransformStatisticsTimestampMicros(const_data_ptr_t input) { + return Value::TIMESTAMP(ParquetTimestampMicrosToTimestamp(Load(input))); +} - assert(litSize > 0); - assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); +static Value TransformStatisticsTimestampImpala(const_data_ptr_t input) { + return Value::TIMESTAMP(ImpalaTimestampToTimestamp(Load(input))); +} - if (writeEntropy && hufMetadata->hType == set_compressed) { - memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); - op += hufMetadata->hufDesSize; - cLitSize += hufMetadata->hufDesSize; - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); - } +unique_ptr ParquetTransformColumnStatistics(const SchemaElement &s_ele, const LogicalType &type, + const ColumnChunk &column_chunk) { + if (!column_chunk.__isset.meta_data || !column_chunk.meta_data.__isset.statistics) { + // no stats present for row group + return nullptr; + } + auto &parquet_stats = column_chunk.meta_data.statistics; + unique_ptr row_group_stats; - /* TODO bmi2 */ - { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) - : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); - op += cSize; - cLitSize += cSize; - if (cSize == 0 || ERR_isError(cSize)) { - DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); - return 0; - } - /* If we expand and we aren't writing a header then emit uncompressed */ - if (!writeEntropy && cLitSize >= litSize) { - DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); - return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); - } - /* If we are writing headers then allow expansion that doesn't change our header size. */ - if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { - assert(cLitSize > litSize); - DEBUGLOG(5, "Literals expanded beyond allowed header size"); - return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); - } - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); - } + switch (type.id()) { - /* Build header */ - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - case 5: /* 2 - 2 - 18 - 18 */ - { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - default: /* not possible : lhSize is {3,4,5} */ - assert(0); - } - *entropyWritten = 1; - DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); - return op-ostart; -} + case LogicalTypeId::UTINYINT: + row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); + break; -static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { - const seqDef* const sstart = sequences; - const seqDef* const send = sequences + nbSeq; - const seqDef* sp = sstart; - size_t matchLengthSum = 0; - size_t litLengthSum = 0; - while (send-sp > 0) { - ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); - litLengthSum += seqLen.litLength; - matchLengthSum += seqLen.matchLength; - sp++; - } - assert(litLengthSum <= litSize); - if (!lastSequence) { - assert(litLengthSum == litSize); - } - return matchLengthSum + litSize; -} + case LogicalTypeId::USMALLINT: + row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); + break; -/** ZSTD_compressSubBlock_sequences() : - * Compresses sequences section for a sub-block. - * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have - * symbol compression modes for the super-block. - * The first successfully compressed block will have these in its header. - * We set entropyWritten=1 when we succeed in compressing the sequences. - * The following sub-blocks will always have repeat mode. - * @return : compressed size of sequences section of a sub-block - * Or 0 if it is unable to compress - * Or error code. */ -static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, - const ZSTD_fseCTablesMetadata_t* fseMetadata, - const seqDef* sequences, size_t nbSeq, - const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - const int bmi2, int writeEntropy, int* entropyWritten) -{ - const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - BYTE* seqHead; + case LogicalTypeId::UINTEGER: + row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); + break; - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); + case LogicalTypeId::UBIGINT: + row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); + break; + case LogicalTypeId::INTEGER: + row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); + break; - *entropyWritten = 0; - /* Sequences Header */ - RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall, ""); - if (nbSeq < 0x7F) - *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) - op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; - else - op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; - if (nbSeq==0) { - return op - ostart; - } + case LogicalTypeId::BIGINT: + row_group_stats = TemplatedGetNumericStats>(type, parquet_stats); + break; - /* seqHead : flags for FSE encoding type */ - seqHead = op++; + case LogicalTypeId::FLOAT: + row_group_stats = TemplatedGetNumericStats(type, parquet_stats); + break; - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); + case LogicalTypeId::DOUBLE: + row_group_stats = TemplatedGetNumericStats(type, parquet_stats); + break; - if (writeEntropy) { - const U32 LLtype = fseMetadata->llType; - const U32 Offtype = fseMetadata->ofType; - const U32 MLtype = fseMetadata->mlType; - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); - op += fseMetadata->fseTablesSize; - } else { - const U32 repeat = set_repeat; - *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); - } + case LogicalTypeId::DATE: + row_group_stats = TemplatedGetNumericStats(type, parquet_stats); + break; - { size_t const bitstreamSize = ZSTD_encodeSequences( - op, oend - op, - fseTables->matchlengthCTable, mlCode, - fseTables->offcodeCTable, ofCode, - fseTables->litlengthCTable, llCode, - sequences, nbSeq, - longOffsets, bmi2); - FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); - op += bitstreamSize; - /* zstd versions <= 1.3.4 mistakenly report corruption when - * FSE_readNCount() receives a buffer < 4 bytes. - * Fixed by https://github.com/facebook/zstd/pull/1146. - * This can happen when the last set_compressed table present is 2 - * bytes and the bitstream is only one byte. - * In this exceedingly rare case, we will simply emit an uncompressed - * block, since it isn't worth optimizing. - */ -#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { - /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(fseMetadata->lastCountSize + bitstreamSize == 3); - DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " - "emitting an uncompressed block."); - return 0; - } -#endif - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); - } + // here we go, our favorite type + case LogicalTypeId::TIMESTAMP: { + switch (s_ele.type) { + case Type::INT64: + // arrow timestamp + switch (s_ele.converted_type) { + case ConvertedType::TIMESTAMP_MICROS: + row_group_stats = TemplatedGetNumericStats(type, parquet_stats); + break; + case ConvertedType::TIMESTAMP_MILLIS: + row_group_stats = TemplatedGetNumericStats(type, parquet_stats); + break; + default: + return nullptr; + } + break; + case Type::INT96: + // impala timestamp + row_group_stats = TemplatedGetNumericStats(type, parquet_stats); + break; + default: + return nullptr; + } + break; + } + case LogicalTypeId::VARCHAR: { + auto string_stats = make_unique(type); + if (parquet_stats.__isset.min) { + string_stats->Update(parquet_stats.min); + } else if (parquet_stats.__isset.min_value) { + string_stats->Update(parquet_stats.min_value); + } else { + return nullptr; + } + if (parquet_stats.__isset.max) { + string_stats->Update(parquet_stats.max); + } else if (parquet_stats.__isset.max_value) { + string_stats->Update(parquet_stats.max_value); + } else { + return nullptr; + } - /* zstd versions <= 1.4.0 mistakenly report error when - * sequences section body size is less than 3 bytes. - * Fixed by https://github.com/facebook/zstd/pull/1664. - * This can happen when the previous sequences section block is compressed - * with rle mode and the current block's sequences section is compressed - * with repeat mode where sequences section body size can be 1 byte. - */ -#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (op-seqHead < 4) { - DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " - "an uncompressed block when sequences are < 4 bytes"); - return 0; - } -#endif + string_stats->has_unicode = true; // we dont know better + row_group_stats = move(string_stats); + break; + } + default: + // no stats for you + break; + } // end of type switch - *entropyWritten = 1; - return op - ostart; -} + // null count is generic + if (row_group_stats) { + if (parquet_stats.__isset.null_count) { + row_group_stats->validity_stats = make_unique(parquet_stats.null_count != 0); + } else { + row_group_stats->validity_stats = make_unique(true); + } + } else { + // if stats are missing from any row group we know squat + return nullptr; + } -/** ZSTD_compressSubBlock() : - * Compresses a single sub-block. - * @return : compressed size of the sub-block - * Or 0 if it failed to compress. */ -static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - const seqDef* sequences, size_t nbSeq, - const BYTE* literals, size_t litSize, - const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - const int bmi2, - int writeLitEntropy, int writeSeqEntropy, - int* litEntropyWritten, int* seqEntropyWritten, - U32 lastBlock) -{ - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart + ZSTDInternalConstants::ZSTD_blockHeaderSize; - DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", - litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); - { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, - &entropyMetadata->hufMetadata, literals, litSize, - op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); - FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); - if (cLitSize == 0) return 0; - op += cLitSize; - } - { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, - &entropyMetadata->fseMetadata, - sequences, nbSeq, - llCode, mlCode, ofCode, - cctxParams, - op, oend-op, - bmi2, writeSeqEntropy, seqEntropyWritten); - FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); - if (cSeqSize == 0) return 0; - op += cSeqSize; - } - /* Write block header */ - { size_t cSize = (op-ostart)-ZSTDInternalConstants::ZSTD_blockHeaderSize; - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(ostart, cBlockHeader24); - } - return op-ostart; + return row_group_stats; } -static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, - const ZSTD_hufCTables_t* huf, - const ZSTD_hufCTablesMetadata_t* hufMetadata, - void* workspace, size_t wkspSize, - int writeEntropy) -{ - unsigned* const countWksp = (unsigned*)workspace; - unsigned maxSymbolValue = 255; - size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ +} // namespace duckdb - if (hufMetadata->hType == set_basic) return litSize; - else if (hufMetadata->hType == set_rle) return 1; - else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { - size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); - if (ZSTD_isError(largest)) return litSize; - { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); - if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; - return cLitSizeEstimate + literalSectionHeaderSize; - } } - assert(0); /* impossible */ - return 0; -} -static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, - const BYTE* codeTable, unsigned maxCode, - size_t nbSeq, const FSE_CTable* fseCTable, - const U32* additionalBits, - short const* defaultNorm, U32 defaultNormLog, - void* workspace, size_t wkspSize) -{ - unsigned* const countWksp = (unsigned*)workspace; - const BYTE* ctp = codeTable; - const BYTE* const ctStart = ctp; - const BYTE* const ctEnd = ctStart + nbSeq; - size_t cSymbolTypeSizeEstimateInBits = 0; - unsigned max = maxCode; +#include "duckdb.hpp" +#ifndef DUCKDB_AMALGAMATION +#include "duckdb/common/types/date.hpp" +#include "duckdb/common/types/time.hpp" +#include "duckdb/common/types/timestamp.hpp" +#endif - HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ - if (type == set_basic) { - cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); - } else if (type == set_rle) { - cSymbolTypeSizeEstimateInBits = 0; - } else if (type == set_compressed || type == set_repeat) { - cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); - } - if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; - while (ctp < ctEnd) { - if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; - else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ - ctp++; - } - return cSymbolTypeSizeEstimateInBits / 8; +namespace duckdb { + +// surely they are joking +static constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = 2440588LL; +static constexpr int64_t MILLISECONDS_PER_DAY = 86400000LL; +static constexpr int64_t NANOSECONDS_PER_DAY = MILLISECONDS_PER_DAY * 1000LL * 1000LL; + +int64_t ImpalaTimestampToNanoseconds(const Int96 &impala_timestamp) { + int64_t days_since_epoch = impala_timestamp.value[2] - JULIAN_TO_UNIX_EPOCH_DAYS; + auto nanoseconds = Load((data_ptr_t)impala_timestamp.value); + return days_since_epoch * NANOSECONDS_PER_DAY + nanoseconds; } -static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_fseCTables_t* fseTables, - const ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize, - int writeEntropy) -{ - size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ - size_t cSeqSizeEstimate = 0; - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, - nbSeq, fseTables->offcodeCTable, NULL, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, - workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, - nbSeq, fseTables->litlengthCTable, ZSTDInternalConstants::LL_bits, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, - workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, - nbSeq, fseTables->matchlengthCTable, ZSTDInternalConstants::ML_bits, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, - workspace, wkspSize); - if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; - return cSeqSizeEstimate + sequencesSectionHeaderSize; +timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts) { + auto impala_ns = ImpalaTimestampToNanoseconds(raw_ts); + return Timestamp::FromEpochMs(impala_ns / 1000000); +} + +Int96 TimestampToImpalaTimestamp(timestamp_t &ts) { + int32_t hour, min, sec, msec; + Time::Convert(Timestamp::GetTime(ts), hour, min, sec, msec); + uint64_t ms_since_midnight = hour * 60 * 60 * 1000 + min * 60 * 1000 + sec * 1000 + msec; + auto days_since_epoch = Date::Epoch(Timestamp::GetDate(ts)) / (24 * 60 * 60); + // first two uint32 in Int96 are nanoseconds since midnights + // last uint32 is number of days since year 4713 BC ("Julian date") + Int96 impala_ts; + Store(ms_since_midnight * 1000000, (data_ptr_t)impala_ts.value); + impala_ts.value[2] = days_since_epoch + JULIAN_TO_UNIX_EPOCH_DAYS; + return impala_ts; } -static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, - const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize, - int writeLitEntropy, int writeSeqEntropy) { - size_t cSizeEstimate = 0; - cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, - &entropy->huf, &entropyMetadata->hufMetadata, - workspace, wkspSize, writeLitEntropy); - cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, - nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, - workspace, wkspSize, writeSeqEntropy); - return cSizeEstimate + ZSTDInternalConstants::ZSTD_blockHeaderSize; +timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts) { + return Timestamp::FromEpochMicroSeconds(raw_ts); +} +timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts) { + return Timestamp::FromEpochMs(raw_ts); } -static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) -{ - if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) - return 1; - if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) - return 1; - if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) - return 1; - return 0; +date_t ParquetIntToDate(const int32_t &raw_date) { + return date_t(raw_date); } -/** ZSTD_compressSubBlock_multi() : - * Breaks super-block into multiple sub-blocks and compresses them. - * Entropy will be written to the first block. - * The following blocks will use repeat mode to compress. - * All sub-blocks are compressed blocks (no raw or rle blocks). - * @return : compressed size of the super block (which is multiple ZSTD blocks) - * Or 0 if it failed to compress. */ -static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, - const ZSTD_compressedBlockState_t* prevCBlock, - ZSTD_compressedBlockState_t* nextCBlock, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const int bmi2, U32 lastBlock, - void* workspace, size_t wkspSize) -{ - const seqDef* const sstart = seqStorePtr->sequencesStart; - const seqDef* const send = seqStorePtr->sequences; - const seqDef* sp = sstart; - const BYTE* const lstart = seqStorePtr->litStart; - const BYTE* const lend = seqStorePtr->lit; - const BYTE* lp = lstart; - BYTE const* ip = (BYTE const*)src; - BYTE const* const iend = ip + srcSize; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - const BYTE* llCodePtr = seqStorePtr->llCode; - const BYTE* mlCodePtr = seqStorePtr->mlCode; - const BYTE* ofCodePtr = seqStorePtr->ofCode; - size_t targetCBlockSize = cctxParams->targetCBlockSize; - size_t litSize, seqCount; - int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; - int writeSeqEntropy = 1; - int lastSequence = 0; +} // namespace duckdb - DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", - (unsigned)(lend-lp), (unsigned)(send-sstart)); - litSize = 0; - seqCount = 0; - do { - size_t cBlockSizeEstimate = 0; - if (sstart == send) { - lastSequence = 1; - } else { - const seqDef* const sequence = sp + seqCount; - lastSequence = sequence == send - 1; - litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; - seqCount++; - } - if (lastSequence) { - assert(lp <= lend); - assert(litSize <= (size_t)(lend - lp)); - litSize = (size_t)(lend - lp); - } - /* I think there is an optimization opportunity here. - * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful - * since it recalculates estimate from scratch. - * For example, it would recount literal distribution and symbol codes everytime. - */ - cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, - &nextCBlock->entropy, entropyMetadata, - workspace, wkspSize, writeLitEntropy, writeSeqEntropy); - if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { - int litEntropyWritten = 0; - int seqEntropyWritten = 0; - const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); - const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, - sp, seqCount, - lp, litSize, - llCodePtr, mlCodePtr, ofCodePtr, - cctxParams, - op, oend-op, - bmi2, writeLitEntropy, writeSeqEntropy, - &litEntropyWritten, &seqEntropyWritten, - lastBlock && lastSequence); - FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); - if (cSize > 0 && cSize < decompressedSize) { - DEBUGLOG(5, "Committed the sub-block"); - assert(ip + decompressedSize <= iend); - ip += decompressedSize; - sp += seqCount; - lp += litSize; - op += cSize; - llCodePtr += seqCount; - mlCodePtr += seqCount; - ofCodePtr += seqCount; - litSize = 0; - seqCount = 0; - /* Entropy only needs to be written once */ - if (litEntropyWritten) { - writeLitEntropy = 0; - } - if (seqEntropyWritten) { - writeSeqEntropy = 0; - } - } - } - } while (!lastSequence); - if (writeLitEntropy) { - DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); - memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); - } - if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { - /* If we haven't written our entropy tables, then we've violated our contract and - * must emit an uncompressed block. - */ - DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); - return 0; - } - if (ip < iend) { - size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); - DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - assert(cSize != 0); - op += cSize; - /* We have to regenerate the repcodes because we've skipped some sequences */ - if (sp < send) { - seqDef const* seq; - repcodes_t rep; - memcpy(&rep, prevCBlock->rep, sizeof(rep)); - for (seq = sstart; seq < sp; ++seq) { - rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); - } - memcpy(nextCBlock->rep, &rep, sizeof(rep)); - } - } - DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); - return op-ostart; -} -size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - void const* src, size_t srcSize, - unsigned lastBlock) { - ZSTD_entropyCTablesMetadata_t entropyMetadata; +#include "duckdb.hpp" +#ifndef DUCKDB_AMALGAMATION +#include "duckdb/function/table_function.hpp" +#include "duckdb/parser/parsed_data/create_table_function_info.hpp" +#include "duckdb/parser/parsed_data/create_copy_function_info.hpp" +#include "duckdb/main/client_context.hpp" +#include "duckdb/main/connection.hpp" +#include "duckdb/common/file_system.hpp" +#include "duckdb/common/string_util.hpp" +#include "duckdb/common/types/date.hpp" +#include "duckdb/common/types/time.hpp" +#include "duckdb/common/types/timestamp.hpp" +#include "duckdb/common/serializer/buffered_file_writer.hpp" +#include "duckdb/common/serializer/buffered_serializer.hpp" +#endif - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, - &zc->blockState.prevCBlock->entropy, - &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - &entropyMetadata, - zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); - return ZSTD_compressSubBlock_multi(&zc->seqStore, - zc->blockState.prevCBlock, - zc->blockState.nextCBlock, - &entropyMetadata, - &zc->appliedParams, - dst, dstCapacity, - src, srcSize, - zc->bmi2, lastBlock, - zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */); + + + +namespace duckdb { + +using namespace duckdb_parquet; // NOLINT +using namespace duckdb_apache::thrift; // NOLINT +using namespace duckdb_apache::thrift::protocol; // NOLINT +using namespace duckdb_apache::thrift::transport; // NOLINT +using namespace duckdb_miniz; // NOLINT + +using duckdb_parquet::format::CompressionCodec; +using duckdb_parquet::format::ConvertedType; +using duckdb_parquet::format::Encoding; +using duckdb_parquet::format::FieldRepetitionType; +using duckdb_parquet::format::FileMetaData; +using duckdb_parquet::format::PageHeader; +using duckdb_parquet::format::PageType; +using ParquetRowGroup = duckdb_parquet::format::RowGroup; +using duckdb_parquet::format::Type; + +class MyTransport : public TTransport { +public: + explicit MyTransport(Serializer &serializer) : serializer(serializer) { + } + + bool isOpen() const override { + return true; + } + + void open() override { + } + + void close() override { + } + + void write_virt(const uint8_t *buf, uint32_t len) override { + serializer.WriteData((const_data_ptr_t)buf, len); + } + +private: + Serializer &serializer; +}; + +static Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type) { + switch (duckdb_type.id()) { + case LogicalTypeId::BOOLEAN: + return Type::BOOLEAN; + case LogicalTypeId::TINYINT: + case LogicalTypeId::SMALLINT: + case LogicalTypeId::INTEGER: + return Type::INT32; + case LogicalTypeId::BIGINT: + return Type::INT64; + case LogicalTypeId::FLOAT: + return Type::FLOAT; + case LogicalTypeId::DECIMAL: // for now... + case LogicalTypeId::DOUBLE: + return Type::DOUBLE; + case LogicalTypeId::VARCHAR: + case LogicalTypeId::BLOB: + return Type::BYTE_ARRAY; + case LogicalTypeId::DATE: + case LogicalTypeId::TIMESTAMP: + return Type::INT96; + default: + throw NotImplementedException(duckdb_type.ToString()); + } } +static bool DuckDBTypeToConvertedType(const LogicalType &duckdb_type, ConvertedType::type &result) { + switch (duckdb_type.id()) { + case LogicalTypeId::VARCHAR: + result = ConvertedType::UTF8; + return true; + default: + return false; + } } +static void VarintEncode(uint32_t val, Serializer &ser) { + do { + uint8_t byte = val & 127; + val >>= 7; + if (val != 0) { + byte |= 128; + } + ser.Write(byte); + } while (val != 0); +} -// LICENSE_CHANGE_END +static uint8_t GetVarintSize(uint32_t val) { + uint8_t res = 0; + do { + uint8_t byte = val & 127; + val >>= 7; + if (val != 0) { + byte |= 128; + } + res++; + } while (val != 0); + return res; +} +template +static void TemplatedWritePlain(Vector &col, idx_t length, ValidityMask &mask, Serializer &ser) { + auto *ptr = FlatVector::GetData(col); + for (idx_t r = 0; r < length; r++) { + if (mask.RowIsValid(r)) { + ser.Write((TGT)ptr[r]); + } + } +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +ParquetWriter::ParquetWriter(FileSystem &fs, string file_name_p, vector types_p, vector names_p, + CompressionCodec::type codec) + : file_name(move(file_name_p)), sql_types(move(types_p)), column_names(move(names_p)), codec(codec) { +#if STANDARD_VECTOR_SIZE < 64 + throw NotImplementedException("Parquet writer is not supported for vector sizes < 64"); +#endif -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + // initialize the file writer + writer = make_unique(fs, file_name.c_str(), + FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW); + // parquet files start with the string "PAR1" + writer->WriteData((const_data_ptr_t) "PAR1", 4); + TCompactProtocolFactoryT tproto_factory; + protocol = tproto_factory.getProtocol(make_shared(*writer)); + file_meta_data.num_rows = 0; + file_meta_data.schema.resize(sql_types.size() + 1); + file_meta_data.schema[0].num_children = sql_types.size(); + file_meta_data.schema[0].__isset.num_children = true; + file_meta_data.version = 1; + for (idx_t i = 0; i < sql_types.size(); i++) { + auto &schema_element = file_meta_data.schema[i + 1]; + schema_element.type = DuckDBTypeToParquetType(sql_types[i]); + schema_element.repetition_type = FieldRepetitionType::OPTIONAL; + schema_element.num_children = 0; + schema_element.__isset.num_children = true; + schema_element.__isset.type = true; + schema_element.__isset.repetition_type = true; + schema_element.name = column_names[i]; + schema_element.__isset.converted_type = DuckDBTypeToConvertedType(sql_types[i], schema_element.converted_type); + } +} -namespace duckdb_zstd { +void ParquetWriter::Flush(ChunkCollection &buffer) { + if (buffer.Count() == 0) { + return; + } + lock_guard glock(lock); -void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashLarge = ms->hashTable; - U32 const hBitsL = cParams->hashLog; - U32 const mls = cParams->minMatch; - U32* const hashSmall = ms->chainTable; - U32 const hBitsS = cParams->chainLog; - const BYTE* const base = ms->window.base; - const BYTE* ip = base + ms->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const U32 fastHashFillStep = 3; + // set up a new row group for this chunk collection + ParquetRowGroup row_group; + row_group.num_rows = 0; + row_group.file_offset = writer->GetTotalWritten(); + row_group.__isset.file_offset = true; + row_group.columns.resize(buffer.ColumnCount()); - /* Always insert every fastHashFillStep position into the hash tables. - * Insert the other positions into the large hash table if their entry - * is empty. - */ - for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { - U32 const current = (U32)(ip - base); - U32 i; - for (i = 0; i < fastHashFillStep; ++i) { - size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); - size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); - if (i == 0) - hashSmall[smHash] = current + i; - if (i == 0 || hashLarge[lgHash] == 0) - hashLarge[lgHash] = current + i; - /* Only load extra positions for ZSTD_dtlm_full */ - if (dtlm == ZSTD_dtlm_fast) - break; - } } -} + // iterate over each of the columns of the chunk collection and write them + for (idx_t i = 0; i < buffer.ColumnCount(); i++) { + // we start off by writing everything into a temporary buffer + // this is necessary to (1) know the total written size, and (2) to compress it afterwards + BufferedSerializer temp_writer; + // set up some metadata + PageHeader hdr; + hdr.compressed_page_size = 0; + hdr.uncompressed_page_size = 0; + hdr.type = PageType::DATA_PAGE; + hdr.__isset.data_page_header = true; -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_doubleFast_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls /* template */, ZSTD_dictMode_e const dictMode) -{ - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32* const hashLong = ms->hashTable; - const U32 hBitsL = cParams->hashLog; - U32* const hashSmall = ms->chainTable; - const U32 hBitsS = cParams->chainLog; - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - /* presumes that, if there is a dictionary, it must be using Attach mode */ - const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); - const BYTE* const prefixLowest = base + prefixLowestIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; + hdr.data_page_header.num_values = buffer.Count(); + hdr.data_page_header.encoding = Encoding::PLAIN; + hdr.data_page_header.definition_level_encoding = Encoding::RLE; + hdr.data_page_header.repetition_level_encoding = Encoding::BIT_PACKED; - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = - dictMode == ZSTD_dictMatchState ? - &dms->cParams : NULL; - const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? - dms->hashTable : NULL; - const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? - dms->chainTable : NULL; - const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? - dictBase + dictStartIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixLowestIndex - (U32)(dictEnd - dictBase) : - 0; - const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? - dictCParams->hashLog : hBitsL; - const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? - dictCParams->chainLog : hBitsS; - const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); + // record the current offset of the writer into the file + // this is the starting position of the current page + auto start_offset = writer->GetTotalWritten(); - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); + // write the definition levels (i.e. the inverse of the nullmask) + // we always bit pack everything - assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + // first figure out how many bytes we need (1 byte per 8 rows, rounded up) + auto define_byte_count = (buffer.Count() + 7) / 8; + // we need to set up the count as a varint, plus an added marker for the RLE scheme + // for this marker we shift the count left 1 and set low bit to 1 to indicate bit packed literals + uint32_t define_header = (define_byte_count << 1) | 1; + uint32_t define_size = GetVarintSize(define_header) + define_byte_count; - /* if a dictionary is attached, it must be within window range */ - if (dictMode == ZSTD_dictMatchState) { - assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); - } + // we write the actual definitions into the temp_writer for now + temp_writer.Write(define_size); + VarintEncode(define_header, temp_writer); - /* init */ - ip += (dictAndPrefixLength == 0); - if (dictMode == ZSTD_noDict) { - U32 const current = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); - U32 const maxRep = current - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - if (dictMode == ZSTD_dictMatchState) { - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - } + for (auto &chunk : buffer.Chunks()) { + auto &validity = FlatVector::Validity(chunk->data[i]); + auto validity_data = validity.GetData(); + auto chunk_define_byte_count = (chunk->size() + 7) / 8; + if (!validity_data) { + ValidityMask nop_mask(chunk->size()); + temp_writer.WriteData((const_data_ptr_t)nop_mask.GetData(), chunk_define_byte_count); + } else { + // write the bits of the nullmask + temp_writer.WriteData((const_data_ptr_t)validity_data, chunk_define_byte_count); + } + } + + // now write the actual payload: we write this as PLAIN values (for now? possibly for ever?) + for (auto &chunk : buffer.Chunks()) { + auto &input = *chunk; + auto &input_column = input.data[i]; + auto &mask = FlatVector::Validity(input_column); - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - U32 offset; - size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); - size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); - size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); - size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); - U32 const current = (U32)(ip-base); - U32 const matchIndexL = hashLong[h2]; - U32 matchIndexS = hashSmall[h]; - const BYTE* matchLong = base + matchIndexL; - const BYTE* match = base + matchIndexS; - const U32 repIndex = current + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex) ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - hashLong[h2] = hashSmall[h] = current; /* update hash tables */ + // write actual payload data + switch (sql_types[i].id()) { + case LogicalTypeId::BOOLEAN: { + auto *ptr = FlatVector::GetData(input_column); + uint8_t byte = 0; + uint8_t byte_pos = 0; + for (idx_t r = 0; r < input.size(); r++) { + if (mask.RowIsValid(r)) { // only encode if non-null + byte |= (ptr[r] & 1) << byte_pos; + byte_pos++; - /* check dictMatchState repcode */ - if (dictMode == ZSTD_dictMatchState - && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - goto _match_stored; - } + temp_writer.Write(byte); + if (byte_pos == 8) { + temp_writer.Write(byte); + byte = 0; + byte_pos = 0; + } + } + } + // flush last byte if req + if (byte_pos > 0) { + temp_writer.Write(byte); + } + break; + } + case LogicalTypeId::TINYINT: + TemplatedWritePlain(input_column, input.size(), mask, temp_writer); + break; + case LogicalTypeId::SMALLINT: + TemplatedWritePlain(input_column, input.size(), mask, temp_writer); + break; + case LogicalTypeId::INTEGER: + TemplatedWritePlain(input_column, input.size(), mask, temp_writer); + break; + case LogicalTypeId::BIGINT: + TemplatedWritePlain(input_column, input.size(), mask, temp_writer); + break; + case LogicalTypeId::FLOAT: + TemplatedWritePlain(input_column, input.size(), mask, temp_writer); + break; + case LogicalTypeId::DECIMAL: { + // FIXME: fixed length byte array... + Vector double_vec(LogicalType::DOUBLE); + VectorOperations::Cast(input_column, double_vec, input.size()); + TemplatedWritePlain(double_vec, input.size(), mask, temp_writer); + break; + } + case LogicalTypeId::DOUBLE: + TemplatedWritePlain(input_column, input.size(), mask, temp_writer); + break; + case LogicalTypeId::DATE: { + auto *ptr = FlatVector::GetData(input_column); + for (idx_t r = 0; r < input.size(); r++) { + if (mask.RowIsValid(r)) { + auto ts = Timestamp::FromDatetime(ptr[r], dtime_t(0)); + temp_writer.Write(TimestampToImpalaTimestamp(ts)); + } + } + break; + } + case LogicalTypeId::TIMESTAMP: { + auto *ptr = FlatVector::GetData(input_column); + for (idx_t r = 0; r < input.size(); r++) { + if (mask.RowIsValid(r)) { + temp_writer.Write(TimestampToImpalaTimestamp(ptr[r])); + } + } + break; + } + case LogicalTypeId::BLOB: + case LogicalTypeId::VARCHAR: { + auto *ptr = FlatVector::GetData(input_column); + for (idx_t r = 0; r < input.size(); r++) { + if (mask.RowIsValid(r)) { + temp_writer.Write(ptr[r].GetSize()); + temp_writer.WriteData((const_data_ptr_t)ptr[r].GetDataUnsafe(), ptr[r].GetSize()); + } + } + break; + } + default: + throw NotImplementedException((sql_types[i].ToString())); + } + } - /* check noDict repcode */ - if ( dictMode == ZSTD_noDict - && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - goto _match_stored; - } + // now that we have finished writing the data we know the uncompressed size + hdr.uncompressed_page_size = temp_writer.blob.size; - if (matchIndexL > prefixLowestIndex) { - /* check prefix long match */ - if (MEM_read64(matchLong) == MEM_read64(ip)) { - mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; - offset = (U32)(ip-matchLong); - while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - goto _match_found; - } - } else if (dictMode == ZSTD_dictMatchState) { - /* check dictMatchState long match */ - U32 const dictMatchIndexL = dictHashLong[dictHL]; - const BYTE* dictMatchL = dictBase + dictMatchIndexL; - assert(dictMatchL < dictEnd); + // compress the data based + size_t compressed_size; + data_ptr_t compressed_data; + unique_ptr compressed_buf; + switch (codec) { + case CompressionCodec::UNCOMPRESSED: + compressed_size = temp_writer.blob.size; + compressed_data = temp_writer.blob.data.get(); + break; + case CompressionCodec::SNAPPY: { + compressed_size = snappy::MaxCompressedLength(temp_writer.blob.size); + compressed_buf = unique_ptr(new data_t[compressed_size]); + snappy::RawCompress((const char *)temp_writer.blob.data.get(), temp_writer.blob.size, + (char *)compressed_buf.get(), &compressed_size); + compressed_data = compressed_buf.get(); + break; + } + case CompressionCodec::GZIP: { + MiniZStream s; + compressed_size = s.MaxCompressedLength(temp_writer.blob.size); + compressed_buf = unique_ptr(new data_t[compressed_size]); + s.Compress((const char *)temp_writer.blob.data.get(), temp_writer.blob.size, (char *)compressed_buf.get(), + &compressed_size); + compressed_data = compressed_buf.get(); + break; + } + case CompressionCodec::ZSTD: { + compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.blob.size); + compressed_buf = unique_ptr(new data_t[compressed_size]); + compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size, + (const void *)temp_writer.blob.data.get(), + temp_writer.blob.size, ZSTD_CLEVEL_DEFAULT); + compressed_data = compressed_buf.get(); + break; + } + default: + throw InternalException("Unsupported codec for Parquet Writer"); + } - if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { - mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; - offset = (U32)(current - dictMatchIndexL - dictIndexDelta); - while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ - goto _match_found; - } } + hdr.compressed_page_size = compressed_size; + // now finally write the data to the actual file + hdr.write(protocol.get()); + writer->WriteData(compressed_data, compressed_size); - if (matchIndexS > prefixLowestIndex) { - /* check prefix short match */ - if (MEM_read32(match) == MEM_read32(ip)) { - goto _search_next_long; - } - } else if (dictMode == ZSTD_dictMatchState) { - /* check dictMatchState short match */ - U32 const dictMatchIndexS = dictHashSmall[dictHS]; - match = dictBase + dictMatchIndexS; - matchIndexS = dictMatchIndexS + dictIndexDelta; + auto &column_chunk = row_group.columns[i]; + column_chunk.__isset.meta_data = true; + column_chunk.meta_data.data_page_offset = start_offset; + column_chunk.meta_data.total_compressed_size = writer->GetTotalWritten() - start_offset; + column_chunk.meta_data.codec = codec; + column_chunk.meta_data.path_in_schema.push_back(file_meta_data.schema[i + 1].name); + column_chunk.meta_data.num_values = buffer.Count(); + column_chunk.meta_data.type = file_meta_data.schema[i + 1].type; + } + row_group.num_rows += buffer.Count(); - if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { - goto _search_next_long; - } } + // append the row group to the file meta data + file_meta_data.row_groups.push_back(row_group); + file_meta_data.num_rows += buffer.Count(); +} - ip += ((ip-anchor) >> kSearchStrength) + 1; -#if defined(__aarch64__) - PREFETCH_L1(ip+256); -#endif - continue; +void ParquetWriter::Finalize() { + auto start_offset = writer->GetTotalWritten(); + file_meta_data.write(protocol.get()); -_search_next_long: + writer->Write(writer->GetTotalWritten() - start_offset); - { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); - U32 const matchIndexL3 = hashLong[hl3]; - const BYTE* matchL3 = base + matchIndexL3; - hashLong[hl3] = current + 1; + // parquet files also end with the string "PAR1" + writer->WriteData((const_data_ptr_t) "PAR1", 4); - /* check prefix long +1 match */ - if (matchIndexL3 > prefixLowestIndex) { - if (MEM_read64(matchL3) == MEM_read64(ip+1)) { - mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; - ip++; - offset = (U32)(ip-matchL3); - while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ - goto _match_found; - } - } else if (dictMode == ZSTD_dictMatchState) { - /* check dict long +1 match */ - U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; - const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; - assert(dictMatchL3 < dictEnd); - if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { - mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; - ip++; - offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); - while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ - goto _match_found; - } } } + // flush to disk + writer->Sync(); + writer.reset(); +} - /* if no long +1 match, explore the short match we found */ - if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { - mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; - offset = (U32)(current - matchIndexS); - while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } else { - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - offset = (U32)(ip - match); - while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } +} // namespace duckdb - /* fall-through */ -_match_found: - offset_2 = offset_1; - offset_1 = offset; +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 +// See the end of this file for a list - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -_match_stored: - /* match found */ - ip += mLength; - anchor = ip; +#include - if (ip <= ilimit) { - /* Complementary insertion */ - /* done after iLimit test, as candidates could be > iend-8 */ - { U32 const indexToInsert = current+2; - hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); - hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; - hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); - } - /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState - && repIndex2 < prefixLowestIndex ? - dictBase + repIndex2 - dictIndexDelta : - base + repIndex2; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } - if (dictMode == ZSTD_noDict) { - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - } /* while (ip < ilimit) */ +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 +// See the end of this file for a list - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - /* Return the last literals size */ - return (size_t)(iend - anchor); -} +#ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ +#define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ +#include -size_t ZSTD_compressBlock_doubleFast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - const U32 mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); - case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); - case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); - case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); - } -} +namespace snappy { +// A Sink is an interface that consumes a sequence of bytes. +class Sink { + public: + Sink() { } + virtual ~Sink(); -size_t ZSTD_compressBlock_doubleFast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - const U32 mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); - case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); - case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); - case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); - } -} + // Append "bytes[0,n-1]" to this. + virtual void Append(const char* bytes, size_t n) = 0; + + // Returns a writable buffer of the specified length for appending. + // May return a pointer to the caller-owned scratch buffer which + // must have at least the indicated length. The returned buffer is + // only valid until the next operation on this Sink. + // + // After writing at most "length" bytes, call Append() with the + // pointer returned from this function and the number of bytes + // written. Many Append() implementations will avoid copying + // bytes if this function returned an internal buffer. + // + // If a non-scratch buffer is returned, the caller may only pass a + // prefix of it to Append(). That is, it is not correct to pass an + // interior pointer of the returned array to Append(). + // + // The default implementation always returns the scratch buffer. + virtual char* GetAppendBuffer(size_t length, char* scratch); + // For higher performance, Sink implementations can provide custom + // AppendAndTakeOwnership() and GetAppendBufferVariable() methods. + // These methods can reduce the number of copies done during + // compression/decompression. -static size_t ZSTD_compressBlock_doubleFast_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls /* template */) -{ - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32* const hashLong = ms->hashTable; - U32 const hBitsL = cParams->hashLog; - U32* const hashSmall = ms->chainTable; - U32 const hBitsS = cParams->chainLog; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); - const U32 dictStartIndex = lowLimit; - const U32 dictLimit = ms->window.dictLimit; - const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const dictStart = dictBase + dictStartIndex; - const BYTE* const dictEnd = dictBase + prefixStartIndex; - U32 offset_1=rep[0], offset_2=rep[1]; + // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes" + // and calls the deleter function as (*deleter)(deleter_arg, bytes, n) + // to free the buffer. deleter function must be non NULL. + // + // The default implementation just calls Append and frees "bytes". + // Other implementations may avoid a copy while appending the buffer. + virtual void AppendAndTakeOwnership( + char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), + void *deleter_arg); + + // Returns a writable buffer for appending and writes the buffer's capacity to + // *allocated_size. Guarantees *allocated_size >= min_size. + // May return a pointer to the caller-owned scratch buffer which must have + // scratch_size >= min_size. + // + // The returned buffer is only valid until the next operation + // on this ByteSink. + // + // After writing at most *allocated_size bytes, call Append() with the + // pointer returned from this function and the number of bytes written. + // Many Append() implementations will avoid copying bytes if this function + // returned an internal buffer. + // + // If the sink implementation allocates or reallocates an internal buffer, + // it should use the desired_size_hint if appropriate. If a caller cannot + // provide a reasonable guess at the desired capacity, it should set + // desired_size_hint = 0. + // + // If a non-scratch buffer is returned, the caller may only pass + // a prefix to it to Append(). That is, it is not correct to pass an + // interior pointer to Append(). + // + // The default implementation always returns the scratch buffer. + virtual char* GetAppendBufferVariable( + size_t min_size, size_t desired_size_hint, char* scratch, + size_t scratch_size, size_t* allocated_size); - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + private: + // No copying + Sink(const Sink&); + void operator=(const Sink&); +}; - /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ - if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); +// A Source is an interface that yields a sequence of bytes +class Source { + public: + Source() { } + virtual ~Source(); - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); - const U32 matchIndex = hashSmall[hSmall]; - const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; - const BYTE* match = matchBase + matchIndex; + // Return the number of bytes left to read from the source + virtual size_t Available() const = 0; - const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); - const U32 matchLongIndex = hashLong[hLong]; - const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; - const BYTE* matchLong = matchLongBase + matchLongIndex; + // Peek at the next flat region of the source. Does not reposition + // the source. The returned region is empty iff Available()==0. + // + // Returns a pointer to the beginning of the region and store its + // length in *len. + // + // The returned region is valid until the next call to Skip() or + // until this object is destroyed, whichever occurs first. + // + // The returned region may be larger than Available() (for example + // if this ByteSource is a view on a substring of a larger source). + // The caller is responsible for ensuring that it only reads the + // Available() bytes. + virtual const char* Peek(size_t* len) = 0; - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ - const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - size_t mLength; - hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ + // Skip the next n bytes. Invalidates any buffer returned by + // a previous call to Peek(). + // REQUIRES: Available() >= n + virtual void Skip(size_t n) = 0; - if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ - & (repIndex > dictStartIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - } else { - if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { - const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; - U32 offset; - mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; - offset = current - matchLongIndex; - while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + private: + // No copying + Source(const Source&); + void operator=(const Source&); +}; - } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - U32 const matchIndex3 = hashLong[h3]; - const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; - const BYTE* match3 = match3Base + matchIndex3; - U32 offset; - hashLong[h3] = current + 1; - if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { - const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; - mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; - ip++; - offset = current+1 - matchIndex3; - while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ - } else { - const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; - mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; - offset = current - matchIndex; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); +// A Source implementation that yields the contents of a flat array +class ByteArraySource : public Source { + public: + ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } + virtual ~ByteArraySource(); + virtual size_t Available() const; + virtual const char* Peek(size_t* len); + virtual void Skip(size_t n); + private: + const char* ptr_; + size_t left_; +}; - } else { - ip += ((ip-anchor) >> kSearchStrength) + 1; - continue; - } } +// A Sink implementation that writes to a flat array without any bound checks. +class UncheckedByteArraySink : public Sink { + public: + explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } + virtual ~UncheckedByteArraySink(); + virtual void Append(const char* data, size_t n); + virtual char* GetAppendBuffer(size_t len, char* scratch); + virtual char* GetAppendBufferVariable( + size_t min_size, size_t desired_size_hint, char* scratch, + size_t scratch_size, size_t* allocated_size); + virtual void AppendAndTakeOwnership( + char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), + void *deleter_arg); - /* move to next sequence start */ - ip += mLength; - anchor = ip; + // Return the current output pointer so that a caller can see how + // many bytes were produced. + // Note: this is not a Sink method. + char* CurrentDestination() const { return dest_; } + private: + char* dest_; +}; - if (ip <= ilimit) { - /* Complementary insertion */ - /* done after iLimit test, as candidates could be > iend-8 */ - { U32 const indexToInsert = current+2; - hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); - hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; - hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); - } +} // namespace snappy - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ - & (repIndex2 > dictStartIndex)) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } +#endif // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ - /* save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; - /* Return the last literals size */ - return (size_t)(iend - anchor); -} +// LICENSE_CHANGE_END -size_t ZSTD_compressBlock_doubleFast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); - } -} +namespace snappy { -} +Source::~Source() { } +Sink::~Sink() { } -// LICENSE_CHANGE_END +char* Sink::GetAppendBuffer(size_t length, char* scratch) { + return scratch; +} +char* Sink::GetAppendBufferVariable( + size_t min_size, size_t desired_size_hint, char* scratch, + size_t scratch_size, size_t* allocated_size) { + *allocated_size = scratch_size; + return scratch; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +void Sink::AppendAndTakeOwnership( + char* bytes, size_t n, + void (*deleter)(void*, const char*, size_t), + void *deleter_arg) { + Append(bytes, n); + (*deleter)(deleter_arg, bytes, n); +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +ByteArraySource::~ByteArraySource() { } - /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ +size_t ByteArraySource::Available() const { return left_; } +const char* ByteArraySource::Peek(size_t* len) { + *len = left_; + return ptr_; +} -namespace duckdb_zstd { +void ByteArraySource::Skip(size_t n) { + left_ -= n; + ptr_ += n; +} -void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - const void* const end, - ZSTD_dictTableLoadMethod_e dtlm) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hBits = cParams->hashLog; - U32 const mls = cParams->minMatch; - const BYTE* const base = ms->window.base; - const BYTE* ip = base + ms->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const U32 fastHashFillStep = 3; +UncheckedByteArraySink::~UncheckedByteArraySink() { } - /* Always insert every fastHashFillStep position into the hash table. - * Insert the other positions if their hash entry is empty. - */ - for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { - U32 const current = (U32)(ip - base); - size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); - hashTable[hash0] = current; - if (dtlm == ZSTD_dtlm_fast) continue; - /* Only load extra positions for ZSTD_dtlm_full */ - { U32 p; - for (p = 1; p < fastHashFillStep; ++p) { - size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); - if (hashTable[hash] == 0) { /* not yet filled */ - hashTable[hash] = current + p; - } } } } +void UncheckedByteArraySink::Append(const char* data, size_t n) { + // Do no copying if the caller filled in the result of GetAppendBuffer() + if (data != dest_) { + memcpy(dest_, data, n); + } + dest_ += n; } +char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { + return dest_; +} -FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_fast_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ - size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ - const BYTE* ip0 = istart; - const BYTE* ip1; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; +void UncheckedByteArraySink::AppendAndTakeOwnership( + char* data, size_t n, + void (*deleter)(void*, const char*, size_t), + void *deleter_arg) { + if (data != dest_) { + memcpy(dest_, data, n); + (*deleter)(deleter_arg, data, n); + } + dest_ += n; +} - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); - ip0 += (ip0 == prefixStart); - ip1 = ip0 + 1; - { U32 const current = (U32)(ip0 - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); - U32 const maxRep = current - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } +char* UncheckedByteArraySink::GetAppendBufferVariable( + size_t min_size, size_t desired_size_hint, char* scratch, + size_t scratch_size, size_t* allocated_size) { + *allocated_size = desired_size_hint; + return dest_; +} - /* Main Search Loop */ -#ifdef __INTEL_COMPILER - /* From intel 'The vector pragma indicates that the loop should be - * vectorized if it is legal to do so'. Can be used together with - * #pragma ivdep (but have opted to exclude that because intel - * warns against using it).*/ - #pragma vector always -#endif - while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ - size_t mLength; - BYTE const* ip2 = ip0 + 2; - size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); - U32 const val0 = MEM_read32(ip0); - size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); - U32 const val1 = MEM_read32(ip1); - U32 const current0 = (U32)(ip0-base); - U32 const current1 = (U32)(ip1-base); - U32 const matchIndex0 = hashTable[h0]; - U32 const matchIndex1 = hashTable[h1]; - BYTE const* repMatch = ip2 - offset_1; - const BYTE* match0 = base + matchIndex0; - const BYTE* match1 = base + matchIndex1; - U32 offcode; +} // namespace snappy -#if defined(__aarch64__) - PREFETCH_L1(ip0+256); -#endif - hashTable[h0] = current0; /* update hash table */ - hashTable[h1] = current1; /* update hash table */ +// LICENSE_CHANGE_END - assert(ip0 + 1 == ip1); - if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { - mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; - ip0 = ip2 - mLength; - match0 = repMatch - mLength; - mLength += 4; - offcode = 0; - goto _match; - } - if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { - /* found a regular match */ - goto _offset; - } - if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { - /* found a regular match after one literal */ - ip0 = ip1; - match0 = match1; - goto _offset; - } - { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; - assert(step >= 2); - ip0 += step; - ip1 += step; - continue; - } -_offset: /* Requires: ip0, match0 */ - /* Compute the offset code */ - offset_2 = offset_1; - offset_1 = (U32)(ip0-match0); - offcode = offset_1 + ZSTD_REP_MOVE; - mLength = 4; - /* Count the backwards match length */ - while (((ip0>anchor) & (match0>prefixStart)) - && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 +// See the end of this file for a list -_match: /* Requires: ip0, match0, offcode */ - /* Count the forward length */ - mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); - ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); - /* match found */ - ip0 += mLength; - anchor = ip0; +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - if (ip0 <= ilimit) { - /* Fill Table */ - assert(base+current0+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); +#include +#include - if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ - while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); - ip0 += rLength; - ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); - anchor = ip0; - continue; /* faster when present (confirmed on gcc-8) ... (?) */ - } } } - ip1 = ip0 + 1; - } - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; - /* Return the last literals size */ - return (size_t)(iend - anchor); -} +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 +// See the end of this file for a list + +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Various stubs for the open-source version of Snappy. +#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ +#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ -size_t ZSTD_compressBlock_fast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - assert(ms->dictMatchState == NULL); - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); - } -} +// #ifdef HAVE_CONFIG_H +// #include "config.h" +// #endif -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_fast_dictMatchState_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 prefixStartIndex = ms->window.dictLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; +#include - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; - const U32* const dictHashTable = dms->hashTable; - const U32 dictStartIndex = dms->window.dictLimit; - const BYTE* const dictBase = dms->window.base; - const BYTE* const dictStart = dictBase + dictStartIndex; - const BYTE* const dictEnd = dms->window.nextSrc; - const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); - const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); - const U32 dictHLog = dictCParams->hashLog; +#include +#include +#include - /* if a dictionary is still attached, it necessarily means that - * it is within window size. So we just check it. */ - const U32 maxDistance = 1U << cParams->windowLog; - const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); - assert(endIndex - prefixStartIndex <= maxDistance); - (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ +#ifdef HAVE_SYS_MMAN_H +#include +#endif - /* ensure there will be no no underflow - * when translating a dict index into a local index */ - assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); +#ifdef HAVE_UNISTD_H +#include +#endif - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); - ip += (dictAndPrefixLength == 0); - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); +#if defined(_MSC_VER) +#include +#endif // defined(_MSC_VER) - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h = ZSTD_hashPtr(ip, hlog, mls); - U32 const current = (U32)(ip-base); - U32 const matchIndex = hashTable[h]; - const BYTE* match = base + matchIndex; - const U32 repIndex = current + 1 - offset_1; - const BYTE* repMatch = (repIndex < prefixStartIndex) ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - hashTable[h] = current; /* update hash table */ +#ifndef __has_feature +#define __has_feature(x) 0 +#endif - if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - } else if ( (matchIndex <= prefixStartIndex) ) { - size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); - U32 const dictMatchIndex = dictHashTable[dictHash]; - const BYTE* dictMatch = dictBase + dictMatchIndex; - if (dictMatchIndex <= dictStartIndex || - MEM_read32(dictMatch) != MEM_read32(ip)) { - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } else { - /* found a dict match */ - U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); - mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; - while (((ip>anchor) & (dictMatch>dictStart)) - && (ip[-1] == dictMatch[-1])) { - ip--; dictMatch--; mLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } - } else if (MEM_read32(match) != MEM_read32(ip)) { - /* it's not a match, and we're not going to check the dictionary */ - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } else { - /* found a regular match */ - U32 const offset = (U32)(ip-match); - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - while (((ip>anchor) & (match>prefixStart)) - && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } +#if __has_feature(memory_sanitizer) +#include +#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \ + __msan_unpoison((address), (size)) +#else +#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */ +#endif // __has_feature(memory_sanitizer) - /* match found */ - ip += mLength; - anchor = ip; - if (ip <= ilimit) { - /* Fill Table */ - assert(base+current+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? - dictBase - dictIndexDelta + repIndex2 : - base + repIndex2; - if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } - } - } +#if defined(__x86_64__) - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; +// Enable 64-bit optimized versions of some routines. +#define ARCH_K8 1 - /* Return the last literals size */ - return (size_t)(iend - anchor); -} +#elif defined(__ppc64__) -size_t ZSTD_compressBlock_fast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - assert(ms->dictMatchState != NULL); - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); - } -} +#define ARCH_PPC 1 +#elif defined(__aarch64__) -static size_t ZSTD_compressBlock_fast_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); - const U32 dictStartIndex = lowLimit; - const BYTE* const dictStart = dictBase + dictStartIndex; - const U32 dictLimit = ms->window.dictLimit; - const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const dictEnd = dictBase + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - U32 offset_1=rep[0], offset_2=rep[1]; +#define ARCH_ARM 1 - DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); +#endif - /* switch to "regular" variant if extDict is invalidated due to maxDistance */ - if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); +// Needed by OS X, among others. +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t h = ZSTD_hashPtr(ip, hlog, mls); - const U32 matchIndex = hashTable[h]; - const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; - const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - hashTable[h] = current; /* update hash table */ - DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current); - assert(offset_1 <= current +1); /* check repIndex */ +// The size of an array, if known at compile-time. +// Will give unexpected results if used on a pointer. +// We undefine it first, since some compilers already have a definition. +#ifdef ARRAYSIZE +#undef ARRAYSIZE +#endif +#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) - if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - } else { - if ( (matchIndex < dictStartIndex) || - (MEM_read32(match) != MEM_read32(ip)) ) { - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } - { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; - U32 const offset = current - matchIndex; - size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = offset; /* update offset history */ - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - ip += mLength; - anchor = ip; - } } +// Static prediction hints. +#ifdef HAVE_BUILTIN_EXPECT +#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) +#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) +#else +#define SNAPPY_PREDICT_FALSE(x) x +#define SNAPPY_PREDICT_TRUE(x) x +#endif - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; - hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } +// This is only used for recomputing the tag byte table used during +// decompression; for simplicity we just remove it from the open-source +// version (anyone who wants to regenerate it can just do the call +// themselves within main()). +#define DEFINE_bool(flag_name, default_value, description) \ + bool FLAGS_ ## flag_name = default_value +#define DECLARE_bool(flag_name) \ + extern bool FLAGS_ ## flag_name - /* save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; +namespace snappy { - /* Return the last literals size */ - return (size_t)(iend - anchor); +//static const uint32 kuint32max = static_cast(0xFFFFFFFF); +//static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); + + +// HM: Always use aligned load to keep ourselves out of trouble. Sorry. + +inline uint16 UNALIGNED_LOAD16(const void *p) { + uint16 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline uint32 UNALIGNED_LOAD32(const void *p) { + uint32 t; + memcpy(&t, p, sizeof t); + return t; } +inline uint64 UNALIGNED_LOAD64(const void *p) { + uint64 t; + memcpy(&t, p, sizeof t); + return t; +} -size_t ZSTD_compressBlock_fast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); - } +inline void UNALIGNED_STORE16(void *p, uint16 v) { + memcpy(p, &v, sizeof v); } +inline void UNALIGNED_STORE32(void *p, uint32 v) { + memcpy(p, &v, sizeof v); } +inline void UNALIGNED_STORE64(void *p, uint64 v) { + memcpy(p, &v, sizeof v); +} -// LICENSE_CHANGE_END +// The following guarantees declaration of the byte swap functions. +#if defined(SNAPPY_IS_BIG_ENDIAN) -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +#ifdef HAVE_SYS_BYTEORDER_H +#include +#endif -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif +#ifdef _MSC_VER +#include +#define bswap_16(x) _byteswap_ushort(x) +#define bswap_32(x) _byteswap_ulong(x) +#define bswap_64(x) _byteswap_uint64(x) +#elif defined(__APPLE__) +// Mac OS X / Darwin features +#include +#define bswap_16(x) OSSwapInt16(x) +#define bswap_32(x) OSSwapInt32(x) +#define bswap_64(x) OSSwapInt64(x) +#elif defined(HAVE_BYTESWAP_H) +#include +#elif defined(bswap32) +// FreeBSD defines bswap{16,32,64} in (already #included). +#define bswap_16(x) bswap16(x) +#define bswap_32(x) bswap32(x) +#define bswap_64(x) bswap64(x) -/*-************************************* -* Binary Tree search -***************************************/ +#elif defined(BSWAP_64) +// Solaris 10 defines BSWAP_{16,32,64} in (already #included). +#define bswap_16(x) BSWAP_16(x) +#define bswap_32(x) BSWAP_32(x) +#define bswap_64(x) BSWAP_64(x) -namespace duckdb_zstd { +#else -static void -ZSTD_updateDUBT(ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iend, - U32 mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hashLog = cParams->hashLog; +inline uint16 bswap_16(uint16 x) { + return (x << 8) | (x >> 8); +} - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; +inline uint32 bswap_32(uint32 x) { + x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8); + return (x >> 16) | (x << 16); +} - const BYTE* const base = ms->window.base; - U32 const target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; +inline uint64 bswap_64(uint64 x) { + x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8); + x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16); + return (x >> 32) | (x << 32); +} - if (idx != target) - DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", - idx, target, ms->window.dictLimit); - assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ - (void)iend; +#endif - assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ - for ( ; idx < target ; idx++) { - size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ - U32 const matchIndex = hashTable[h]; +#endif // defined(SNAPPY_IS_BIG_ENDIAN) - U32* const nextCandidatePtr = bt + 2*(idx&btMask); - U32* const sortMarkPtr = nextCandidatePtr + 1; +// Convert to little-endian storage, opposite of network format. +// Convert x from host to little endian: x = LittleEndian.FromHost(x); +// convert x from little endian to host: x = LittleEndian.ToHost(x); +// +// Store values into unaligned memory converting to little endian order: +// LittleEndian.Store16(p, x); +// +// Load unaligned values stored in little endian converting to host order: +// x = LittleEndian.Load16(p); +class LittleEndian { + public: + // Conversion functions. +#if defined(SNAPPY_IS_BIG_ENDIAN) - DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); - hashTable[h] = idx; /* Update Hash Table */ - *nextCandidatePtr = matchIndex; /* update BT like a chain */ - *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; - } - ms->nextToUpdate = target; -} + static uint16 FromHost16(uint16 x) { return bswap_16(x); } + static uint16 ToHost16(uint16 x) { return bswap_16(x); } + static uint32 FromHost32(uint32 x) { return bswap_32(x); } + static uint32 ToHost32(uint32 x) { return bswap_32(x); } -/** ZSTD_insertDUBT1() : - * sort one already inserted but unsorted position - * assumption : current >= btlow == (current - btmask) - * doesn't fail */ -static void -ZSTD_insertDUBT1(ZSTD_matchState_t* ms, - U32 current, const BYTE* inputEnd, - U32 nbCompares, U32 btLow, - const ZSTD_dictMode_e dictMode) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; - const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* match; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = smallerPtr + 1; - U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ - U32 dummy32; /* to be nullified at the end */ - U32 const windowValid = ms->window.lowLimit; - U32 const maxDistance = 1U << cParams->windowLog; - U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; + static bool IsLittleEndian() { return false; } +#else // !defined(SNAPPY_IS_BIG_ENDIAN) - DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", - current, dictLimit, windowLow); - assert(current >= btLow); - assert(ip < iend); /* condition for ZSTD_count */ + static uint16 FromHost16(uint16 x) { return x; } + static uint16 ToHost16(uint16 x) { return x; } - while (nbCompares-- && (matchIndex > windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(matchIndex < current); - /* note : all candidates are now supposed sorted, - * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK - * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ + static uint32 FromHost32(uint32 x) { return x; } + static uint32 ToHost32(uint32 x) { return x; } - if ( (dictMode != ZSTD_extDict) - || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ - || (current < dictLimit) /* both in extDict */) { - const BYTE* const mBase = ( (dictMode != ZSTD_extDict) - || (matchIndex+matchLength >= dictLimit)) ? - base : dictBase; - assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ - || (current < dictLimit) ); - match = mBase + matchIndex; - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* preparation for next read of match[matchLength] */ - } + static bool IsLittleEndian() { return true; } - DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", - current, matchIndex, (U32)matchLength); +#endif // !defined(SNAPPY_IS_BIG_ENDIAN) - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ - } + // Functions to do unaligned loads and stores in little-endian order. + static uint16 Load16(const void *p) { + return ToHost16(UNALIGNED_LOAD16(p)); + } - if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", - matchIndex, btLow, nextPtr[1]); - smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ - matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", - matchIndex, btLow, nextPtr[0]); - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } + static void Store16(void *p, uint16 v) { + UNALIGNED_STORE16(p, FromHost16(v)); + } - *smallerPtr = *largerPtr = 0; -} + static uint32 Load32(const void *p) { + return ToHost32(UNALIGNED_LOAD32(p)); + } + static void Store32(void *p, uint32 v) { + UNALIGNED_STORE32(p, FromHost32(v)); + } +}; -static size_t -ZSTD_DUBT_findBetterDictMatch ( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, - size_t bestLength, - U32 nbCompares, - U32 const mls, - const ZSTD_dictMode_e dictMode) -{ - const ZSTD_matchState_t * const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; - const U32 * const dictHashTable = dms->hashTable; - U32 const hashLog = dmsCParams->hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 dictMatchIndex = dictHashTable[h]; +// Some bit-manipulation functions. +class Bits { + public: + // Return floor(log2(n)) for positive integer n. + static int Log2FloorNonZero(uint32 n); - const BYTE* const base = ms->window.base; - const BYTE* const prefixStart = base + ms->window.dictLimit; - U32 const current = (U32)(ip-base); - const BYTE* const dictBase = dms->window.base; - const BYTE* const dictEnd = dms->window.nextSrc; - U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); - U32 const dictLowLimit = dms->window.lowLimit; - U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; + // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. + static int Log2Floor(uint32 n); - U32* const dictBt = dms->chainTable; - U32 const btLog = dmsCParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; + // Return the first set least / most significant bit, 0-indexed. Returns an + // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except + // that it's 0-indexed. + static int FindLSBSetNonZero(uint32 n); - size_t commonLengthSmaller=0, commonLengthLarger=0; +#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) + static int FindLSBSetNonZero64(uint64 n); +#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - (void)dictMode; - assert(dictMode == ZSTD_dictMatchState); + private: + // No copying + Bits(const Bits&); + void operator=(const Bits&); +}; - while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { - U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match = dictBase + dictMatchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (dictMatchIndex+matchLength >= dictHighLimit) - match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ +#ifdef HAVE_BUILTIN_CTZ - if (matchLength > bestLength) { - U32 matchIndex = dictMatchIndex + dictIndexDelta; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { - DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", - current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex); - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - } - if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } +inline int Bits::Log2FloorNonZero(uint32 n) { + assert(n != 0); + // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof + // represents subtraction in base 2 and observes that there's no carry. + // + // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x). + // Using "31 ^" here instead of "31 -" allows the optimizer to strip the + // function body down to _bit_scan_reverse(x). + return 31 ^ __builtin_clz(n); +} - if (match[matchLength] < ip[matchLength]) { - if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ - commonLengthLarger = matchLength; - dictMatchIndex = nextPtr[0]; - } - } +inline int Bits::Log2Floor(uint32 n) { + return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); +} - if (bestLength >= MINMATCH) { - U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; - DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - current, (U32)bestLength, (U32)*offsetPtr, mIndex); - } - return bestLength; +inline int Bits::FindLSBSetNonZero(uint32 n) { + assert(n != 0); + return __builtin_ctz(n); +} +#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +inline int Bits::FindLSBSetNonZero64(uint64 n) { + assert(n != 0); + return __builtin_ctzll(n); } +#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +#elif defined(_MSC_VER) -static size_t -ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, - U32 const mls, - const ZSTD_dictMode_e dictMode) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hashLog = cParams->hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 matchIndex = hashTable[h]; +inline int Bits::Log2FloorNonZero(uint32 n) { + assert(n != 0); + unsigned long where; + _BitScanReverse(&where, n); + return static_cast(where); +} - const BYTE* const base = ms->window.base; - U32 const current = (U32)(ip-base); - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); +inline int Bits::Log2Floor(uint32 n) { + unsigned long where; + if (_BitScanReverse(&where, n)) + return static_cast(where); + return -1; +} - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 const btLow = (btMask >= current) ? 0 : current - btMask; - U32 const unsortLimit = MAX(btLow, windowLow); +inline int Bits::FindLSBSetNonZero(uint32 n) { + assert(n != 0); + unsigned long where; + if (_BitScanForward(&where, n)) + return static_cast(where); + return 32; +} - U32* nextCandidate = bt + 2*(matchIndex&btMask); - U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; - U32 nbCompares = 1U << cParams->searchLog; - U32 nbCandidates = nbCompares; - U32 previousCandidate = 0; +#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +inline int Bits::FindLSBSetNonZero64(uint64 n) { + assert(n != 0); + unsigned long where; + if (_BitScanForward64(&where, n)) + return static_cast(where); + return 64; +} +#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); - assert(ip <= iend-8); /* required for h calculation */ +#else // Portable versions. - /* reach end of unsorted candidates list */ - while ( (matchIndex > unsortLimit) - && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) - && (nbCandidates > 1) ) { - DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", - matchIndex); - *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ - previousCandidate = matchIndex; - matchIndex = *nextCandidate; - nextCandidate = bt + 2*(matchIndex&btMask); - unsortedMark = bt + 2*(matchIndex&btMask) + 1; - nbCandidates --; - } +inline int Bits::Log2FloorNonZero(uint32 n) { + assert(n != 0); - /* nullify last candidate if it's still unsorted - * simplification, detrimental to compression ratio, beneficial for speed */ - if ( (matchIndex > unsortLimit) - && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { - DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", - matchIndex); - *nextCandidate = *unsortedMark = 0; + int log = 0; + uint32 value = n; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + uint32 x = value >> shift; + if (x != 0) { + value = x; + log += shift; } + } + assert(value == 1); + return log; +} - /* batch sort stacked candidates */ - matchIndex = previousCandidate; - while (matchIndex) { /* will end on matchIndex == 0 */ - U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; - U32 const nextCandidateIdx = *nextCandidateIdxPtr; - ZSTD_insertDUBT1(ms, matchIndex, iend, - nbCandidates, unsortLimit, dictMode); - matchIndex = nextCandidateIdx; - nbCandidates++; +inline int Bits::Log2Floor(uint32 n) { + return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); +} + +inline int Bits::FindLSBSetNonZero(uint32 n) { + assert(n != 0); + + int rc = 31; + for (int i = 4, shift = 1 << 4; i >= 0; --i) { + const uint32 x = n << shift; + if (x != 0) { + n = x; + rc -= shift; } + shift >>= 1; + } + return rc; +} - /* find longest match */ - { size_t commonLengthSmaller = 0, commonLengthLarger = 0; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current + 8 + 1; - U32 dummy32; /* to be nullified at the end */ - size_t bestLength = 0; +#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). +inline int Bits::FindLSBSetNonZero64(uint64 n) { + assert(n != 0); - matchIndex = hashTable[h]; - hashTable[h] = current; /* Update Hash Table */ + const uint32 bottombits = static_cast(n); + if (bottombits == 0) { + // Bottom bits are zero, so scan in top bits + return 32 + FindLSBSetNonZero(static_cast(n >> 32)); + } else { + return FindLSBSetNonZero(bottombits); + } +} +#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - while (nbCompares-- && (matchIndex > windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match; +#endif // End portable versions. - if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } +// Variable-length integer encoding. +class Varint { + public: + // Maximum lengths of varint encoding of uint32. + static const int kMax32 = 5; - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - if (dictMode == ZSTD_dictMatchState) { - nbCompares = 0; /* in addition to avoiding checking any - * further in this loop, make sure we - * skip checking in the dictionary. */ - } - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } + // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. + // Never reads a character at or beyond limit. If a valid/terminated varint32 + // was found in the range, stores it in *OUTPUT and returns a pointer just + // past the last byte of the varint32. Else returns NULL. On success, + // "result <= limit". + static const char* Parse32WithLimit(const char* ptr, const char* limit, + uint32* OUTPUT); - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } + // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". + // EFFECTS Encodes "v" into "ptr" and returns a pointer to the + // byte just past the last encoded byte. + static char* Encode32(char* ptr, uint32 v); - *smallerPtr = *largerPtr = 0; + // EFFECTS Appends the varint representation of "value" to "*s". + static void Append32(string* s, uint32 value); +}; - if (dictMode == ZSTD_dictMatchState && nbCompares) { - bestLength = ZSTD_DUBT_findBetterDictMatch( - ms, ip, iend, - offsetPtr, bestLength, nbCompares, - mls, dictMode); - } +inline const char* Varint::Parse32WithLimit(const char* p, + const char* l, + uint32* OUTPUT) { + const unsigned char* ptr = reinterpret_cast(p); + const unsigned char* limit = reinterpret_cast(l); + uint32 b, result; + if (ptr >= limit) return NULL; + b = *(ptr++); result = b & 127; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; + return NULL; // Value is too long to be a varint32 + done: + *OUTPUT = result; + return reinterpret_cast(ptr); +} - assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ - ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ - if (bestLength >= MINMATCH) { - U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; - DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - current, (U32)bestLength, (U32)*offsetPtr, mIndex); - } - return bestLength; - } +inline char* Varint::Encode32(char* sptr, uint32 v) { + // Operate on characters as unsigneds + unsigned char* ptr = reinterpret_cast(sptr); + static const int B = 128; + if (v < (1<<7)) { + *(ptr++) = v; + } else if (v < (1<<14)) { + *(ptr++) = v | B; + *(ptr++) = v>>7; + } else if (v < (1<<21)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = v>>14; + } else if (v < (1<<28)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = v>>21; + } else { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = (v>>21) | B; + *(ptr++) = v>>28; + } + return reinterpret_cast(ptr); } +// If you know the internal layout of the std::string in use, you can +// replace this function with one that resizes the string without +// filling the new space with zeros (if applicable) -- +// it will be non-portable but faster. +inline void STLStringResizeUninitialized(string* s, size_t new_size) { + s->resize(new_size); +} -/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ -FORCE_INLINE_TEMPLATE size_t -ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 mls /* template */, - const ZSTD_dictMode_e dictMode) -{ - DEBUGLOG(7, "ZSTD_BtFindBestMatch"); - if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateDUBT(ms, ip, iLimit, mls); - return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); +// Return a mutable char* pointing to a string's internal buffer, +// which may not be null-terminated. Writing through this pointer will +// modify the string. +// +// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the +// next call to a string method that invalidates iterators. +// +// As of 2006-04, there is no standard-blessed way of getting a +// mutable reference to a string's internal buffer. However, issue 530 +// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) +// proposes this as the method. It will officially be part of the standard +// for C++0x. This should already work on all current implementations. +inline char* string_as_array(string* str) { + return str->empty() ? NULL : &*str->begin(); } +} // namespace snappy -static size_t -ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); - } -} +#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ -static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); - } -} +// LICENSE_CHANGE_END -static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); - } +namespace snappy { + +void Varint::Append32(string* s, uint32 value) { + char buf[Varint::kMax32]; + const char* p = Varint::Encode32(buf, value); + s->append(buf, p - buf); } +} // namespace snappy -/* ********************************* -* Hash Chain -***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] +// LICENSE_CHANGE_END -/* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndex_internal( - ZSTD_matchState_t* ms, - const ZSTD_compressionParameters* const cParams, - const BYTE* ip, U32 const mls) -{ - U32* const hashTable = ms->hashTable; - const U32 hashLog = cParams->hashLog; - U32* const chainTable = ms->chainTable; - const U32 chainMask = (1 << cParams->chainLog) - 1; - const BYTE* const base = ms->window.base; - const U32 target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - while(idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 +// See the end of this file for a list - ms->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} +// Copyright 2005 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); -} -/* inlining is important to hardwire a hot branch (template emulation) */ -FORCE_INLINE_TEMPLATE -size_t ZSTD_HcFindBestMatch_generic ( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 mls, const ZSTD_dictMode_e dictMode) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const chainTable = ms->chainTable; - const U32 chainSize = (1 << cParams->chainLog); - const U32 chainMask = chainSize-1; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const U32 current = (U32)(ip-base); - const U32 maxDistance = 1U << cParams->windowLog; - const U32 lowestValid = ms->window.lowLimit; - const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; - const U32 isDictionary = (ms->loadedDictEnd != 0); - const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; - const U32 minChain = current > chainSize ? current - chainSize : 0; - U32 nbAttempts = 1U << cParams->searchLog; - size_t ml=4-1; - /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 +// See the end of this file for a list - for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { - size_t currentMl=0; - if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { - const BYTE* const match = base + matchIndex; - assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ - if (match[ml] == ip[ml]) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - const BYTE* const match = dictBase + matchIndex; - assert(match+4 <= dictEnd); - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; - } +// Copyright 2008 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Internals shared between the Snappy implementation and its unittest. - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } +#ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ +#define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ - if (matchIndex <= minChain) break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - if (dictMode == ZSTD_dictMatchState) { - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32* const dmsChainTable = dms->chainTable; - const U32 dmsChainSize = (1 << dms->cParams.chainLog); - const U32 dmsChainMask = dmsChainSize - 1; - const U32 dmsLowestIndex = dms->window.dictLimit; - const BYTE* const dmsBase = dms->window.base; - const BYTE* const dmsEnd = dms->window.nextSrc; - const U32 dmsSize = (U32)(dmsEnd - dmsBase); - const U32 dmsIndexDelta = dictLimit - dmsSize; - const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; - matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; +namespace snappy { +namespace internal { - for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { - size_t currentMl=0; - const BYTE* const match = dmsBase + matchIndex; - assert(match+4 <= dmsEnd); - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; +// Working memory performs a single allocation to hold all scratch space +// required for compression. +class WorkingMemory { + public: + explicit WorkingMemory(size_t input_size); + ~WorkingMemory(); - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } + // Allocates and clears a hash table using memory in "*this", + // stores the number of buckets in "*table_size" and returns a pointer to + // the base of the hash table. + uint16* GetHashTable(size_t fragment_size, int* table_size) const; + char* GetScratchInput() const { return input_; } + char* GetScratchOutput() const { return output_; } - if (matchIndex <= dmsMinChain) break; - matchIndex = dmsChainTable[matchIndex & dmsChainMask]; - } - } + private: + char* mem_; // the allocated memory, never nullptr + size_t size_; // the size of the allocated memory, never 0 + uint16* table_; // the pointer to the hashtable + char* input_; // the pointer to the input scratch buffer + char* output_; // the pointer to the output scratch buffer - return ml; -} + // No copying + WorkingMemory(const WorkingMemory&); + void operator=(const WorkingMemory&); +}; +// Flat array compression that does not emit the "uncompressed length" +// prefix. Compresses "input" string to the "*op" buffer. +// +// REQUIRES: "input_length <= kBlockSize" +// REQUIRES: "op" points to an array of memory that is at least +// "MaxCompressedLength(input_length)" in size. +// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. +// REQUIRES: "table_size" is a power of two +// +// Returns an "end" pointer into "op" buffer. +// "end - op" is the compressed size of "input". +char* CompressFragment(const char* input, + size_t input_length, + char* op, + uint16* table, + const int table_size); -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); - } -} +// Find the largest n such that +// +// s1[0,n-1] == s2[0,n-1] +// and n <= (s2_limit - s2). +// +// Return make_pair(n, n < 8). +// Does not read *s2_limit or beyond. +// Does not read *(s1 + (s2_limit - s2)) or beyond. +// Requires that s2_limit >= s2. +// +// Separate implementation for 64-bit, little-endian cpus. +#if !defined(SNAPPY_IS_BIG_ENDIAN) && \ + (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)) +static inline std::pair FindMatchLength(const char* s1, + const char* s2, + const char* s2_limit) { + assert(s2_limit >= s2); + size_t matched = 0; + // This block isn't necessary for correctness; we could just start looping + // immediately. As an optimization though, it is useful. It creates some not + // uncommon code paths that determine, without extra effort, whether the match + // length is less than 8. In short, we are hoping to avoid a conditional + // branch, and perhaps get better code layout from the C++ compiler. + if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { + uint64 a1 = UNALIGNED_LOAD64(s1); + uint64 a2 = UNALIGNED_LOAD64(s2); + if (a1 != a2) { + return std::pair(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3, + true); + } else { + matched = 8; + s2 += 8; + } + } -static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + // Find out how long the match is. We loop over the data 64 bits at a + // time until we find a 64-bit block that doesn't match; then we find + // the first non-matching bit and use that to calculate the total + // length of the match. + while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { + if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) { + s2 += 8; + matched += 8; + } else { + uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); + int matching_bits = Bits::FindLSBSetNonZero64(x); + matched += matching_bits >> 3; + assert(matched >= 8); + return std::pair(matched, false); + } + } + while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) { + if (s1[matched] == *s2) { + ++s2; + ++matched; + } else { + return std::pair(matched, matched < 8); } + } + return std::pair(matched, matched < 8); } +#else +static inline std::pair FindMatchLength(const char* s1, + const char* s2, + const char* s2_limit) { + // Implementation based on the x86-64 version, above. + assert(s2_limit >= s2); + int matched = 0; - -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + while (s2 <= s2_limit - 4 && + UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { + s2 += 4; + matched += 4; + } + if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { + uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); + int matching_bits = Bits::FindLSBSetNonZero(x); + matched += matching_bits >> 3; + } else { + while ((s2 < s2_limit) && (s1[matched] == *s2)) { + ++s2; + ++matched; } + } + return std::pair(matched, matched < 8); } +#endif +// Lookup tables for decompression code. Give --snappy_dump_decompression_table +// to the unit test to recompute char_table. -/* ******************************* -* Common parser - lazy strategy -*********************************/ -typedef enum { search_hashChain, search_binaryTree } searchMethod_e; +enum { + LITERAL = 0, + COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode + COPY_2_BYTE_OFFSET = 2, + COPY_4_BYTE_OFFSET = 3 +}; +static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset. -FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_lazy_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const searchMethod_e searchMethod, const U32 depth, - ZSTD_dictMode_e const dictMode) -{ - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const U32 prefixLowestIndex = ms->window.dictLimit; - const BYTE* const prefixLowest = base + prefixLowestIndex; +// Data stored per entry in lookup table: +// Range Bits-used Description +// ------------------------------------ +// 1..64 0..7 Literal/copy length encoded in opcode byte +// 0..7 8..10 Copy offset encoded in opcode byte / 256 +// 0..4 11..13 Extra bytes after opcode +// +// We use eight bits for the length even though 7 would have sufficed +// because of efficiency reasons: +// (1) Extracting a byte is faster than a bit-field +// (2) It properly aligns copy offset so we do not need a <<8 +static const uint16 char_table[256] = { + 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, + 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, + 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, + 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, + 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, + 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, + 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, + 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, + 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, + 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, + 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, + 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, + 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, + 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, + 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, + 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, + 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, + 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, + 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, + 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, + 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, + 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, + 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, + 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, + 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, + 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, + 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, + 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, + 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, + 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, + 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, + 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 +}; - typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? - (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS - : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : - (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS - : ZSTD_HcFindBestMatch_selectMLS); - U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; +} // end namespace internal - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? - dictBase + dictLowestIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixLowestIndex - (U32)(dictEnd - dictBase) : - 0; - const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); - DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); +// The size of a compression block. Note that many parts of the compression +// code assumes that kBlockSize <= 65536; in particular, the hash table +// can only store 16-bit offsets, and EmitCopy() also assumes the offset +// is 65535 bytes or less. Note also that if you change this, it will +// affect the framing format (see framing_format.txt). +// +// Note that there might be older data around that is compressed with larger +// block sizes, so the decompression code should not rely on the +// non-existence of long backreferences. +static const int kBlockLog = 16; +static const size_t kBlockSize = 1 << kBlockLog; - /* init */ - ip += (dictAndPrefixLength == 0); - if (dictMode == ZSTD_noDict) { - U32 const current = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog); - U32 const maxRep = current - windowLow; - if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; - if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; - } - if (dictMode == ZSTD_dictMatchState) { - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - } +static const int kMaxHashTableBits = 14; +static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; - /* Match Loop */ -#if defined(__GNUC__) && defined(__x86_64__) - /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the - * code alignment is perturbed. To fix the instability align the loop on 32-bytes. - */ - __asm__(".p2align 5"); -#endif - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - /* check repCode */ - if (dictMode == ZSTD_dictMatchState) { - const U32 repIndex = (U32)(ip - base) + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex) ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - if (depth==0) goto _storeSequence; - } - } - if ( dictMode == ZSTD_noDict - && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { - matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - if (depth==0) goto _storeSequence; - } +} // end namespace snappy - /* first search (depth 0) */ - { size_t offsetFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } +#endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ - if (matchLength < 4) { - ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - /* let's try to find a better solution */ - if (depth>=1) - while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - if (dictMode == ZSTD_dictMatchState) { - const U32 repIndex = (U32)(ip - base) - offset_1; - const BYTE* repMatch = repIndex < prefixLowestIndex ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - } - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } +// LICENSE_CHANGE_END + - /* let's find an even better one */ - if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - if (dictMode == ZSTD_dictMatchState) { - const U32 repIndex = (U32)(ip - base) - offset_1; - const BYTE* repMatch = repIndex < prefixLowestIndex ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - int const gain2 = (int)(mlRep * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - } - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - /* NOTE: - * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. - * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which - * overflows the pointer, which is undefined behavior. - */ - /* catch up */ - if (offset) { - if (dictMode == ZSTD_noDict) { - while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) - && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ - { start--; matchLength++; } - } - if (dictMode == ZSTD_dictMatchState) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); - const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; - const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - } - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } +#if !defined(SNAPPY_HAVE_SSSE3) +// __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD +// support between SSE2 and AVX (so SSSE3 instructions require AVX support), and +// defines __AVX__ when AVX support is available. +#if defined(__SSSE3__) || defined(__AVX__) +#define SNAPPY_HAVE_SSSE3 1 +#else +#define SNAPPY_HAVE_SSSE3 0 +#endif +#endif // !defined(SNAPPY_HAVE_SSSE3) - /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex = current2 - offset_2; - const BYTE* repMatch = dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex ? - dictBase - dictIndexDelta + repIndex : - base + repIndex; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; - } - break; - } - } +#if !defined(SNAPPY_HAVE_BMI2) +// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2 +// specifically, but it does define __AVX2__ when AVX2 support is available. +// Fortunately, AVX2 was introduced in Haswell, just like BMI2. +// +// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So, +// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which +// case issuing BMI2 instructions results in a compiler error. +#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__)) +#define SNAPPY_HAVE_BMI2 1 +#else +#define SNAPPY_HAVE_BMI2 0 +#endif +#endif // !defined(SNAPPY_HAVE_BMI2) - if (dictMode == ZSTD_noDict) { - while ( ((ip <= ilimit) & (offset_2>0)) - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { - /* store sequence */ - matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } +#if SNAPPY_HAVE_SSSE3 +// Please do not replace with . or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include +#endif - /* Save reps for next block */ - rep[0] = offset_1 ? offset_1 : savedOffset; - rep[1] = offset_2 ? offset_2 : savedOffset; +#if SNAPPY_HAVE_BMI2 +// Please do not replace with . or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include +#endif - /* Return the last literals size */ - return (size_t)(iend - anchor); -} +#include +#include +#include +#include -size_t ZSTD_compressBlock_btlazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); -} +namespace snappy { -size_t ZSTD_compressBlock_lazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); -} +using internal::COPY_1_BYTE_OFFSET; +using internal::COPY_2_BYTE_OFFSET; +using internal::LITERAL; +using internal::char_table; +using internal::kMaximumTagLength; -size_t ZSTD_compressBlock_lazy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); +// Any hash function will produce a valid compressed bitstream, but a good +// hash function reduces the number of collisions and thus yields better +// compression for compressible input, and more speed for incompressible +// input. Of course, it doesn't hurt if the hash function is reasonably fast +// either, as it gets called a lot. +static inline uint32 HashBytes(uint32 bytes, int shift) { + uint32 kMul = 0x1e35a7bd; + return (bytes * kMul) >> shift; } - -size_t ZSTD_compressBlock_greedy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); +static inline uint32 Hash(const char* p, int shift) { + return HashBytes(UNALIGNED_LOAD32(p), shift); } -size_t ZSTD_compressBlock_btlazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); +size_t MaxCompressedLength(size_t source_len) { + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + return 32 + source_len + source_len/6; } -size_t ZSTD_compressBlock_lazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); -} +namespace { -size_t ZSTD_compressBlock_lazy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); +void UnalignedCopy64(const void* src, void* dst) { + char tmp[8]; + memcpy(tmp, src, 8); + memcpy(dst, tmp, 8); } -size_t ZSTD_compressBlock_greedy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); +void UnalignedCopy128(const void* src, void* dst) { + // memcpy gets vectorized when the appropriate compiler options are used. + // For example, x86 compilers targeting SSE2+ will optimize to an SSE2 load + // and store. + char tmp[16]; + memcpy(tmp, src, 16); + memcpy(dst, tmp, 16); } +// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used +// for handling COPY operations where the input and output regions may overlap. +// For example, suppose: +// src == "ab" +// op == src + 2 +// op_limit == op + 20 +// After IncrementalCopySlow(src, op, op_limit), the result will have eleven +// copies of "ab" +// ababababababababababab +// Note that this does not match the semantics of either memcpy() or memmove(). +inline char* IncrementalCopySlow(const char* src, char* op, + char* const op_limit) { + // TODO: Remove pragma when LLVM is aware this + // function is only called in cold regions and when cold regions don't get + // vectorized or unrolled. +#ifdef __clang__ +#pragma clang loop unroll(disable) +#endif + while (op < op_limit) { + *op++ = *src++; + } + return op_limit; +} -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_lazy_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const searchMethod_e searchMethod, const U32 depth) -{ - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + ms->window.lowLimit; - const U32 windowLog = ms->cParams.windowLog; - - typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; - - U32 offset_1 = rep[0], offset_2 = rep[1]; +#if SNAPPY_HAVE_SSSE3 - DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); +// This is a table of shuffle control masks that can be used as the source +// operand for PSHUFB to permute the contents of the destination XMM register +// into a repeating byte pattern. +alignas(16) const char pshufb_fill_patterns[7][16] = { + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, + {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0}, + {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, + {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0}, + {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3}, + {0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1}, +}; - /* init */ - ip += (ip == prefixStart); +#endif // SNAPPY_HAVE_SSSE3 - /* Match Loop */ -#if defined(__GNUC__) && defined(__x86_64__) - /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the - * code alignment is perturbed. To fix the instability align the loop on 32-bytes. - */ - __asm__(".p2align 5"); -#endif - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - U32 current = (U32)(ip-base); +// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than +// IncrementalCopySlow. buf_limit is the address past the end of the writable +// region of the buffer. +inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, + char* const buf_limit) { + // Terminology: + // + // slop = buf_limit - op + // pat = op - src + // len = limit - op + assert(src < op); + assert(op <= op_limit); + assert(op_limit <= buf_limit); + // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that + // to optimize this function but we have to also handle other cases in case + // the input does not satisfy these conditions. - /* check repCode */ - { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog); - const U32 repIndex = (U32)(current+1 - offset_1); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip+1) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; - if (depth==0) goto _storeSequence; - } } + size_t pattern_size = op - src; + // The cases are split into different branches to allow the branch predictor, + // FDO, and static prediction hints to work better. For each input we list the + // ratio of invocations that match each condition. + // + // input slop < 16 pat < 8 len > 16 + // ------------------------------------------ + // html|html4|cp 0% 1.01% 27.73% + // urls 0% 0.88% 14.79% + // jpg 0% 64.29% 7.14% + // pdf 0% 2.56% 58.06% + // txt[1-4] 0% 0.23% 0.97% + // pb 0% 0.96% 13.88% + // bin 0.01% 22.27% 41.17% + // + // It is very rare that we don't have enough slop for doing block copies. It + // is also rare that we need to expand a pattern. Small patterns are common + // for incompressible formats and for those we are plenty fast already. + // Lengths are normally not greater than 16 but they vary depending on the + // input. In general if we always predict len <= 16 it would be an ok + // prediction. + // + // In order to be fast we want a pattern >= 8 bytes and an unrolled loop + // copying 2x 8 bytes at a time. - /* first search (depth 0) */ - { size_t offsetFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } + // Handle the uncommon case where pattern is less than 8 bytes. + if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) { +#if SNAPPY_HAVE_SSSE3 + // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB + // to permute the register's contents in-place into a repeating sequence of + // the first "pattern_size" bytes. + // For example, suppose: + // src == "abc" + // op == op + 3 + // After _mm_shuffle_epi8(), "pattern" will have five copies of "abc" + // followed by one byte of slop: abcabcabcabcabca. + // + // The non-SSE fallback implementation suffers from store-forwarding stalls + // because its loads and stores partly overlap. By expanding the pattern + // in-place, we avoid the penalty. + if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 16)) { + const __m128i shuffle_mask = _mm_load_si128( + reinterpret_cast(pshufb_fill_patterns) + + pattern_size - 1); + const __m128i pattern = _mm_shuffle_epi8( + _mm_loadl_epi64(reinterpret_cast(src)), shuffle_mask); + // Uninitialized bytes are masked out by the shuffle mask. + // TODO: remove annotation and macro defs once MSan is fixed. + SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(&pattern, sizeof(pattern)); + pattern_size *= 16 / pattern_size; + char* op_end = std::min(op_limit, buf_limit - 15); + while (op < op_end) { + _mm_storeu_si128(reinterpret_cast<__m128i*>(op), pattern); + op += pattern_size; + } + if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; + } + return IncrementalCopySlow(src, op, op_limit); +#else // !SNAPPY_HAVE_SSSE3 + // If plenty of buffer space remains, expand the pattern to at least 8 + // bytes. The way the following loop is written, we need 8 bytes of buffer + // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10 + // bytes if pattern_size is 2. Precisely encoding that is probably not + // worthwhile; instead, invoke the slow path if we cannot write 11 bytes + // (because 11 are required in the worst case). + if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) { + while (pattern_size < 8) { + UnalignedCopy64(src, op); + op += pattern_size; + pattern_size *= 2; + } + if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; + } else { + return IncrementalCopySlow(src, op, op_limit); + } +#endif // SNAPPY_HAVE_SSSE3 + } + assert(pattern_size >= 8); - if (matchLength < 4) { - ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } + // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single + // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe + // because expanding the pattern to at least 8 bytes guarantees that + // op - src >= 8. + // + // Typically, the op_limit is the gating factor so try to simplify the loop + // based on that. + if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 16)) { + // Factor the displacement from op to the source into a variable. This helps + // simplify the loop below by only varying the op pointer which we need to + // test for the end. Note that this was done after carefully examining the + // generated code to allow the addressing modes in the loop below to + // maximize micro-op fusion where possible on modern Intel processors. The + // generated code should be checked carefully for new processors or with + // major changes to the compiler. + // TODO: Simplify this code when the compiler reliably produces + // the correct x86 instruction sequence. + ptrdiff_t op_to_src = src - op; - /* let's try to find a better solution */ - if (depth>=1) - while (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } + // The trip count of this loop is not large and so unrolling will only hurt + // code size without helping performance. + // + // TODO: Replace with loop trip count hint. +#ifdef __clang__ +#pragma clang loop unroll(disable) +#endif + do { + UnalignedCopy64(op + op_to_src, op); + UnalignedCopy64(op + op_to_src + 8, op + 8); + op += 16; + } while (op < op_limit); + return op_limit; + } - /* search match, depth 1 */ - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } + // Fall back to doing as much as we can with the available slop in the + // buffer. This code path is relatively cold however so we save code size by + // avoiding unrolling and vectorizing. + // + // TODO: Remove pragma when when cold regions don't get vectorized + // or unrolled. +#ifdef __clang__ +#pragma clang loop unroll(disable) +#endif + for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) { + UnalignedCopy64(src, op); + UnalignedCopy64(src + 8, op + 8); + } + if (op >= op_limit) + return op_limit; - /* let's find an even better one */ - if ((depth==2) && (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } + // We only take this branch if we didn't have enough slop and we can do a + // single 8 byte copy. + if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) { + UnalignedCopy64(src, op); + src += 8; + op += 8; + } + return IncrementalCopySlow(src, op, op_limit); +} - /* search match, depth 2 */ - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } +} // namespace - /* catch up */ - if (offset) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); - const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } +template +static inline char* EmitLiteral(char* op, + const char* literal, + int len) { + // The vast majority of copies are below 16 bytes, for which a + // call to memcpy is overkill. This fast path can sometimes + // copy up to 15 bytes too much, but that is okay in the + // main loop, since we have a bit to go on for both sides: + // + // - The input will always have kInputMarginBytes = 15 extra + // available bytes, as long as we're in the main loop, and + // if not, allow_fast_path = false. + // - The output will always have 32 spare bytes (see + // MaxCompressedLength). + assert(len > 0); // Zero-length literals are disallowed + int n = len - 1; + if (allow_fast_path && len <= 16) { + // Fits in tag byte + *op++ = LITERAL | (n << 2); - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } + UnalignedCopy128(literal, op); + return op + len; + } - /* check immediate repcode */ - while (ip <= ilimit) { - const U32 repCurrent = (U32)(ip-base); - const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); - const U32 repIndex = repCurrent - offset_2; - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - break; - } } + if (n < 60) { + // Fits in tag byte + *op++ = LITERAL | (n << 2); + } else { + int count = (Bits::Log2Floor(n) >> 3) + 1; + assert(count >= 1); + assert(count <= 4); + *op++ = LITERAL | ((59 + count) << 2); + // Encode in upcoming bytes. + // Write 4 bytes, though we may care about only 1 of them. The output buffer + // is guaranteed to have at least 3 more spaces left as 'len >= 61' holds + // here and there is a memcpy of size 'len' below. + LittleEndian::Store32(op, n); + op += count; + } + memcpy(op, literal, len); + return op + len; +} - /* Save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; +template +static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) { + assert(len <= 64); + assert(len >= 4); + assert(offset < 65536); + assert(len_less_than_12 == (len < 12)); - /* Return the last literals size */ - return (size_t)(iend - anchor); + if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) { + // offset fits in 11 bits. The 3 highest go in the top of the first byte, + // and the rest go in the second byte. + *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0); + *op++ = offset & 0xff; + } else { + // Write 4 bytes, though we only care about 3 of them. The output buffer + // is required to have some slack, so the extra byte won't overrun it. + uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8); + LittleEndian::Store32(op, u); + op += 3; + } + return op; } +template +static inline char* EmitCopy(char* op, size_t offset, size_t len) { + assert(len_less_than_12 == (len < 12)); + if (len_less_than_12) { + return EmitCopyAtMost64(op, offset, len); + } else { + // A special case for len <= 64 might help, but so far measurements suggest + // it's in the noise. -size_t ZSTD_compressBlock_greedy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); -} + // Emit 64 byte copies but make sure to keep at least four bytes reserved. + while (SNAPPY_PREDICT_FALSE(len >= 68)) { + op = EmitCopyAtMost64(op, offset, 64); + len -= 64; + } -size_t ZSTD_compressBlock_lazy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) + // One or two copies will now finish the job. + if (len > 64) { + op = EmitCopyAtMost64(op, offset, 60); + len -= 60; + } -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); + // Emit remainder. + if (len < 12) { + op = EmitCopyAtMost64(op, offset, len); + } else { + op = EmitCopyAtMost64(op, offset, len); + } + return op; + } } -size_t ZSTD_compressBlock_lazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) +bool GetUncompressedLength(const char* start, size_t n, size_t* result) { + uint32 v = 0; + const char* limit = start + n; + if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { + *result = v; + return true; + } else { + return false; + } +} -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); +namespace { +uint32 CalculateTableSize(uint32 input_size) { + assert(kMaxHashTableSize >= 256); + if (input_size > kMaxHashTableSize) { + return kMaxHashTableSize; + } + if (input_size < 256) { + return 256; + } + // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1. + // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)). + return 2u << Bits::Log2Floor(input_size - 1); } +} // namespace -size_t ZSTD_compressBlock_btlazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) +namespace internal { +WorkingMemory::WorkingMemory(size_t input_size) { + const size_t max_fragment_size = std::min(input_size, kBlockSize); + const size_t table_size = CalculateTableSize(max_fragment_size); + size_ = table_size * sizeof(*table_) + max_fragment_size + + MaxCompressedLength(max_fragment_size); + mem_ = std::allocator().allocate(size_); + table_ = reinterpret_cast(mem_); + input_ = mem_ + table_size * sizeof(*table_); + output_ = input_ + max_fragment_size; +} -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); +WorkingMemory::~WorkingMemory() { + std::allocator().deallocate(mem_, size_); } +uint16* WorkingMemory::GetHashTable(size_t fragment_size, + int* table_size) const { + const size_t htsize = CalculateTableSize(fragment_size); + memset(table_, 0, htsize * sizeof(*table_)); + *table_size = htsize; + return table_; } +} // end namespace internal + +// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will +// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have +// empirically found that overlapping loads such as +// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) +// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. +// +// We have different versions for 64- and 32-bit; ideally we would avoid the +// two functions and just inline the UNALIGNED_LOAD64 call into +// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever +// enough to avoid loading the value multiple times then. For 64-bit, the load +// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is +// done at GetUint32AtOffset() time. +#ifdef ARCH_K8 -// LICENSE_CHANGE_END +typedef uint64 EightBytesReference; +static inline EightBytesReference GetEightBytesAt(const char* ptr) { + return UNALIGNED_LOAD64(ptr); +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +static inline uint32 GetUint32AtOffset(uint64 v, int offset) { + assert(offset >= 0); + assert(offset <= 4); + return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +#else +typedef const char* EightBytesReference; +static inline EightBytesReference GetEightBytesAt(const char* ptr) { + return ptr; +} +static inline uint32 GetUint32AtOffset(const char* v, int offset) { + assert(offset >= 0); + assert(offset <= 4); + return UNALIGNED_LOAD32(v + offset); +} - /* ZSTD_fillHashTable() */ - /* ZSTD_fillDoubleHashTable() */ +#endif -#define LDM_BUCKET_SIZE_LOG 3 -#define LDM_MIN_MATCH_LENGTH 64 -#define LDM_HASH_RLOG 7 -#define LDM_HASH_CHAR_OFFSET 10 +// Flat array compression that does not emit the "uncompressed length" +// prefix. Compresses "input" string to the "*op" buffer. +// +// REQUIRES: "input" is at most "kBlockSize" bytes long. +// REQUIRES: "op" points to an array of memory that is at least +// "MaxCompressedLength(input.size())" in size. +// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. +// REQUIRES: "table_size" is a power of two +// +// Returns an "end" pointer into "op" buffer. +// "end - op" is the compressed size of "input". +namespace internal { +char* CompressFragment(const char* input, + size_t input_size, + char* op, + uint16* table, + const int table_size) { + // "ip" is the input pointer, and "op" is the output pointer. + const char* ip = input; + assert(input_size <= kBlockSize); + assert((table_size & (table_size - 1)) == 0); // table must be power of two + const int shift = 32 - Bits::Log2Floor(table_size); + // assert(static_cast(kuint32max >> shift) == table_size - 1); + const char* ip_end = input + input_size; + const char* base_ip = ip; + // Bytes in [next_emit, ip) will be emitted as literal bytes. Or + // [next_emit, ip_end) after the main loop. + const char* next_emit = ip; -namespace duckdb_zstd { + const size_t kInputMarginBytes = 15; + if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) { + const char* ip_limit = input + input_size - kInputMarginBytes; -void ZSTD_ldm_adjustParameters(ldmParams_t* params, - ZSTD_compressionParameters const* cParams) -{ - params->windowLog = cParams->windowLog; - ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); - DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); - if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; - if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; - if (cParams->strategy >= ZSTD_btopt) { - /* Get out of the way of the optimal parser */ - U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); - assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); - assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); - params->minMatchLength = minMatch; - } - if (params->hashLog == 0) { - params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); - assert(params->hashLog <= ZSTD_HASHLOG_MAX); - } - if (params->hashRateLog == 0) { - params->hashRateLog = params->windowLog < params->hashLog - ? 0 - : params->windowLog - params->hashLog; - } - params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); -} + for (uint32 next_hash = Hash(++ip, shift); ; ) { + assert(next_emit < ip); + // The body of this loop calls EmitLiteral once and then EmitCopy one or + // more times. (The exception is that when we're close to exhausting + // the input we goto emit_remainder.) + // + // In the first iteration of this loop we're just starting, so + // there's nothing to copy, so calling EmitLiteral once is + // necessary. And we only start a new iteration when the + // current iteration has determined that a call to EmitLiteral will + // precede the next call to EmitCopy (if any). + // + // Step 1: Scan forward in the input looking for a 4-byte-long match. + // If we get close to exhausting the input then goto emit_remainder. + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match is + // found, immediately go back to looking at every byte. This is a small + // loss (~5% performance, ~0.1% density) for compressible data due to more + // bookkeeping, but for non-compressible data (such as JPEG) it's a huge + // win since the compressor quickly "realizes" the data is incompressible + // and doesn't bother looking for matches everywhere. + // + // The "skip" variable keeps track of how many bytes there are since the + // last match; dividing it by 32 (ie. right-shifting by five) gives the + // number of bytes to move ahead for each iteration. + uint32 skip = 32; -size_t ZSTD_ldm_getTableSize(ldmParams_t params) -{ - size_t const ldmHSize = ((size_t)1) << params.hashLog; - size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); - size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); - size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) - + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); - return params.enableLdm ? totalSize : 0; -} + const char* next_ip = ip; + const char* candidate; + do { + ip = next_ip; + uint32 hash = next_hash; + assert(hash == Hash(ip, shift)); + uint32 bytes_between_hash_lookups = skip >> 5; + skip += bytes_between_hash_lookups; + next_ip = ip + bytes_between_hash_lookups; + if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) { + goto emit_remainder; + } + next_hash = Hash(next_ip, shift); + candidate = base_ip + table[hash]; + assert(candidate >= base_ip); + assert(candidate < ip); -size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) -{ - return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; -} + table[hash] = ip - base_ip; + } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) != + UNALIGNED_LOAD32(candidate))); -/** ZSTD_ldm_getSmallHash() : - * numBits should be <= 32 - * If numBits==0, returns 0. - * @return : the most significant numBits of value. */ -static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) -{ - assert(numBits <= 32); - return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); -} + // Step 2: A 4-byte match has been found. We'll later see if more + // than 4 bytes match. But, prior to the match, input + // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." + assert(next_emit + 16 <= ip_end); + op = EmitLiteral(op, next_emit, ip - next_emit); -/** ZSTD_ldm_getChecksum() : - * numBitsToDiscard should be <= 32 - * @return : the next most significant 32 bits after numBitsToDiscard */ -static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) -{ - assert(numBitsToDiscard <= 32); - return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; -} + // Step 3: Call EmitCopy, and then see if another EmitCopy could + // be our next move. Repeat until we find no match for the + // input immediately after what was consumed by the last EmitCopy call. + // + // If we exit this loop normally then we need to call EmitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can exit + // this loop via goto if we get close to exhausting the input. + EightBytesReference input_bytes; + uint32 candidate_bytes = 0; -/** ZSTD_ldm_getTag() ; - * Given the hash, returns the most significant numTagBits bits - * after (32 + hbits) bits. - * - * If there are not enough bits remaining, return the last - * numTagBits bits. */ -static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) -{ - assert(numTagBits < 32 && hbits <= 32); - if (32 - hbits < numTagBits) { - return hash & (((U32)1 << numTagBits) - 1); - } else { - return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); + do { + // We have a 4-byte match at ip, and no need to emit any + // "literal bytes" prior to ip. + const char* base = ip; + std::pair p = + FindMatchLength(candidate + 4, ip + 4, ip_end); + size_t matched = 4 + p.first; + ip += matched; + size_t offset = base - candidate; + assert(0 == memcmp(base, candidate, matched)); + if (p.second) { + op = EmitCopy(op, offset, matched); + } else { + op = EmitCopy(op, offset, matched); + } + next_emit = ip; + if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) { + goto emit_remainder; + } + // We are now looking for a 4-byte match again. We read + // table[Hash(ip, shift)] for that. To improve compression, + // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)]. + input_bytes = GetEightBytesAt(ip - 1); + uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); + table[prev_hash] = ip - base_ip - 1; + uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); + candidate = base_ip + table[cur_hash]; + candidate_bytes = UNALIGNED_LOAD32(candidate); + table[cur_hash] = ip - base_ip; + } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); + + next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); + ++ip; } -} + } + + emit_remainder: + // Emit the remaining bytes as a literal + if (next_emit < ip_end) { + op = EmitLiteral(op, next_emit, + ip_end - next_emit); + } -/** ZSTD_ldm_getBucket() : - * Returns a pointer to the start of the bucket associated with hash. */ -static ldmEntry_t* ZSTD_ldm_getBucket( - ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) -{ - return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); + return op; } +} // end namespace internal -/** ZSTD_ldm_insertEntry() : - * Insert the entry with corresponding hash into the hash table */ -static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, - size_t const hash, const ldmEntry_t entry, - ldmParams_t const ldmParams) -{ - BYTE* const bucketOffsets = ldmState->bucketOffsets; - *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; - bucketOffsets[hash]++; - bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; -} +// Called back at avery compression call to trace parameters and sizes. +static inline void Report(const char *algorithm, size_t compressed_size, + size_t uncompressed_size) {} -/** ZSTD_ldm_makeEntryAndInsertByTag() : - * - * Gets the small hash, checksum, and tag from the rollingHash. - * - * If the tag matches (1 << ldmParams.hashRateLog)-1, then - * creates an ldmEntry from the offset, and inserts it into the hash table. - * - * hBits is the length of the small hash, which is the most significant hBits - * of rollingHash. The checksum is the next 32 most significant bits, followed - * by ldmParams.hashRateLog bits that make up the tag. */ -static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, - U64 const rollingHash, - U32 const hBits, - U32 const offset, - ldmParams_t const ldmParams) -{ - U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); - U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; - if (tag == tagMask) { - U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); - ldmEntry_t entry; - entry.offset = offset; - entry.checksum = checksum; - ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); - } -} +// Signature of output types needed by decompression code. +// The decompression code is templatized on a type that obeys this +// signature so that we do not pay virtual function call overhead in +// the middle of a tight decompression loop. +// +// class DecompressionWriter { +// public: +// // Called before decompression +// void SetExpectedLength(size_t length); +// +// // Called after decompression +// bool CheckLength() const; +// +// // Called repeatedly during decompression +// bool Append(const char* ip, size_t length); +// bool AppendFromSelf(uint32 offset, size_t length); +// +// // The rules for how TryFastAppend differs from Append are somewhat +// // convoluted: +// // +// // - TryFastAppend is allowed to decline (return false) at any +// // time, for any reason -- just "return false" would be +// // a perfectly legal implementation of TryFastAppend. +// // The intention is for TryFastAppend to allow a fast path +// // in the common case of a small append. +// // - TryFastAppend is allowed to read up to bytes +// // from the input buffer, whereas Append is allowed to read +// // . However, if it returns true, it must leave +// // at least five (kMaximumTagLength) bytes in the input buffer +// // afterwards, so that there is always enough space to read the +// // next tag without checking for a refill. +// // - TryFastAppend must always return decline (return false) +// // if is 61 or more, as in this case the literal length is not +// // decoded fully. In practice, this should not be a big problem, +// // as it is unlikely that one would implement a fast path accepting +// // this much data. +// // +// bool TryFastAppend(const char* ip, size_t available, size_t length); +// }; -/** ZSTD_ldm_countBackwardsMatch() : - * Returns the number of bytes that match backwards before pIn and pMatch. - * - * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ -static size_t ZSTD_ldm_countBackwardsMatch( - const BYTE* pIn, const BYTE* pAnchor, - const BYTE* pMatch, const BYTE* pBase) -{ - size_t matchLength = 0; - while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { - pIn--; - pMatch--; - matchLength++; - } - return matchLength; +static inline uint32 ExtractLowBytes(uint32 v, int n) { + assert(n >= 0); + assert(n <= 4); +#if SNAPPY_HAVE_BMI2 + return _bzhi_u32(v, 8 * n); +#else + // This needs to be wider than uint32 otherwise `mask << 32` will be + // undefined. + uint64 mask = 0xffffffff; + return v & ~(mask << (8 * n)); +#endif } -/** ZSTD_ldm_fillFastTables() : - * - * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. - * This is similar to ZSTD_loadDictionaryContent. - * - * The tables for the other strategies are filled within their - * block compressors. */ -static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, - void const* end) -{ - const BYTE* const iend = (const BYTE*)end; +static inline bool LeftShiftOverflows(uint8 value, uint32 shift) { + assert(shift < 32); + static const uint8 masks[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe}; + return (value & masks[shift]) != 0; +} - switch(ms->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); - break; +// Helper class for decompression +class SnappyDecompressor { + private: + Source* reader_; // Underlying source of bytes to decompress + const char* ip_; // Points to next buffered byte + const char* ip_limit_; // Points just past buffered bytes + uint32 peeked_; // Bytes peeked from reader (need to skip) + bool eof_; // Hit end of input without an error? + char scratch_[kMaximumTagLength]; // See RefillTag(). - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); - break; + // Ensure that all of the tag metadata for the next tag is available + // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even + // if (ip_limit_ - ip_ < 5). + // + // Returns true on success, false on error or end of input. + bool RefillTag(); - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - case ZSTD_btlazy2: - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - break; - default: - assert(0); /* not possible : not a valid strategy id */ - } + public: + explicit SnappyDecompressor(Source* reader) + : reader_(reader), + ip_(NULL), + ip_limit_(NULL), + peeked_(0), + eof_(false) { + } - return 0; -} + ~SnappyDecompressor() { + // Advance past any bytes we peeked at from the reader + reader_->Skip(peeked_); + } -/** ZSTD_ldm_fillLdmHashTable() : - * - * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). - * lastHash is the rolling hash that corresponds to lastHashed. - * - * Returns the rolling hash corresponding to position iend-1. */ -static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, - U64 lastHash, const BYTE* lastHashed, - const BYTE* iend, const BYTE* base, - U32 hBits, ldmParams_t const ldmParams) -{ - U64 rollingHash = lastHash; - const BYTE* cur = lastHashed + 1; + // Returns true iff we have hit the end of the input without an error. + bool eof() const { + return eof_; + } - while (cur < iend) { - rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1], - cur[ldmParams.minMatchLength-1], - state->hashPower); - ZSTD_ldm_makeEntryAndInsertByTag(state, - rollingHash, hBits, - (U32)(cur - base), ldmParams); - ++cur; + // Read the uncompressed length stored at the start of the compressed data. + // On success, stores the length in *result and returns true. + // On failure, returns false. + bool ReadUncompressedLength(uint32* result) { + assert(ip_ == NULL); // Must not have read anything yet + // Length is encoded in 1..5 bytes + *result = 0; + uint32 shift = 0; + while (true) { + if (shift >= 32) return false; + size_t n; + const char* ip = reader_->Peek(&n); + if (n == 0) return false; + const unsigned char c = *(reinterpret_cast(ip)); + reader_->Skip(1); + uint32 val = c & 0x7f; + if (LeftShiftOverflows(static_cast(val), shift)) return false; + *result |= val << shift; + if (c < 128) { + break; + } + shift += 7; } - return rollingHash; -} + return true; + } -void ZSTD_ldm_fillHashTable( - ldmState_t* state, const BYTE* ip, - const BYTE* iend, ldmParams_t const* params) -{ - DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); - if ((size_t)(iend - ip) >= params->minMatchLength) { - U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); - ZSTD_ldm_fillLdmHashTable( - state, startingHash, ip, iend - params->minMatchLength, state->window.base, - params->hashLog - params->bucketSizeLog, - *params); - } -} + // Process the next item found in the input. + // Returns true if successful, false on error or end of input. + template +#if defined(__GNUC__) && defined(__x86_64__) + __attribute__((aligned(32))) +#endif + void DecompressAllTags(Writer* writer) { + // In x86, pad the function body to start 16 bytes later. This function has + // a couple of hotspots that are highly sensitive to alignment: we have + // observed regressions by more than 20% in some metrics just by moving the + // exact same code to a different position in the benchmark binary. + // + // Putting this code on a 32-byte-aligned boundary + 16 bytes makes us hit + // the "lucky" case consistently. Unfortunately, this is a very brittle + // workaround, and future differences in code generation may reintroduce + // this regression. If you experience a big, difficult to explain, benchmark + // performance regression here, first try removing this hack. +#if defined(__GNUC__) && defined(__x86_64__) + // Two 8-byte "NOP DWORD ptr [EAX + EAX*1 + 00000000H]" instructions. + asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); + asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); +#endif + const char* ip = ip_; + // We could have put this refill fragment only at the beginning of the loop. + // However, duplicating it at the end of each branch gives the compiler more + // scope to optimize the expression based on the local + // context, which overall increases speed. + #define MAYBE_REFILL() \ + if (ip_limit_ - ip < kMaximumTagLength) { \ + ip_ = ip; \ + if (!RefillTag()) return; \ + ip = ip_; \ + } -/** ZSTD_ldm_limitTableUpdate() : - * - * Sets cctx->nextToUpdate to a position corresponding closer to anchor - * if it is far way - * (after a long match, only update tables a limited amount). */ -static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) -{ - U32 const current = (U32)(anchor - ms->window.base); - if (current > ms->nextToUpdate + 1024) { - ms->nextToUpdate = - current - MIN(512, current - ms->nextToUpdate - 1024); - } -} + MAYBE_REFILL(); + for ( ;; ) { + const unsigned char c = *(reinterpret_cast(ip++)); -static size_t ZSTD_ldm_generateSequences_internal( - ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, - ldmParams_t const* params, void const* src, size_t srcSize) -{ - /* LDM parameters */ - int const extDict = ZSTD_window_hasExtDict(ldmState->window); - U32 const minMatchLength = params->minMatchLength; - U64 const hashPower = ldmState->hashPower; - U32 const hBits = params->hashLog - params->bucketSizeLog; - U32 const ldmBucketSize = 1U << params->bucketSizeLog; - U32 const hashRateLog = params->hashRateLog; - U32 const ldmTagMask = (1U << params->hashRateLog) - 1; - /* Prefix and extDict parameters */ - U32 const dictLimit = ldmState->window.dictLimit; - U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; - BYTE const* const base = ldmState->window.base; - BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; - BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; - BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; - BYTE const* const lowPrefixPtr = base + dictLimit; - /* Input bounds */ - BYTE const* const istart = (BYTE const*)src; - BYTE const* const iend = istart + srcSize; - BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); - /* Input positions */ - BYTE const* anchor = istart; - BYTE const* ip = istart; - /* Rolling hash */ - BYTE const* lastHashed = NULL; - U64 rollingHash = 0; + // Ratio of iterations that have LITERAL vs non-LITERAL for different + // inputs. + // + // input LITERAL NON_LITERAL + // ----------------------------------- + // html|html4|cp 23% 77% + // urls 36% 64% + // jpg 47% 53% + // pdf 19% 81% + // txt[1-4] 25% 75% + // pb 24% 76% + // bin 24% 76% + if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) { + size_t literal_length = (c >> 2) + 1u; + if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { + assert(literal_length < 61); + ip += literal_length; + // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend() + // will not return true unless there's already at least five spare + // bytes in addition to the literal. + continue; + } + if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) { + // Long literal. + const size_t literal_length_length = literal_length - 60; + literal_length = + ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) + + 1; + ip += literal_length_length; + } - while (ip <= ilimit) { - size_t mLength; - U32 const current = (U32)(ip - base); - size_t forwardMatchLength = 0, backwardMatchLength = 0; - ldmEntry_t* bestEntry = NULL; - if (ip != istart) { - rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], - lastHashed[minMatchLength], - hashPower); - } else { - rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); + size_t avail = ip_limit_ - ip; + while (avail < literal_length) { + if (!writer->Append(ip, avail)) return; + literal_length -= avail; + reader_->Skip(peeked_); + size_t n; + ip = reader_->Peek(&n); + avail = n; + peeked_ = avail; + if (avail == 0) return; // Premature end of input + ip_limit_ = ip + avail; } - lastHashed = ip; + if (!writer->Append(ip, literal_length)) { + return; + } + ip += literal_length; + MAYBE_REFILL(); + } else { + const size_t entry = char_table[c]; + const size_t trailer = + ExtractLowBytes(LittleEndian::Load32(ip), entry >> 11); + const size_t length = entry & 0xff; + ip += entry >> 11; - /* Do not insert and do not look for a match */ - if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { - ip++; - continue; + // copy_offset/256 is encoded in bits 8..10. By just fetching + // those bits, we get copy_offset (since the bit-field starts at + // bit 8). + const size_t copy_offset = entry & 0x700; + if (!writer->AppendFromSelf(copy_offset + trailer, length)) { + return; } + MAYBE_REFILL(); + } + } - /* Get the best entry and compute the match lengths */ - { - ldmEntry_t* const bucket = - ZSTD_ldm_getBucket(ldmState, - ZSTD_ldm_getSmallHash(rollingHash, hBits), - *params); - ldmEntry_t* cur; - size_t bestMatchLength = 0; - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); +#undef MAYBE_REFILL + } +}; - for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { - size_t curForwardMatchLength, curBackwardMatchLength, - curTotalMatchLength; - if (cur->checksum != checksum || cur->offset <= lowestIndex) { - continue; - } - if (extDict) { - BYTE const* const curMatchBase = - cur->offset < dictLimit ? dictBase : base; - BYTE const* const pMatch = curMatchBase + cur->offset; - BYTE const* const matchEnd = - cur->offset < dictLimit ? dictEnd : iend; - BYTE const* const lowMatchPtr = - cur->offset < dictLimit ? dictStart : lowPrefixPtr; +bool SnappyDecompressor::RefillTag() { + const char* ip = ip_; + if (ip == ip_limit_) { + // Fetch a new fragment from the reader + reader_->Skip(peeked_); // All peeked bytes are used up + size_t n; + ip = reader_->Peek(&n); + peeked_ = n; + eof_ = (n == 0); + if (eof_) return false; + ip_limit_ = ip + n; + } - curForwardMatchLength = ZSTD_count_2segments( - ip, pMatch, iend, - matchEnd, lowPrefixPtr); - if (curForwardMatchLength < minMatchLength) { - continue; - } - curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, - lowMatchPtr); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; - } else { /* !extDict */ - BYTE const* const pMatch = base + cur->offset; - curForwardMatchLength = ZSTD_count(ip, pMatch, iend); - if (curForwardMatchLength < minMatchLength) { - continue; - } - curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, - lowPrefixPtr); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; - } + // Read the tag character + assert(ip < ip_limit_); + const unsigned char c = *(reinterpret_cast(ip)); + const uint32 entry = char_table[c]; + const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' + assert(needed <= sizeof(scratch_)); + + // Read more bytes from reader if needed + uint32 nbuf = ip_limit_ - ip; + if (nbuf < needed) { + // Stitch together bytes from ip and reader to form the word + // contents. We store the needed bytes in "scratch_". They + // will be consumed immediately by the caller since we do not + // read more than we need. + memmove(scratch_, ip, nbuf); + reader_->Skip(peeked_); // All peeked bytes are used up + peeked_ = 0; + while (nbuf < needed) { + size_t length; + const char* src = reader_->Peek(&length); + if (length == 0) return false; + uint32 to_add = std::min(needed - nbuf, length); + memcpy(scratch_ + nbuf, src, to_add); + nbuf += to_add; + reader_->Skip(to_add); + } + assert(nbuf == needed); + ip_ = scratch_; + ip_limit_ = scratch_ + needed; + } else if (nbuf < kMaximumTagLength) { + // Have enough bytes, but move into scratch_ so that we do not + // read past end of input + memmove(scratch_, ip, nbuf); + reader_->Skip(peeked_); // All peeked bytes are used up + peeked_ = 0; + ip_ = scratch_; + ip_limit_ = scratch_ + nbuf; + } else { + // Pass pointer to buffer returned by reader_. + ip_ = ip; + } + return true; +} + +template +static bool InternalUncompress(Source* r, Writer* writer) { + // Read the uncompressed length from the front of the compressed input + SnappyDecompressor decompressor(r); + uint32 uncompressed_len = 0; + if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; - if (curTotalMatchLength > bestMatchLength) { - bestMatchLength = curTotalMatchLength; - forwardMatchLength = curForwardMatchLength; - backwardMatchLength = curBackwardMatchLength; - bestEntry = cur; - } - } - } + return InternalUncompressAllTags(&decompressor, writer, r->Available(), + uncompressed_len); +} - /* No match found -- continue searching */ - if (bestEntry == NULL) { - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, - hBits, current, - *params); - ip++; - continue; - } +template +static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, + Writer* writer, + uint32 compressed_len, + uint32 uncompressed_len) { + Report("snappy_uncompress", compressed_len, uncompressed_len); - /* Match found */ - mLength = forwardMatchLength + backwardMatchLength; - ip -= backwardMatchLength; + writer->SetExpectedLength(uncompressed_len); - { - /* Store the sequence: - * ip = current - backwardMatchLength - * The match is at (bestEntry->offset - backwardMatchLength) - */ - U32 const matchIndex = bestEntry->offset; - U32 const offset = current - matchIndex; - rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + // Process the entire input + decompressor->DecompressAllTags(writer); + writer->Flush(); + return (decompressor->eof() && writer->CheckLength()); +} - /* Out of sequence storage */ - if (rawSeqStore->size == rawSeqStore->capacity) - return ERROR(dstSize_tooSmall); - seq->litLength = (U32)(ip - anchor); - seq->matchLength = (U32)mLength; - seq->offset = offset; - rawSeqStore->size++; - } +bool GetUncompressedLength(Source* source, uint32* result) { + SnappyDecompressor decompressor(source); + return decompressor.ReadUncompressedLength(result); +} - /* Insert the current entry into the hash table */ - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, - (U32)(lastHashed - base), - *params); +size_t Compress(Source* reader, Sink* writer) { + size_t written = 0; + size_t N = reader->Available(); + const size_t uncompressed_size = N; + char ulength[Varint::kMax32]; + char* p = Varint::Encode32(ulength, N); + writer->Append(ulength, p-ulength); + written += (p - ulength); - assert(ip + backwardMatchLength == lastHashed); + internal::WorkingMemory wmem(N); - /* Fill the hash table from lastHashed+1 to ip+mLength*/ - /* Heuristic: don't need to fill the entire table at end of block */ - if (ip + mLength <= ilimit) { - rollingHash = ZSTD_ldm_fillLdmHashTable( - ldmState, rollingHash, lastHashed, - ip + mLength, base, hBits, *params); - lastHashed = ip + mLength - 1; - } - ip += mLength; - anchor = ip; - } - return iend - anchor; -} + while (N > 0) { + // Get next block to compress (without copying if possible) + size_t fragment_size; + const char* fragment = reader->Peek(&fragment_size); + assert(fragment_size != 0); // premature end of input + const size_t num_to_read = std::min(N, kBlockSize); + size_t bytes_read = fragment_size; -/*! ZSTD_ldm_reduceTable() : - * reduce table indexes by `reducerValue` */ -static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, - U32 const reducerValue) -{ - U32 u; - for (u = 0; u < size; u++) { - if (table[u].offset < reducerValue) table[u].offset = 0; - else table[u].offset -= reducerValue; + size_t pending_advance = 0; + if (bytes_read >= num_to_read) { + // Buffer returned by reader is large enough + pending_advance = num_to_read; + fragment_size = num_to_read; + } else { + char* scratch = wmem.GetScratchInput(); + memcpy(scratch, fragment, bytes_read); + reader->Skip(bytes_read); + + while (bytes_read < num_to_read) { + fragment = reader->Peek(&fragment_size); + size_t n = std::min(fragment_size, num_to_read - bytes_read); + memcpy(scratch + bytes_read, fragment, n); + bytes_read += n; + reader->Skip(n); + } + assert(bytes_read == num_to_read); + fragment = scratch; + fragment_size = num_to_read; } -} + assert(fragment_size == num_to_read); -size_t ZSTD_ldm_generateSequences( - ldmState_t* ldmState, rawSeqStore_t* sequences, - ldmParams_t const* params, void const* src, size_t srcSize) -{ - U32 const maxDist = 1U << params->windowLog; - BYTE const* const istart = (BYTE const*)src; - BYTE const* const iend = istart + srcSize; - size_t const kMaxChunkSize = 1 << 20; - size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); - size_t chunk; - size_t leftoverSize = 0; + // Get encoding table for compression + int table_size; + uint16* table = wmem.GetHashTable(num_to_read, &table_size); - assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); - /* Check that ZSTD_window_update() has been called for this chunk prior - * to passing it to this function. - */ - assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); - /* The input could be very large (in zstdmt), so it must be broken up into - * chunks to enforce the maximum distance and handle overflow correction. - */ - assert(sequences->pos <= sequences->size); - assert(sequences->size <= sequences->capacity); - for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { - BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; - size_t const remaining = (size_t)(iend - chunkStart); - BYTE const *const chunkEnd = - (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; - size_t const chunkSize = chunkEnd - chunkStart; - size_t newLeftoverSize; - size_t const prevSize = sequences->size; + // Compress input_fragment and append to dest + const int max_output = MaxCompressedLength(num_to_read); - assert(chunkStart < iend); - /* 1. Perform overflow correction if necessary. */ - if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { - U32 const ldmHSize = 1U << params->hashLog; - U32 const correction = ZSTD_window_correctOverflow( - &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); - ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); - /* invalidate dictionaries on overflow correction */ - ldmState->loadedDictEnd = 0; - } - /* 2. We enforce the maximum offset allowed. - * - * kMaxChunkSize should be small enough that we don't lose too much of - * the window through early invalidation. - * TODO: * Test the chunk size. - * * Try invalidation after the sequence generation and test the - * the offset against maxDist directly. - * - * NOTE: Because of dictionaries + sequence splitting we MUST make sure - * that any offset used is valid at the END of the sequence, since it may - * be split into two sequences. This condition holds when using - * ZSTD_window_enforceMaxDist(), but if we move to checking offsets - * against maxDist directly, we'll have to carefully handle that case. - */ - ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); - /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ - newLeftoverSize = ZSTD_ldm_generateSequences_internal( - ldmState, sequences, params, chunkStart, chunkSize); - if (ZSTD_isError(newLeftoverSize)) - return newLeftoverSize; - /* 4. We add the leftover literals from previous iterations to the first - * newly generated sequence, or add the `newLeftoverSize` if none are - * generated. - */ - /* Prepend the leftover literals from the last call */ - if (prevSize < sequences->size) { - sequences->seq[prevSize].litLength += (U32)leftoverSize; - leftoverSize = newLeftoverSize; - } else { - assert(newLeftoverSize == chunkSize); - leftoverSize += chunkSize; - } - } - return 0; -} + // Need a scratch buffer for the output, in case the byte sink doesn't + // have room for us directly. -void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { - while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { - rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; - if (srcSize <= seq->litLength) { - /* Skip past srcSize literals */ - seq->litLength -= (U32)srcSize; - return; - } - srcSize -= seq->litLength; - seq->litLength = 0; - if (srcSize < seq->matchLength) { - /* Skip past the first srcSize of the match */ - seq->matchLength -= (U32)srcSize; - if (seq->matchLength < minMatch) { - /* The match is too short, omit it */ - if (rawSeqStore->pos + 1 < rawSeqStore->size) { - seq[1].litLength += seq[0].matchLength; - } - rawSeqStore->pos++; - } - return; - } - srcSize -= seq->matchLength; - seq->matchLength = 0; - rawSeqStore->pos++; - } -} + // Since we encode kBlockSize regions followed by a region + // which is <= kBlockSize in length, a previously allocated + // scratch_output[] region is big enough for this iteration. + char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput()); + char* end = internal::CompressFragment(fragment, fragment_size, dest, table, + table_size); + writer->Append(dest, end - dest); + written += (end - dest); -/** - * If the sequence length is longer than remaining then the sequence is split - * between this block and the next. - * - * Returns the current sequence to handle, or if the rest of the block should - * be literals, it returns a sequence with offset == 0. - */ -static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, - U32 const remaining, U32 const minMatch) -{ - rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; - assert(sequence.offset > 0); - /* Likely: No partial sequence */ - if (remaining >= sequence.litLength + sequence.matchLength) { - rawSeqStore->pos++; - return sequence; - } - /* Cut the sequence short (offset == 0 ==> rest is literals). */ - if (remaining <= sequence.litLength) { - sequence.offset = 0; - } else if (remaining < sequence.litLength + sequence.matchLength) { - sequence.matchLength = remaining - sequence.litLength; - if (sequence.matchLength < minMatch) { - sequence.offset = 0; - } - } - /* Skip past `remaining` bytes for the future sequences. */ - ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); - return sequence; + N -= num_to_read; + reader->Skip(pending_advance); + } + + Report("snappy_compress", written, uncompressed_size); + + return written; } -size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - unsigned const minMatch = cParams->minMatch; - ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); - /* Input bounds */ - BYTE const* const istart = (BYTE const*)src; - BYTE const* const iend = istart + srcSize; - /* Input positions */ - BYTE const* ip = istart; +// ----------------------------------------------------------------------- +// IOVec interfaces +// ----------------------------------------------------------------------- - DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); - assert(rawSeqStore->pos <= rawSeqStore->size); - assert(rawSeqStore->size <= rawSeqStore->capacity); - /* Loop through each sequence and apply the block compressor to the lits */ - while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { - /* maybeSplitSequence updates rawSeqStore->pos */ - rawSeq const sequence = maybeSplitSequence(rawSeqStore, - (U32)(iend - ip), minMatch); - int i; - /* End signal */ - if (sequence.offset == 0) - break; +// A type that writes to an iovec. +// Note that this is not a "ByteSink", but a type that matches the +// Writer template argument to SnappyDecompressor::DecompressAllTags(). +class SnappyIOVecWriter { + private: + // output_iov_end_ is set to iov + count and used to determine when + // the end of the iovs is reached. + const struct iovec* output_iov_end_; - assert(ip + sequence.litLength + sequence.matchLength <= iend); +#if !defined(NDEBUG) + const struct iovec* output_iov_; +#endif // !defined(NDEBUG) - /* Fill tables for block compressor */ - ZSTD_ldm_limitTableUpdate(ms, ip); - ZSTD_ldm_fillFastTables(ms, ip); - /* Run the block compressor */ - DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); - { - size_t const newLitLength = - blockCompressor(ms, seqStore, rep, ip, sequence.litLength); - ip += sequence.litLength; - /* Update the repcodes */ - for (i = ZSTD_REP_NUM - 1; i > 0; i--) - rep[i] = rep[i-1]; - rep[0] = sequence.offset; - /* Store the sequence */ - ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, - sequence.offset + ZSTD_REP_MOVE, - sequence.matchLength - MINMATCH); - ip += sequence.matchLength; - } - } - /* Fill the tables for the block compressor */ - ZSTD_ldm_limitTableUpdate(ms, ip); - ZSTD_ldm_fillFastTables(ms, ip); - /* Compress the last literals */ - return blockCompressor(ms, seqStore, rep, ip, iend - ip); -} + // Current iov that is being written into. + const struct iovec* curr_iov_; -} + // Pointer to current iov's write location. + char* curr_iov_output_; + // Remaining bytes to write into curr_iov_output. + size_t curr_iov_remaining_; -// LICENSE_CHANGE_END + // Total bytes decompressed into output_iov_ so far. + size_t total_written_; + // Maximum number of bytes that will be decompressed into output_iov_. + size_t output_limit_; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) { + return reinterpret_cast(iov->iov_base) + offset; + } -/* - * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + public: + // Does not take ownership of iov. iov must be valid during the + // entire lifetime of the SnappyIOVecWriter. + inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count) + : output_iov_end_(iov + iov_count), +#if !defined(NDEBUG) + output_iov_(iov), +#endif // !defined(NDEBUG) + curr_iov_(iov), + curr_iov_output_(iov_count ? reinterpret_cast(iov->iov_base) + : nullptr), + curr_iov_remaining_(iov_count ? iov->iov_len : 0), + total_written_(0), + output_limit_(-1) {} + + inline void SetExpectedLength(size_t len) { + output_limit_ = len; + } + inline bool CheckLength() const { + return total_written_ == output_limit_; + } + inline bool Append(const char* ip, size_t len) { + if (total_written_ + len > output_limit_) { + return false; + } + return AppendNoCheck(ip, len); + } + inline bool AppendNoCheck(const char* ip, size_t len) { + while (len > 0) { + if (curr_iov_remaining_ == 0) { + // This iovec is full. Go to the next one. + if (curr_iov_ + 1 >= output_iov_end_) { + return false; + } + ++curr_iov_; + curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); + curr_iov_remaining_ = curr_iov_->iov_len; + } + const size_t to_write = std::min(len, curr_iov_remaining_); + memcpy(curr_iov_output_, ip, to_write); + curr_iov_output_ += to_write; + curr_iov_remaining_ -= to_write; + total_written_ += to_write; + ip += to_write; + len -= to_write; + } -#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ -#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ -#define ZSTD_MAX_PRICE (1<<30) + return true; + } -#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + inline bool TryFastAppend(const char* ip, size_t available, size_t len) { + const size_t space_left = output_limit_ - total_written_; + if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 && + curr_iov_remaining_ >= 16) { + // Fast path, used for the majority (about 95%) of invocations. + UnalignedCopy128(ip, curr_iov_output_); + curr_iov_output_ += len; + curr_iov_remaining_ -= len; + total_written_ += len; + return true; + } + return false; + } -/*-************************************* -* Price functions for optimal parser -***************************************/ + inline bool AppendFromSelf(size_t offset, size_t len) { + // See SnappyArrayWriter::AppendFromSelf for an explanation of + // the "offset - 1u" trick. + if (offset - 1u >= total_written_) { + return false; + } + const size_t space_left = output_limit_ - total_written_; + if (len > space_left) { + return false; + } -#if 0 /* approximation at bit level */ -# define BITCOST_ACCURACY 0 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) -#elif 0 /* fractional bit accuracy */ -# define BITCOST_ACCURACY 8 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) -#else /* opt==approx, ultra==accurate */ -# define BITCOST_ACCURACY 8 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) -#endif + // Locate the iovec from which we need to start the copy. + const iovec* from_iov = curr_iov_; + size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_; + while (offset > 0) { + if (from_iov_offset >= offset) { + from_iov_offset -= offset; + break; + } -namespace duckdb_zstd { + offset -= from_iov_offset; + --from_iov; +#if !defined(NDEBUG) + assert(from_iov >= output_iov_); +#endif // !defined(NDEBUG) + from_iov_offset = from_iov->iov_len; + } -MEM_STATIC U32 ZSTD_bitWeight(U32 stat) -{ - return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); -} + // Copy bytes starting from the iovec pointed to by from_iov_index to + // the current iovec. + while (len > 0) { + assert(from_iov <= curr_iov_); + if (from_iov != curr_iov_) { + const size_t to_copy = + std::min((unsigned long)(from_iov->iov_len - from_iov_offset), (unsigned long)len); + AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy); + len -= to_copy; + if (len > 0) { + ++from_iov; + from_iov_offset = 0; + } + } else { + size_t to_copy = curr_iov_remaining_; + if (to_copy == 0) { + // This iovec is full. Go to the next one. + if (curr_iov_ + 1 >= output_iov_end_) { + return false; + } + ++curr_iov_; + curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); + curr_iov_remaining_ = curr_iov_->iov_len; + continue; + } + if (to_copy > len) { + to_copy = len; + } + + IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset), + curr_iov_output_, curr_iov_output_ + to_copy, + curr_iov_output_ + curr_iov_remaining_); + curr_iov_output_ += to_copy; + curr_iov_remaining_ -= to_copy; + from_iov_offset += to_copy; + total_written_ += to_copy; + len -= to_copy; + } + } -MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) -{ - U32 const stat = rawStat + 1; - U32 const hb = ZSTD_highbit32(stat); - U32 const BWeight = hb * BITCOST_MULTIPLIER; - U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; - U32 const weight = BWeight + FWeight; - assert(hb + BITCOST_ACCURACY < 31); - return weight; -} + return true; + } -#if (DEBUGLEVEL>=2) -/* debugging function, - * @return price in bytes as fractional value - * for debug messages only */ -MEM_STATIC double ZSTD_fCost(U32 price) -{ - return (double)price / (BITCOST_MULTIPLIER*8); -} -#endif + inline void Flush() {} +}; -static int ZSTD_compressedLiterals(optState_t const* const optPtr) -{ - return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; +bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, + const struct iovec* iov, size_t iov_cnt) { + ByteArraySource reader(compressed, compressed_length); + return RawUncompressToIOVec(&reader, iov, iov_cnt); } -static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) -{ - if (ZSTD_compressedLiterals(optPtr)) - optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); - optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); - optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); - optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); +bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, + size_t iov_cnt) { + SnappyIOVecWriter output(iov, iov_cnt); + return InternalUncompress(compressed, &output); } +// ----------------------------------------------------------------------- +// Flat array interfaces +// ----------------------------------------------------------------------- -/* ZSTD_downscaleStat() : - * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) - * return the resulting sum of elements */ -static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) -{ - U32 s, sum=0; - DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); - assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); - for (s=0; s> (ZSTD_FREQ_DIV+malus)); - sum += table[s]; - } - return sum; -} - -/* ZSTD_rescaleFreqs() : - * if first block (detected by optPtr->litLengthSum == 0) : init statistics - * take hints from dictionary if there is one - * or init from zero, using src for literals stats, or flat 1 for match symbols - * otherwise downscale existing stats, to be used as seed for next block. - */ -static void -ZSTD_rescaleFreqs(optState_t* const optPtr, - const BYTE* const src, size_t const srcSize, - int const optLevel) -{ - int const compressedLiterals = ZSTD_compressedLiterals(optPtr); - DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); - optPtr->priceType = zop_dynamic; +// A type that writes to a flat array. +// Note that this is not a "ByteSink", but a type that matches the +// Writer template argument to SnappyDecompressor::DecompressAllTags(). +class SnappyArrayWriter { + private: + char* base_; + char* op_; + char* op_limit_; - if (optPtr->litLengthSum == 0) { /* first block : init */ - if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ - DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); - optPtr->priceType = zop_predef; - } + public: + inline explicit SnappyArrayWriter(char* dst) + : base_(dst), + op_(dst), + op_limit_(dst) { + } - assert(optPtr->symbolCosts != NULL); - if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { - /* huffman table presumed generated by dictionary */ - optPtr->priceType = zop_dynamic; + inline void SetExpectedLength(size_t len) { + op_limit_ = op_ + len; + } - if (compressedLiterals) { - unsigned lit; - assert(optPtr->litFreq != NULL); - optPtr->litSum = 0; - for (lit=0; lit<=MaxLit; lit++) { - U32 const scaleLog = 11; /* scale to 2K */ - U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); - assert(bitCost <= scaleLog); - optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->litSum += optPtr->litFreq[lit]; - } } + inline bool CheckLength() const { + return op_ == op_limit_; + } - { unsigned ll; - FSE_CState_t llstate; - FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); - optPtr->litLengthSum = 0; - for (ll=0; ll<=MaxLL; ll++) { - U32 const scaleLog = 10; /* scale to 1K */ - U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); - assert(bitCost < scaleLog); - optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->litLengthSum += optPtr->litLengthFreq[ll]; - } } + inline bool Append(const char* ip, size_t len) { + char* op = op_; + const size_t space_left = op_limit_ - op; + if (space_left < len) { + return false; + } + memcpy(op, ip, len); + op_ = op + len; + return true; + } - { unsigned ml; - FSE_CState_t mlstate; - FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); - optPtr->matchLengthSum = 0; - for (ml=0; ml<=MaxML; ml++) { - U32 const scaleLog = 10; - U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); - assert(bitCost < scaleLog); - optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; - } } + inline bool TryFastAppend(const char* ip, size_t available, size_t len) { + char* op = op_; + const size_t space_left = op_limit_ - op; + if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) { + // Fast path, used for the majority (about 95%) of invocations. + UnalignedCopy128(ip, op); + op_ = op + len; + return true; + } else { + return false; + } + } - { unsigned of; - FSE_CState_t ofstate; - FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); - optPtr->offCodeSum = 0; - for (of=0; of<=MaxOff; of++) { - U32 const scaleLog = 10; - U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); - assert(bitCost < scaleLog); - optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->offCodeSum += optPtr->offCodeFreq[of]; - } } + inline bool AppendFromSelf(size_t offset, size_t len) { + char* const op_end = op_ + len; - } else { /* not a dictionary */ + // Check if we try to append from before the start of the buffer. + // Normally this would just be a check for "produced < offset", + // but "produced <= offset - 1u" is equivalent for every case + // except the one where offset==0, where the right side will wrap around + // to a very big number. This is convenient, as offset==0 is another + // invalid case that we also want to catch, so that we do not go + // into an infinite loop. + if (Produced() <= offset - 1u || op_end > op_limit_) return false; + op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_); - assert(optPtr->litFreq != NULL); - if (compressedLiterals) { - unsigned lit = MaxLit; - HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); - } + return true; + } + inline size_t Produced() const { + assert(op_ >= base_); + return op_ - base_; + } + inline void Flush() {} +}; - { unsigned ll; - for (ll=0; ll<=MaxLL; ll++) - optPtr->litLengthFreq[ll] = 1; - } - optPtr->litLengthSum = MaxLL+1; +bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { + ByteArraySource reader(compressed, n); + return RawUncompress(&reader, uncompressed); +} - { unsigned ml; - for (ml=0; ml<=MaxML; ml++) - optPtr->matchLengthFreq[ml] = 1; - } - optPtr->matchLengthSum = MaxML+1; +bool RawUncompress(Source* compressed, char* uncompressed) { + SnappyArrayWriter output(uncompressed); + return InternalUncompress(compressed, &output); +} - { unsigned of; - for (of=0; of<=MaxOff; of++) - optPtr->offCodeFreq[of] = 1; - } - optPtr->offCodeSum = MaxOff+1; +bool Uncompress(const char* compressed, size_t n, string* uncompressed) { + size_t ulength; + if (!GetUncompressedLength(compressed, n, &ulength)) { + return false; + } + // On 32-bit builds: max_size() < kuint32max. Check for that instead + // of crashing (e.g., consider externally specified compressed data). + if (ulength > uncompressed->max_size()) { + return false; + } + STLStringResizeUninitialized(uncompressed, ulength); + return RawUncompress(compressed, n, string_as_array(uncompressed)); +} - } +// A Writer that drops everything on the floor and just does validation +class SnappyDecompressionValidator { + private: + size_t expected_; + size_t produced_; - } else { /* new block : re-use previous statistics, scaled down */ + public: + inline SnappyDecompressionValidator() : expected_(0), produced_(0) { } + inline void SetExpectedLength(size_t len) { + expected_ = len; + } + inline bool CheckLength() const { + return expected_ == produced_; + } + inline bool Append(const char* ip, size_t len) { + produced_ += len; + return produced_ <= expected_; + } + inline bool TryFastAppend(const char* ip, size_t available, size_t length) { + return false; + } + inline bool AppendFromSelf(size_t offset, size_t len) { + // See SnappyArrayWriter::AppendFromSelf for an explanation of + // the "offset - 1u" trick. + if (produced_ <= offset - 1u) return false; + produced_ += len; + return produced_ <= expected_; + } + inline void Flush() {} +}; - if (compressedLiterals) - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); - optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); - optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); - optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); - } +bool IsValidCompressedBuffer(const char* compressed, size_t n) { + ByteArraySource reader(compressed, n); + SnappyDecompressionValidator writer; + return InternalUncompress(&reader, &writer); +} - ZSTD_setBasePrices(optPtr, optLevel); +bool IsValidCompressed(Source* compressed) { + SnappyDecompressionValidator writer; + return InternalUncompress(compressed, &writer); } -/* ZSTD_rawLiteralsCost() : - * price of literals (only) in specified segment (which length can be 0). - * does not include price of literalLength symbol */ -static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, - const optState_t* const optPtr, - int optLevel) -{ - if (litLength == 0) return 0; +void RawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length) { + ByteArraySource reader(input, input_length); + UncheckedByteArraySink writer(compressed); + Compress(&reader, &writer); - if (!ZSTD_compressedLiterals(optPtr)) - return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ + // Compute how many bytes were added + *compressed_length = (writer.CurrentDestination() - compressed); +} - if (optPtr->priceType == zop_predef) - return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ +size_t Compress(const char* input, size_t input_length, string* compressed) { + // Pre-grow the buffer to the max length of the compressed output + STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length)); - /* dynamic statistics */ - { U32 price = litLength * optPtr->litSumBasePrice; - U32 u; - for (u=0; u < litLength; u++) { - assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ - price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); - } - return price; - } + size_t compressed_length; + RawCompress(input, input_length, string_as_array(compressed), + &compressed_length); + compressed->resize(compressed_length); + return compressed_length; } -/* ZSTD_litLengthPrice() : - * cost of literalLength symbol */ -static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) -{ - if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); +// ----------------------------------------------------------------------- +// Sink interface +// ----------------------------------------------------------------------- - /* dynamic statistics */ - { U32 const llCode = ZSTD_LLcode(litLength); - return (ZSTDInternalConstants::LL_bits[llCode] * BITCOST_MULTIPLIER) - + optPtr->litLengthSumBasePrice - - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); - } -} +// A type that decompresses into a Sink. The template parameter +// Allocator must export one method "char* Allocate(int size);", which +// allocates a buffer of "size" and appends that to the destination. +template +class SnappyScatteredWriter { + Allocator allocator_; -/* ZSTD_getMatchPrice() : - * Provides the cost of the match part (offset + matchLength) of a sequence - * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. - * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ -FORCE_INLINE_TEMPLATE U32 -ZSTD_getMatchPrice(U32 const offset, - U32 const matchLength, - const optState_t* const optPtr, - int const optLevel) -{ - U32 price; - U32 const offCode = ZSTD_highbit32(offset+1); - U32 const mlBase = matchLength - MINMATCH; - assert(matchLength >= MINMATCH); + // We need random access into the data generated so far. Therefore + // we keep track of all of the generated data as an array of blocks. + // All of the blocks except the last have length kBlockSize. + std::vector blocks_; + size_t expected_; - if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ - return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); + // Total size of all fully generated blocks so far + size_t full_size_; - /* dynamic statistics */ - price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); - if ((optLevel<2) /*static*/ && offCode >= 20) - price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ + // Pointer into current output block + char* op_base_; // Base of output block + char* op_ptr_; // Pointer to next unfilled byte in block + char* op_limit_; // Pointer just past block - /* match Length */ - { U32 const mlCode = ZSTD_MLcode(mlBase); - price += (ZSTDInternalConstants::ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); - } + inline size_t Size() const { + return full_size_ + (op_ptr_ - op_base_); + } - price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ + bool SlowAppend(const char* ip, size_t len); + bool SlowAppendFromSelf(size_t offset, size_t len); - DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); - return price; -} + public: + inline explicit SnappyScatteredWriter(const Allocator& allocator) + : allocator_(allocator), + full_size_(0), + op_base_(NULL), + op_ptr_(NULL), + op_limit_(NULL) { + } -/* ZSTD_updateStats() : - * assumption : literals + litLengtn <= iend */ -static void ZSTD_updateStats(optState_t* const optPtr, - U32 litLength, const BYTE* literals, - U32 offsetCode, U32 matchLength) -{ - /* literals */ - if (ZSTD_compressedLiterals(optPtr)) { - U32 u; - for (u=0; u < litLength; u++) - optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; - optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; - } + inline void SetExpectedLength(size_t len) { + assert(blocks_.empty()); + expected_ = len; + } - /* literal Length */ - { U32 const llCode = ZSTD_LLcode(litLength); - optPtr->litLengthFreq[llCode]++; - optPtr->litLengthSum++; - } + inline bool CheckLength() const { + return Size() == expected_; + } - /* match offset code (0-2=>repCode; 3+=>offset+2) */ - { U32 const offCode = ZSTD_highbit32(offsetCode+1); - assert(offCode <= MaxOff); - optPtr->offCodeFreq[offCode]++; - optPtr->offCodeSum++; - } + // Return the number of bytes actually uncompressed so far + inline size_t Produced() const { + return Size(); + } - /* match Length */ - { U32 const mlBase = matchLength - MINMATCH; - U32 const mlCode = ZSTD_MLcode(mlBase); - optPtr->matchLengthFreq[mlCode]++; - optPtr->matchLengthSum++; + inline bool Append(const char* ip, size_t len) { + size_t avail = op_limit_ - op_ptr_; + if (len <= avail) { + // Fast path + memcpy(op_ptr_, ip, len); + op_ptr_ += len; + return true; + } else { + return SlowAppend(ip, len); } -} - + } -/* ZSTD_readMINMATCH() : - * function safe only for comparisons - * assumption : memPtr must be at least 4 bytes before end of buffer */ -MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) -{ - switch (length) - { - default : - case 4 : return MEM_read32(memPtr); - case 3 : if (MEM_isLittleEndian()) - return MEM_read32(memPtr)<<8; - else - return MEM_read32(memPtr)>>8; + inline bool TryFastAppend(const char* ip, size_t available, size_t length) { + char* op = op_ptr_; + const int space_left = op_limit_ - op; + if (length <= 16 && available >= 16 + kMaximumTagLength && + space_left >= 16) { + // Fast path, used for the majority (about 95%) of invocations. + UnalignedCopy128(ip, op); + op_ptr_ = op + length; + return true; + } else { + return false; } -} + } + inline bool AppendFromSelf(size_t offset, size_t len) { + char* const op_end = op_ptr_ + len; + // See SnappyArrayWriter::AppendFromSelf for an explanation of + // the "offset - 1u" trick. + if (SNAPPY_PREDICT_TRUE(offset - 1u < (size_t)(op_ptr_ - op_base_) && + op_end <= op_limit_)) { + // Fast path: src and dst in current block. + op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_); + return true; + } + return SlowAppendFromSelf(offset, len); + } -/* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* const ip) -{ - U32* const hashTable3 = ms->hashTable3; - U32 const hashLog3 = ms->hashLog3; - const BYTE* const base = ms->window.base; - U32 idx = *nextToUpdate3; - U32 const target = (U32)(ip - base); - size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); - assert(hashLog3 > 0); + // Called at the end of the decompress. We ask the allocator + // write all blocks to the sink. + inline void Flush() { allocator_.Flush(Produced()); } +}; - while(idx < target) { - hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; - idx++; +template +bool SnappyScatteredWriter::SlowAppend(const char* ip, size_t len) { + size_t avail = op_limit_ - op_ptr_; + while (len > avail) { + // Completely fill this block + memcpy(op_ptr_, ip, avail); + op_ptr_ += avail; + assert(op_limit_ - op_ptr_ == 0); + full_size_ += (op_ptr_ - op_base_); + len -= avail; + ip += avail; + + // Bounds check + if (full_size_ + len > expected_) { + return false; } - *nextToUpdate3 = target; - return hashTable3[hash3]; -} + // Make new block + size_t bsize = std::min(kBlockSize, expected_ - full_size_); + op_base_ = allocator_.Allocate(bsize); + op_ptr_ = op_base_; + op_limit_ = op_base_ + bsize; + blocks_.push_back(op_base_); + avail = bsize; + } + memcpy(op_ptr_, ip, len); + op_ptr_ += len; + return true; +} -/*-************************************* -* Binary Tree search -***************************************/ -/** ZSTD_insertBt1() : add one or multiple positions to tree. - * ip : assumed <= iend-8 . - * @return : nb of positions added */ -static U32 ZSTD_insertBt1( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - U32 const mls, const int extDict) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hashLog = cParams->hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* match; - const U32 current = (U32)(ip-base); - const U32 btLow = btMask >= current ? 0 : current - btMask; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = smallerPtr + 1; - U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = ms->window.lowLimit; - U32 matchEndIdx = current+8+1; - size_t bestLength = 8; - U32 nbCompares = 1U << cParams->searchLog; -#ifdef ZSTD_C_PREDICT - U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); - U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); - predictedSmall += (predictedSmall>0); - predictedLarge += (predictedLarge>0); -#endif /* ZSTD_C_PREDICT */ +template +bool SnappyScatteredWriter::SlowAppendFromSelf(size_t offset, + size_t len) { + // Overflow check + // See SnappyArrayWriter::AppendFromSelf for an explanation of + // the "offset - 1u" trick. + const size_t cur = Size(); + if (offset - 1u >= cur) return false; + if (expected_ - cur < len) return false; - DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); + // Currently we shouldn't ever hit this path because Compress() chops the + // input into blocks and does not create cross-block copies. However, it is + // nice if we do not rely on that, since we can get better compression if we + // allow cross-block copies and thus might want to change the compressor in + // the future. + size_t src = cur - offset; + while (len-- > 0) { + char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)]; + Append(&c, 1); + src++; + } + return true; +} - assert(ip <= iend-8); /* required for h calculation */ - hashTable[h] = current; /* Update Hash Table */ +class SnappySinkAllocator { + public: + explicit SnappySinkAllocator(Sink* dest): dest_(dest) {} + ~SnappySinkAllocator() {} - assert(windowLow > 0); - while (nbCompares-- && (matchIndex >= windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(matchIndex < current); + char* Allocate(int size) { + Datablock block(new char[size], size); + blocks_.push_back(block); + return block.data; + } -#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ - const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ - if (matchIndex == predictedSmall) { - /* no need to check length, result known */ - *smallerPtr = matchIndex; - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - predictedSmall = predictPtr[1] + (predictPtr[1]>0); - continue; - } - if (matchIndex == predictedLarge) { - *largerPtr = matchIndex; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - predictedLarge = predictPtr[0] + (predictPtr[0]>0); - continue; - } -#endif + // We flush only at the end, because the writer wants + // random access to the blocks and once we hand the + // block over to the sink, we can't access it anymore. + // Also we don't write more than has been actually written + // to the blocks. + void Flush(size_t size) { + size_t size_written = 0; + size_t block_size; + for (size_t i = 0; i < blocks_.size(); ++i) { + block_size = std::min(blocks_[i].size, size - size_written); + dest_->AppendAndTakeOwnership(blocks_[i].data, block_size, + &SnappySinkAllocator::Deleter, NULL); + size_written += block_size; + } + blocks_.clear(); + } - if (!extDict || (matchIndex+matchLength >= dictLimit)) { - assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ - match = base + matchIndex; - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } + private: + struct Datablock { + char* data; + size_t size; + Datablock(char* p, size_t s) : data(p), size(s) {} + }; - if (matchLength > bestLength) { - bestLength = matchLength; - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - } + static void Deleter(void* arg, const char* bytes, size_t size) { + delete[] bytes; + } - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ - } + Sink* dest_; + std::vector blocks_; - if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ - matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } + // Note: copying this object is allowed +}; - *smallerPtr = *largerPtr = 0; - { U32 positions = 0; - if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - assert(matchEndIdx > current + 8); - return MAX(positions, matchEndIdx - (current + 8)); - } +size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) { + SnappySinkAllocator allocator(uncompressed); + SnappyScatteredWriter writer(allocator); + InternalUncompress(compressed, &writer); + return writer.Produced(); } -FORCE_INLINE_TEMPLATE -void ZSTD_updateTree_internal( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - const U32 mls, const ZSTD_dictMode_e dictMode) -{ - const BYTE* const base = ms->window.base; - U32 const target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", - idx, target, dictMode); +bool Uncompress(Source* compressed, Sink* uncompressed) { + // Read the uncompressed length from the front of the compressed input + SnappyDecompressor decompressor(compressed); + uint32 uncompressed_len = 0; + if (!decompressor.ReadUncompressedLength(&uncompressed_len)) { + return false; + } - while(idx < target) { - U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); - assert(idx < (U32)(idx + forward)); - idx += forward; - } - assert((size_t)(ip - base) <= (size_t)(U32)(-1)); - assert((size_t)(iend - base) <= (size_t)(U32)(-1)); - ms->nextToUpdate = target; -} + char c; + size_t allocated_size; + char* buf = uncompressed->GetAppendBufferVariable( + 1, uncompressed_len, &c, 1, &allocated_size); -void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { - ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); + const size_t compressed_len = compressed->Available(); + // If we can get a flat buffer, then use it, otherwise do block by block + // uncompression + if (allocated_size >= uncompressed_len) { + SnappyArrayWriter writer(buf); + bool result = InternalUncompressAllTags(&decompressor, &writer, + compressed_len, uncompressed_len); + uncompressed->Append(buf, writer.Produced()); + return result; + } else { + SnappySinkAllocator allocator(uncompressed); + SnappyScatteredWriter writer(allocator); + return InternalUncompressAllTags(&decompressor, &writer, compressed_len, + uncompressed_len); + } } -FORCE_INLINE_TEMPLATE -U32 ZSTD_insertBtAndGetAllMatches ( - ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ - ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, - const U32 rep[ZSTD_REP_NUM], - U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ - const U32 lengthToBeat, - U32 const mls /* template */) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); - const BYTE* const base = ms->window.base; - U32 const current = (U32)(ip-base); - U32 const hashLog = cParams->hashLog; - U32 const minMatch = (mls==3) ? 3 : 4; - U32* const hashTable = ms->hashTable; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 matchIndex = hashTable[h]; - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask= (1U << btLog) - 1; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const dictBase = ms->window.dictBase; - U32 const dictLimit = ms->window.dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - U32 const btLow = (btMask >= current) ? 0 : current - btMask; - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); - U32 const matchLow = windowLow ? windowLow : 1; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */ - U32 dummy32; /* to be nullified at the end */ - U32 mnum = 0; - U32 nbCompares = 1U << cParams->searchLog; - - const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; - const ZSTD_compressionParameters* const dmsCParams = - dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; - const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; - const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; - U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; - U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; - U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; - U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; - U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; - U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; - U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; +} // namespace snappy - size_t bestLength = lengthToBeat-1; - DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); - /* check repCode */ - assert(ll0 <= 1); /* necessarily 1 or 0 */ - { U32 const lastR = ZSTD_REP_NUM + ll0; - U32 repCode; - for (repCode = ll0; repCode < lastR; repCode++) { - U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - U32 const repIndex = current - repOffset; - U32 repLen = 0; - assert(current >= dictLimit); - if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ - /* We must validate the repcode offset because when we're using a dictionary the - * valid offset range shrinks when the dictionary goes out of bounds. - */ - if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { - repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; - } - } else { /* repIndex < dictLimit || repIndex >= current */ - const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? - dmsBase + repIndex - dmsIndexDelta : - dictBase + repIndex; - assert(current >= windowLow); - if ( dictMode == ZSTD_extDict - && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ - & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; - } - if (dictMode == ZSTD_dictMatchState - && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */ - & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; - } } - /* save longer solution */ - if (repLen > bestLength) { - DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", - repCode, ll0, repOffset, repLen); - bestLength = repLen; - matches[mnum].off = repCode - ll0; - matches[mnum].len = (U32)repLen; - mnum++; - if ( (repLen > sufficient_len) - | (ip+repLen == iLimit) ) { /* best possible */ - return mnum; - } } } } +// LICENSE_CHANGE_END - /* HC3 match finder */ - if ((mls == 3) /*static*/ && (bestLength < mls)) { - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); - if ((matchIndex3 >= matchLow) - & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { - size_t mlen; - if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { - const BYTE* const match = base + matchIndex3; - mlen = ZSTD_count(ip, match, iLimit); - } else { - const BYTE* const match = dictBase + matchIndex3; - mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); - } - /* save best solution */ - if (mlen >= mls /* == 3 > bestLength */) { - DEBUGLOG(8, "found small match with hlog3, of length %u", - (U32)mlen); - bestLength = mlen; - assert(current > matchIndex3); - assert(mnum==0); /* no prior solution */ - matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; - matches[0].len = (U32)mlen; - mnum = 1; - if ( (mlen > sufficient_len) | - (ip+mlen == iLimit) ) { /* best possible length */ - ms->nextToUpdate = current+1; /* skip insertion */ - return 1; - } } } - /* no dictMatchState lookup: dicts don't have a populated HC3 table */ - } +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 +// See the end of this file for a list - hashTable[h] = current; /* Update Hash Table */ +/** + * Autogenerated by Thrift Compiler (0.11.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ - while (nbCompares-- && (matchIndex >= matchLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - const BYTE* match; - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(current > matchIndex); - if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { - assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ - match = base + matchIndex; - if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); - } else { - match = dictBase + matchIndex; - assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* prepare for match[matchLength] read */ - } +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 +// See the end of this file for a list - if (matchLength > bestLength) { - DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", - (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); - assert(matchEndIdx > matchIndex); - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; - matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; - matches[mnum].len = (U32)matchLength; - mnum++; - if ( (matchLength > ZSTD_OPT_NUM) - | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { - if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ - break; /* drop, to preserve bt consistency (miss a little bit of compression) */ - } - } +/** + * Autogenerated by Thrift Compiler (0.11.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +#ifndef parquet_CONSTANTS_H +#define parquet_CONSTANTS_H - if (match[matchLength] < ip[matchLength]) { - /* match smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ - matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ - } else { - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - *smallerPtr = *largerPtr = 0; - if (dictMode == ZSTD_dictMatchState && nbCompares) { - size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); - U32 dictMatchIndex = dms->hashTable[dmsH]; - const U32* const dmsBt = dms->chainTable; - commonLengthSmaller = commonLengthLarger = 0; - while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) { - const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match = dmsBase + dictMatchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); - if (dictMatchIndex+matchLength >= dmsHighLimit) - match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ +namespace duckdb_parquet { namespace format { - if (matchLength > bestLength) { - matchIndex = dictMatchIndex + dmsIndexDelta; - DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", - (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; - matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; - matches[mnum].len = (U32)matchLength; - mnum++; - if ( (matchLength > ZSTD_OPT_NUM) - | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } +class parquetConstants { + public: + parquetConstants(); - if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ - if (match[matchLength] < ip[matchLength]) { - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - commonLengthLarger = matchLength; - dictMatchIndex = nextPtr[0]; - } - } - } +}; - assert(matchEndIdx > current+8); - ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ - return mnum; -} +extern const parquetConstants g_parquet_constants; +}} // namespace -FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( - ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ - ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, - const U32 rep[ZSTD_REP_NUM], - U32 const ll0, - U32 const lengthToBeat) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const matchLengthSearch = cParams->minMatch; - DEBUGLOG(8, "ZSTD_BtGetAllMatches"); - if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); - switch(matchLengthSearch) - { - case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); - default : - case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); - case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); - case 7 : - case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); - } -} +#endif -/*-******************************* -* Optimal parser -*********************************/ +// LICENSE_CHANGE_END -static U32 ZSTD_totalLen(ZSTD_optimal_t sol) -{ - return sol.litlen + sol.mlen; -} +namespace duckdb_parquet { namespace format { -#if 0 /* debug */ +const parquetConstants g_parquet_constants; -static void -listStats(const U32* table, int lastEltID) -{ - int const nbElts = lastEltID + 1; - int enb; - for (enb=0; enb < nbElts; enb++) { - (void)table; - /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ - RAWLOG(2, "%4i,", table[enb]); - } - RAWLOG(2, " \n"); +parquetConstants::parquetConstants() { } -#endif +}} // namespace -FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const int optLevel, - const ZSTD_dictMode_e dictMode) -{ - optState_t* const optStatePtr = &ms->opt; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const BYTE* const prefixStart = base + ms->window.dictLimit; - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); - U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; - U32 nextToUpdate3 = ms->nextToUpdate; - ZSTD_optimal_t* const opt = optStatePtr->priceTable; - ZSTD_match_t* const matches = optStatePtr->matchTable; - ZSTD_optimal_t lastSequence; +// LICENSE_CHANGE_END - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", - (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); - assert(optLevel <= 2); - ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); - ip += (ip==prefixStart); - /* Match Loop */ - while (ip < ilimit) { - U32 cur, last_pos = 0; +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 +// See the end of this file for a list - /* find first match */ - { U32 const litlen = (U32)(ip - anchor); - U32 const ll0 = !litlen; - U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); - if (!nbMatches) { ip++; continue; } +/** + * Autogenerated by Thrift Compiler (0.11.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ - /* initialize opt[0] */ - { U32 i ; for (i=0; i immediate encoding */ - { U32 const maxML = matches[nbMatches-1].len; - U32 const maxOffset = matches[nbMatches-1].off; - DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", - nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); +#include +#include - if (maxML > sufficient_len) { - lastSequence.litlen = litlen; - lastSequence.mlen = maxML; - lastSequence.off = maxOffset; - DEBUGLOG(6, "large match (%u>%u), immediate encoding", - maxML, sufficient_len); - cur = 0; - last_pos = ZSTD_totalLen(lastSequence); - goto _shortestPath; - } } - /* set prices for first matches starting position == 0 */ - { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 pos; - U32 matchNb; - for (pos = 1; pos < minMatch; pos++) { - opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ - } - for (matchNb = 0; matchNb < nbMatches; matchNb++) { - U32 const offset = matches[matchNb].off; - U32 const end = matches[matchNb].len; - for ( ; pos <= end ; pos++ ) { - U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); - U32 const sequencePrice = literalsPrice + matchPrice; - DEBUGLOG(7, "rPos:%u => set initial price : %.2f", - pos, ZSTD_fCost(sequencePrice)); - opt[pos].mlen = pos; - opt[pos].off = offset; - opt[pos].litlen = litlen; - opt[pos].price = sequencePrice; - } } - last_pos = pos-1; - } - } - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - const BYTE* const inr = ip + cur; - assert(cur < ZSTD_OPT_NUM); - DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 +// See the end of this file for a list - /* Fix current position with one literal if cheaper */ - { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; - int const price = opt[cur-1].price - + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) - + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) - - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); - assert(price < 1000000000); /* overflow check */ - if (price <= opt[cur].price) { - DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", - inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, - opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); - opt[cur].mlen = 0; - opt[cur].off = 0; - opt[cur].litlen = litlen; - opt[cur].price = price; - } else { - DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", - inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), - opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); - } - } +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ - /* Set the repcodes of the current position. We must do it here - * because we rely on the repcodes of the 2nd to last sequence being - * correct to set the next chunks repcodes during the backward - * traversal. - */ - ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); - assert(cur >= opt[cur].mlen); - if (opt[cur].mlen != 0) { - U32 const prev = cur - opt[cur].mlen; - repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); - memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); - } else { - memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); - } +#ifndef _THRIFT_TOSTRING_H_ +#define _THRIFT_TOSTRING_H_ 1 - /* last match must start at a minimum distance of 8 from oend */ - if (inr > ilimit) continue; +#include +#include +#include +#include +#include +#include +#include - if (cur == last_pos) break; +namespace duckdb_apache { +namespace thrift { - if ( (optLevel==0) /*static_test*/ - && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { - DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); - continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ - } +template +std::string to_string(const T& t) { + std::ostringstream o; + o << t; + return o.str(); +} - { U32 const ll0 = (opt[cur].mlen != 0); - U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; - U32 const previousPrice = opt[cur].price; - U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); - U32 matchNb; - if (!nbMatches) { - DEBUGLOG(7, "rPos:%u : no match found", cur); - continue; - } +// TODO: replace the computations below with std::numeric_limits::max_digits10 once C++11 +// is enabled. +inline std::string to_string(const float& t) { + std::ostringstream o; + o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); + o << t; + return o.str(); +} - { U32 const maxML = matches[nbMatches-1].len; - DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", - inr-istart, cur, nbMatches, maxML); +inline std::string to_string(const double& t) { + std::ostringstream o; + o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); + o << t; + return o.str(); +} - if ( (maxML > sufficient_len) - || (cur + maxML >= ZSTD_OPT_NUM) ) { - lastSequence.mlen = maxML; - lastSequence.off = matches[nbMatches-1].off; - lastSequence.litlen = litlen; - cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ - last_pos = cur + ZSTD_totalLen(lastSequence); - if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ - goto _shortestPath; - } } +inline std::string to_string(const long double& t) { + std::ostringstream o; + o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); + o << t; + return o.str(); +} - /* set prices using matches found at position == cur */ - for (matchNb = 0; matchNb < nbMatches; matchNb++) { - U32 const offset = matches[matchNb].off; - U32 const lastML = matches[matchNb].len; - U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; - U32 mlen; +template +std::string to_string(const std::map& m); - DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", - matchNb, matches[matchNb].off, lastML, litlen); +template +std::string to_string(const std::set& s); - for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ - U32 const pos = cur + mlen; - int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); +template +std::string to_string(const std::vector& t); - if ((pos > last_pos) || (price < opt[pos].price)) { - DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", - pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); - while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ - opt[pos].mlen = mlen; - opt[pos].off = offset; - opt[pos].litlen = litlen; - opt[pos].price = price; - } else { - DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", - pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); - if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ - } - } } } - } /* for (cur = 1; cur <= last_pos; cur++) */ +template +std::string to_string(const typename std::pair& v) { + std::ostringstream o; + o << to_string(v.first) << ": " << to_string(v.second); + return o.str(); +} - lastSequence = opt[last_pos]; - cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ - assert(cur < ZSTD_OPT_NUM); /* control overflow*/ +template +std::string to_string(const T& beg, const T& end) { + std::ostringstream o; + for (T it = beg; it != end; ++it) { + if (it != beg) + o << ", "; + o << to_string(*it); + } + return o.str(); +} -_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ - assert(opt[0].mlen == 0); +template +std::string to_string(const std::vector& t) { + std::ostringstream o; + o << "[" << to_string(t.begin(), t.end()) << "]"; + return o.str(); +} - /* Set the next chunk's repcodes based on the repcodes of the beginning - * of the last match, and the last sequence. This avoids us having to - * update them while traversing the sequences. - */ - if (lastSequence.mlen != 0) { - repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); - memcpy(rep, &reps, sizeof(reps)); - } else { - memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); - } +template +std::string to_string(const std::map& m) { + std::ostringstream o; + o << "{" << to_string(m.begin(), m.end()) << "}"; + return o.str(); +} - { U32 const storeEnd = cur + 1; - U32 storeStart = storeEnd; - U32 seqPos = cur; +template +std::string to_string(const std::set& s) { + std::ostringstream o; + o << "{" << to_string(s.begin(), s.end()) << "}"; + return o.str(); +} +} +} // duckdb_apache::thrift - DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", - last_pos, cur); (void)last_pos; - assert(storeEnd < ZSTD_OPT_NUM); - DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", - storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); - opt[storeEnd] = lastSequence; - while (seqPos > 0) { - U32 const backDist = ZSTD_totalLen(opt[seqPos]); - storeStart--; - DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", - seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); - opt[storeStart] = opt[seqPos]; - seqPos = (seqPos > backDist) ? seqPos - backDist : 0; - } +#endif // _THRIFT_TOSTRING_H_ - /* save sequences */ - DEBUGLOG(6, "sending selected sequences into seqStore") - { U32 storePos; - for (storePos=storeStart; storePos <= storeEnd; storePos++) { - U32 const llen = opt[storePos].litlen; - U32 const mlen = opt[storePos].mlen; - U32 const offCode = opt[storePos].off; - U32 const advance = llen + mlen; - DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", - anchor - istart, (unsigned)llen, (unsigned)mlen); - if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ - assert(storePos == storeEnd); /* must be last sequence */ - ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ - continue; /* will finish */ - } +// LICENSE_CHANGE_END - assert(anchor + llen <= iend); - ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); - ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); - anchor += advance; - ip = anchor; - } } - ZSTD_setBasePrices(optStatePtr, optLevel); - } - } /* while (ip < ilimit) */ - /* Return the last literals size */ - return (size_t)(iend - anchor); +namespace duckdb_parquet { namespace format { + +int _kTypeValues[] = { + Type::BOOLEAN, + Type::INT32, + Type::INT64, + Type::INT96, + Type::FLOAT, + Type::DOUBLE, + Type::BYTE_ARRAY, + Type::FIXED_LEN_BYTE_ARRAY +}; +const char* _kTypeNames[] = { + "BOOLEAN", + "INT32", + "INT64", + "INT96", + "FLOAT", + "DOUBLE", + "BYTE_ARRAY", + "FIXED_LEN_BYTE_ARRAY" +}; +const std::map _Type_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); + +std::ostream& operator<<(std::ostream& out, const Type::type& val) { + std::map::const_iterator it = _Type_VALUES_TO_NAMES.find(val); + if (it != _Type_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; +} + +int _kConvertedTypeValues[] = { + ConvertedType::UTF8, + ConvertedType::MAP, + ConvertedType::MAP_KEY_VALUE, + ConvertedType::LIST, + ConvertedType::ENUM, + ConvertedType::DECIMAL, + ConvertedType::DATE, + ConvertedType::TIME_MILLIS, + ConvertedType::TIME_MICROS, + ConvertedType::TIMESTAMP_MILLIS, + ConvertedType::TIMESTAMP_MICROS, + ConvertedType::UINT_8, + ConvertedType::UINT_16, + ConvertedType::UINT_32, + ConvertedType::UINT_64, + ConvertedType::INT_8, + ConvertedType::INT_16, + ConvertedType::INT_32, + ConvertedType::INT_64, + ConvertedType::JSON, + ConvertedType::BSON, + ConvertedType::INTERVAL +}; +const char* _kConvertedTypeNames[] = { + "UTF8", + "MAP", + "MAP_KEY_VALUE", + "LIST", + "ENUM", + "DECIMAL", + "DATE", + "TIME_MILLIS", + "TIME_MICROS", + "TIMESTAMP_MILLIS", + "TIMESTAMP_MICROS", + "UINT_8", + "UINT_16", + "UINT_32", + "UINT_64", + "INT_8", + "INT_16", + "INT_32", + "INT_64", + "JSON", + "BSON", + "INTERVAL" +}; +const std::map _ConvertedType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(22, _kConvertedTypeValues, _kConvertedTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); + +std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val) { + std::map::const_iterator it = _ConvertedType_VALUES_TO_NAMES.find(val); + if (it != _ConvertedType_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; +} + +int _kFieldRepetitionTypeValues[] = { + FieldRepetitionType::REQUIRED, + FieldRepetitionType::OPTIONAL, + FieldRepetitionType::REPEATED +}; +const char* _kFieldRepetitionTypeNames[] = { + "REQUIRED", + "OPTIONAL", + "REPEATED" +}; +const std::map _FieldRepetitionType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(3, _kFieldRepetitionTypeValues, _kFieldRepetitionTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); + +std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val) { + std::map::const_iterator it = _FieldRepetitionType_VALUES_TO_NAMES.find(val); + if (it != _FieldRepetitionType_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; +} + +int _kEncodingValues[] = { + Encoding::PLAIN, + Encoding::PLAIN_DICTIONARY, + Encoding::RLE, + Encoding::BIT_PACKED, + Encoding::DELTA_BINARY_PACKED, + Encoding::DELTA_LENGTH_BYTE_ARRAY, + Encoding::DELTA_BYTE_ARRAY, + Encoding::RLE_DICTIONARY +}; +const char* _kEncodingNames[] = { + "PLAIN", + "PLAIN_DICTIONARY", + "RLE", + "BIT_PACKED", + "DELTA_BINARY_PACKED", + "DELTA_LENGTH_BYTE_ARRAY", + "DELTA_BYTE_ARRAY", + "RLE_DICTIONARY" +}; +const std::map _Encoding_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(8, _kEncodingValues, _kEncodingNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); + +std::ostream& operator<<(std::ostream& out, const Encoding::type& val) { + std::map::const_iterator it = _Encoding_VALUES_TO_NAMES.find(val); + if (it != _Encoding_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; } +int _kCompressionCodecValues[] = { + CompressionCodec::UNCOMPRESSED, + CompressionCodec::SNAPPY, + CompressionCodec::GZIP, + CompressionCodec::LZO, + CompressionCodec::BROTLI, + CompressionCodec::LZ4, + CompressionCodec::ZSTD +}; +const char* _kCompressionCodecNames[] = { + "UNCOMPRESSED", + "SNAPPY", + "GZIP", + "LZO", + "BROTLI", + "LZ4", + "ZSTD" +}; +const std::map _CompressionCodec_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(7, _kCompressionCodecValues, _kCompressionCodecNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); -size_t ZSTD_compressBlock_btopt( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressBlock_btopt"); - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); +std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val) { + std::map::const_iterator it = _CompressionCodec_VALUES_TO_NAMES.find(val); + if (it != _CompressionCodec_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; } +int _kPageTypeValues[] = { + PageType::DATA_PAGE, + PageType::INDEX_PAGE, + PageType::DICTIONARY_PAGE, + PageType::DATA_PAGE_V2 +}; +const char* _kPageTypeNames[] = { + "DATA_PAGE", + "INDEX_PAGE", + "DICTIONARY_PAGE", + "DATA_PAGE_V2" +}; +const std::map _PageType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); -/* used in 2-pass strategy */ -static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) -{ - U32 s, sum=0; - assert(ZSTD_FREQ_DIV+bonus >= 0); - for (s=0; s::const_iterator it = _PageType_VALUES_TO_NAMES.find(val); + if (it != _PageType_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; } -/* used in 2-pass strategy */ -MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) -{ - if (ZSTD_compressedLiterals(optPtr)) - optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); - optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); - optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); - optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); -} +int _kBoundaryOrderValues[] = { + BoundaryOrder::UNORDERED, + BoundaryOrder::ASCENDING, + BoundaryOrder::DESCENDING +}; +const char* _kBoundaryOrderNames[] = { + "UNORDERED", + "ASCENDING", + "DESCENDING" +}; +const std::map _BoundaryOrder_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(3, _kBoundaryOrderValues, _kBoundaryOrderNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); -/* ZSTD_initStats_ultra(): - * make a first compression pass, just to seed stats with more accurate starting values. - * only works on first block, with no dictionary and no ldm. - * this function cannot error, hence its contract must be respected. - */ -static void -ZSTD_initStats_ultra(ZSTD_matchState_t* ms, - seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ - memcpy(tmpRep, rep, sizeof(tmpRep)); +std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val) { + std::map::const_iterator it = _BoundaryOrder_VALUES_TO_NAMES.find(val); + if (it != _BoundaryOrder_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; +} - DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); - assert(ms->opt.litLengthSum == 0); /* first block */ - assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ - assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ - assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ - ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ +Statistics::~Statistics() throw() { +} - /* invalidate first scan from history */ - ZSTD_resetSeqStore(seqStore); - ms->window.base -= srcSize; - ms->window.dictLimit += (U32)srcSize; - ms->window.lowLimit = ms->window.dictLimit; - ms->nextToUpdate = ms->window.dictLimit; - /* re-inforce weight of collected statistics */ - ZSTD_upscaleStats(&ms->opt); +void Statistics::__set_max(const std::string& val) { + this->max = val; +__isset.max = true; } -size_t ZSTD_compressBlock_btultra( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +void Statistics::__set_min(const std::string& val) { + this->min = val; +__isset.min = true; } -size_t ZSTD_compressBlock_btultra2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - U32 const current = (U32)((const BYTE*)src - ms->window.base); - DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); - - /* 2-pass strategy: - * this strategy makes a first pass over first block to collect statistics - * and seed next round's statistics with it. - * After 1st pass, function forgets everything, and starts a new block. - * Consequently, this can only work if no data has been previously loaded in tables, - * aka, no dictionary, no prefix, no ldm preprocessing. - * The compression ratio gain is generally small (~0.5% on first block), - * the cost is 2x cpu time on first block. */ - assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - if ( (ms->opt.litLengthSum==0) /* first block */ - && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ - && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ - && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ - && (srcSize > ZSTD_PREDEF_THRESHOLD) - ) { - ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); - } - - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +void Statistics::__set_null_count(const int64_t val) { + this->null_count = val; +__isset.null_count = true; } -size_t ZSTD_compressBlock_btopt_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); +void Statistics::__set_distinct_count(const int64_t val) { + this->distinct_count = val; +__isset.distinct_count = true; } -size_t ZSTD_compressBlock_btultra_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); +void Statistics::__set_max_value(const std::string& val) { + this->max_value = val; +__isset.max_value = true; } -size_t ZSTD_compressBlock_btopt_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); +void Statistics::__set_min_value(const std::string& val) { + this->min_value = val; +__isset.min_value = true; } - -size_t ZSTD_compressBlock_btultra_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) +std::ostream& operator<<(std::ostream& out, const Statistics& obj) { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); -} - -/* note : no btultra2 variant for extDict nor dictMatchState, - * because btultra2 is not meant to work with dictionaries - * and is only specific for the first block (no prefix) */ - + obj.printTo(out); + return out; } -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list - -/* ****************************************************************** - * Common functions of New Generation Entropy library - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************* -* Dependencies -***************************************/ - - /* ERR_*, ERROR */ - - - - - -namespace duckdb_zstd { - -/*=== Version ===*/ -unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } - - -/*=== Error Management ===*/ -unsigned FSE_isError(size_t code) { return ERR_isError(code); } -const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } - -unsigned HUF_isError(size_t code) { return ERR_isError(code); } -const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } - - -/*-************************************************************** -* FSE NCount encoding-decoding -****************************************************************/ -size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, - const void* headerBuffer, size_t hbSize) -{ - const BYTE* const istart = (const BYTE*) headerBuffer; - const BYTE* const iend = istart + hbSize; - const BYTE* ip = istart; - int nbBits; - int remaining; - int threshold; - U32 bitStream; - int bitCount; - unsigned charnum = 0; - int previous0 = 0; +uint32_t Statistics::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - if (hbSize < 4) { - /* This function only works when hbSize >= 4 */ - char buffer[4]; - memset(buffer, 0, sizeof(buffer)); - memcpy(buffer, headerBuffer, hbSize); - { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, - buffer, sizeof(buffer)); - if (FSE_isError(countSize)) return countSize; - if (countSize > hbSize) return ERROR(corruption_detected); - return countSize; - } } - assert(hbSize >= 4); + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - /* init */ - memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ - bitStream = MEM_readLE32(ip); - nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ - if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); - bitStream >>= 4; - bitCount = 4; - *tableLogPtr = nbBits; - remaining = (1<readStructBegin(fname); - while ((remaining>1) & (charnum<=*maxSVPtr)) { - if (previous0) { - unsigned n0 = charnum; - while ((bitStream & 0xFFFF) == 0xFFFF) { - n0 += 24; - if (ip < iend-5) { - ip += 2; - bitStream = MEM_readLE32(ip) >> bitCount; - } else { - bitStream >>= 16; - bitCount += 16; - } } - while ((bitStream & 3) == 3) { - n0 += 3; - bitStream >>= 2; - bitCount += 2; - } - n0 += bitStream & 3; - bitCount += 2; - if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); - while (charnum < n0) normalizedCounter[charnum++] = 0; - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { - assert((bitCount >> 3) <= 3); /* For first condition to work */ - ip += bitCount>>3; - bitCount &= 7; - bitStream = MEM_readLE32(ip) >> bitCount; - } else { - bitStream >>= 2; - } } - { int const max = (2*threshold-1) - remaining; - int count; + using ::duckdb_apache::thrift::protocol::TProtocolException; - if ((bitStream & (threshold-1)) < (U32)max) { - count = bitStream & (threshold-1); - bitCount += nbBits-1; - } else { - count = bitStream & (2*threshold-1); - if (count >= threshold) count -= max; - bitCount += nbBits; - } - count--; /* extra accuracy */ - remaining -= count < 0 ? -count : count; /* -1 means +1 */ - normalizedCounter[charnum++] = (short)count; - previous0 = !count; - while (remaining < threshold) { - nbBits--; - threshold >>= 1; - } + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->max); + this->__isset.max = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->min); + this->__isset.min = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->null_count); + this->__isset.null_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->distinct_count); + this->__isset.distinct_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->max_value); + this->__isset.max_value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->min_value); + this->__isset.min_value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { - ip += bitCount>>3; - bitCount &= 7; - } else { - bitCount -= (int)(8 * (iend - 4 - ip)); - ip = iend - 4; - } - bitStream = MEM_readLE32(ip) >> (bitCount & 31); - } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ - if (remaining != 1) return ERROR(corruption_detected); - if (bitCount > 32) return ERROR(corruption_detected); - *maxSVPtr = charnum-1; + xfer += iprot->readStructEnd(); - ip += (bitCount+7)>>3; - return ip-istart; + return xfer; } +uint32_t Statistics::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Statistics"); -/*! HUF_readStats() : - Read compact Huffman tree, saved by HUF_writeCTable(). - `huffWeight` is destination buffer. - `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. - @return : size read from `src` , or an error Code . - Note : Needed by HUF_readCTable() and HUF_readDTableX?() . -*/ -size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, - U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize) -{ - U32 weightTotal; - const BYTE* ip = (const BYTE*) src; - size_t iSize; - size_t oSize; - - if (!srcSize) return ERROR(srcSize_wrong); - iSize = ip[0]; - /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ - - if (iSize >= 128) { /* special header */ - oSize = iSize - 127; - iSize = ((oSize+1)/2); - if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - if (oSize >= hwSize) return ERROR(corruption_detected); - ip += 1; - { U32 n; - for (n=0; n> 4; - huffWeight[n+1] = ip[n/2] & 15; - } } } - else { /* header compressed with FSE (normal case) */ - FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ - if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ - if (FSE_isError(oSize)) return oSize; - } - - /* collect weight stats */ - memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); - weightTotal = 0; - { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); - rankStats[huffWeight[n]]++; - weightTotal += (1 << huffWeight[n]) >> 1; - } } - if (weightTotal == 0) return ERROR(corruption_detected); - - /* get last non-null symbol weight (implied, total must be 2^n) */ - { U32 const tableLog = BIT_highbit32(weightTotal) + 1; - if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); - *tableLogPtr = tableLog; - /* determine last weight */ - { U32 const total = 1 << tableLog; - U32 const rest = total - weightTotal; - U32 const verif = 1 << BIT_highbit32(rest); - U32 const lastWeight = BIT_highbit32(rest) + 1; - if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ - huffWeight[oSize] = (BYTE)lastWeight; - rankStats[lastWeight]++; - } } - - /* check tree construction validity */ - if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + if (this->__isset.max) { + xfer += oprot->writeFieldBegin("max", ::duckdb_apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->max); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.min) { + xfer += oprot->writeFieldBegin("min", ::duckdb_apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->min); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.null_count) { + xfer += oprot->writeFieldBegin("null_count", ::duckdb_apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->null_count); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.distinct_count) { + xfer += oprot->writeFieldBegin("distinct_count", ::duckdb_apache::thrift::protocol::T_I64, 4); + xfer += oprot->writeI64(this->distinct_count); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.max_value) { + xfer += oprot->writeFieldBegin("max_value", ::duckdb_apache::thrift::protocol::T_STRING, 5); + xfer += oprot->writeBinary(this->max_value); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.min_value) { + xfer += oprot->writeFieldBegin("min_value", ::duckdb_apache::thrift::protocol::T_STRING, 6); + xfer += oprot->writeBinary(this->min_value); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - /* results */ - *nbSymbolsPtr = (U32)(oSize+1); - return iSize+1; +void swap(Statistics &a, Statistics &b) { + using ::std::swap; + swap(a.max, b.max); + swap(a.min, b.min); + swap(a.null_count, b.null_count); + swap(a.distinct_count, b.distinct_count); + swap(a.max_value, b.max_value); + swap(a.min_value, b.min_value); + swap(a.__isset, b.__isset); } +Statistics::Statistics(const Statistics& other0) { + max = other0.max; + min = other0.min; + null_count = other0.null_count; + distinct_count = other0.distinct_count; + max_value = other0.max_value; + min_value = other0.min_value; + __isset = other0.__isset; +} +Statistics& Statistics::operator=(const Statistics& other1) { + max = other1.max; + min = other1.min; + null_count = other1.null_count; + distinct_count = other1.distinct_count; + max_value = other1.max_value; + min_value = other1.min_value; + __isset = other1.__isset; + return *this; +} +void Statistics::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "Statistics("; + out << "max="; (__isset.max ? (out << to_string(max)) : (out << "")); + out << ", " << "min="; (__isset.min ? (out << to_string(min)) : (out << "")); + out << ", " << "null_count="; (__isset.null_count ? (out << to_string(null_count)) : (out << "")); + out << ", " << "distinct_count="; (__isset.distinct_count ? (out << to_string(distinct_count)) : (out << "")); + out << ", " << "max_value="; (__isset.max_value ? (out << to_string(max_value)) : (out << "")); + out << ", " << "min_value="; (__isset.min_value ? (out << to_string(min_value)) : (out << "")); + out << ")"; } -// LICENSE_CHANGE_END +StringType::~StringType() throw() { +} +std::ostream& operator<<(std::ostream& out, const StringType& obj) +{ + obj.printTo(out); + return out; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +uint32_t StringType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -/* The purpose of this file is to have a single list of error strings embedded in binary */ + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; + xfer += iprot->readStructBegin(fname); + using ::duckdb_apache::thrift::protocol::TProtocolException; -namespace duckdb_zstd { -const char* ERR_getErrorString(ERR_enum code) -{ -#ifdef ZSTD_STRIP_ERROR_STRINGS - (void)code; - return "Error strings stripped"; -#else - static const char* const notErrorCode = "Unspecified error code"; - switch( code ) - { - case PREFIX(no_error): return "No error detected"; - case PREFIX(GENERIC): return "Error (generic)"; - case PREFIX(prefix_unknown): return "Unknown frame descriptor"; - case PREFIX(version_unsupported): return "Version not supported"; - case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; - case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; - case PREFIX(corruption_detected): return "Corrupted block detected"; - case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; - case PREFIX(parameter_unsupported): return "Unsupported parameter"; - case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; - case PREFIX(init_missing): return "Context should be init first"; - case PREFIX(memory_allocation): return "Allocation error : not enough memory"; - case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; - case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; - case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; - case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; - case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; - case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; - case PREFIX(dictionary_wrong): return "Dictionary mismatch"; - case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; - case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; - case PREFIX(srcSize_wrong): return "Src size is incorrect"; - case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; - /* following error codes are not stable and may be removed or changed in a future version */ - case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; - case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; - case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; - case PREFIX(maxCode): - default: return notErrorCode; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } -#endif + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; } +uint32_t StringType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("StringType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } +void swap(StringType &a, StringType &b) { + using ::std::swap; + (void) a; + (void) b; +} -// LICENSE_CHANGE_END +StringType::StringType(const StringType& other2) { + (void) other2; +} +StringType& StringType::operator=(const StringType& other3) { + (void) other3; + return *this; +} +void StringType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "StringType("; + out << ")"; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +UUIDType::~UUIDType() throw() { +} -/* ****************************************************************** - * FSE : Finite State Entropy decoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ +std::ostream& operator<<(std::ostream& out, const UUIDType& obj) +{ + obj.printTo(out); + return out; +} -/* ************************************************************** -* Includes -****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ +uint32_t UUIDType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; + xfer += iprot->readStructBegin(fname); + using ::duckdb_apache::thrift::protocol::TProtocolException; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + xfer += iprot->readStructEnd(); -/* ************************************************************** -* Error Management -****************************************************************/ -// #define FSE_isError ERR_isError -#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + return xfer; +} +uint32_t UUIDType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("UUIDType"); -/* ************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -# error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -# error "FSE_FUNCTION_TYPE must be defined" -#endif +void swap(UUIDType &a, UUIDType &b) { + using ::std::swap; + (void) a; + (void) b; +} -/* Function names */ -#define FSE_CAT(X,Y) X##Y -#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) -#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) +UUIDType::UUIDType(const UUIDType& other4) { + (void) other4; +} +UUIDType& UUIDType::operator=(const UUIDType& other5) { + (void) other5; + return *this; +} +void UUIDType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "UUIDType("; + out << ")"; +} -namespace duckdb_zstd { -/* Function templates */ -FSE_DTable* FSE_createDTable (unsigned tableLog) -{ - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +MapType::~MapType() throw() { } -void FSE_freeDTable (FSE_DTable* dt) +std::ostream& operator<<(std::ostream& out, const MapType& obj) { - free(dt); + obj.printTo(out); + return out; } -size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) -{ - void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ - FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); - U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; - U32 const maxSV1 = maxSymbolValue + 1; - U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize-1; +uint32_t MapType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - /* Sanity Checks */ - if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - /* Init, lay down lowprob symbols */ - { FSE_DTableHeader DTableH; - DTableH.tableLog = (U16)tableLog; - DTableH.fastMode = 1; - { S16 const largeLimit= (S16)(1 << (tableLog-1)); - U32 s; - for (s=0; s= largeLimit) DTableH.fastMode=0; - symbolNext[s] = normalizedCounter[s]; - } } } - memcpy(dt, &DTableH, sizeof(DTableH)); - } + xfer += iprot->readStructBegin(fname); - /* Spread symbols */ - { U32 const tableMask = tableSize-1; - U32 const step = FSE_TABLESTEP(tableSize); - U32 s, position = 0; - for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } } - if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + using ::duckdb_apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } - /* Build Decoding table */ - { U32 u; - for (u=0; ureadStructEnd(); - return 0; + return xfer; } +uint32_t MapType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MapType"); -#ifndef FSE_COMMONDEFS_ONLY + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} -/*-******************************************************* -* Decompression (Byte symbols) -*********************************************************/ -size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) -{ - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - void* dPtr = dt + 1; - FSE_decode_t* const cell = (FSE_decode_t*)dPtr; +void swap(MapType &a, MapType &b) { + using ::std::swap; + (void) a; + (void) b; +} - DTableH->tableLog = 0; - DTableH->fastMode = 0; +MapType::MapType(const MapType& other6) { + (void) other6; +} +MapType& MapType::operator=(const MapType& other7) { + (void) other7; + return *this; +} +void MapType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "MapType("; + out << ")"; +} - cell->newState = 0; - cell->symbol = symbolValue; - cell->nbBits = 0; - return 0; +ListType::~ListType() throw() { } - -size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +std::ostream& operator<<(std::ostream& out, const ListType& obj) { - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - void* dPtr = dt + 1; - FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSV1 = tableMask+1; - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return ERROR(GENERIC); /* min size */ - - /* Build Decoding Table */ - DTableH->tableLog = (U16)nbBits; - DTableH->fastMode = 1; - for (s=0; sreadStructBegin(fname); -#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) + using ::duckdb_apache::thrift::protocol::TProtocolException; - /* 4 symbols per loop */ - for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op sizeof(bitD.bitContainer)*8) /* This test must be static */ - BIT_reloadDStream(&bitD); + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } - op[1] = FSE_GETSYMBOL(&state2); + xfer += iprot->readStructEnd(); - if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + return xfer; +} - op[2] = FSE_GETSYMBOL(&state1); +uint32_t ListType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ListType"); - if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - BIT_reloadDStream(&bitD); + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - op[3] = FSE_GETSYMBOL(&state2); - } +void swap(ListType &a, ListType &b) { + using ::std::swap; + (void) a; + (void) b; +} - /* tail */ - /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ - while (1) { - if (op>(omax-2)) return ERROR(dstSize_tooSmall); - *op++ = FSE_GETSYMBOL(&state1); - if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { - *op++ = FSE_GETSYMBOL(&state2); - break; - } +ListType::ListType(const ListType& other8) { + (void) other8; +} +ListType& ListType::operator=(const ListType& other9) { + (void) other9; + return *this; +} +void ListType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "ListType("; + out << ")"; +} - if (op>(omax-2)) return ERROR(dstSize_tooSmall); - *op++ = FSE_GETSYMBOL(&state2); - if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { - *op++ = FSE_GETSYMBOL(&state1); - break; - } } - return op-ostart; +EnumType::~EnumType() throw() { +} + +std::ostream& operator<<(std::ostream& out, const EnumType& obj) +{ + obj.printTo(out); + return out; } -size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize, - const FSE_DTable* dt) -{ - const void* ptr = dt; - const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; - const U32 fastMode = DTableH->fastMode; +uint32_t EnumType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - /* select fast mode (static) */ - if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); -} + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; + xfer += iprot->readStructBegin(fname); -size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog) -{ - const BYTE* const istart = (const BYTE*)cSrc; - const BYTE* ip = istart; - short counting[FSE_MAX_SYMBOL_VALUE+1]; - unsigned tableLog; - unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + using ::duckdb_apache::thrift::protocol::TProtocolException; - /* normal FSE decoding mode */ - size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); - if (FSE_isError(NCountLength)) return NCountLength; - /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ - if (tableLog > maxLog) return ERROR(tableLog_tooLarge); - ip += NCountLength; - cSrcSize -= NCountLength; - CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } - return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ + xfer += iprot->readStructEnd(); + + return xfer; } +uint32_t EnumType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EnumType"); -typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} -size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) -{ - DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ - return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); +void swap(EnumType &a, EnumType &b) { + using ::std::swap; + (void) a; + (void) b; } +EnumType::EnumType(const EnumType& other10) { + (void) other10; +} +EnumType& EnumType::operator=(const EnumType& other11) { + (void) other11; + return *this; +} +void EnumType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "EnumType("; + out << ")"; } -#endif /* FSE_COMMONDEFS_ONLY */ +DateType::~DateType() throw() { +} -// LICENSE_CHANGE_END +std::ostream& operator<<(std::ostream& out, const DateType& obj) +{ + obj.printTo(out); + return out; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +uint32_t DateType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -/* - * xxHash - Fast Hash algorithm - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - xxHash homepage: http://www.xxhash.com - * - xxHash source repository : https://github.com/Cyan4973/xxHash - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -*/ + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; + xfer += iprot->readStructBegin(fname); -/* ************************************* -* Tuning parameters -***************************************/ -/*!XXH_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. - * It can generate buggy code on targets which do not support unaligned memory accesses. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://stackoverflow.com/a/32095106/646947 for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define XXH_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ - defined(__ICCARM__) -# define XXH_FORCE_MEMORY_ACCESS 1 -# endif -#endif + using ::duckdb_apache::thrift::protocol::TProtocolException; -/*!XXH_ACCEPT_NULL_INPUT_POINTER : - * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. - * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. - * By default, this option is disabled. To enable it, uncomment below define : - */ -/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ -/*!XXH_FORCE_NATIVE_FORMAT : - * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. - * Results are therefore identical for little-endian and big-endian CPU. - * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. - * Should endian-independence be of no importance for your application, you may set the #define below to 1, - * to improve speed for Big-endian CPU. - * This option has no impact on Little_Endian CPU. - */ -#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ -# define XXH_FORCE_NATIVE_FORMAT 0 -#endif + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } -/*!XXH_FORCE_ALIGN_CHECK : - * This is a minor performance trick, only useful with lots of very small keys. - * It means : check for aligned/unaligned input. - * The check costs one initial branch per hash; set to 0 when the input data - * is guaranteed to be aligned. - */ -#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ -# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -# define XXH_FORCE_ALIGN_CHECK 0 -# else -# define XXH_FORCE_ALIGN_CHECK 1 -# endif -#endif + xfer += iprot->readStructEnd(); + return xfer; +} -/* ************************************* -* Includes & Memory related functions -***************************************/ -/* Modify the local functions below should you wish to use some other memory routines */ -/* for malloc(), free() */ -#include -#include /* size_t */ -/* for memcpy() */ -#include +uint32_t DateType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DateType"); + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} +void swap(DateType &a, DateType &b) { + using ::std::swap; + (void) a; + (void) b; +} +DateType::DateType(const DateType& other12) { + (void) other12; +} +DateType& DateType::operator=(const DateType& other13) { + (void) other13; + return *this; +} +void DateType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "DateType("; + out << ")"; +} -/* ************************************* -* Compiler Specific Options -***************************************/ -#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# define INLINE_KEYWORD inline -#else -# define INLINE_KEYWORD -#endif -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_INLINE_ATTR __attribute__((always_inline)) -#elif defined(_MSC_VER) -# define FORCE_INLINE_ATTR __forceinline -#else -# define FORCE_INLINE_ATTR -#endif +NullType::~NullType() throw() { +} -#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +std::ostream& operator<<(std::ostream& out, const NullType& obj) +{ + obj.printTo(out); + return out; +} -/* ************************************* -* Basic Types -***************************************/ -#ifndef MEM_MODULE -# define MEM_MODULE -# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -# else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ -# endif -#endif +uint32_t NullType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -namespace duckdb_zstd { -static void* XXH_malloc(size_t s) { return malloc(s); } -static void XXH_free (void* p) { free(p); } -static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + xfer += iprot->readStructBegin(fname); -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } -static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + using ::duckdb_apache::thrift::protocol::TProtocolException; -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } -static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + xfer += iprot->readStructEnd(); -#else + return xfer; +} -/* portable and safe solution. Generally efficient. - * see : http://stackoverflow.com/a/32095106/646947 - */ +uint32_t NullType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("NullType"); -static U32 XXH_read32(const void* memPtr) -{ - U32 val; - memcpy(&val, memPtr, sizeof(val)); - return val; + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -static U64 XXH_read64(const void* memPtr) -{ - U64 val; - memcpy(&val, memPtr, sizeof(val)); - return val; +void swap(NullType &a, NullType &b) { + using ::std::swap; + (void) a; + (void) b; } -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ +NullType::NullType(const NullType& other14) { + (void) other14; +} +NullType& NullType::operator=(const NullType& other15) { + (void) other15; + return *this; +} +void NullType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "NullType("; + out << ")"; +} -/* **************************************** -* Compiler-specific Functions and Macros -******************************************/ -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +DecimalType::~DecimalType() throw() { +} -/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ -#if defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -# define XXH_rotl64(x,r) _rotl64(x,r) -#else -#if defined(__ICCARM__) -# include -# define XXH_rotl32(x,r) __ROR(x,(32 - r)) -#else -# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) -#endif -# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) -#endif -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap32 _byteswap_ulong -# define XXH_swap64 _byteswap_uint64 -#elif GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -# define XXH_swap64 __builtin_bswap64 -#else -static U32 XXH_swap32 (U32 x) -{ - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); +void DecimalType::__set_scale(const int32_t val) { + this->scale = val; } -static U64 XXH_swap64 (U64 x) + +void DecimalType::__set_precision(const int32_t val) { + this->precision = val; +} +std::ostream& operator<<(std::ostream& out, const DecimalType& obj) { - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); + obj.printTo(out); + return out; } -#endif -/* ************************************* -* Architecture Macros -***************************************/ -typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; +uint32_t DecimalType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ -#ifndef XXH_CPU_LITTLE_ENDIAN - static const int g_one = 1; -# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) -#endif + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; + xfer += iprot->readStructBegin(fname); -/* *************************** -* Memory reads -*****************************/ -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + using ::duckdb_apache::thrift::protocol::TProtocolException; -FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); - else - return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); + bool isset_scale = false; + bool isset_precision = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->scale); + isset_scale = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->precision); + isset_precision = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_scale) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_precision) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; } -FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE32_align(ptr, endian, XXH_unaligned); +uint32_t DecimalType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DecimalType"); + + xfer += oprot->writeFieldBegin("scale", ::duckdb_apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->scale); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("precision", ::duckdb_apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->precision); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -static U32 XXH_readBE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +void swap(DecimalType &a, DecimalType &b) { + using ::std::swap; + swap(a.scale, b.scale); + swap(a.precision, b.precision); } -FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); - else - return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +DecimalType::DecimalType(const DecimalType& other16) { + scale = other16.scale; + precision = other16.precision; +} +DecimalType& DecimalType::operator=(const DecimalType& other17) { + scale = other17.scale; + precision = other17.precision; + return *this; +} +void DecimalType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "DecimalType("; + out << "scale=" << to_string(scale); + out << ", " << "precision=" << to_string(precision); + out << ")"; } -FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE64_align(ptr, endian, XXH_unaligned); + +MilliSeconds::~MilliSeconds() throw() { } -static U64 XXH_readBE64(const void* ptr) +std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) { - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); + obj.printTo(out); + return out; } -/* ************************************* -* Macros -***************************************/ -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +uint32_t MilliSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::duckdb_apache::thrift::protocol::TProtocolException; -/* ************************************* -* Constants -***************************************/ -static const U32 PRIME32_1 = 2654435761U; -static const U32 PRIME32_2 = 2246822519U; -static const U32 PRIME32_3 = 3266489917U; -static const U32 PRIME32_4 = 668265263U; -static const U32 PRIME32_5 = 374761393U; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } -static const U64 PRIME64_1 = 11400714785074694791ULL; -static const U64 PRIME64_2 = 14029467366897019727ULL; -static const U64 PRIME64_3 = 1609587929392839161ULL; -static const U64 PRIME64_4 = 9650029242287828579ULL; -static const U64 PRIME64_5 = 2870177450012600261ULL; + xfer += iprot->readStructEnd(); -XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + return xfer; +} +uint32_t MilliSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MilliSeconds"); -/* ************************** -* Utils -****************************/ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* __restrict dstState, const XXH32_state_t* __restrict srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* __restrict dstState, const XXH64_state_t* __restrict srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); +void swap(MilliSeconds &a, MilliSeconds &b) { + using ::std::swap; + (void) a; + (void) b; } +MilliSeconds::MilliSeconds(const MilliSeconds& other18) { + (void) other18; +} +MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other19) { + (void) other19; + return *this; +} +void MilliSeconds::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "MilliSeconds("; + out << ")"; +} -/* *************************** -* Simple Hash Functions -*****************************/ -static U32 XXH32_round(U32 seed, U32 input) -{ - seed += input * PRIME32_2; - seed = XXH_rotl32(seed, 13); - seed *= PRIME32_1; - return seed; +MicroSeconds::~MicroSeconds() throw() { } -FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) { - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U32 h32; -#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + obj.printTo(out); + return out; +} -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)16; - } -#endif - if (len>=16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; +uint32_t MicroSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - do { - v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; - v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; - v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; - v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; - } while (p<=limit); + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } else { - h32 = seed + PRIME32_5; - } + xfer += iprot->readStructBegin(fname); - h32 += (U32) len; + using ::duckdb_apache::thrift::protocol::TProtocolException; - while (p+4<=bEnd) { - h32 += XXH_get32bits(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } - while (preadFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } - h32 ^= h32 >> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; + xfer += iprot->readStructEnd(); - return h32; + return xfer; } +uint32_t MicroSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MicroSeconds"); -XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH32_CREATESTATE_STATIC(state); - XXH32_reset(state, seed); - XXH32_update(state, input, len); - return XXH32_digest(state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } +void swap(MicroSeconds &a, MicroSeconds &b) { + using ::std::swap; + (void) a; + (void) b; +} - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif +MicroSeconds::MicroSeconds(const MicroSeconds& other20) { + (void) other20; +} +MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other21) { + (void) other21; + return *this; +} +void MicroSeconds::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "MicroSeconds("; + out << ")"; } -static U64 XXH64_round(U64 acc, U64 input) -{ - acc += input * PRIME64_2; - acc = XXH_rotl64(acc, 31); - acc *= PRIME64_1; - return acc; +NanoSeconds::~NanoSeconds() throw() { } -static U64 XXH64_mergeRound(U64 acc, U64 val) +std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) { - val = XXH64_round(0, val); - acc ^= val; - acc = acc * PRIME64_1 + PRIME64_4; - return acc; + obj.printTo(out); + return out; } -FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)32; - } -#endif - if (len>=32) { - const BYTE* const limit = bEnd - 32; - U64 v1 = seed + PRIME64_1 + PRIME64_2; - U64 v2 = seed + PRIME64_2; - U64 v3 = seed + 0; - U64 v4 = seed - PRIME64_1; +uint32_t NanoSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - do { - v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; - v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; - v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; - v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; - } while (p<=limit); + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); + xfer += iprot->readStructBegin(fname); - } else { - h64 = seed + PRIME64_5; - } + using ::duckdb_apache::thrift::protocol::TProtocolException; - h64 += (U64) len; - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_get64bits(p)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } + xfer += iprot->readStructEnd(); - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; +uint32_t NanoSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("NanoSeconds"); - return h64; + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } +void swap(NanoSeconds &a, NanoSeconds &b) { + using ::std::swap; + (void) a; + (void) b; +} -XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH64_CREATESTATE_STATIC(state); - XXH64_reset(state, seed); - XXH64_update(state, input, len); - return XXH64_digest(state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; +NanoSeconds::NanoSeconds(const NanoSeconds& other22) { + (void) other22; +} +NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other23) { + (void) other23; + return *this; +} +void NanoSeconds::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "NanoSeconds("; + out << ")"; +} - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif +TimeUnit::~TimeUnit() throw() { } -/* ************************************************** -* Advanced Hash Functions -****************************************************/ - -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) -{ - return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; +void TimeUnit::__set_MILLIS(const MilliSeconds& val) { + this->MILLIS = val; +__isset.MILLIS = true; } -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) -{ - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; +void TimeUnit::__set_MICROS(const MicroSeconds& val) { + this->MICROS = val; +__isset.MICROS = true; } - -/*** Hash feed ***/ - -XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +void TimeUnit::__set_NANOS(const NanoSeconds& val) { + this->NANOS = val; +__isset.NANOS = true; +} +std::ostream& operator<<(std::ostream& out, const TimeUnit& obj) { - XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ - state.v1 = seed + PRIME32_1 + PRIME32_2; - state.v2 = seed + PRIME32_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME32_1; - memcpy(statePtr, &state, sizeof(state)); - return XXH_OK; + obj.printTo(out); + return out; } -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) -{ - XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ - state.v1 = seed + PRIME64_1 + PRIME64_2; - state.v2 = seed + PRIME64_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME64_1; - memcpy(statePtr, &state, sizeof(state)); - return XXH_OK; -} +uint32_t TimeUnit::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; + xfer += iprot->readStructBegin(fname); -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif + using ::duckdb_apache::thrift::protocol::TProtocolException; - state->total_len_32 += (unsigned)len; - state->large_len |= (len>=16) | (state->total_len_32>=16); - if (state->memsize + len < 16) { /* fill in tmp buffer */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); - state->memsize += (unsigned)len; - return XXH_OK; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } - - if (state->memsize) { /* some data left from previous update */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); - { const U32* p32 = state->mem32; - state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; - state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; - state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; - state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->MILLIS.read(iprot); + this->__isset.MILLIS = true; + } else { + xfer += iprot->skip(ftype); } - p += 16-state->memsize; - state->memsize = 0; + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->MICROS.read(iprot); + this->__isset.MICROS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->NANOS.read(iprot); + this->__isset.NANOS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } + xfer += iprot->readFieldEnd(); + } - if (p <= bEnd-16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; + xfer += iprot->readStructEnd(); - do { - v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; - v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; - v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; - v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; - } while (p<=limit); + return xfer; +} - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } +uint32_t TimeUnit::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimeUnit"); - if (p < bEnd) { - XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } + if (this->__isset.MILLIS) { + xfer += oprot->writeFieldBegin("MILLIS", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); + xfer += this->MILLIS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.MICROS) { + xfer += oprot->writeFieldBegin("MICROS", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); + xfer += this->MICROS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.NANOS) { + xfer += oprot->writeFieldBegin("NANOS", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); + xfer += this->NANOS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - return XXH_OK; +void swap(TimeUnit &a, TimeUnit &b) { + using ::std::swap; + swap(a.MILLIS, b.MILLIS); + swap(a.MICROS, b.MICROS); + swap(a.NANOS, b.NANOS); + swap(a.__isset, b.__isset); +} + +TimeUnit::TimeUnit(const TimeUnit& other24) { + MILLIS = other24.MILLIS; + MICROS = other24.MICROS; + NANOS = other24.NANOS; + __isset = other24.__isset; +} +TimeUnit& TimeUnit::operator=(const TimeUnit& other25) { + MILLIS = other25.MILLIS; + MICROS = other25.MICROS; + NANOS = other25.NANOS; + __isset = other25.__isset; + return *this; +} +void TimeUnit::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "TimeUnit("; + out << "MILLIS="; (__isset.MILLIS ? (out << to_string(MILLIS)) : (out << "")); + out << ", " << "MICROS="; (__isset.MICROS ? (out << to_string(MICROS)) : (out << "")); + out << ", " << "NANOS="; (__isset.NANOS ? (out << to_string(NANOS)) : (out << "")); + out << ")"; } -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +TimestampType::~TimestampType() throw() { } - -FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) -{ - const BYTE * p = (const BYTE*)state->mem32; - const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; - U32 h32; - - if (state->large_len) { - h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); - } else { - h32 = state->v3 /* == seed */ + PRIME32_5; - } - - h32 += state->total_len_32; - - while (p+4<=bEnd) { - h32 += XXH_readLE32(p, endian) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; +void TimestampType::__set_isAdjustedToUTC(const bool val) { + this->isAdjustedToUTC = val; } - -XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +void TimestampType::__set_unit(const TimeUnit& val) { + this->unit = val; +} +std::ostream& operator<<(std::ostream& out, const TimestampType& obj) { - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_digest_endian(state_in, XXH_littleEndian); - else - return XXH32_digest_endian(state_in, XXH_bigEndian); + obj.printTo(out); + return out; } +uint32_t TimestampType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -/* **** XXH64 **** */ + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; + xfer += iprot->readStructBegin(fname); -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif + using ::duckdb_apache::thrift::protocol::TProtocolException; - state->total_len += len; + bool isset_isAdjustedToUTC = false; + bool isset_unit = false; - if (state->memsize + len < 32) { /* fill in tmp buffer */ - if (input != NULL) { - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isAdjustedToUTC); + isset_isAdjustedToUTC = true; + } else { + xfer += iprot->skip(ftype); } - state->memsize += (U32)len; - return XXH_OK; + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->unit.read(iprot); + isset_unit = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } + xfer += iprot->readFieldEnd(); + } - if (state->memsize) { /* tmp buffer is full */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); - state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); - state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); - state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); - state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); - p += 32-state->memsize; - state->memsize = 0; - } + xfer += iprot->readStructEnd(); - if (p+32 <= bEnd) { - const BYTE* const limit = bEnd - 32; - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; + if (!isset_isAdjustedToUTC) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_unit) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} - do { - v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; - v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; - v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; - v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; - } while (p<=limit); +uint32_t TimestampType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimestampType"); - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } + xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::duckdb_apache::thrift::protocol::T_BOOL, 1); + xfer += oprot->writeBool(this->isAdjustedToUTC); + xfer += oprot->writeFieldEnd(); - if (p < bEnd) { - XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } + xfer += oprot->writeFieldBegin("unit", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); + xfer += this->unit.write(oprot); + xfer += oprot->writeFieldEnd(); - return XXH_OK; + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; +void swap(TimestampType &a, TimestampType &b) { + using ::std::swap; + swap(a.isAdjustedToUTC, b.isAdjustedToUTC); + swap(a.unit, b.unit); +} - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +TimestampType::TimestampType(const TimestampType& other26) { + isAdjustedToUTC = other26.isAdjustedToUTC; + unit = other26.unit; +} +TimestampType& TimestampType::operator=(const TimestampType& other27) { + isAdjustedToUTC = other27.isAdjustedToUTC; + unit = other27.unit; + return *this; +} +void TimestampType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "TimestampType("; + out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); + out << ", " << "unit=" << to_string(unit); + out << ")"; } +TimeType::~TimeType() throw() { +} -FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) -{ - const BYTE * p = (const BYTE*)state->mem64; - const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; - U64 h64; - if (state->total_len >= 32) { - U64 const v1 = state->v1; - U64 const v2 = state->v2; - U64 const v3 = state->v3; - U64 const v4 = state->v4; +void TimeType::__set_isAdjustedToUTC(const bool val) { + this->isAdjustedToUTC = val; +} - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - } else { - h64 = state->v3 + PRIME64_5; - } +void TimeType::__set_unit(const TimeUnit& val) { + this->unit = val; +} +std::ostream& operator<<(std::ostream& out, const TimeType& obj) +{ + obj.printTo(out); + return out; +} - h64 += (U64) state->total_len; - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } +uint32_t TimeType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - while (preadStructBegin(fname); - h64 ^= h64 >> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; + using ::duckdb_apache::thrift::protocol::TProtocolException; - return h64; -} + bool isset_isAdjustedToUTC = false; + bool isset_unit = false; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isAdjustedToUTC); + isset_isAdjustedToUTC = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->unit.read(iprot); + isset_unit = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } -XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + xfer += iprot->readStructEnd(); - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_digest_endian(state_in, XXH_littleEndian); - else - return XXH64_digest_endian(state_in, XXH_bigEndian); + if (!isset_isAdjustedToUTC) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_unit) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; } +uint32_t TimeType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimeType"); -/* ************************** -* Canonical representation -****************************/ + xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::duckdb_apache::thrift::protocol::T_BOOL, 1); + xfer += oprot->writeBool(this->isAdjustedToUTC); + xfer += oprot->writeFieldEnd(); -/*! Default XXH result types are basic unsigned 32 and 64 bits. -* The canonical representation follows human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. -*/ + xfer += oprot->writeFieldBegin("unit", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); + xfer += this->unit.write(oprot); + xfer += oprot->writeFieldEnd(); -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - memcpy(dst, &hash, sizeof(*dst)); + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); - memcpy(dst, &hash, sizeof(*dst)); +void swap(TimeType &a, TimeType &b) { + using ::std::swap; + swap(a.isAdjustedToUTC, b.isAdjustedToUTC); + swap(a.unit, b.unit); } -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) -{ - return XXH_readBE32(src); +TimeType::TimeType(const TimeType& other28) { + isAdjustedToUTC = other28.isAdjustedToUTC; + unit = other28.unit; } - -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) -{ - return XXH_readBE64(src); +TimeType& TimeType::operator=(const TimeType& other29) { + isAdjustedToUTC = other29.isAdjustedToUTC; + unit = other29.unit; + return *this; } - +void TimeType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "TimeType("; + out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); + out << ", " << "unit=" << to_string(unit); + out << ")"; } -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +IntType::~IntType() throw() { +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +void IntType::__set_bitWidth(const int8_t val) { + this->bitWidth = val; +} +void IntType::__set_isSigned(const bool val) { + this->isSigned = val; +} +std::ostream& operator<<(std::ostream& out, const IntType& obj) +{ + obj.printTo(out); + return out; +} -/*-************************************* -* Dependencies -***************************************/ -#include /* malloc, calloc, free */ -#include /* memset */ +uint32_t IntType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -namespace duckdb_zstd { + xfer += iprot->readStructBegin(fname); -/*-**************************************** -* Version -******************************************/ -unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } + using ::duckdb_apache::thrift::protocol::TProtocolException; -const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } + bool isset_bitWidth = false; + bool isset_isSigned = false; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_BYTE) { + xfer += iprot->readByte(this->bitWidth); + isset_bitWidth = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isSigned); + isset_isSigned = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } -/*-**************************************** -* ZSTD Error Management -******************************************/ -#undef ZSTD_isError /* defined within zstd_internal.h */ -/*! ZSTD_isError() : - * tells if a return value is an error code - * symbol is required for external callers */ -unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + xfer += iprot->readStructEnd(); -/*! ZSTD_getErrorName() : - * provides error code string from function result (useful for debugging) */ -const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + if (!isset_bitWidth) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_isSigned) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} -/*! ZSTD_getError() : - * convert a `size_t` function result into a proper ZSTD_errorCode enum */ -ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } +uint32_t IntType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("IntType"); -/*! ZSTD_getErrorString() : - * provides error code string from enum */ -const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } + xfer += oprot->writeFieldBegin("bitWidth", ::duckdb_apache::thrift::protocol::T_BYTE, 1); + xfer += oprot->writeByte(this->bitWidth); + xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldBegin("isSigned", ::duckdb_apache::thrift::protocol::T_BOOL, 2); + xfer += oprot->writeBool(this->isSigned); + xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} -/*=************************************************************** -* Custom allocator -****************************************************************/ -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) -{ - if (customMem.customAlloc) - return customMem.customAlloc(customMem.opaque, size); - return malloc(size); +void swap(IntType &a, IntType &b) { + using ::std::swap; + swap(a.bitWidth, b.bitWidth); + swap(a.isSigned, b.isSigned); } -void* ZSTD_calloc(size_t size, ZSTD_customMem customMem) -{ - if (customMem.customAlloc) { - /* calloc implemented as malloc+memset; - * not as efficient as calloc, but next best guess for custom malloc */ - void* const ptr = customMem.customAlloc(customMem.opaque, size); - memset(ptr, 0, size); - return ptr; - } - return calloc(1, size); +IntType::IntType(const IntType& other30) { + bitWidth = other30.bitWidth; + isSigned = other30.isSigned; +} +IntType& IntType::operator=(const IntType& other31) { + bitWidth = other31.bitWidth; + isSigned = other31.isSigned; + return *this; +} +void IntType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "IntType("; + out << "bitWidth=" << to_string(bitWidth); + out << ", " << "isSigned=" << to_string(isSigned); + out << ")"; } -void ZSTD_free(void* ptr, ZSTD_customMem customMem) -{ - if (ptr!=NULL) { - if (customMem.customFree) - customMem.customFree(customMem.opaque, ptr); - else - free(ptr); - } + +JsonType::~JsonType() throw() { } +std::ostream& operator<<(std::ostream& out, const JsonType& obj) +{ + obj.printTo(out); + return out; } -// LICENSE_CHANGE_END +uint32_t JsonType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + xfer += iprot->readStructBegin(fname); -/* ****************************************************************** - * huff0 huffman decoder, - * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ + using ::duckdb_apache::thrift::protocol::TProtocolException; -/* ************************************************************** -* Dependencies -****************************************************************/ -#include /* memcpy, memset */ - /* BIT_* */ - /* to compress headers */ + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + xfer += iprot->readStructEnd(); + return xfer; +} +uint32_t JsonType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("JsonType"); -namespace duckdb_zstd { -/* ************************************************************** -* Macros -****************************************************************/ + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} -/* These two optional macros force the use one way or another of the two - * Huffman decompression implementations. You can't force in both directions - * at the same time. - */ -#if defined(HUF_FORCE_DECOMPRESS_X1) && \ - defined(HUF_FORCE_DECOMPRESS_X2) -#error "Cannot force the use of the X1 and X2 decoders at the same time!" -#endif +void swap(JsonType &a, JsonType &b) { + using ::std::swap; + (void) a; + (void) b; +} +JsonType::JsonType(const JsonType& other32) { + (void) other32; +} +JsonType& JsonType::operator=(const JsonType& other33) { + (void) other33; + return *this; +} +void JsonType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "JsonType("; + out << ")"; +} -/* ************************************************************** -* Error Management -****************************************************************/ -// #define HUF_isError ERR_isError +BsonType::~BsonType() throw() { +} -/* ************************************************************** -* Byte alignment for workSpace management -****************************************************************/ -#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) -#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) +std::ostream& operator<<(std::ostream& out, const BsonType& obj) +{ + obj.printTo(out); + return out; +} -/* ************************************************************** -* BMI2 Variant Wrappers -****************************************************************/ -#if DYNAMIC_BMI2 +uint32_t BsonType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -#define HUF_DGEN(fn) \ - \ - static size_t fn##_default( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - if (bmi2) { \ - return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ - } + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -#else + xfer += iprot->readStructBegin(fname); -#define HUF_DGEN(fn) \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - (void)bmi2; \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } + using ::duckdb_apache::thrift::protocol::TProtocolException; -#endif + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } -/*-***************************/ -/* generic DTableDesc */ -/*-***************************/ -typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + xfer += iprot->readStructEnd(); -static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) -{ - DTableDesc dtd; - memcpy(&dtd, table, sizeof(dtd)); - return dtd; + return xfer; } +uint32_t BsonType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BsonType"); -#ifndef HUF_FORCE_DECOMPRESS_X2 - -/*-***************************/ -/* single-symbol decoding */ -/*-***************************/ -typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ - -size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) -{ - U32 tableLog = 0; - U32 nbSymbols = 0; - size_t iSize; - void* const dtPtr = DTable + 1; - HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; - - U32* rankVal; - BYTE* huffWeight; - size_t spaceUsed32 = 0; + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - rankVal = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; +void swap(BsonType &a, BsonType &b) { + using ::std::swap; + (void) a; + (void) b; +} - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); +BsonType::BsonType(const BsonType& other34) { + (void) other34; +} +BsonType& BsonType::operator=(const BsonType& other35) { + (void) other35; + return *this; +} +void BsonType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "BsonType("; + out << ")"; +} - DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); - /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; +LogicalType::~LogicalType() throw() { +} - /* Table header */ - { DTableDesc dtd = HUF_getDTableDesc(DTable); - if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ - dtd.tableType = 0; - dtd.tableLog = (BYTE)tableLog; - memcpy(DTable, &dtd, sizeof(dtd)); - } - /* Calculate starting value for each rank */ - { U32 n, nextRankStart = 0; - for (n=1; nSTRING = val; +__isset.STRING = true; +} - /* fill DTable */ - { U32 n; - size_t const nEnd = nbSymbols; - for (n=0; n> 1; - size_t const uStart = rankVal[w]; - size_t const uEnd = uStart + length; - size_t u; - HUF_DEltX1 D; - D.byte = (BYTE)n; - D.nbBits = (BYTE)(tableLog + 1 - w); - rankVal[w] = (U32)uEnd; - if (length < 4) { - /* Use length in the loop bound so the compiler knows it is short. */ - for (u = 0; u < length; ++u) - dt[uStart + u] = D; - } else { - /* Unroll the loop 4 times, we know it is a power of 2. */ - for (u = uStart; u < uEnd; u += 4) { - dt[u + 0] = D; - dt[u + 1] = D; - dt[u + 2] = D; - dt[u + 3] = D; - } } } } - return iSize; +void LogicalType::__set_MAP(const MapType& val) { + this->MAP = val; +__isset.MAP = true; } -size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX1_wksp(DTable, src, srcSize, - workSpace, sizeof(workSpace)); +void LogicalType::__set_LIST(const ListType& val) { + this->LIST = val; +__isset.LIST = true; } -FORCE_INLINE_TEMPLATE BYTE -HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - BYTE const c = dt[val].byte; - BIT_skipBits(Dstream, dt[val].nbBits); - return c; +void LogicalType::__set_ENUM(const EnumType& val) { + this->ENUM = val; +__isset.ENUM = true; } -#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ - *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) +void LogicalType::__set_DECIMAL(const DecimalType& val) { + this->DECIMAL = val; +__isset.DECIMAL = true; +} -#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) +void LogicalType::__set_DATE(const DateType& val) { + this->DATE = val; +__isset.DATE = true; +} -#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) +void LogicalType::__set_TIME(const TimeType& val) { + this->TIME = val; +__isset.TIME = true; +} -HINT_INLINE size_t -HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) -{ - BYTE* const pStart = p; +void LogicalType::__set_TIMESTAMP(const TimestampType& val) { + this->TIMESTAMP = val; +__isset.TIMESTAMP = true; +} - /* up to 4 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_1(p, bitDPtr); - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - } +void LogicalType::__set_INTEGER(const IntType& val) { + this->INTEGER = val; +__isset.INTEGER = true; +} - /* [0-3] symbols remaining */ - if (MEM_32bits()) - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); +void LogicalType::__set_UNKNOWN(const NullType& val) { + this->UNKNOWN = val; +__isset.UNKNOWN = true; +} - /* no more data to retrieve from bitstream, no need to reload */ - while (p < pEnd) - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); +void LogicalType::__set_JSON(const JsonType& val) { + this->JSON = val; +__isset.JSON = true; +} - return pEnd-pStart; +void LogicalType::__set_BSON(const BsonType& val) { + this->BSON = val; +__isset.BSON = true; } -FORCE_INLINE_TEMPLATE size_t -HUF_decompress1X1_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) +void LogicalType::__set_UUID(const UUIDType& val) { + this->UUID = val; +__isset.UUID = true; +} +std::ostream& operator<<(std::ostream& out, const LogicalType& obj) { - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + dstSize; - const void* dtPtr = DTable + 1; - const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; - BIT_DStream_t bitD; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); - - HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); + obj.printTo(out); + return out; +} - if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); - return dstSize; -} +uint32_t LogicalType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -FORCE_INLINE_TEMPLATE size_t -HUF_decompress4X1_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - /* Check */ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* const olimit = oend - 3; - const void* const dtPtr = DTable + 1; - const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + xfer += iprot->readStructBegin(fname); - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - U32 endSignal = 1; + using ::duckdb_apache::thrift::protocol::TProtocolException; - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); - CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); - /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ - for ( ; (endSignal) & (op4 < olimit) ; ) { - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_1(op1, &bitD1); - HUF_DECODE_SYMBOLX1_1(op2, &bitD2); - HUF_DECODE_SYMBOLX1_1(op3, &bitD3); - HUF_DECODE_SYMBOLX1_1(op4, &bitD4); - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_0(op1, &bitD1); - HUF_DECODE_SYMBOLX1_0(op2, &bitD2); - HUF_DECODE_SYMBOLX1_0(op3, &bitD3); - HUF_DECODE_SYMBOLX1_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->STRING.read(iprot); + this->__isset.STRING = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->MAP.read(iprot); + this->__isset.MAP = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->LIST.read(iprot); + this->__isset.LIST = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->ENUM.read(iprot); + this->__isset.ENUM = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->DECIMAL.read(iprot); + this->__isset.DECIMAL = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->DATE.read(iprot); + this->__isset.DATE = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->TIME.read(iprot); + this->__isset.TIME = true; + } else { + xfer += iprot->skip(ftype); } + break; + case 8: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->TIMESTAMP.read(iprot); + this->__isset.TIMESTAMP = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->INTEGER.read(iprot); + this->__isset.INTEGER = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 11: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->UNKNOWN.read(iprot); + this->__isset.UNKNOWN = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 12: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->JSON.read(iprot); + this->__isset.JSON = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 13: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->BSON.read(iprot); + this->__isset.BSON = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 14: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->UUID.read(iprot); + this->__isset.UUID = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } - /* check corruption */ - /* note : should not be necessary : op# advance in lock step, and we control op4. - * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); - - /* check */ - { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) return ERROR(corruption_detected); } + xfer += iprot->readStructEnd(); - /* decoded size */ - return dstSize; - } + return xfer; } +uint32_t LogicalType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("LogicalType"); -typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, - const void *cSrc, - size_t cSrcSize, - const HUF_DTable *DTable); + if (this->__isset.STRING) { + xfer += oprot->writeFieldBegin("STRING", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); + xfer += this->STRING.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.MAP) { + xfer += oprot->writeFieldBegin("MAP", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); + xfer += this->MAP.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.LIST) { + xfer += oprot->writeFieldBegin("LIST", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); + xfer += this->LIST.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ENUM) { + xfer += oprot->writeFieldBegin("ENUM", ::duckdb_apache::thrift::protocol::T_STRUCT, 4); + xfer += this->ENUM.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.DECIMAL) { + xfer += oprot->writeFieldBegin("DECIMAL", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); + xfer += this->DECIMAL.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.DATE) { + xfer += oprot->writeFieldBegin("DATE", ::duckdb_apache::thrift::protocol::T_STRUCT, 6); + xfer += this->DATE.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.TIME) { + xfer += oprot->writeFieldBegin("TIME", ::duckdb_apache::thrift::protocol::T_STRUCT, 7); + xfer += this->TIME.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.TIMESTAMP) { + xfer += oprot->writeFieldBegin("TIMESTAMP", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); + xfer += this->TIMESTAMP.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.INTEGER) { + xfer += oprot->writeFieldBegin("INTEGER", ::duckdb_apache::thrift::protocol::T_STRUCT, 10); + xfer += this->INTEGER.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.UNKNOWN) { + xfer += oprot->writeFieldBegin("UNKNOWN", ::duckdb_apache::thrift::protocol::T_STRUCT, 11); + xfer += this->UNKNOWN.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.JSON) { + xfer += oprot->writeFieldBegin("JSON", ::duckdb_apache::thrift::protocol::T_STRUCT, 12); + xfer += this->JSON.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.BSON) { + xfer += oprot->writeFieldBegin("BSON", ::duckdb_apache::thrift::protocol::T_STRUCT, 13); + xfer += this->BSON.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.UUID) { + xfer += oprot->writeFieldBegin("UUID", ::duckdb_apache::thrift::protocol::T_STRUCT, 14); + xfer += this->UUID.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} -HUF_DGEN(HUF_decompress1X1_usingDTable_internal) -HUF_DGEN(HUF_decompress4X1_usingDTable_internal) +void swap(LogicalType &a, LogicalType &b) { + using ::std::swap; + swap(a.STRING, b.STRING); + swap(a.MAP, b.MAP); + swap(a.LIST, b.LIST); + swap(a.ENUM, b.ENUM); + swap(a.DECIMAL, b.DECIMAL); + swap(a.DATE, b.DATE); + swap(a.TIME, b.TIME); + swap(a.TIMESTAMP, b.TIMESTAMP); + swap(a.INTEGER, b.INTEGER); + swap(a.UNKNOWN, b.UNKNOWN); + swap(a.JSON, b.JSON); + swap(a.BSON, b.BSON); + swap(a.UUID, b.UUID); + swap(a.__isset, b.__isset); +} +LogicalType::LogicalType(const LogicalType& other36) { + STRING = other36.STRING; + MAP = other36.MAP; + LIST = other36.LIST; + ENUM = other36.ENUM; + DECIMAL = other36.DECIMAL; + DATE = other36.DATE; + TIME = other36.TIME; + TIMESTAMP = other36.TIMESTAMP; + INTEGER = other36.INTEGER; + UNKNOWN = other36.UNKNOWN; + JSON = other36.JSON; + BSON = other36.BSON; + UUID = other36.UUID; + __isset = other36.__isset; +} +LogicalType& LogicalType::operator=(const LogicalType& other37) { + STRING = other37.STRING; + MAP = other37.MAP; + LIST = other37.LIST; + ENUM = other37.ENUM; + DECIMAL = other37.DECIMAL; + DATE = other37.DATE; + TIME = other37.TIME; + TIMESTAMP = other37.TIMESTAMP; + INTEGER = other37.INTEGER; + UNKNOWN = other37.UNKNOWN; + JSON = other37.JSON; + BSON = other37.BSON; + UUID = other37.UUID; + __isset = other37.__isset; + return *this; +} +void LogicalType::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "LogicalType("; + out << "STRING="; (__isset.STRING ? (out << to_string(STRING)) : (out << "")); + out << ", " << "MAP="; (__isset.MAP ? (out << to_string(MAP)) : (out << "")); + out << ", " << "LIST="; (__isset.LIST ? (out << to_string(LIST)) : (out << "")); + out << ", " << "ENUM="; (__isset.ENUM ? (out << to_string(ENUM)) : (out << "")); + out << ", " << "DECIMAL="; (__isset.DECIMAL ? (out << to_string(DECIMAL)) : (out << "")); + out << ", " << "DATE="; (__isset.DATE ? (out << to_string(DATE)) : (out << "")); + out << ", " << "TIME="; (__isset.TIME ? (out << to_string(TIME)) : (out << "")); + out << ", " << "TIMESTAMP="; (__isset.TIMESTAMP ? (out << to_string(TIMESTAMP)) : (out << "")); + out << ", " << "INTEGER="; (__isset.INTEGER ? (out << to_string(INTEGER)) : (out << "")); + out << ", " << "UNKNOWN="; (__isset.UNKNOWN ? (out << to_string(UNKNOWN)) : (out << "")); + out << ", " << "JSON="; (__isset.JSON ? (out << to_string(JSON)) : (out << "")); + out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "")); + out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "")); + out << ")"; +} -size_t HUF_decompress1X1_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +SchemaElement::~SchemaElement() throw() { } -size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +void SchemaElement::__set_type(const Type::type val) { + this->type = val; +__isset.type = true; } - -size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); +void SchemaElement::__set_type_length(const int32_t val) { + this->type_length = val; +__isset.type_length = true; } -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +void SchemaElement::__set_repetition_type(const FieldRepetitionType::type val) { + this->repetition_type = val; +__isset.repetition_type = true; } -size_t HUF_decompress4X1_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +void SchemaElement::__set_name(const std::string& val) { + this->name = val; } -static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; +void SchemaElement::__set_num_children(const int32_t val) { + this->num_children = val; +__isset.num_children = true; +} - size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; +void SchemaElement::__set_converted_type(const ConvertedType::type val) { + this->converted_type = val; +__isset.converted_type = true; +} - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +void SchemaElement::__set_scale(const int32_t val) { + this->scale = val; +__isset.scale = true; } -size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +void SchemaElement::__set_precision(const int32_t val) { + this->precision = val; +__isset.precision = true; } +void SchemaElement::__set_field_id(const int32_t val) { + this->field_id = val; +__isset.field_id = true; +} -size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); +void SchemaElement::__set_logicalType(const LogicalType& val) { + this->logicalType = val; +__isset.logicalType = true; } -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) { - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); + obj.printTo(out); + return out; } -#endif /* HUF_FORCE_DECOMPRESS_X2 */ - - -#ifndef HUF_FORCE_DECOMPRESS_X1 -/* *************************/ -/* double-symbols decoding */ -/* *************************/ +uint32_t SchemaElement::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ -typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; -typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; -typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; + xfer += iprot->readStructBegin(fname); -/* HUF_fillDTableX2Level2() : - * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, - const U32* rankValOrigin, const int minWeight, - const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, - U32 nbBitsBaseline, U16 baseSeq) -{ - HUF_DEltX2 DElt; - U32 rankVal[HUF_TABLELOG_MAX + 1]; + using ::duckdb_apache::thrift::protocol::TProtocolException; - /* get pre-calculated rankVal */ - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + bool isset_name = false; - /* fill skipped values */ - if (minWeight>1) { - U32 i, skipSize = rankVal[minWeight]; - MEM_writeLE16(&(DElt.sequence), baseSeq); - DElt.nbBits = (BYTE)(consumed); - DElt.length = 1; - for (i = 0; i < skipSize; i++) - DTable[i] = DElt; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } - - /* fill DTable */ - { U32 s; for (s=0; s= 1 */ - - rankVal[weight] += length; - } } -} - - -static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, - const sortedSymbol_t* sortedList, const U32 sortedListSize, - const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, - const U32 nbBitsBaseline) -{ - U32 rankVal[HUF_TABLELOG_MAX + 1]; - const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ - const U32 minBits = nbBitsBaseline - maxWeight; - U32 s; - - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill DTable */ - for (s=0; s= minBits) { /* enough room for a second symbol */ - U32 sortedRank; - int minWeight = nbBits + scaleLog; - if (minWeight < 1) minWeight = 1; - sortedRank = rankStart[minWeight]; - HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, - rankValOrigin[nbBits], minWeight, - sortedList+sortedRank, sortedListSize-sortedRank, - nbBitsBaseline, symbol); + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast38; + xfer += iprot->readI32(ecast38); + this->type = (Type::type)ecast38; + this->__isset.type = true; } else { - HUF_DEltX2 DElt; - MEM_writeLE16(&(DElt.sequence), symbol); - DElt.nbBits = (BYTE)(nbBits); - DElt.length = 1; - { U32 const end = start + length; - U32 u; - for (u = start; u < end; u++) DTable[u] = DElt; - } } - rankVal[weight] += length; + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->type_length); + this->__isset.type_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast39; + xfer += iprot->readI32(ecast39); + this->repetition_type = (FieldRepetitionType::type)ecast39; + this->__isset.repetition_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->name); + isset_name = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_children); + this->__isset.num_children = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast40; + xfer += iprot->readI32(ecast40); + this->converted_type = (ConvertedType::type)ecast40; + this->__isset.converted_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->scale); + this->__isset.scale = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->precision); + this->__isset.precision = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->field_id); + this->__isset.field_id = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->logicalType.read(iprot); + this->__isset.logicalType = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } -} - -size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, - const void* src, size_t srcSize, - void* workSpace, size_t wkspSize) -{ - U32 tableLog, maxW, sizeOfSort, nbSymbols; - DTableDesc dtd = HUF_getDTableDesc(DTable); - U32 const maxTableLog = dtd.maxTableLog; - size_t iSize; - void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ - HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; - U32 *rankStart; - - rankValCol_t* rankVal; - U32* rankStats; - U32* rankStart0; - sortedSymbol_t* sortedSymbol; - BYTE* weightList; - size_t spaceUsed32 = 0; - - rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; - rankStats = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 1; - rankStart0 = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 2; - sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); - spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; - weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + xfer += iprot->readFieldEnd(); + } - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + xfer += iprot->readStructEnd(); - rankStart = rankStart0 + 1; - memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + if (!isset_name) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} - DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ - if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ +uint32_t SchemaElement::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SchemaElement"); - iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; + if (this->__isset.type) { + xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32((int32_t)this->type); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.type_length) { + xfer += oprot->writeFieldBegin("type_length", ::duckdb_apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->type_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.repetition_type) { + xfer += oprot->writeFieldBegin("repetition_type", ::duckdb_apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32((int32_t)this->repetition_type); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("name", ::duckdb_apache::thrift::protocol::T_STRING, 4); + xfer += oprot->writeString(this->name); + xfer += oprot->writeFieldEnd(); - /* check result */ - if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + if (this->__isset.num_children) { + xfer += oprot->writeFieldBegin("num_children", ::duckdb_apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->num_children); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.converted_type) { + xfer += oprot->writeFieldBegin("converted_type", ::duckdb_apache::thrift::protocol::T_I32, 6); + xfer += oprot->writeI32((int32_t)this->converted_type); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.scale) { + xfer += oprot->writeFieldBegin("scale", ::duckdb_apache::thrift::protocol::T_I32, 7); + xfer += oprot->writeI32(this->scale); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.precision) { + xfer += oprot->writeFieldBegin("precision", ::duckdb_apache::thrift::protocol::T_I32, 8); + xfer += oprot->writeI32(this->precision); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.field_id) { + xfer += oprot->writeFieldBegin("field_id", ::duckdb_apache::thrift::protocol::T_I32, 9); + xfer += oprot->writeI32(this->field_id); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.logicalType) { + xfer += oprot->writeFieldBegin("logicalType", ::duckdb_apache::thrift::protocol::T_STRUCT, 10); + xfer += this->logicalType.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - /* find maxWeight */ - for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ +void swap(SchemaElement &a, SchemaElement &b) { + using ::std::swap; + swap(a.type, b.type); + swap(a.type_length, b.type_length); + swap(a.repetition_type, b.repetition_type); + swap(a.name, b.name); + swap(a.num_children, b.num_children); + swap(a.converted_type, b.converted_type); + swap(a.scale, b.scale); + swap(a.precision, b.precision); + swap(a.field_id, b.field_id); + swap(a.logicalType, b.logicalType); + swap(a.__isset, b.__isset); +} - /* Get start index of each weight */ - { U32 w, nextRankStart = 0; - for (w=1; w")); + out << ", " << "type_length="; (__isset.type_length ? (out << to_string(type_length)) : (out << "")); + out << ", " << "repetition_type="; (__isset.repetition_type ? (out << to_string(repetition_type)) : (out << "")); + out << ", " << "name=" << to_string(name); + out << ", " << "num_children="; (__isset.num_children ? (out << to_string(num_children)) : (out << "")); + out << ", " << "converted_type="; (__isset.converted_type ? (out << to_string(converted_type)) : (out << "")); + out << ", " << "scale="; (__isset.scale ? (out << to_string(scale)) : (out << "")); + out << ", " << "precision="; (__isset.precision ? (out << to_string(precision)) : (out << "")); + out << ", " << "field_id="; (__isset.field_id ? (out << to_string(field_id)) : (out << "")); + out << ", " << "logicalType="; (__isset.logicalType ? (out << to_string(logicalType)) : (out << "")); + out << ")"; +} - /* sort symbols by weight */ - { U32 s; - for (s=0; s> consumed; - } } } } +DataPageHeader::~DataPageHeader() throw() { +} - HUF_fillDTableX2(dt, maxTableLog, - sortedSymbol, sizeOfSort, - rankStart0, rankVal, maxW, - tableLog+1); - dtd.tableLog = (BYTE)maxTableLog; - dtd.tableType = 1; - memcpy(DTable, &dtd, sizeof(dtd)); - return iSize; +void DataPageHeader::__set_num_values(const int32_t val) { + this->num_values = val; } -size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX2_wksp(DTable, src, srcSize, - workSpace, sizeof(workSpace)); +void DataPageHeader::__set_encoding(const Encoding::type val) { + this->encoding = val; } - -FORCE_INLINE_TEMPLATE U32 -HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 2); - BIT_skipBits(DStream, dt[val].nbBits); - return dt[val].length; +void DataPageHeader::__set_definition_level_encoding(const Encoding::type val) { + this->definition_level_encoding = val; } -FORCE_INLINE_TEMPLATE U32 -HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 1); - if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); - else { - if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { - BIT_skipBits(DStream, dt[val].nbBits); - if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) - /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ - DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); - } } - return 1; +void DataPageHeader::__set_repetition_level_encoding(const Encoding::type val) { + this->repetition_level_encoding = val; } -#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) - -HINT_INLINE size_t -HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, - const HUF_DEltX2* const dt, const U32 dtLog) +void DataPageHeader::__set_statistics(const Statistics& val) { + this->statistics = val; +__isset.statistics = true; +} +std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) { - BYTE* const pStart = p; - - /* up to 8 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - } + obj.printTo(out); + return out; +} - /* closer to end : up to 2 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - while (p <= pEnd-2) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ +uint32_t DataPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - if (p < pEnd) - p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - return p-pStart; -} + xfer += iprot->readStructBegin(fname); -FORCE_INLINE_TEMPLATE size_t -HUF_decompress1X2_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - BIT_DStream_t bitD; + using ::duckdb_apache::thrift::protocol::TProtocolException; - /* Init */ - CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + bool isset_num_values = false; + bool isset_encoding = false; + bool isset_definition_level_encoding = false; + bool isset_repetition_level_encoding = false; - /* decode */ - { BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast43; + xfer += iprot->readI32(ecast43); + this->encoding = (Encoding::type)ecast43; + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast44; + xfer += iprot->readI32(ecast44); + this->definition_level_encoding = (Encoding::type)ecast44; + isset_definition_level_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast45; + xfer += iprot->readI32(ecast45); + this->repetition_level_encoding = (Encoding::type)ecast45; + isset_repetition_level_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } + xfer += iprot->readFieldEnd(); + } - /* check */ - if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + xfer += iprot->readStructEnd(); - /* decoded size */ - return dstSize; + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_definition_level_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_repetition_level_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; } -FORCE_INLINE_TEMPLATE size_t -HUF_decompress4X2_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* const olimit = oend - (sizeof(size_t)-1); - const void* const dtPtr = DTable+1; - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; +uint32_t DataPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DataPageHeader"); - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - size_t const segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal = 1; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; + xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); - CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32((int32_t)this->encoding); + xfer += oprot->writeFieldEnd(); - /* 16-32 symbols per loop (4-8 symbols per stream) */ - for ( ; (endSignal) & (op4 < olimit); ) { -#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; -#else - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal = (U32)LIKELY( - (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); -#endif - } + xfer += oprot->writeFieldBegin("definition_level_encoding", ::duckdb_apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32((int32_t)this->definition_level_encoding); + xfer += oprot->writeFieldEnd(); - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 already verified within main loop */ + xfer += oprot->writeFieldBegin("repetition_level_encoding", ::duckdb_apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32((int32_t)this->repetition_level_encoding); + xfer += oprot->writeFieldEnd(); - /* finish bitStreams one by one */ - HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - /* check */ - { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) return ERROR(corruption_detected); } +void swap(DataPageHeader &a, DataPageHeader &b) { + using ::std::swap; + swap(a.num_values, b.num_values); + swap(a.encoding, b.encoding); + swap(a.definition_level_encoding, b.definition_level_encoding); + swap(a.repetition_level_encoding, b.repetition_level_encoding); + swap(a.statistics, b.statistics); + swap(a.__isset, b.__isset); +} - /* decoded size */ - return dstSize; - } +DataPageHeader::DataPageHeader(const DataPageHeader& other46) { + num_values = other46.num_values; + encoding = other46.encoding; + definition_level_encoding = other46.definition_level_encoding; + repetition_level_encoding = other46.repetition_level_encoding; + statistics = other46.statistics; + __isset = other46.__isset; +} +DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other47) { + num_values = other47.num_values; + encoding = other47.encoding; + definition_level_encoding = other47.definition_level_encoding; + repetition_level_encoding = other47.repetition_level_encoding; + statistics = other47.statistics; + __isset = other47.__isset; + return *this; +} +void DataPageHeader::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "DataPageHeader("; + out << "num_values=" << to_string(num_values); + out << ", " << "encoding=" << to_string(encoding); + out << ", " << "definition_level_encoding=" << to_string(definition_level_encoding); + out << ", " << "repetition_level_encoding=" << to_string(repetition_level_encoding); + out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); + out << ")"; } -HUF_DGEN(HUF_decompress1X2_usingDTable_internal) -HUF_DGEN(HUF_decompress4X2_usingDTable_internal) -size_t HUF_decompress1X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +IndexPageHeader::~IndexPageHeader() throw() { } -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) +std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) { - const BYTE* ip = (const BYTE*) cSrc; + obj.printTo(out); + return out; +} - size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); -} +uint32_t IndexPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} + xfer += iprot->readStructBegin(fname); -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} + using ::duckdb_apache::thrift::protocol::TProtocolException; -size_t HUF_decompress4X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} -static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } - size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; + xfer += iprot->readStructEnd(); - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); + return xfer; } -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); -} +uint32_t IndexPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("IndexPageHeader"); + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); +void swap(IndexPageHeader &a, IndexPageHeader &b) { + using ::std::swap; + (void) a; + (void) b; } -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +IndexPageHeader::IndexPageHeader(const IndexPageHeader& other48) { + (void) other48; +} +IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other49) { + (void) other49; + return *this; +} +void IndexPageHeader::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "IndexPageHeader("; + out << ")"; } -#endif /* HUF_FORCE_DECOMPRESS_X1 */ +DictionaryPageHeader::~DictionaryPageHeader() throw() { +} -/* ***********************************/ -/* Universal decompression selectors */ -/* ***********************************/ -size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#else - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#endif +void DictionaryPageHeader::__set_num_values(const int32_t val) { + this->num_values = val; } -size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#else - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#endif +void DictionaryPageHeader::__set_encoding(const Encoding::type val) { + this->encoding = val; } - -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) -typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; -static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = -{ - /* single, double, quad */ - {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ - {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ - {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ - {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ - {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ - {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ - {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ - {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ - {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ - {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ - {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ - {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ - {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ - {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ - {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ - {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ -}; -#endif - -/** HUF_selectDecoder() : - * Tells which decoder is likely to decode faster, - * based on a set of pre-computed metrics. - * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . - * Assumption : 0 < dstSize <= 128 KB */ -U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +void DictionaryPageHeader::__set_is_sorted(const bool val) { + this->is_sorted = val; +__isset.is_sorted = true; +} +std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) { - assert(dstSize > 0); - assert(dstSize <= 128*1024); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dstSize; - (void)cSrcSize; - return 0; -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dstSize; - (void)cSrcSize; - return 1; -#else - /* decoder timing evaluation */ - { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ - U32 const D256 = (U32)(dstSize >> 8); - U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); - U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); - DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ - return DTime1 < DTime0; - } -#endif + obj.printTo(out); + return out; } -typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +uint32_t DictionaryPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) - static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; -#endif + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + xfer += iprot->readStructBegin(fname); - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); -#else - return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); -#endif + using ::duckdb_apache::thrift::protocol::TProtocolException; + + bool isset_num_values = false; + bool isset_encoding = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast50; + xfer += iprot->readI32(ecast50); + this->encoding = (Encoding::type)ecast50; + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_sorted); + this->__isset.is_sorted = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; } -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ +uint32_t DictionaryPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DictionaryPageHeader"); - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#else - return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; -#endif - } + xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32((int32_t)this->encoding); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.is_sorted) { + xfer += oprot->writeFieldBegin("is_sorted", ::duckdb_apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->is_sorted); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); +void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { + using ::std::swap; + swap(a.num_values, b.num_values); + swap(a.encoding, b.encoding); + swap(a.is_sorted, b.is_sorted); + swap(a.__isset, b.__isset); } +DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other51) { + num_values = other51.num_values; + encoding = other51.encoding; + is_sorted = other51.is_sorted; + __isset = other51.__isset; +} +DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other52) { + num_values = other52.num_values; + encoding = other52.encoding; + is_sorted = other52.is_sorted; + __isset = other52.__isset; + return *this; +} +void DictionaryPageHeader::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "DictionaryPageHeader("; + out << "num_values=" << to_string(num_values); + out << ", " << "encoding=" << to_string(encoding); + out << ", " << "is_sorted="; (__isset.is_sorted ? (out << to_string(is_sorted)) : (out << "")); + out << ")"; +} -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, - size_t dstSize, const void* cSrc, - size_t cSrcSize, void* workSpace, - size_t wkspSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize == 0) return ERROR(corruption_detected); - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#else - return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize): - HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#endif - } +DataPageHeaderV2::~DataPageHeaderV2() throw() { } -size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#else - return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize): - HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#endif - } +void DataPageHeaderV2::__set_num_values(const int32_t val) { + this->num_values = val; } -size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); +void DataPageHeaderV2::__set_num_nulls(const int32_t val) { + this->num_nulls = val; } - -size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#else - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#endif +void DataPageHeaderV2::__set_num_rows(const int32_t val) { + this->num_rows = val; } -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; +void DataPageHeaderV2::__set_encoding(const Encoding::type val) { + this->encoding = val; +} - size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; +void DataPageHeaderV2::__set_definition_levels_byte_length(const int32_t val) { + this->definition_levels_byte_length = val; +} - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +void DataPageHeaderV2::__set_repetition_levels_byte_length(const int32_t val) { + this->repetition_levels_byte_length = val; } -#endif -size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#else - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#endif +void DataPageHeaderV2::__set_is_compressed(const bool val) { + this->is_compressed = val; +__isset.is_compressed = true; } -size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +void DataPageHeaderV2::__set_statistics(const Statistics& val) { + this->statistics = val; +__isset.statistics = true; +} +std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) { - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize == 0) return ERROR(corruption_detected); - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#else - return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : - HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#endif - } + obj.printTo(out); + return out; } -} +uint32_t DataPageHeaderV2::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -// LICENSE_CHANGE_END + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; + xfer += iprot->readStructBegin(fname); -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + using ::duckdb_apache::thrift::protocol::TProtocolException; -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + bool isset_num_values = false; + bool isset_num_nulls = false; + bool isset_num_rows = false; + bool isset_encoding = false; + bool isset_definition_levels_byte_length = false; + bool isset_repetition_levels_byte_length = false; -/* zstd_ddict.c : - * concentrates all logic that needs to know the internals of ZSTD_DDict object */ + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_nulls); + isset_num_nulls = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast53; + xfer += iprot->readI32(ecast53); + this->encoding = (Encoding::type)ecast53; + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->definition_levels_byte_length); + isset_definition_levels_byte_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->repetition_levels_byte_length); + isset_repetition_levels_byte_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_compressed); + this->__isset.is_compressed = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } -/*-******************************************************* -* Dependencies -*********************************************************/ -#include /* memcpy, memmove, memset */ - /* low level memory routines */ + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_nulls) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_definition_levels_byte_length) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_repetition_levels_byte_length) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} +uint32_t DataPageHeaderV2::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DataPageHeaderV2"); + xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldBegin("num_nulls", ::duckdb_apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->num_nulls); + xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->num_rows); + xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32((int32_t)this->encoding); + xfer += oprot->writeFieldEnd(); -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::duckdb_apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->definition_levels_byte_length); + xfer += oprot->writeFieldEnd(); -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::duckdb_apache::thrift::protocol::T_I32, 6); + xfer += oprot->writeI32(this->repetition_levels_byte_length); + xfer += oprot->writeFieldEnd(); + if (this->__isset.is_compressed) { + xfer += oprot->writeFieldBegin("is_compressed", ::duckdb_apache::thrift::protocol::T_BOOL, 7); + xfer += oprot->writeBool(this->is_compressed); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} -/* zstd_decompress_internal: - * objects and definitions shared within lib/decompress modules */ +void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { + using ::std::swap; + swap(a.num_values, b.num_values); + swap(a.num_nulls, b.num_nulls); + swap(a.num_rows, b.num_rows); + swap(a.encoding, b.encoding); + swap(a.definition_levels_byte_length, b.definition_levels_byte_length); + swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length); + swap(a.is_compressed, b.is_compressed); + swap(a.statistics, b.statistics); + swap(a.__isset, b.__isset); +} - #ifndef ZSTD_DECOMPRESS_INTERNAL_H - #define ZSTD_DECOMPRESS_INTERNAL_H +DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other54) { + num_values = other54.num_values; + num_nulls = other54.num_nulls; + num_rows = other54.num_rows; + encoding = other54.encoding; + definition_levels_byte_length = other54.definition_levels_byte_length; + repetition_levels_byte_length = other54.repetition_levels_byte_length; + is_compressed = other54.is_compressed; + statistics = other54.statistics; + __isset = other54.__isset; +} +DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other55) { + num_values = other55.num_values; + num_nulls = other55.num_nulls; + num_rows = other55.num_rows; + encoding = other55.encoding; + definition_levels_byte_length = other55.definition_levels_byte_length; + repetition_levels_byte_length = other55.repetition_levels_byte_length; + is_compressed = other55.is_compressed; + statistics = other55.statistics; + __isset = other55.__isset; + return *this; +} +void DataPageHeaderV2::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "DataPageHeaderV2("; + out << "num_values=" << to_string(num_values); + out << ", " << "num_nulls=" << to_string(num_nulls); + out << ", " << "num_rows=" << to_string(num_rows); + out << ", " << "encoding=" << to_string(encoding); + out << ", " << "definition_levels_byte_length=" << to_string(definition_levels_byte_length); + out << ", " << "repetition_levels_byte_length=" << to_string(repetition_levels_byte_length); + out << ", " << "is_compressed="; (__isset.is_compressed ? (out << to_string(is_compressed)) : (out << "")); + out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); + out << ")"; +} -/*-******************************************************* - * Dependencies - *********************************************************/ - /* BYTE, U16, U32 */ - /* ZSTD_seqSymbol */ +PageHeader::~PageHeader() throw() { +} -namespace duckdb_zstd { -/*-******************************************************* - * Constants - *********************************************************/ -struct ZSTDConstants { - static const U32 LL_base[MaxLL+1]; - static const U32 OF_base[MaxOff+1]; - static const U32 OF_bits[MaxOff+1]; - static const U32 ML_base[MaxML+1]; -}; +void PageHeader::__set_type(const PageType::type val) { + this->type = val; +} +void PageHeader::__set_uncompressed_page_size(const int32_t val) { + this->uncompressed_page_size = val; +} -/*-******************************************************* - * Decompression types - *********************************************************/ - typedef struct { - U32 fastMode; - U32 tableLog; - } ZSTD_seqSymbol_header; +void PageHeader::__set_compressed_page_size(const int32_t val) { + this->compressed_page_size = val; +} - typedef struct { - U16 nextState; - BYTE nbAdditionalBits; - BYTE nbBits; - U32 baseValue; - } ZSTD_seqSymbol; +void PageHeader::__set_crc(const int32_t val) { + this->crc = val; +__isset.crc = true; +} - #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) +void PageHeader::__set_data_page_header(const DataPageHeader& val) { + this->data_page_header = val; +__isset.data_page_header = true; +} -typedef struct { - ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ - ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ - ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ - U32 rep[ZSTD_REP_NUM]; -} ZSTD_entropyDTables_t; +void PageHeader::__set_index_page_header(const IndexPageHeader& val) { + this->index_page_header = val; +__isset.index_page_header = true; +} -typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, - ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, - ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, - ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; +void PageHeader::__set_dictionary_page_header(const DictionaryPageHeader& val) { + this->dictionary_page_header = val; +__isset.dictionary_page_header = true; +} -typedef enum { zdss_init=0, zdss_loadHeader, - zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; +void PageHeader::__set_data_page_header_v2(const DataPageHeaderV2& val) { + this->data_page_header_v2 = val; +__isset.data_page_header_v2 = true; +} +std::ostream& operator<<(std::ostream& out, const PageHeader& obj) +{ + obj.printTo(out); + return out; +} -typedef enum { - ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ - ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ - ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ -} ZSTD_dictUses_e; -typedef enum { - ZSTD_obm_buffered = 0, /* Buffer the output */ - ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ -} ZSTD_outBufferMode_e; +uint32_t PageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -struct ZSTD_DCtx_s -{ - const ZSTD_seqSymbol* LLTptr; - const ZSTD_seqSymbol* MLTptr; - const ZSTD_seqSymbol* OFTptr; - const HUF_DTable* HUFptr; - ZSTD_entropyDTables_t entropy; - U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ - const void* previousDstEnd; /* detect continuity */ - const void* prefixStart; /* start of current segment */ - const void* virtualStart; /* virtual start of previous segment if it was just before current one */ - const void* dictEnd; /* end of previous segment */ - size_t expected; - ZSTD_frameHeader fParams; - U64 decodedSize; - blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ - ZSTD_dStage stage; - U32 litEntropy; - U32 fseEntropy; - XXH64_state_t xxhState; - size_t headerSize; - ZSTD_format_e format; - const BYTE* litPtr; - ZSTD_customMem customMem; - size_t litSize; - size_t rleSize; - size_t staticSize; - int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - /* dictionary */ - ZSTD_DDict* ddictLocal; - const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ - U32 dictID; - int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ - ZSTD_dictUses_e dictUses; + xfer += iprot->readStructBegin(fname); - /* streaming */ - ZSTD_dStreamStage streamStage; - char* inBuff; - size_t inBuffSize; - size_t inPos; - size_t maxWindowSize; - char* outBuff; - size_t outBuffSize; - size_t outStart; - size_t outEnd; - size_t lhSize; - void* legacyContext; - U32 previousLegacyVersion; - U32 legacyVersion; - U32 hostageByte; - int noForwardProgress; - ZSTD_outBufferMode_e outBufferMode; - ZSTD_outBuffer expectedOutBuffer; + using ::duckdb_apache::thrift::protocol::TProtocolException; + + bool isset_type = false; + bool isset_uncompressed_page_size = false; + bool isset_compressed_page_size = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast56; + xfer += iprot->readI32(ecast56); + this->type = (PageType::type)ecast56; + isset_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->uncompressed_page_size); + isset_uncompressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->compressed_page_size); + isset_compressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->crc); + this->__isset.crc = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->data_page_header.read(iprot); + this->__isset.data_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->index_page_header.read(iprot); + this->__isset.index_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->dictionary_page_header.read(iprot); + this->__isset.dictionary_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->data_page_header_v2.read(iprot); + this->__isset.data_page_header_v2 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } - /* workspace */ - BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + xfer += iprot->readStructEnd(); - size_t oversizedDuration; + if (!isset_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_uncompressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - void const* dictContentBeginForFuzzing; - void const* dictContentEndForFuzzing; -#endif -}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ +uint32_t PageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageHeader"); + xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32((int32_t)this->type); + xfer += oprot->writeFieldEnd(); -/*-******************************************************* - * Shared internal functions - *********************************************************/ + xfer += oprot->writeFieldBegin("uncompressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->uncompressed_page_size); + xfer += oprot->writeFieldEnd(); -/*! ZSTD_loadDEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ -size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, - const void* const dict, size_t const dictSize); + xfer += oprot->writeFieldBegin("compressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->compressed_page_size); + xfer += oprot->writeFieldEnd(); -/*! ZSTD_checkContinuity() : - * check if next `dst` follows previous position, where decompression ended. - * If yes, do nothing (continue on current segment). - * If not, classify previous segment as "external dictionary", and start a new segment. - * This function cannot fail. */ -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); + if (this->__isset.crc) { + xfer += oprot->writeFieldBegin("crc", ::duckdb_apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(this->crc); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.data_page_header) { + xfer += oprot->writeFieldBegin("data_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); + xfer += this->data_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.index_page_header) { + xfer += oprot->writeFieldBegin("index_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 6); + xfer += this->index_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.dictionary_page_header) { + xfer += oprot->writeFieldBegin("dictionary_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 7); + xfer += this->dictionary_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.data_page_header_v2) { + xfer += oprot->writeFieldBegin("data_page_header_v2", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); + xfer += this->data_page_header_v2.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} +void swap(PageHeader &a, PageHeader &b) { + using ::std::swap; + swap(a.type, b.type); + swap(a.uncompressed_page_size, b.uncompressed_page_size); + swap(a.compressed_page_size, b.compressed_page_size); + swap(a.crc, b.crc); + swap(a.data_page_header, b.data_page_header); + swap(a.index_page_header, b.index_page_header); + swap(a.dictionary_page_header, b.dictionary_page_header); + swap(a.data_page_header_v2, b.data_page_header_v2); + swap(a.__isset, b.__isset); } -#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ +PageHeader::PageHeader(const PageHeader& other57) { + type = other57.type; + uncompressed_page_size = other57.uncompressed_page_size; + compressed_page_size = other57.compressed_page_size; + crc = other57.crc; + data_page_header = other57.data_page_header; + index_page_header = other57.index_page_header; + dictionary_page_header = other57.dictionary_page_header; + data_page_header_v2 = other57.data_page_header_v2; + __isset = other57.__isset; +} +PageHeader& PageHeader::operator=(const PageHeader& other58) { + type = other58.type; + uncompressed_page_size = other58.uncompressed_page_size; + compressed_page_size = other58.compressed_page_size; + crc = other58.crc; + data_page_header = other58.data_page_header; + index_page_header = other58.index_page_header; + dictionary_page_header = other58.dictionary_page_header; + data_page_header_v2 = other58.data_page_header_v2; + __isset = other58.__isset; + return *this; +} +void PageHeader::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "PageHeader("; + out << "type=" << to_string(type); + out << ", " << "uncompressed_page_size=" << to_string(uncompressed_page_size); + out << ", " << "compressed_page_size=" << to_string(compressed_page_size); + out << ", " << "crc="; (__isset.crc ? (out << to_string(crc)) : (out << "")); + out << ", " << "data_page_header="; (__isset.data_page_header ? (out << to_string(data_page_header)) : (out << "")); + out << ", " << "index_page_header="; (__isset.index_page_header ? (out << to_string(index_page_header)) : (out << "")); + out << ", " << "dictionary_page_header="; (__isset.dictionary_page_header ? (out << to_string(dictionary_page_header)) : (out << "")); + out << ", " << "data_page_header_v2="; (__isset.data_page_header_v2 ? (out << to_string(data_page_header_v2)) : (out << "")); + out << ")"; +} -// LICENSE_CHANGE_END +KeyValue::~KeyValue() throw() { +} +void KeyValue::__set_key(const std::string& val) { + this->key = val; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +void KeyValue::__set_value(const std::string& val) { + this->value = val; +__isset.value = true; +} +std::ostream& operator<<(std::ostream& out, const KeyValue& obj) +{ + obj.printTo(out); + return out; +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +uint32_t KeyValue::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -#ifndef ZSTD_DDICT_H -#define ZSTD_DDICT_H + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -/*-******************************************************* - * Dependencies - *********************************************************/ -#include /* size_t */ - /* ZSTD_DDict, and several public functions */ + xfer += iprot->readStructBegin(fname); -namespace duckdb_zstd { -/*-******************************************************* - * Interface - *********************************************************/ + using ::duckdb_apache::thrift::protocol::TProtocolException; -/* note: several prototypes are already published in `zstd.h` : - * ZSTD_createDDict() - * ZSTD_createDDict_byReference() - * ZSTD_createDDict_advanced() - * ZSTD_freeDDict() - * ZSTD_initStaticDDict() - * ZSTD_sizeof_DDict() - * ZSTD_estimateDDictSize() - * ZSTD_getDictID_fromDict() - */ + bool isset_key = false; -const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); -size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->key); + isset_key = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->value); + this->__isset.value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } -void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + xfer += iprot->readStructEnd(); + if (!isset_key) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; } -#endif /* ZSTD_DDICT_H */ +uint32_t KeyValue::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("KeyValue"); + xfer += oprot->writeFieldBegin("key", ::duckdb_apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->key); + xfer += oprot->writeFieldEnd(); -// LICENSE_CHANGE_END + if (this->__isset.value) { + xfer += oprot->writeFieldBegin("value", ::duckdb_apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeString(this->value); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} +void swap(KeyValue &a, KeyValue &b) { + using ::std::swap; + swap(a.key, b.key); + swap(a.value, b.value); + swap(a.__isset, b.__isset); +} -// #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -// # include "../legacy/zstd_legacy.h" -// #endif +KeyValue::KeyValue(const KeyValue& other59) { + key = other59.key; + value = other59.value; + __isset = other59.__isset; +} +KeyValue& KeyValue::operator=(const KeyValue& other60) { + key = other60.key; + value = other60.value; + __isset = other60.__isset; + return *this; +} +void KeyValue::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "KeyValue("; + out << "key=" << to_string(key); + out << ", " << "value="; (__isset.value ? (out << to_string(value)) : (out << "")); + out << ")"; +} -namespace duckdb_zstd { -/*-******************************************************* -* Types -*********************************************************/ -struct ZSTD_DDict_s { - void* dictBuffer; - const void* dictContent; - size_t dictSize; - ZSTD_entropyDTables_t entropy; - U32 dictID; - U32 entropyPresent; - ZSTD_customMem cMem; -}; /* typedef'd to ZSTD_DDict within "zstd.h" */ +SortingColumn::~SortingColumn() throw() { +} -const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) -{ - assert(ddict != NULL); - return ddict->dictContent; + +void SortingColumn::__set_column_idx(const int32_t val) { + this->column_idx = val; } -size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) -{ - assert(ddict != NULL); - return ddict->dictSize; +void SortingColumn::__set_descending(const bool val) { + this->descending = val; } -void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +void SortingColumn::__set_nulls_first(const bool val) { + this->nulls_first = val; +} +std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) { - DEBUGLOG(4, "ZSTD_copyDDictParameters"); - assert(dctx != NULL); - assert(ddict != NULL); - dctx->dictID = ddict->dictID; - dctx->prefixStart = ddict->dictContent; - dctx->virtualStart = ddict->dictContent; - dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; - dctx->previousDstEnd = dctx->dictEnd; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - dctx->dictContentBeginForFuzzing = dctx->prefixStart; - dctx->dictContentEndForFuzzing = dctx->previousDstEnd; -#endif - if (ddict->entropyPresent) { - dctx->litEntropy = 1; - dctx->fseEntropy = 1; - dctx->LLTptr = ddict->entropy.LLTable; - dctx->MLTptr = ddict->entropy.MLTable; - dctx->OFTptr = ddict->entropy.OFTable; - dctx->HUFptr = ddict->entropy.hufTable; - dctx->entropy.rep[0] = ddict->entropy.rep[0]; - dctx->entropy.rep[1] = ddict->entropy.rep[1]; - dctx->entropy.rep[2] = ddict->entropy.rep[2]; - } else { - dctx->litEntropy = 0; - dctx->fseEntropy = 0; - } + obj.printTo(out); + return out; } -static size_t -ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, - ZSTD_dictContentType_e dictContentType) -{ - ddict->dictID = 0; - ddict->entropyPresent = 0; - if (dictContentType == ZSTD_dct_rawContent) return 0; +uint32_t SortingColumn::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - if (ddict->dictSize < 8) { - if (dictContentType == ZSTD_dct_fullDict) - return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ - return 0; /* pure content mode */ - } - { U32 const magic = MEM_readLE32(ddict->dictContent); - if (magic != ZSTD_MAGIC_DICTIONARY) { - if (dictContentType == ZSTD_dct_fullDict) - return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ - return 0; /* pure content mode */ - } - } - ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - /* load entropy tables */ - RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( - &ddict->entropy, ddict->dictContent, ddict->dictSize)), - dictionary_corrupted, ""); - ddict->entropyPresent = 1; - return 0; -} + xfer += iprot->readStructBegin(fname); + using ::duckdb_apache::thrift::protocol::TProtocolException; -static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType) -{ - if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { - ddict->dictBuffer = NULL; - ddict->dictContent = dict; - if (!dict) dictSize = 0; - } else { - void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); - ddict->dictBuffer = internalBuffer; - ddict->dictContent = internalBuffer; - if (!internalBuffer) return ERROR(memory_allocation); - memcpy(internalBuffer, dict, dictSize); + bool isset_column_idx = false; + bool isset_descending = false; + bool isset_nulls_first = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } - ddict->dictSize = dictSize; - ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->column_idx); + isset_column_idx = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->descending); + isset_descending = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->nulls_first); + isset_nulls_first = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } - /* parse dictionary content */ - FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); + xfer += iprot->readStructEnd(); - return 0; + if (!isset_column_idx) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_descending) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_nulls_first) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; } -ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_customMem customMem) -{ - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; +uint32_t SortingColumn::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SortingColumn"); - { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); - if (ddict == NULL) return NULL; - ddict->cMem = customMem; - { size_t const initResult = ZSTD_initDDict_internal(ddict, - dict, dictSize, - dictLoadMethod, dictContentType); - if (ZSTD_isError(initResult)) { - ZSTD_freeDDict(ddict); - return NULL; - } } - return ddict; - } -} + xfer += oprot->writeFieldBegin("column_idx", ::duckdb_apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->column_idx); + xfer += oprot->writeFieldEnd(); -/*! ZSTD_createDDict() : -* Create a digested dictionary, to start decompression without startup delay. -* `dict` content is copied inside DDict. -* Consequently, `dict` can be released after `ZSTD_DDict` creation */ -ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) -{ - ZSTD_customMem const allocator = { NULL, NULL, NULL }; - return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); -} + xfer += oprot->writeFieldBegin("descending", ::duckdb_apache::thrift::protocol::T_BOOL, 2); + xfer += oprot->writeBool(this->descending); + xfer += oprot->writeFieldEnd(); -/*! ZSTD_createDDict_byReference() : - * Create a digested dictionary, to start decompression without startup delay. - * Dictionary content is simply referenced, it will be accessed during decompression. - * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ -ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) -{ - ZSTD_customMem const allocator = { NULL, NULL, NULL }; - return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); + xfer += oprot->writeFieldBegin("nulls_first", ::duckdb_apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->nulls_first); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } +void swap(SortingColumn &a, SortingColumn &b) { + using ::std::swap; + swap(a.column_idx, b.column_idx); + swap(a.descending, b.descending); + swap(a.nulls_first, b.nulls_first); +} -const ZSTD_DDict* ZSTD_initStaticDDict( - void* sBuffer, size_t sBufferSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType) -{ - size_t const neededSpace = sizeof(ZSTD_DDict) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); - ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; - assert(sBuffer != NULL); - assert(dict != NULL); - if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ - if (sBufferSize < neededSpace) return NULL; - if (dictLoadMethod == ZSTD_dlm_byCopy) { - memcpy(ddict+1, dict, dictSize); /* local copy */ - dict = ddict+1; - } - if (ZSTD_isError( ZSTD_initDDict_internal(ddict, - dict, dictSize, - ZSTD_dlm_byRef, dictContentType) )) - return NULL; - return ddict; +SortingColumn::SortingColumn(const SortingColumn& other61) { + column_idx = other61.column_idx; + descending = other61.descending; + nulls_first = other61.nulls_first; +} +SortingColumn& SortingColumn::operator=(const SortingColumn& other62) { + column_idx = other62.column_idx; + descending = other62.descending; + nulls_first = other62.nulls_first; + return *this; +} +void SortingColumn::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "SortingColumn("; + out << "column_idx=" << to_string(column_idx); + out << ", " << "descending=" << to_string(descending); + out << ", " << "nulls_first=" << to_string(nulls_first); + out << ")"; } -size_t ZSTD_freeDDict(ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; /* support free on NULL */ - { ZSTD_customMem const cMem = ddict->cMem; - ZSTD_free(ddict->dictBuffer, cMem); - ZSTD_free(ddict, cMem); - return 0; - } +PageEncodingStats::~PageEncodingStats() throw() { } -/*! ZSTD_estimateDDictSize() : - * Estimate amount of memory that will be needed to create a dictionary for decompression. - * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ -size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) -{ - return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); -} -size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +void PageEncodingStats::__set_page_type(const PageType::type val) { + this->page_type = val; } -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; - return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +void PageEncodingStats::__set_encoding(const Encoding::type val) { + this->encoding = val; } +void PageEncodingStats::__set_count(const int32_t val) { + this->count = val; +} +std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) +{ + obj.printTo(out); + return out; } -// LICENSE_CHANGE_END +uint32_t PageEncodingStats::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list + xfer += iprot->readStructBegin(fname); -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ + using ::duckdb_apache::thrift::protocol::TProtocolException; + bool isset_page_type = false; + bool isset_encoding = false; + bool isset_count = false; -/* *************************************************************** -* Tuning parameters -*****************************************************************/ -/*! - * HEAPMODE : - * Select how default decompression function ZSTD_decompress() allocates its context, - * on stack (0), or into heap (1, default; requires malloc()). - * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. - */ -#ifndef ZSTD_HEAPMODE -# define ZSTD_HEAPMODE 1 -#endif + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast63; + xfer += iprot->readI32(ecast63); + this->page_type = (PageType::type)ecast63; + isset_page_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast64; + xfer += iprot->readI32(ecast64); + this->encoding = (Encoding::type)ecast64; + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->count); + isset_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } -/*! -* LEGACY_SUPPORT : -* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) -*/ -#ifndef ZSTD_LEGACY_SUPPORT -# define ZSTD_LEGACY_SUPPORT 0 -#endif + xfer += iprot->readStructEnd(); -/*! - * MAXWINDOWSIZE_DEFAULT : - * maximum window size accepted by DStream __by default__. - * Frames requiring more memory will be rejected. - * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). - */ -#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT -# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) -#endif + if (!isset_page_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_count) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} -/*! - * NO_FORWARD_PROGRESS_MAX : - * maximum allowed nb of calls to ZSTD_decompressStream() - * without any forward progress - * (defined as: no byte read from input, and no byte flushed to output) - * before triggering an error. - */ -#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX -# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 -#endif +uint32_t PageEncodingStats::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageEncodingStats"); + xfer += oprot->writeFieldBegin("page_type", ::duckdb_apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32((int32_t)this->page_type); + xfer += oprot->writeFieldEnd(); -/*-******************************************************* -* Dependencies -*********************************************************/ -#include /* memcpy, memmove, memset */ - /* low level memory routines */ + xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32((int32_t)this->encoding); + xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldBegin("count", ::duckdb_apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->count); + xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} +void swap(PageEncodingStats &a, PageEncodingStats &b) { + using ::std::swap; + swap(a.page_type, b.page_type); + swap(a.encoding, b.encoding); + swap(a.count, b.count); +} - /* blockProperties_t */ - /* ZSTD_DCtx */ - /* ZSTD_DDictDictContent */ +PageEncodingStats::PageEncodingStats(const PageEncodingStats& other65) { + page_type = other65.page_type; + encoding = other65.encoding; + count = other65.count; +} +PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other66) { + page_type = other66.page_type; + encoding = other66.encoding; + count = other66.count; + return *this; +} +void PageEncodingStats::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "PageEncodingStats("; + out << "page_type=" << to_string(page_type); + out << ", " << "encoding=" << to_string(encoding); + out << ", " << "count=" << to_string(count); + out << ")"; +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list +ColumnMetaData::~ColumnMetaData() throw() { +} -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +void ColumnMetaData::__set_type(const Type::type val) { + this->type = val; +} -#ifndef ZSTD_DEC_BLOCK_H -#define ZSTD_DEC_BLOCK_H +void ColumnMetaData::__set_encodings(const std::vector & val) { + this->encodings = val; +} -/*-******************************************************* - * Dependencies - *********************************************************/ -#include /* size_t */ - /* DCtx, and some public functions */ - /* blockProperties_t, and some public functions */ - /* ZSTD_seqSymbol */ +void ColumnMetaData::__set_path_in_schema(const std::vector & val) { + this->path_in_schema = val; +} -namespace duckdb_zstd { +void ColumnMetaData::__set_codec(const CompressionCodec::type val) { + this->codec = val; +} -/* === Prototypes === */ +void ColumnMetaData::__set_num_values(const int64_t val) { + this->num_values = val; +} -/* note: prototypes already published within `zstd.h` : - * ZSTD_decompressBlock() - */ +void ColumnMetaData::__set_total_uncompressed_size(const int64_t val) { + this->total_uncompressed_size = val; +} -/* note: prototypes already published within `zstd_internal.h` : - * ZSTD_getcBlockSize() - * ZSTD_decodeSeqHeaders() - */ +void ColumnMetaData::__set_total_compressed_size(const int64_t val) { + this->total_compressed_size = val; +} +void ColumnMetaData::__set_key_value_metadata(const std::vector & val) { + this->key_value_metadata = val; +__isset.key_value_metadata = true; +} -/* ZSTD_decompressBlock_internal() : - * decompress block, starting at `src`, - * into destination buffer `dst`. - * @return : decompressed block size, - * or an error code (which can be tested using ZSTD_isError()) - */ -size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame); +void ColumnMetaData::__set_data_page_offset(const int64_t val) { + this->data_page_offset = val; +} -/* ZSTD_buildFSETable() : - * generate FSE decoding table for one symbol (ll, ml or off) - * this function must be called with valid parameters only - * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) - * in which case it cannot fail. - * Internal use only. - */ -void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, - const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog); +void ColumnMetaData::__set_index_page_offset(const int64_t val) { + this->index_page_offset = val; +__isset.index_page_offset = true; +} +void ColumnMetaData::__set_dictionary_page_offset(const int64_t val) { + this->dictionary_page_offset = val; +__isset.dictionary_page_offset = true; } -#endif /* ZSTD_DEC_BLOCK_H */ +void ColumnMetaData::__set_statistics(const Statistics& val) { + this->statistics = val; +__isset.statistics = true; +} +void ColumnMetaData::__set_encoding_stats(const std::vector & val) { + this->encoding_stats = val; +__isset.encoding_stats = true; +} +std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) +{ + obj.printTo(out); + return out; +} -// LICENSE_CHANGE_END - /* ZSTD_decompressBlock_internal */ -// #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -// # include "../legacy/zstd_legacy.h" -// #endif -namespace duckdb_zstd { -const U32 ZSTDConstants::LL_base[MaxLL+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 18, 20, 22, 24, 28, 32, 40, - 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, - 0x2000, 0x4000, 0x8000, 0x10000 }; +uint32_t ColumnMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -const U32 ZSTDConstants::OF_base[MaxOff+1] = { - 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, - 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, - 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, - 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -const U32 ZSTDConstants::OF_bits[MaxOff+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31 }; + xfer += iprot->readStructBegin(fname); -const U32 ZSTDConstants::ML_base[MaxML+1] = { - 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, - 35, 37, 39, 41, 43, 47, 51, 59, - 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, - 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + using ::duckdb_apache::thrift::protocol::TProtocolException; -const size_t ZSTDInternalConstants::ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; -const U32 ZSTDInternalConstants::LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, - 4, 6, 7, 8, 9,10,11,12, - 13,14,15,16 }; -const S16 ZSTDInternalConstants::LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 3, 2, 1, 1, 1, 1, 1, - -1,-1,-1,-1 }; -#define LL_DEFAULTNORMLOG 6 /* for static allocation */ -const U32 ZSTDInternalConstants::LL_defaultNormLog = LL_DEFAULTNORMLOG; -const U32 ZSTDInternalConstants::ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, - 4, 4, 5, 7, 8, 9,10,11, - 12,13,14,15,16 }; -const S16 ZSTDInternalConstants::ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, - 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1,-1,-1, - -1,-1,-1,-1,-1 }; -#define ML_DEFAULTNORMLOG 6 /* for static allocation */ -const U32 ZSTDInternalConstants::ML_defaultNormLog = ML_DEFAULTNORMLOG; + bool isset_type = false; + bool isset_encodings = false; + bool isset_path_in_schema = false; + bool isset_codec = false; + bool isset_num_values = false; + bool isset_total_uncompressed_size = false; + bool isset_total_compressed_size = false; + bool isset_data_page_offset = false; -const S16 ZSTDInternalConstants::OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, - 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - -1,-1,-1,-1,-1 }; -#define OF_DEFAULTNORMLOG 5 /* for static allocation */ -const U32 ZSTDInternalConstants::OF_defaultNormLog = OF_DEFAULTNORMLOG; -const U32 ZSTDInternalConstants::repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast67; + xfer += iprot->readI32(ecast67); + this->type = (Type::type)ecast67; + isset_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->encodings.clear(); + uint32_t _size68; + ::duckdb_apache::thrift::protocol::TType _etype71; + xfer += iprot->readListBegin(_etype71, _size68); + this->encodings.resize(_size68); + uint32_t _i72; + for (_i72 = 0; _i72 < _size68; ++_i72) + { + int32_t ecast73; + xfer += iprot->readI32(ecast73); + this->encodings[_i72] = (Encoding::type)ecast73; + } + xfer += iprot->readListEnd(); + } + isset_encodings = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->path_in_schema.clear(); + uint32_t _size74; + ::duckdb_apache::thrift::protocol::TType _etype77; + xfer += iprot->readListBegin(_etype77, _size74); + this->path_in_schema.resize(_size74); + uint32_t _i78; + for (_i78 = 0; _i78 < _size74; ++_i78) + { + xfer += iprot->readString(this->path_in_schema[_i78]); + } + xfer += iprot->readListEnd(); + } + isset_path_in_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast79; + xfer += iprot->readI32(ecast79); + this->codec = (CompressionCodec::type)ecast79; + isset_codec = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_uncompressed_size); + isset_total_uncompressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_compressed_size); + isset_total_compressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->key_value_metadata.clear(); + uint32_t _size80; + ::duckdb_apache::thrift::protocol::TType _etype83; + xfer += iprot->readListBegin(_etype83, _size80); + this->key_value_metadata.resize(_size80); + uint32_t _i84; + for (_i84 = 0; _i84 < _size80; ++_i84) + { + xfer += this->key_value_metadata[_i84].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.key_value_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->data_page_offset); + isset_data_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->index_page_offset); + this->__isset.index_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 11: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->dictionary_page_offset); + this->__isset.dictionary_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 12: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 13: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->encoding_stats.clear(); + uint32_t _size85; + ::duckdb_apache::thrift::protocol::TType _etype88; + xfer += iprot->readListBegin(_etype88, _size85); + this->encoding_stats.resize(_size85); + uint32_t _i89; + for (_i89 = 0; _i89 < _size85; ++_i89) + { + xfer += this->encoding_stats[_i89].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.encoding_stats = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } -const ZSTD_customMem ZSTDInternalConstants::ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + xfer += iprot->readStructEnd(); -/*-************************************************************* -* Context management -***************************************************************/ -size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) -{ - if (dctx==NULL) return 0; /* support sizeof NULL */ - return sizeof(*dctx) - + ZSTD_sizeof_DDict(dctx->ddictLocal) - + dctx->inBuffSize + dctx->outBuffSize; + if (!isset_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encodings) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_path_in_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_codec) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_uncompressed_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_compressed_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_data_page_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; } -size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } +uint32_t ColumnMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnMetaData"); + xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32((int32_t)this->type); + xfer += oprot->writeFieldEnd(); -static size_t ZSTD_startingInputLength(ZSTD_format_e format) -{ - size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); - /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ - assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); - return startingInputLength; -} + xfer += oprot->writeFieldBegin("encodings", ::duckdb_apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); + std::vector ::const_iterator _iter90; + for (_iter90 = this->encodings.begin(); _iter90 != this->encodings.end(); ++_iter90) + { + xfer += oprot->writeI32((int32_t)(*_iter90)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); -static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) -{ - dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ - dctx->staticSize = 0; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; - dctx->ddict = NULL; - dctx->ddictLocal = NULL; - dctx->dictEnd = NULL; - dctx->ddictIsCold = 0; - dctx->dictUses = ZSTD_dont_use; - dctx->inBuff = NULL; - dctx->inBuffSize = 0; - dctx->outBuffSize = 0; - dctx->streamStage = zdss_init; - dctx->legacyContext = NULL; - dctx->previousLegacyVersion = 0; - dctx->noForwardProgress = 0; - dctx->oversizedDuration = 0; - dctx->bmi2 = 0; - dctx->outBufferMode = ZSTD_obm_buffered; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - dctx->dictContentEndForFuzzing = NULL; -#endif -} + xfer += oprot->writeFieldBegin("path_in_schema", ::duckdb_apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); + std::vector ::const_iterator _iter91; + for (_iter91 = this->path_in_schema.begin(); _iter91 != this->path_in_schema.end(); ++_iter91) + { + xfer += oprot->writeString((*_iter91)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); -ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) -{ - ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + xfer += oprot->writeFieldBegin("codec", ::duckdb_apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32((int32_t)this->codec); + xfer += oprot->writeFieldEnd(); - if ((size_t)workspace & 7) return NULL; /* 8-aligned */ - if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I64, 5); + xfer += oprot->writeI64(this->num_values); + xfer += oprot->writeFieldEnd(); - ZSTD_initDCtx_internal(dctx); - dctx->staticSize = workspaceSize; - dctx->inBuff = (char*)(dctx+1); - return dctx; -} + xfer += oprot->writeFieldBegin("total_uncompressed_size", ::duckdb_apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->total_uncompressed_size); + xfer += oprot->writeFieldEnd(); -ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) -{ - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + xfer += oprot->writeFieldBegin("total_compressed_size", ::duckdb_apache::thrift::protocol::T_I64, 7); + xfer += oprot->writeI64(this->total_compressed_size); + xfer += oprot->writeFieldEnd(); - { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem); - if (!dctx) return NULL; - dctx->customMem = customMem; - ZSTD_initDCtx_internal(dctx); - return dctx; + if (this->__isset.key_value_metadata) { + xfer += oprot->writeFieldBegin("key_value_metadata", ::duckdb_apache::thrift::protocol::T_LIST, 8); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); + std::vector ::const_iterator _iter92; + for (_iter92 = this->key_value_metadata.begin(); _iter92 != this->key_value_metadata.end(); ++_iter92) + { + xfer += (*_iter92).write(oprot); + } + xfer += oprot->writeListEnd(); } -} - -ZSTD_DCtx* ZSTD_createDCtx(void) -{ - DEBUGLOG(3, "ZSTD_createDCtx"); - return ZSTD_createDCtx_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); -} - -static void ZSTD_clearDict(ZSTD_DCtx* dctx) -{ - ZSTD_freeDDict(dctx->ddictLocal); - dctx->ddictLocal = NULL; - dctx->ddict = NULL; - dctx->dictUses = ZSTD_dont_use; -} + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("data_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 9); + xfer += oprot->writeI64(this->data_page_offset); + xfer += oprot->writeFieldEnd(); -size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) -{ - if (dctx==NULL) return 0; /* support free on NULL */ - RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); - { ZSTD_customMem const cMem = dctx->customMem; - ZSTD_clearDict(dctx); - ZSTD_free(dctx->inBuff, cMem); - dctx->inBuff = NULL; -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (dctx->legacyContext) - ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); -#endif - ZSTD_free(dctx, cMem); - return 0; + if (this->__isset.index_page_offset) { + xfer += oprot->writeFieldBegin("index_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 10); + xfer += oprot->writeI64(this->index_page_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.dictionary_page_offset) { + xfer += oprot->writeFieldBegin("dictionary_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 11); + xfer += oprot->writeI64(this->dictionary_page_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 12); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encoding_stats) { + xfer += oprot->writeFieldBegin("encoding_stats", ::duckdb_apache::thrift::protocol::T_LIST, 13); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); + std::vector ::const_iterator _iter93; + for (_iter93 = this->encoding_stats.begin(); _iter93 != this->encoding_stats.end(); ++_iter93) + { + xfer += (*_iter93).write(oprot); + } + xfer += oprot->writeListEnd(); } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -/* no longer useful */ -void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) -{ - size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); - memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +void swap(ColumnMetaData &a, ColumnMetaData &b) { + using ::std::swap; + swap(a.type, b.type); + swap(a.encodings, b.encodings); + swap(a.path_in_schema, b.path_in_schema); + swap(a.codec, b.codec); + swap(a.num_values, b.num_values); + swap(a.total_uncompressed_size, b.total_uncompressed_size); + swap(a.total_compressed_size, b.total_compressed_size); + swap(a.key_value_metadata, b.key_value_metadata); + swap(a.data_page_offset, b.data_page_offset); + swap(a.index_page_offset, b.index_page_offset); + swap(a.dictionary_page_offset, b.dictionary_page_offset); + swap(a.statistics, b.statistics); + swap(a.encoding_stats, b.encoding_stats); + swap(a.__isset, b.__isset); } - -/*-************************************************************* - * Frame header decoding - ***************************************************************/ - -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -unsigned ZSTD_isFrame(const void* buffer, size_t size) -{ - if (size < ZSTD_FRAMEIDSIZE) return 0; - { U32 const magic = MEM_readLE32(buffer); - if (magic == ZSTD_MAGICNUMBER) return 1; - if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; - } -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(buffer, size)) return 1; -#endif - return 0; +ColumnMetaData::ColumnMetaData(const ColumnMetaData& other94) { + type = other94.type; + encodings = other94.encodings; + path_in_schema = other94.path_in_schema; + codec = other94.codec; + num_values = other94.num_values; + total_uncompressed_size = other94.total_uncompressed_size; + total_compressed_size = other94.total_compressed_size; + key_value_metadata = other94.key_value_metadata; + data_page_offset = other94.data_page_offset; + index_page_offset = other94.index_page_offset; + dictionary_page_offset = other94.dictionary_page_offset; + statistics = other94.statistics; + encoding_stats = other94.encoding_stats; + __isset = other94.__isset; } - -static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; -static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; -/** ZSTD_frameHeaderSize_internal() : - * srcSize must be large enough to reach header size fields. - * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. - * @return : size of the Frame Header - * or an error code, which can be tested with ZSTD_isError() */ -static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) -{ - size_t const minInputSize = ZSTD_startingInputLength(format); - RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); - - { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; - U32 const dictID= fhd & 3; - U32 const singleSegment = (fhd >> 5) & 1; - U32 const fcsId = fhd >> 6; - return minInputSize + !singleSegment - + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] - + (singleSegment && !fcsId); - } +ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other95) { + type = other95.type; + encodings = other95.encodings; + path_in_schema = other95.path_in_schema; + codec = other95.codec; + num_values = other95.num_values; + total_uncompressed_size = other95.total_uncompressed_size; + total_compressed_size = other95.total_compressed_size; + key_value_metadata = other95.key_value_metadata; + data_page_offset = other95.data_page_offset; + index_page_offset = other95.index_page_offset; + dictionary_page_offset = other95.dictionary_page_offset; + statistics = other95.statistics; + encoding_stats = other95.encoding_stats; + __isset = other95.__isset; + return *this; } - -/** ZSTD_frameHeaderSize() : - * srcSize must be >= ZSTD_frameHeaderSize_prefix. - * @return : size of the Frame Header, - * or an error code (if srcSize is too small) */ -size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) -{ - return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +void ColumnMetaData::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "ColumnMetaData("; + out << "type=" << to_string(type); + out << ", " << "encodings=" << to_string(encodings); + out << ", " << "path_in_schema=" << to_string(path_in_schema); + out << ", " << "codec=" << to_string(codec); + out << ", " << "num_values=" << to_string(num_values); + out << ", " << "total_uncompressed_size=" << to_string(total_uncompressed_size); + out << ", " << "total_compressed_size=" << to_string(total_compressed_size); + out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "")); + out << ", " << "data_page_offset=" << to_string(data_page_offset); + out << ", " << "index_page_offset="; (__isset.index_page_offset ? (out << to_string(index_page_offset)) : (out << "")); + out << ", " << "dictionary_page_offset="; (__isset.dictionary_page_offset ? (out << to_string(dictionary_page_offset)) : (out << "")); + out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); + out << ", " << "encoding_stats="; (__isset.encoding_stats ? (out << to_string(encoding_stats)) : (out << "")); + out << ")"; } -/** ZSTD_getFrameHeader_advanced() : - * decode Frame Header, or require larger `srcSize`. - * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) -{ - const BYTE* ip = (const BYTE*)src; - size_t const minInputSize = ZSTD_startingInputLength(format); - - memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ - if (srcSize < minInputSize) return minInputSize; - RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); - - if ( (format != ZSTD_f_zstd1_magicless) - && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { - if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - /* skippable frame */ - if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) - return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ - memset(zfhPtr, 0, sizeof(*zfhPtr)); - zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); - zfhPtr->frameType = ZSTD_skippableFrame; - return 0; - } - RETURN_ERROR(prefix_unknown, ""); - } - - /* ensure there is enough `srcSize` to fully read/decode frame header */ - { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); - if (srcSize < fhsize) return fhsize; - zfhPtr->headerSize = (U32)fhsize; - } - - { BYTE const fhdByte = ip[minInputSize-1]; - size_t pos = minInputSize; - U32 const dictIDSizeCode = fhdByte&3; - U32 const checksumFlag = (fhdByte>>2)&1; - U32 const singleSegment = (fhdByte>>5)&1; - U32 const fcsID = fhdByte>>6; - U64 windowSize = 0; - U32 dictID = 0; - U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; - RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, - "reserved bits, must be zero"); - - if (!singleSegment) { - BYTE const wlByte = ip[pos++]; - U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); - windowSize = (1ULL << windowLog); - windowSize += (windowSize >> 3) * (wlByte&7); - } - switch(dictIDSizeCode) - { - default: assert(0); /* impossible */ - case 0 : break; - case 1 : dictID = ip[pos]; pos++; break; - case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; - case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; - } - switch(fcsID) - { - default: assert(0); /* impossible */ - case 0 : if (singleSegment) frameContentSize = ip[pos]; break; - case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; - case 2 : frameContentSize = MEM_readLE32(ip+pos); break; - case 3 : frameContentSize = MEM_readLE64(ip+pos); break; - } - if (singleSegment) windowSize = frameContentSize; - - zfhPtr->frameType = ZSTD_frame; - zfhPtr->frameContentSize = frameContentSize; - zfhPtr->windowSize = windowSize; - zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - zfhPtr->dictID = dictID; - zfhPtr->checksumFlag = checksumFlag; - } - return 0; -} - -/** ZSTD_getFrameHeader() : - * decode Frame Header, or require larger `srcSize`. - * note : this function does not consume input, it only reads it. - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) -{ - return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); +EncryptionWithFooterKey::~EncryptionWithFooterKey() throw() { } - -/** ZSTD_getFrameContentSize() : - * compatible with legacy mode - * @return : decompressed size of the single frame pointed to be `src` if known, otherwise - * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) { -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) { - unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); - return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; - } -#endif - { ZSTD_frameHeader zfh; - if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) - return ZSTD_CONTENTSIZE_ERROR; - if (zfh.frameType == ZSTD_skippableFrame) { - return 0; - } else { - return zfh.frameContentSize; - } } + obj.printTo(out); + return out; } -static size_t readSkippableFrameSize(void const* src, size_t srcSize) -{ - size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; - U32 sizeU32; - - RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); - sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); - RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, - frameParameter_unsupported, ""); - { - size_t const skippableSize = skippableHeaderSize + sizeU32; - RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); - return skippableSize; - } -} +uint32_t EncryptionWithFooterKey::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -/** ZSTD_findDecompressedSize() : - * compatible with legacy mode - * `srcSize` must be the exact length of some number of ZSTD compressed and/or - * skippable frames - * @return : decompressed size of the frames contained */ -unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) -{ - unsigned long long totalDstSize = 0; + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { - U32 const magicNumber = MEM_readLE32(src); + xfer += iprot->readStructBegin(fname); - if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t const skippableSize = readSkippableFrameSize(src, srcSize); - if (ZSTD_isError(skippableSize)) { - return ZSTD_CONTENTSIZE_ERROR; - } - assert(skippableSize <= srcSize); + using ::duckdb_apache::thrift::protocol::TProtocolException; - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; - continue; - } - { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); - if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } - /* check for overflow */ - if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; - totalDstSize += ret; - } - { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); - if (ZSTD_isError(frameSrcSize)) { - return ZSTD_CONTENTSIZE_ERROR; - } + xfer += iprot->readStructEnd(); - src = (const BYTE *)src + frameSrcSize; - srcSize -= frameSrcSize; - } - } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + return xfer; +} - if (srcSize) return ZSTD_CONTENTSIZE_ERROR; +uint32_t EncryptionWithFooterKey::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); - return totalDstSize; + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -/** ZSTD_getDecompressedSize() : - * compatible with legacy mode - * @return : decompressed size if known, 0 otherwise - note : 0 can mean any of the following : - - frame content is empty - - decompressed size field is not present in frame header - - frame header unknown / not supported - - frame header not complete (`srcSize` too small) */ -unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) -{ - unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); - return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { + using ::std::swap; + (void) a; + (void) b; } +EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other96) { + (void) other96; +} +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other97) { + (void) other97; + return *this; +} +void EncryptionWithFooterKey::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "EncryptionWithFooterKey("; + out << ")"; +} -/** ZSTD_decodeFrameHeader() : - * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). - * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ -static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) -{ - size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); - if (ZSTD_isError(result)) return result; /* invalid header */ - RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); -#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - /* Skip the dictID check in fuzzing mode, because it makes the search - * harder. - */ - RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), - dictionary_wrong, ""); -#endif - if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); - return 0; + +EncryptionWithColumnKey::~EncryptionWithColumnKey() throw() { } -static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) -{ - ZSTD_frameSizeInfo frameSizeInfo; - frameSizeInfo.compressedSize = ret; - frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; - return frameSizeInfo; + +void EncryptionWithColumnKey::__set_path_in_schema(const std::vector & val) { + this->path_in_schema = val; } -static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +void EncryptionWithColumnKey::__set_key_metadata(const std::string& val) { + this->key_metadata = val; +__isset.key_metadata = true; +} +std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj) { - ZSTD_frameSizeInfo frameSizeInfo; - memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); - -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) - return ZSTD_findFrameSizeInfoLegacy(src, srcSize); -#endif + obj.printTo(out); + return out; +} - if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) - && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); - assert(ZSTD_isError(frameSizeInfo.compressedSize) || - frameSizeInfo.compressedSize <= srcSize); - return frameSizeInfo; - } else { - const BYTE* ip = (const BYTE*)src; - const BYTE* const ipstart = ip; - size_t remainingSize = srcSize; - size_t nbBlocks = 0; - ZSTD_frameHeader zfh; - /* Extract Frame Header */ - { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); - if (ZSTD_isError(ret)) - return ZSTD_errorFrameSizeInfo(ret); - if (ret > 0) - return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); - } +uint32_t EncryptionWithColumnKey::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - ip += zfh.headerSize; - remainingSize -= zfh.headerSize; + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - /* Iterate over each block */ - while (1) { - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) - return ZSTD_errorFrameSizeInfo(cBlockSize); + xfer += iprot->readStructBegin(fname); - if (ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize > remainingSize) - return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + using ::duckdb_apache::thrift::protocol::TProtocolException; - ip += ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize; - remainingSize -= ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize; - nbBlocks++; + bool isset_path_in_schema = false; - if (blockProperties.lastBlock) break; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->path_in_schema.clear(); + uint32_t _size98; + ::duckdb_apache::thrift::protocol::TType _etype101; + xfer += iprot->readListBegin(_etype101, _size98); + this->path_in_schema.resize(_size98); + uint32_t _i102; + for (_i102 = 0; _i102 < _size98; ++_i102) + { + xfer += iprot->readString(this->path_in_schema[_i102]); + } + xfer += iprot->readListEnd(); + } + isset_path_in_schema = true; + } else { + xfer += iprot->skip(ftype); } - - /* Final frame content checksum */ - if (zfh.checksumFlag) { - if (remainingSize < 4) - return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); - ip += 4; + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->key_metadata); + this->__isset.key_metadata = true; + } else { + xfer += iprot->skip(ftype); } - - frameSizeInfo.compressedSize = ip - ipstart; - frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) - ? zfh.frameContentSize - : nbBlocks * zfh.blockSizeMax; - return frameSizeInfo; + break; + default: + xfer += iprot->skip(ftype); + break; } -} + xfer += iprot->readFieldEnd(); + } -/** ZSTD_findFrameCompressedSize() : - * compatible with legacy mode - * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame - * `srcSize` must be at least as large as the frame contained - * @return : the compressed size of the frame starting at `src` */ -size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) -{ - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); - return frameSizeInfo.compressedSize; + xfer += iprot->readStructEnd(); + + if (!isset_path_in_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; } -/** ZSTD_decompressBound() : - * compatible with legacy mode - * `src` must point to the start of a ZSTD frame or a skippeable frame - * `srcSize` must be at least as large as the frame contained - * @return : the maximum decompressed size of the compressed source - */ -unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) -{ - unsigned long long bound = 0; - /* Iterate over each frame */ - while (srcSize > 0) { - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); - size_t const compressedSize = frameSizeInfo.compressedSize; - unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; - if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) - return ZSTD_CONTENTSIZE_ERROR; - assert(srcSize >= compressedSize); - src = (const BYTE*)src + compressedSize; - srcSize -= compressedSize; - bound += decompressedBound; +uint32_t EncryptionWithColumnKey::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); + + xfer += oprot->writeFieldBegin("path_in_schema", ::duckdb_apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); + std::vector ::const_iterator _iter103; + for (_iter103 = this->path_in_schema.begin(); _iter103 != this->path_in_schema.end(); ++_iter103) + { + xfer += oprot->writeString((*_iter103)); } - return bound; + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_metadata) { + xfer += oprot->writeFieldBegin("key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } +void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { + using ::std::swap; + swap(a.path_in_schema, b.path_in_schema); + swap(a.key_metadata, b.key_metadata); + swap(a.__isset, b.__isset); +} -/*-************************************************************* - * Frame decoding - ***************************************************************/ +EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other104) { + path_in_schema = other104.path_in_schema; + key_metadata = other104.key_metadata; + __isset = other104.__isset; +} +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other105) { + path_in_schema = other105.path_in_schema; + key_metadata = other105.key_metadata; + __isset = other105.__isset; + return *this; +} +void EncryptionWithColumnKey::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "EncryptionWithColumnKey("; + out << "path_in_schema=" << to_string(path_in_schema); + out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); + out << ")"; +} -/** ZSTD_insertBlock() : - * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ -size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) -{ - DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); - ZSTD_checkContinuity(dctx, blockStart); - dctx->previousDstEnd = (const char*)blockStart + blockSize; - return blockSize; + +ColumnCryptoMetaData::~ColumnCryptoMetaData() throw() { } -static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_copyRawBlock"); - if (dst == NULL) { - if (srcSize == 0) return 0; - RETURN_ERROR(dstBuffer_null, ""); - } - RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); - memcpy(dst, src, srcSize); - return srcSize; +void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val) { + this->ENCRYPTION_WITH_FOOTER_KEY = val; +__isset.ENCRYPTION_WITH_FOOTER_KEY = true; } -static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, - BYTE b, - size_t regenSize) +void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val) { + this->ENCRYPTION_WITH_COLUMN_KEY = val; +__isset.ENCRYPTION_WITH_COLUMN_KEY = true; +} +std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj) { - if (dst == NULL) { - if (regenSize == 0) return 0; - RETURN_ERROR(dstBuffer_null, ""); - } - RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); - memset(dst, b, regenSize); - return regenSize; + obj.printTo(out); + return out; } -/*! ZSTD_decompressFrame() : - * @dctx must be properly initialized - * will update *srcPtr and *srcSizePtr, - * to make *srcPtr progress by one frame. */ -static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void** srcPtr, size_t *srcSizePtr) -{ - const BYTE* ip = (const BYTE*)(*srcPtr); - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; - BYTE* op = ostart; - size_t remainingSrcSize = *srcSizePtr; - - DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); +uint32_t ColumnCryptoMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - /* check */ - RETURN_ERROR_IF( - remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTDInternalConstants::ZSTD_blockHeaderSize, - srcSize_wrong, ""); + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - /* Frame Header */ - { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( - ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); - if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; - RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTDInternalConstants::ZSTD_blockHeaderSize, - srcSize_wrong, ""); - FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); - ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; - } + xfer += iprot->readStructBegin(fname); - /* Loop on each block */ - while (1) { - size_t decodedSize; - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) return cBlockSize; + using ::duckdb_apache::thrift::protocol::TProtocolException; - ip += ZSTDInternalConstants::ZSTD_blockHeaderSize; - remainingSrcSize -= ZSTDInternalConstants::ZSTD_blockHeaderSize; - RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); - switch(blockProperties.blockType) - { - case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1); - break; - case bt_raw : - decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); - break; - case bt_rle : - decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize); - break; - case bt_reserved : - default: - RETURN_ERROR(corruption_detected, "invalid block type"); + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); + this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; + } else { + xfer += iprot->skip(ftype); } - - if (ZSTD_isError(decodedSize)) return decodedSize; - if (dctx->fParams.checksumFlag) - XXH64_update(&dctx->xxhState, op, decodedSize); - if (decodedSize != 0) - op += decodedSize; - assert(ip != NULL); - ip += cBlockSize; - remainingSrcSize -= cBlockSize; - if (blockProperties.lastBlock) break; + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); + this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } + xfer += iprot->readFieldEnd(); + } - if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { - RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, - corruption_detected, ""); - } - if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ - U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); - U32 checkRead; - RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); - checkRead = MEM_readLE32(ip); - RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); - ip += 4; - remainingSrcSize -= 4; - } + xfer += iprot->readStructEnd(); - /* Allow caller to get size read */ - *srcPtr = ip; - *srcSizePtr = remainingSrcSize; - return op-ostart; + return xfer; } -static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize, - const ZSTD_DDict* ddict) -{ - void* const dststart = dst; - int moreThan1Frame = 0; - - DEBUGLOG(5, "ZSTD_decompressMultiFrame"); - assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ - - if (ddict) { - dict = ZSTD_DDict_dictContent(ddict); - dictSize = ZSTD_DDict_dictSize(ddict); - } - - while (srcSize >= ZSTD_startingInputLength(dctx->format)) { - -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) { - size_t decodedSize; - size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); - if (ZSTD_isError(frameSize)) return frameSize; - RETURN_ERROR_IF(dctx->staticSize, memory_allocation, - "legacy support is not compatible with static dctx"); - - decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); - if (ZSTD_isError(decodedSize)) return decodedSize; +uint32_t ColumnCryptoMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); - assert(decodedSize <=- dstCapacity); - dst = (BYTE*)dst + decodedSize; - dstCapacity -= decodedSize; + if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { + xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); + xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { + xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); + xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - src = (const BYTE*)src + frameSize; - srcSize -= frameSize; +void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { + using ::std::swap; + swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY); + swap(a.ENCRYPTION_WITH_COLUMN_KEY, b.ENCRYPTION_WITH_COLUMN_KEY); + swap(a.__isset, b.__isset); +} - continue; - } -#endif +ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other106) { + ENCRYPTION_WITH_FOOTER_KEY = other106.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other106.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other106.__isset; +} +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other107) { + ENCRYPTION_WITH_FOOTER_KEY = other107.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other107.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other107.__isset; + return *this; +} +void ColumnCryptoMetaData::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "ColumnCryptoMetaData("; + out << "ENCRYPTION_WITH_FOOTER_KEY="; (__isset.ENCRYPTION_WITH_FOOTER_KEY ? (out << to_string(ENCRYPTION_WITH_FOOTER_KEY)) : (out << "")); + out << ", " << "ENCRYPTION_WITH_COLUMN_KEY="; (__isset.ENCRYPTION_WITH_COLUMN_KEY ? (out << to_string(ENCRYPTION_WITH_COLUMN_KEY)) : (out << "")); + out << ")"; +} - { U32 const magicNumber = MEM_readLE32(src); - DEBUGLOG(4, "reading magic number %08X (expecting %08X)", - (unsigned)magicNumber, ZSTD_MAGICNUMBER); - if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t const skippableSize = readSkippableFrameSize(src, srcSize); - FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); - assert(skippableSize <= srcSize); - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; - continue; - } } +ColumnChunk::~ColumnChunk() throw() { +} - if (ddict) { - /* we were called from ZSTD_decompress_usingDDict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); - } else { - /* this will initialize correctly with no dict if dict == NULL, so - * use this in all cases but ddict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); - } - ZSTD_checkContinuity(dctx, dst); - { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, - &src, &srcSize); - RETURN_ERROR_IF( - (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) - && (moreThan1Frame==1), - srcSize_wrong, - "at least one frame successfully completed, but following " - "bytes are garbage: it's more likely to be a srcSize error, " - "specifying more bytes than compressed size of frame(s). This " - "error message replaces ERROR(prefix_unknown), which would be " - "confusing, as the first header is actually correct. Note that " - "one could be unlucky, it might be a corruption error instead, " - "happening right at the place where we expect zstd magic " - "bytes. But this is _much_ less likely than a srcSize field " - "error."); - if (ZSTD_isError(res)) return res; - assert(res <= dstCapacity); - if (res != 0) - dst = (BYTE*)dst + res; - dstCapacity -= res; - } - moreThan1Frame = 1; - } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ +void ColumnChunk::__set_file_path(const std::string& val) { + this->file_path = val; +__isset.file_path = true; +} - RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); +void ColumnChunk::__set_file_offset(const int64_t val) { + this->file_offset = val; +} - return (BYTE*)dst - (BYTE*)dststart; +void ColumnChunk::__set_meta_data(const ColumnMetaData& val) { + this->meta_data = val; +__isset.meta_data = true; } -size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize) -{ - return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +void ColumnChunk::__set_offset_index_offset(const int64_t val) { + this->offset_index_offset = val; +__isset.offset_index_offset = true; } +void ColumnChunk::__set_offset_index_length(const int32_t val) { + this->offset_index_length = val; +__isset.offset_index_length = true; +} -static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) -{ - switch (dctx->dictUses) { - default: - assert(0 /* Impossible */); - /* fall-through */ - case ZSTD_dont_use: - ZSTD_clearDict(dctx); - return NULL; - case ZSTD_use_indefinitely: - return dctx->ddict; - case ZSTD_use_once: - dctx->dictUses = ZSTD_dont_use; - return dctx->ddict; - } +void ColumnChunk::__set_column_index_offset(const int64_t val) { + this->column_index_offset = val; +__isset.column_index_offset = true; } -size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); +void ColumnChunk::__set_column_index_length(const int32_t val) { + this->column_index_length = val; +__isset.column_index_length = true; } +void ColumnChunk::__set_crypto_metadata(const ColumnCryptoMetaData& val) { + this->crypto_metadata = val; +__isset.crypto_metadata = true; +} -size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +void ColumnChunk::__set_encrypted_column_metadata(const std::string& val) { + this->encrypted_column_metadata = val; +__isset.encrypted_column_metadata = true; +} +std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) { -#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) - size_t regenSize; - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); - regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); - ZSTD_freeDCtx(dctx); - return regenSize; -#else /* stack mode */ - ZSTD_DCtx dctx; - ZSTD_initDCtx_internal(&dctx); - return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); -#endif + obj.printTo(out); + return out; } -/*-************************************** -* Advanced Streaming Decompression API -* Bufferless and synchronous -****************************************/ -size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } +uint32_t ColumnChunk::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -/** - * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, - * we allow taking a partial block as the input. Currently only raw uncompressed blocks can - * be streamed. - * - * For blocks that can be streamed, this allows us to reduce the latency until we produce - * output, and avoid copying the input. - * - * @param inputSize - The total amount of input that the caller currently has. - */ -static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { - if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) - return dctx->expected; - if (dctx->bType != bt_raw) - return dctx->expected; - return MIN(MAX(inputSize, 1), dctx->expected); -} + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { - switch(dctx->stage) + xfer += iprot->readStructBegin(fname); + + using ::duckdb_apache::thrift::protocol::TProtocolException; + + bool isset_file_offset = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) { - default: /* should not happen */ - assert(0); - case ZSTDds_getFrameHeaderSize: - case ZSTDds_decodeFrameHeader: - return ZSTDnit_frameHeader; - case ZSTDds_decodeBlockHeader: - return ZSTDnit_blockHeader; - case ZSTDds_decompressBlock: - return ZSTDnit_block; - case ZSTDds_decompressLastBlock: - return ZSTDnit_lastBlock; - case ZSTDds_checkChecksum: - return ZSTDnit_checksum; - case ZSTDds_decodeSkippableHeader: - case ZSTDds_skipFrame: - return ZSTDnit_skippableFrame; + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->file_path); + this->__isset.file_path = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->file_offset); + isset_file_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->meta_data.read(iprot); + this->__isset.meta_data = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->offset_index_offset); + this->__isset.offset_index_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->offset_index_length); + this->__isset.offset_index_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->column_index_offset); + this->__isset.column_index_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->column_index_length); + this->__isset.column_index_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->crypto_metadata.read(iprot); + this->__isset.crypto_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->encrypted_column_metadata); + this->__isset.encrypted_column_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } -} + xfer += iprot->readFieldEnd(); + } -static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } + xfer += iprot->readStructEnd(); -/** ZSTD_decompressContinue() : - * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) - * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) - * or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); - /* Sanity check */ - RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); - if (dstCapacity) ZSTD_checkContinuity(dctx, dst); + if (!isset_file_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} - switch (dctx->stage) - { - case ZSTDds_getFrameHeaderSize : - assert(src != NULL); - if (dctx->format == ZSTD_f_zstd1) { /* allows header */ - assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ - if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - memcpy(dctx->headerBuffer, src, srcSize); - dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ - dctx->stage = ZSTDds_decodeSkippableHeader; - return 0; - } } - dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); - if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; - memcpy(dctx->headerBuffer, src, srcSize); - dctx->expected = dctx->headerSize - srcSize; - dctx->stage = ZSTDds_decodeFrameHeader; - return 0; +uint32_t ColumnChunk::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnChunk"); - case ZSTDds_decodeFrameHeader: - assert(src != NULL); - memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); - dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; - dctx->stage = ZSTDds_decodeBlockHeader; - return 0; + if (this->__isset.file_path) { + xfer += oprot->writeFieldBegin("file_path", ::duckdb_apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->file_path); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("file_offset", ::duckdb_apache::thrift::protocol::T_I64, 2); + xfer += oprot->writeI64(this->file_offset); + xfer += oprot->writeFieldEnd(); - case ZSTDds_decodeBlockHeader: - { blockProperties_t bp; - size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTDInternalConstants::ZSTD_blockHeaderSize, &bp); - if (ZSTD_isError(cBlockSize)) return cBlockSize; - RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); - dctx->expected = cBlockSize; - dctx->bType = bp.blockType; - dctx->rleSize = bp.origSize; - if (cBlockSize) { - dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; - return 0; - } - /* empty block */ - if (bp.lastBlock) { - if (dctx->fParams.checksumFlag) { - dctx->expected = 4; - dctx->stage = ZSTDds_checkChecksum; - } else { - dctx->expected = 0; /* end of frame */ - dctx->stage = ZSTDds_getFrameHeaderSize; - } - } else { - dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; /* jump to next header */ - dctx->stage = ZSTDds_decodeBlockHeader; - } - return 0; - } + if (this->__isset.meta_data) { + xfer += oprot->writeFieldBegin("meta_data", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); + xfer += this->meta_data.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.offset_index_offset) { + xfer += oprot->writeFieldBegin("offset_index_offset", ::duckdb_apache::thrift::protocol::T_I64, 4); + xfer += oprot->writeI64(this->offset_index_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.offset_index_length) { + xfer += oprot->writeFieldBegin("offset_index_length", ::duckdb_apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->offset_index_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_index_offset) { + xfer += oprot->writeFieldBegin("column_index_offset", ::duckdb_apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->column_index_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_index_length) { + xfer += oprot->writeFieldBegin("column_index_length", ::duckdb_apache::thrift::protocol::T_I32, 7); + xfer += oprot->writeI32(this->column_index_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.crypto_metadata) { + xfer += oprot->writeFieldBegin("crypto_metadata", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); + xfer += this->crypto_metadata.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encrypted_column_metadata) { + xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 9); + xfer += oprot->writeBinary(this->encrypted_column_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - case ZSTDds_decompressLastBlock: - case ZSTDds_decompressBlock: - DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); - { size_t rSize; - switch(dctx->bType) - { - case bt_compressed: - DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); - rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); - dctx->expected = 0; /* Streaming not supported */ - break; - case bt_raw : - assert(srcSize <= dctx->expected); - rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); - FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); - assert(rSize == srcSize); - dctx->expected -= rSize; - break; - case bt_rle : - rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); - dctx->expected = 0; /* Streaming not supported */ - break; - case bt_reserved : /* should never happen */ - default: - RETURN_ERROR(corruption_detected, "invalid block type"); - } - FORWARD_IF_ERROR(rSize, ""); - RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); - DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); - dctx->decodedSize += rSize; - if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); - dctx->previousDstEnd = (char*)dst + rSize; +void swap(ColumnChunk &a, ColumnChunk &b) { + using ::std::swap; + swap(a.file_path, b.file_path); + swap(a.file_offset, b.file_offset); + swap(a.meta_data, b.meta_data); + swap(a.offset_index_offset, b.offset_index_offset); + swap(a.offset_index_length, b.offset_index_length); + swap(a.column_index_offset, b.column_index_offset); + swap(a.column_index_length, b.column_index_length); + swap(a.crypto_metadata, b.crypto_metadata); + swap(a.encrypted_column_metadata, b.encrypted_column_metadata); + swap(a.__isset, b.__isset); +} - /* Stay on the same stage until we are finished streaming the block. */ - if (dctx->expected > 0) { - return rSize; - } +ColumnChunk::ColumnChunk(const ColumnChunk& other108) { + file_path = other108.file_path; + file_offset = other108.file_offset; + meta_data = other108.meta_data; + offset_index_offset = other108.offset_index_offset; + offset_index_length = other108.offset_index_length; + column_index_offset = other108.column_index_offset; + column_index_length = other108.column_index_length; + crypto_metadata = other108.crypto_metadata; + encrypted_column_metadata = other108.encrypted_column_metadata; + __isset = other108.__isset; +} +ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other109) { + file_path = other109.file_path; + file_offset = other109.file_offset; + meta_data = other109.meta_data; + offset_index_offset = other109.offset_index_offset; + offset_index_length = other109.offset_index_length; + column_index_offset = other109.column_index_offset; + column_index_length = other109.column_index_length; + crypto_metadata = other109.crypto_metadata; + encrypted_column_metadata = other109.encrypted_column_metadata; + __isset = other109.__isset; + return *this; +} +void ColumnChunk::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "ColumnChunk("; + out << "file_path="; (__isset.file_path ? (out << to_string(file_path)) : (out << "")); + out << ", " << "file_offset=" << to_string(file_offset); + out << ", " << "meta_data="; (__isset.meta_data ? (out << to_string(meta_data)) : (out << "")); + out << ", " << "offset_index_offset="; (__isset.offset_index_offset ? (out << to_string(offset_index_offset)) : (out << "")); + out << ", " << "offset_index_length="; (__isset.offset_index_length ? (out << to_string(offset_index_length)) : (out << "")); + out << ", " << "column_index_offset="; (__isset.column_index_offset ? (out << to_string(column_index_offset)) : (out << "")); + out << ", " << "column_index_length="; (__isset.column_index_length ? (out << to_string(column_index_length)) : (out << "")); + out << ", " << "crypto_metadata="; (__isset.crypto_metadata ? (out << to_string(crypto_metadata)) : (out << "")); + out << ", " << "encrypted_column_metadata="; (__isset.encrypted_column_metadata ? (out << to_string(encrypted_column_metadata)) : (out << "")); + out << ")"; +} - if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ - DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); - RETURN_ERROR_IF( - dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && dctx->decodedSize != dctx->fParams.frameContentSize, - corruption_detected, ""); - if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ - dctx->expected = 4; - dctx->stage = ZSTDds_checkChecksum; - } else { - dctx->expected = 0; /* ends here */ - dctx->stage = ZSTDds_getFrameHeaderSize; - } - } else { - dctx->stage = ZSTDds_decodeBlockHeader; - dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; - } - return rSize; - } - case ZSTDds_checkChecksum: - assert(srcSize == 4); /* guaranteed by dctx->expected */ - { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); - U32 const check32 = MEM_readLE32(src); - DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); - RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - } +RowGroup::~RowGroup() throw() { +} + - case ZSTDds_decodeSkippableHeader: - assert(src != NULL); - assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); - memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ - dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ - dctx->stage = ZSTDds_skipFrame; - return 0; +void RowGroup::__set_columns(const std::vector & val) { + this->columns = val; +} - case ZSTDds_skipFrame: - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; +void RowGroup::__set_total_byte_size(const int64_t val) { + this->total_byte_size = val; +} - default: - assert(0); /* impossible */ - RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ - } +void RowGroup::__set_num_rows(const int64_t val) { + this->num_rows = val; } +void RowGroup::__set_sorting_columns(const std::vector & val) { + this->sorting_columns = val; +__isset.sorting_columns = true; +} -static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - dctx->dictEnd = dctx->previousDstEnd; - dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); - dctx->prefixStart = dict; - dctx->previousDstEnd = (const char*)dict + dictSize; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - dctx->dictContentBeginForFuzzing = dctx->prefixStart; - dctx->dictContentEndForFuzzing = dctx->previousDstEnd; -#endif - return 0; +void RowGroup::__set_file_offset(const int64_t val) { + this->file_offset = val; +__isset.file_offset = true; } -/*! ZSTD_loadDEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * @return : size of entropy tables read */ -size_t -ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, - const void* const dict, size_t const dictSize) +void RowGroup::__set_total_compressed_size(const int64_t val) { + this->total_compressed_size = val; +__isset.total_compressed_size = true; +} + +void RowGroup::__set_ordinal(const int16_t val) { + this->ordinal = val; +__isset.ordinal = true; +} +std::ostream& operator<<(std::ostream& out, const RowGroup& obj) { - const BYTE* dictPtr = (const BYTE*)dict; - const BYTE* const dictEnd = dictPtr + dictSize; + obj.printTo(out); + return out; +} - RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); - assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ - dictPtr += 8; /* skip header = magic + dictID */ - ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); - ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); - ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); - { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ - size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); -#ifdef HUF_FORCE_DECOMPRESS_X1 - /* in minimal huffman, we always use X1 variants */ - size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, - workspace, workspaceSize); -#else - size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, - workspace, workspaceSize); -#endif - RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); - dictPtr += hSize; - } +uint32_t RowGroup::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - { short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff, offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); - ZSTD_buildFSETable( entropy->OFTable, - offcodeNCount, offcodeMaxValue, - ZSTDConstants::OF_base, ZSTDConstants::OF_bits, - offcodeLog); - dictPtr += offcodeHeaderSize; - } + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); - ZSTD_buildFSETable( entropy->MLTable, - matchlengthNCount, matchlengthMaxValue, - ZSTDConstants::ML_base, ZSTDInternalConstants::ML_bits, - matchlengthLog); - dictPtr += matchlengthHeaderSize; - } + xfer += iprot->readStructBegin(fname); - { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); - ZSTD_buildFSETable( entropy->LLTable, - litlengthNCount, litlengthMaxValue, - ZSTDConstants::LL_base, ZSTDInternalConstants::LL_bits, - litlengthLog); - dictPtr += litlengthHeaderSize; + using ::duckdb_apache::thrift::protocol::TProtocolException; + + bool isset_columns = false; + bool isset_total_byte_size = false; + bool isset_num_rows = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->columns.clear(); + uint32_t _size110; + ::duckdb_apache::thrift::protocol::TType _etype113; + xfer += iprot->readListBegin(_etype113, _size110); + this->columns.resize(_size110); + uint32_t _i114; + for (_i114 = 0; _i114 < _size110; ++_i114) + { + xfer += this->columns[_i114].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_columns = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_byte_size); + isset_total_byte_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->sorting_columns.clear(); + uint32_t _size115; + ::duckdb_apache::thrift::protocol::TType _etype118; + xfer += iprot->readListBegin(_etype118, _size115); + this->sorting_columns.resize(_size115); + uint32_t _i119; + for (_i119 = 0; _i119 < _size115; ++_i119) + { + xfer += this->sorting_columns[_i119].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.sorting_columns = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->file_offset); + this->__isset.file_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_compressed_size); + this->__isset.total_compressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::duckdb_apache::thrift::protocol::T_I16) { + xfer += iprot->readI16(this->ordinal); + this->__isset.ordinal = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } + xfer += iprot->readFieldEnd(); + } - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); - { int i; - size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); - for (i=0; i<3; i++) { - U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; - RETURN_ERROR_IF(rep==0 || rep > dictContentSize, - dictionary_corrupted, ""); - entropy->rep[i] = rep; - } } + xfer += iprot->readStructEnd(); - return dictPtr - (const BYTE*)dict; + if (!isset_columns) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_byte_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; } -static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); - { U32 const magic = MEM_readLE32(dict); - if (magic != ZSTD_MAGIC_DICTIONARY) { - return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ - } } - dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); +uint32_t RowGroup::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("RowGroup"); - /* load entropy tables */ - { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); - RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); - dict = (const char*)dict + eSize; - dictSize -= eSize; + xfer += oprot->writeFieldBegin("columns", ::duckdb_apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); + std::vector ::const_iterator _iter120; + for (_iter120 = this->columns.begin(); _iter120 != this->columns.end(); ++_iter120) + { + xfer += (*_iter120).write(oprot); } - dctx->litEntropy = dctx->fseEntropy = 1; + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); - /* reference dictionary content */ - return ZSTD_refDictContent(dctx, dict, dictSize); -} + xfer += oprot->writeFieldBegin("total_byte_size", ::duckdb_apache::thrift::protocol::T_I64, 2); + xfer += oprot->writeI64(this->total_byte_size); + xfer += oprot->writeFieldEnd(); -static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->num_rows); + xfer += oprot->writeFieldEnd(); -size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) -{ - assert(dctx != NULL); - dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ - dctx->stage = ZSTDds_getFrameHeaderSize; - dctx->decodedSize = 0; - dctx->previousDstEnd = NULL; - dctx->prefixStart = NULL; - dctx->virtualStart = NULL; - dctx->dictEnd = NULL; - dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - dctx->litEntropy = dctx->fseEntropy = 0; - dctx->dictID = 0; - dctx->bType = bt_reserved; - ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ - dctx->LLTptr = dctx->entropy.LLTable; - dctx->MLTptr = dctx->entropy.MLTable; - dctx->OFTptr = dctx->entropy.OFTable; - dctx->HUFptr = dctx->entropy.hufTable; - return 0; + if (this->__isset.sorting_columns) { + xfer += oprot->writeFieldBegin("sorting_columns", ::duckdb_apache::thrift::protocol::T_LIST, 4); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); + std::vector ::const_iterator _iter121; + for (_iter121 = this->sorting_columns.begin(); _iter121 != this->sorting_columns.end(); ++_iter121) + { + xfer += (*_iter121).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.file_offset) { + xfer += oprot->writeFieldBegin("file_offset", ::duckdb_apache::thrift::protocol::T_I64, 5); + xfer += oprot->writeI64(this->file_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.total_compressed_size) { + xfer += oprot->writeFieldBegin("total_compressed_size", ::duckdb_apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->total_compressed_size); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ordinal) { + xfer += oprot->writeFieldBegin("ordinal", ::duckdb_apache::thrift::protocol::T_I16, 7); + xfer += oprot->writeI16(this->ordinal); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); - if (dict && dictSize) - RETURN_ERROR_IF( - ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), - dictionary_corrupted, ""); - return 0; +void swap(RowGroup &a, RowGroup &b) { + using ::std::swap; + swap(a.columns, b.columns); + swap(a.total_byte_size, b.total_byte_size); + swap(a.num_rows, b.num_rows); + swap(a.sorting_columns, b.sorting_columns); + swap(a.file_offset, b.file_offset); + swap(a.total_compressed_size, b.total_compressed_size); + swap(a.ordinal, b.ordinal); + swap(a.__isset, b.__isset); } - -/* ====== ZSTD_DDict ====== */ - -size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) -{ - DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); - assert(dctx != NULL); - if (ddict) { - const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); - size_t const dictSize = ZSTD_DDict_dictSize(ddict); - const void* const dictEnd = dictStart + dictSize; - dctx->ddictIsCold = (dctx->dictEnd != dictEnd); - DEBUGLOG(4, "DDict is %s", - dctx->ddictIsCold ? "~cold~" : "hot!"); - } - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); - if (ddict) { /* NULL ddict is equivalent to no dictionary */ - ZSTD_copyDDictParameters(dctx, ddict); - } - return 0; +RowGroup::RowGroup(const RowGroup& other122) { + columns = other122.columns; + total_byte_size = other122.total_byte_size; + num_rows = other122.num_rows; + sorting_columns = other122.sorting_columns; + file_offset = other122.file_offset; + total_compressed_size = other122.total_compressed_size; + ordinal = other122.ordinal; + __isset = other122.__isset; } - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) -{ - if (dictSize < 8) return 0; - if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; - return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); +RowGroup& RowGroup::operator=(const RowGroup& other123) { + columns = other123.columns; + total_byte_size = other123.total_byte_size; + num_rows = other123.num_rows; + sorting_columns = other123.sorting_columns; + file_offset = other123.file_offset; + total_compressed_size = other123.total_compressed_size; + ordinal = other123.ordinal; + __isset = other123.__isset; + return *this; } - -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompress frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary (most common case). - * - The frame was built with dictID intentionally removed. - * Needed dictionary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, frame header could not be decoded. - * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to use - * ZSTD_getFrameHeader(), which will provide a more precise error code. */ -unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) -{ - ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; - size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); - if (ZSTD_isError(hError)) return 0; - return zfp.dictID; +void RowGroup::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "RowGroup("; + out << "columns=" << to_string(columns); + out << ", " << "total_byte_size=" << to_string(total_byte_size); + out << ", " << "num_rows=" << to_string(num_rows); + out << ", " << "sorting_columns="; (__isset.sorting_columns ? (out << to_string(sorting_columns)) : (out << "")); + out << ", " << "file_offset="; (__isset.file_offset ? (out << to_string(file_offset)) : (out << "")); + out << ", " << "total_compressed_size="; (__isset.total_compressed_size ? (out << to_string(total_compressed_size)) : (out << "")); + out << ", " << "ordinal="; (__isset.ordinal ? (out << to_string(ordinal)) : (out << "")); + out << ")"; } -/*! ZSTD_decompress_usingDDict() : -* Decompression using a pre-digested Dictionary -* Use dictionary without significant overhead. */ -size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict) -{ - /* pass content and size in case legacy frames are encountered */ - return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, - NULL, 0, - ddict); +TypeDefinedOrder::~TypeDefinedOrder() throw() { } - -/*===================================== -* Streaming decompression -*====================================*/ - -ZSTD_DStream* ZSTD_createDStream(void) +std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) { - DEBUGLOG(3, "ZSTD_createDStream"); - return ZSTD_createDStream_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); + obj.printTo(out); + return out; } -ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) -{ - return ZSTD_initStaticDCtx(workspace, workspaceSize); -} -ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) -{ - return ZSTD_createDCtx_advanced(customMem); -} +uint32_t TypeDefinedOrder::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -size_t ZSTD_freeDStream(ZSTD_DStream* zds) -{ - return ZSTD_freeDCtx(zds); -} + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; + xfer += iprot->readStructBegin(fname); -/* *** Initialization *** */ + using ::duckdb_apache::thrift::protocol::TProtocolException; -size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTDInternalConstants::ZSTD_blockHeaderSize; } -size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } -size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType) -{ - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - ZSTD_clearDict(dctx); - if (dict && dictSize != 0) { - dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); - RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); - dctx->ddict = dctx->ddictLocal; - dctx->dictUses = ZSTD_use_indefinitely; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } - return 0; -} + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); -size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); + return xfer; } -size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +uint32_t TypeDefinedOrder::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TypeDefinedOrder"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) -{ - FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); - dctx->dictUses = ZSTD_use_once; - return 0; +void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { + using ::std::swap; + (void) a; + (void) b; } -size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) -{ - return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other124) { + (void) other124; +} +TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other125) { + (void) other125; + return *this; +} +void TypeDefinedOrder::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "TypeDefinedOrder("; + out << ")"; } -/* ZSTD_initDStream_usingDict() : - * return : expected size, aka ZSTD_startingInputLength(). - * this function cannot fail */ -size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) -{ - DEBUGLOG(4, "ZSTD_initDStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); - return ZSTD_startingInputLength(zds->format); +ColumnOrder::~ColumnOrder() throw() { } -/* note : this variant can't fail */ -size_t ZSTD_initDStream(ZSTD_DStream* zds) -{ - DEBUGLOG(4, "ZSTD_initDStream"); - return ZSTD_initDStream_usingDDict(zds, NULL); -} -/* ZSTD_initDStream_usingDDict() : - * ddict will just be referenced, and must outlive decompression session - * this function cannot fail */ -size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) -{ - FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); - return ZSTD_startingInputLength(dctx->format); +void ColumnOrder::__set_TYPE_ORDER(const TypeDefinedOrder& val) { + this->TYPE_ORDER = val; +__isset.TYPE_ORDER = true; } - -/* ZSTD_resetDStream() : - * return : expected size, aka ZSTD_startingInputLength(). - * this function cannot fail */ -size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) { - FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); - return ZSTD_startingInputLength(dctx->format); + obj.printTo(out); + return out; } -size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) -{ - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - ZSTD_clearDict(dctx); - if (ddict) { - dctx->ddict = ddict; - dctx->dictUses = ZSTD_use_indefinitely; - } - return 0; -} +uint32_t ColumnOrder::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -/* ZSTD_DCtx_setMaxWindowSize() : - * note : no direct equivalence in ZSTD_DCtx_setParameter, - * since this version sets windowSize, and the other sets windowLog */ -size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) -{ - ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); - size_t const min = (size_t)1 << bounds.lowerBound; - size_t const max = (size_t)1 << bounds.upperBound; - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); - RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); - dctx->maxWindowSize = maxWindowSize; - return 0; -} + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) -{ - return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); -} + xfer += iprot->readStructBegin(fname); -ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) -{ - ZSTD_bounds bounds = { 0, 0, 0 }; - switch(dParam) { - case ZSTD_d_windowLogMax: - bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; - bounds.upperBound = ZSTD_WINDOWLOG_MAX; - return bounds; - case ZSTD_d_format: - bounds.lowerBound = (int)ZSTD_f_zstd1; - bounds.upperBound = (int)ZSTD_f_zstd1_magicless; - ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); - return bounds; - case ZSTD_d_stableOutBuffer: - bounds.lowerBound = (int)ZSTD_obm_buffered; - bounds.upperBound = (int)ZSTD_obm_stable; - return bounds; - default:; + using ::duckdb_apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } - bounds.error = ERROR(parameter_unsupported); - return bounds; -} + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->TYPE_ORDER.read(iprot); + this->__isset.TYPE_ORDER = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } -/* ZSTD_dParam_withinBounds: - * @return 1 if value is within dParam bounds, - * 0 otherwise */ -static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) -{ - ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); - if (ZSTD_isError(bounds.error)) return 0; - if (value < bounds.lowerBound) return 0; - if (value > bounds.upperBound) return 0; - return 1; + xfer += iprot->readStructEnd(); + + return xfer; } -#define CHECK_DBOUNDS(p,v) { \ - RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +uint32_t ColumnOrder::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnOrder"); + + if (this->__isset.TYPE_ORDER) { + xfer += oprot->writeFieldBegin("TYPE_ORDER", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); + xfer += this->TYPE_ORDER.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) -{ - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - switch(dParam) { - case ZSTD_d_windowLogMax: - if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; - CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); - dctx->maxWindowSize = ((size_t)1) << value; - return 0; - case ZSTD_d_format: - CHECK_DBOUNDS(ZSTD_d_format, value); - dctx->format = (ZSTD_format_e)value; - return 0; - case ZSTD_d_stableOutBuffer: - CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); - dctx->outBufferMode = (ZSTD_outBufferMode_e)value; - return 0; - default:; - } - RETURN_ERROR(parameter_unsupported, ""); +void swap(ColumnOrder &a, ColumnOrder &b) { + using ::std::swap; + swap(a.TYPE_ORDER, b.TYPE_ORDER); + swap(a.__isset, b.__isset); } -size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) -{ - if ( (reset == ZSTD_reset_session_only) - || (reset == ZSTD_reset_session_and_parameters) ) { - dctx->streamStage = zdss_init; - dctx->noForwardProgress = 0; - } - if ( (reset == ZSTD_reset_parameters) - || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - ZSTD_clearDict(dctx); - dctx->format = ZSTD_f_zstd1; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; - } - return 0; +ColumnOrder::ColumnOrder(const ColumnOrder& other126) { + TYPE_ORDER = other126.TYPE_ORDER; + __isset = other126.__isset; +} +ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other127) { + TYPE_ORDER = other127.TYPE_ORDER; + __isset = other127.__isset; + return *this; +} +void ColumnOrder::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "ColumnOrder("; + out << "TYPE_ORDER="; (__isset.TYPE_ORDER ? (out << to_string(TYPE_ORDER)) : (out << "")); + out << ")"; } -size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) -{ - return ZSTD_sizeof_DCtx(dctx); +PageLocation::~PageLocation() throw() { } -size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) -{ - size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); - unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); - size_t const minRBSize = (size_t) neededSize; - RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, - frameParameter_windowTooLarge, ""); - return minRBSize; + +void PageLocation::__set_offset(const int64_t val) { + this->offset = val; } -size_t ZSTD_estimateDStreamSize(size_t windowSize) -{ - size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - size_t const inBuffSize = blockSize; /* no block can be larger */ - size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); - return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +void PageLocation::__set_compressed_page_size(const int32_t val) { + this->compressed_page_size = val; } -size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +void PageLocation::__set_first_row_index(const int64_t val) { + this->first_row_index = val; +} +std::ostream& operator<<(std::ostream& out, const PageLocation& obj) { - U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ - ZSTD_frameHeader zfh; - size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); - if (ZSTD_isError(err)) return err; - RETURN_ERROR_IF(err>0, srcSize_wrong, ""); - RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, - frameParameter_windowTooLarge, ""); - return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); + obj.printTo(out); + return out; } -/* ***** Decompression ***** */ +uint32_t PageLocation::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) -{ - return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; -} + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) -{ - if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) - zds->oversizedDuration++; - else - zds->oversizedDuration = 0; -} + xfer += iprot->readStructBegin(fname); -static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) -{ - return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; -} + using ::duckdb_apache::thrift::protocol::TProtocolException; -/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ -static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) -{ - ZSTD_outBuffer const expect = zds->expectedOutBuffer; - /* No requirement when ZSTD_obm_stable is not enabled. */ - if (zds->outBufferMode != ZSTD_obm_stable) - return 0; - /* Any buffer is allowed in zdss_init, this must be the same for every other call until - * the context is reset. - */ - if (zds->streamStage == zdss_init) - return 0; - /* The buffer must match our expectation exactly. */ - if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) - return 0; - RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!"); -} + bool isset_offset = false; + bool isset_compressed_page_size = false; + bool isset_first_row_index = false; -/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() - * and updates the stage and the output buffer state. This call is extracted so it can be - * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. - * NOTE: You must break after calling this function since the streamStage is modified. - */ -static size_t ZSTD_decompressContinueStream( - ZSTD_DStream* zds, char** op, char* oend, - void const* src, size_t srcSize) { - int const isSkipFrame = ZSTD_isSkipFrame(zds); - if (zds->outBufferMode == ZSTD_obm_buffered) { - size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; - size_t const decodedSize = ZSTD_decompressContinue(zds, - zds->outBuff + zds->outStart, dstSize, src, srcSize); - FORWARD_IF_ERROR(decodedSize, ""); - if (!decodedSize && !isSkipFrame) { - zds->streamStage = zdss_read; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->offset); + isset_offset = true; } else { - zds->outEnd = zds->outStart + decodedSize; - zds->streamStage = zdss_flush; + xfer += iprot->skip(ftype); } - } else { - /* Write directly into the output buffer */ - size_t const dstSize = isSkipFrame ? 0 : oend - *op; - size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); - FORWARD_IF_ERROR(decodedSize, ""); - *op += decodedSize; - /* Flushing is not needed. */ - zds->streamStage = zdss_read; - assert(*op <= oend); - assert(zds->outBufferMode == ZSTD_obm_stable); + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->compressed_page_size); + isset_compressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->first_row_index); + isset_first_row_index = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } - return 0; -} - -size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) -{ - const char* const src = (const char*)input->src; - const char* const istart = input->pos != 0 ? src + input->pos : src; - const char* const iend = input->size != 0 ? src + input->size : src; - const char* ip = istart; - char* const dst = (char*)output->dst; - char* const ostart = output->pos != 0 ? dst + output->pos : dst; - char* const oend = output->size != 0 ? dst + output->size : dst; - char* op = ostart; - U32 someMoreWork = 1; + xfer += iprot->readFieldEnd(); + } - DEBUGLOG(5, "ZSTD_decompressStream"); - RETURN_ERROR_IF( - input->pos > input->size, - srcSize_wrong, - "forbidden. in: pos: %u vs size: %u", - (U32)input->pos, (U32)input->size); - RETURN_ERROR_IF( - output->pos > output->size, - dstSize_tooSmall, - "forbidden. out: pos: %u vs size: %u", - (U32)output->pos, (U32)output->size); - DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); - FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); + xfer += iprot->readStructEnd(); - while (someMoreWork) { - switch(zds->streamStage) - { - case zdss_init : - DEBUGLOG(5, "stage zdss_init => transparent reset "); - zds->streamStage = zdss_loadHeader; - zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - zds->legacyVersion = 0; - zds->hostageByte = 0; - zds->expectedOutBuffer = *output; - /* fall-through */ + if (!isset_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_first_row_index) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} - case zdss_loadHeader : - DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) - if (zds->legacyVersion) { - RETURN_ERROR_IF(zds->staticSize, memory_allocation, - "legacy support is incompatible with static dctx"); - { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); - if (hint==0) zds->streamStage = zdss_init; - return hint; - } } -#endif - { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); - DEBUGLOG(5, "header size : %u", (U32)hSize); - if (ZSTD_isError(hSize)) { -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) - U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); - if (legacyVersion) { - ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); - const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; - size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; - DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); - RETURN_ERROR_IF(zds->staticSize, memory_allocation, - "legacy support is incompatible with static dctx"); - FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, - zds->previousLegacyVersion, legacyVersion, - dict, dictSize), ""); - zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; - { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); - if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ - return hint; - } } -#endif - return hSize; /* error */ - } - if (hSize != 0) { /* need more input */ - size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ - size_t const remainingInput = (size_t)(iend-ip); - assert(iend >= ip); - if (toLoad > remainingInput) { /* not enough input to load full header */ - if (remainingInput > 0) { - memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); - zds->lhSize += remainingInput; - } - input->pos = input->size; - return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTDInternalConstants::ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ - } - assert(ip != NULL); - memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; - break; - } } +uint32_t PageLocation::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageLocation"); - /* check for single-pass mode opportunity */ - if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && zds->fParams.frameType != ZSTD_skippableFrame - && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { - size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); - if (cSize <= (size_t)(iend-istart)) { - /* shortcut : using single-pass mode */ - size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds)); - if (ZSTD_isError(decompressedSize)) return decompressedSize; - DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") - ip = istart + cSize; - op += decompressedSize; - zds->expected = 0; - zds->streamStage = zdss_init; - someMoreWork = 0; - break; - } } + xfer += oprot->writeFieldBegin("offset", ::duckdb_apache::thrift::protocol::T_I64, 1); + xfer += oprot->writeI64(this->offset); + xfer += oprot->writeFieldEnd(); - /* Check output buffer is large enough for ZSTD_odm_stable. */ - if (zds->outBufferMode == ZSTD_obm_stable - && zds->fParams.frameType != ZSTD_skippableFrame - && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { - RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); - } + xfer += oprot->writeFieldBegin("compressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->compressed_page_size); + xfer += oprot->writeFieldEnd(); - /* Consume header (see ZSTDds_decodeFrameHeader) */ - DEBUGLOG(4, "Consume header"); - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); + xfer += oprot->writeFieldBegin("first_row_index", ::duckdb_apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->first_row_index); + xfer += oprot->writeFieldEnd(); - if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); - zds->stage = ZSTDds_skipFrame; - } else { - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); - zds->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; - zds->stage = ZSTDds_decodeBlockHeader; - } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - /* control buffer memory usage */ - DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", - (U32)(zds->fParams.windowSize >>10), - (U32)(zds->maxWindowSize >> 10) ); - zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); - RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, - frameParameter_windowTooLarge, ""); +void swap(PageLocation &a, PageLocation &b) { + using ::std::swap; + swap(a.offset, b.offset); + swap(a.compressed_page_size, b.compressed_page_size); + swap(a.first_row_index, b.first_row_index); +} - /* Adapt buffer sizes to frame header instructions */ - { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); - size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered - ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) - : 0; +PageLocation::PageLocation(const PageLocation& other128) { + offset = other128.offset; + compressed_page_size = other128.compressed_page_size; + first_row_index = other128.first_row_index; +} +PageLocation& PageLocation::operator=(const PageLocation& other129) { + offset = other129.offset; + compressed_page_size = other129.compressed_page_size; + first_row_index = other129.first_row_index; + return *this; +} +void PageLocation::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "PageLocation("; + out << "offset=" << to_string(offset); + out << ", " << "compressed_page_size=" << to_string(compressed_page_size); + out << ", " << "first_row_index=" << to_string(first_row_index); + out << ")"; +} - ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); - { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); - int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); +OffsetIndex::~OffsetIndex() throw() { +} - if (tooSmall || tooLarge) { - size_t const bufferSize = neededInBuffSize + neededOutBuffSize; - DEBUGLOG(4, "inBuff : from %u to %u", - (U32)zds->inBuffSize, (U32)neededInBuffSize); - DEBUGLOG(4, "outBuff : from %u to %u", - (U32)zds->outBuffSize, (U32)neededOutBuffSize); - if (zds->staticSize) { /* static DCtx */ - DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); - assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ - RETURN_ERROR_IF( - bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), - memory_allocation, ""); - } else { - ZSTD_free(zds->inBuff, zds->customMem); - zds->inBuffSize = 0; - zds->outBuffSize = 0; - zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); - RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); - } - zds->inBuffSize = neededInBuffSize; - zds->outBuff = zds->inBuff + zds->inBuffSize; - zds->outBuffSize = neededOutBuffSize; - } } } - zds->streamStage = zdss_read; - /* fall-through */ - case zdss_read: - DEBUGLOG(5, "stage zdss_read"); - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip); - DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); - if (neededInSize==0) { /* end of frame */ - zds->streamStage = zdss_init; - someMoreWork = 0; - break; - } - if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ - FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); - ip += neededInSize; - /* Function modifies the stage so we must break */ - break; - } } - if (ip==iend) { someMoreWork = 0; break; } /* no more input */ - zds->streamStage = zdss_load; - /* fall-through */ +void OffsetIndex::__set_page_locations(const std::vector & val) { + this->page_locations = val; +} +std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj) +{ + obj.printTo(out); + return out; +} - case zdss_load: - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); - size_t const toLoad = neededInSize - zds->inPos; - int const isSkipFrame = ZSTD_isSkipFrame(zds); - size_t loadedSize; - /* At this point we shouldn't be decompressing a block that we can stream. */ - assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); - if (isSkipFrame) { - loadedSize = MIN(toLoad, (size_t)(iend-ip)); - } else { - RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, - corruption_detected, - "should never happen"); - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); - } - ip += loadedSize; - zds->inPos += loadedSize; - if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ - /* decode loaded input */ - zds->inPos = 0; /* input is consumed */ - FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); - /* Function modifies the stage so we must break */ - break; - } - case zdss_flush: - { size_t const toFlushSize = zds->outEnd - zds->outStart; - size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); - op += flushedSize; - zds->outStart += flushedSize; - if (flushedSize == toFlushSize) { /* flush completed */ - zds->streamStage = zdss_read; - if ( (zds->outBuffSize < zds->fParams.frameContentSize) - && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { - DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", - (int)(zds->outBuffSize - zds->outStart), - (U32)zds->fParams.blockSizeMax); - zds->outStart = zds->outEnd = 0; - } - break; - } } - /* cannot complete flush */ - someMoreWork = 0; - break; +uint32_t OffsetIndex::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - default: - assert(0); /* impossible */ - RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ - } } + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - /* result */ - input->pos = (size_t)(ip - (const char*)(input->src)); - output->pos = (size_t)(op - (char*)(output->dst)); + xfer += iprot->readStructBegin(fname); - /* Update the expected output buffer for ZSTD_obm_stable. */ - zds->expectedOutBuffer = *output; + using ::duckdb_apache::thrift::protocol::TProtocolException; - if ((ip==istart) && (op==ostart)) { /* no forward progress */ - zds->noForwardProgress ++; - if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { - RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); - RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); - assert(0); - } - } else { - zds->noForwardProgress = 0; + bool isset_page_locations = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } - { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); - if (!nextSrcSizeHint) { /* frame fully decoded */ - if (zds->outEnd == zds->outStart) { /* output fully flushed */ - if (zds->hostageByte) { - if (input->pos >= input->size) { - /* can't release hostage (not present) */ - zds->streamStage = zdss_read; - return 1; - } - input->pos++; /* release hostage */ - } /* zds->hostageByte */ - return 0; - } /* zds->outEnd == zds->outStart */ - if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ - input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ - zds->hostageByte=1; + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->page_locations.clear(); + uint32_t _size130; + ::duckdb_apache::thrift::protocol::TType _etype133; + xfer += iprot->readListBegin(_etype133, _size130); + this->page_locations.resize(_size130); + uint32_t _i134; + for (_i134 = 0; _i134 < _size130; ++_i134) + { + xfer += this->page_locations[_i134].read(iprot); } - return 1; - } /* nextSrcSizeHint==0 */ - nextSrcSizeHint += ZSTDInternalConstants::ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ - assert(zds->inPos <= nextSrcSizeHint); - nextSrcSizeHint -= zds->inPos; /* part already loaded*/ - return nextSrcSizeHint; + xfer += iprot->readListEnd(); + } + isset_page_locations = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_page_locations) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; } -size_t ZSTD_decompressStream_simpleArgs ( - ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos) -{ - ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; - ZSTD_inBuffer input = { src, srcSize, *srcPos }; - /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ - size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); - *dstPos = output.pos; - *srcPos = input.pos; - return cErr; +uint32_t OffsetIndex::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("OffsetIndex"); + + xfer += oprot->writeFieldBegin("page_locations", ::duckdb_apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); + std::vector ::const_iterator _iter135; + for (_iter135 = this->page_locations.begin(); _iter135 != this->page_locations.end(); ++_iter135) + { + xfer += (*_iter135).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } +void swap(OffsetIndex &a, OffsetIndex &b) { + using ::std::swap; + swap(a.page_locations, b.page_locations); } +OffsetIndex::OffsetIndex(const OffsetIndex& other136) { + page_locations = other136.page_locations; +} +OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other137) { + page_locations = other137.page_locations; + return *this; +} +void OffsetIndex::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "OffsetIndex("; + out << "page_locations=" << to_string(page_locations); + out << ")"; +} -// LICENSE_CHANGE_END +ColumnIndex::~ColumnIndex() throw() { +} -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +void ColumnIndex::__set_null_pages(const std::vector & val) { + this->null_pages = val; +} -/* zstd_decompress_block : - * this module takes care of decompressing _compressed_ block */ +void ColumnIndex::__set_min_values(const std::vector & val) { + this->min_values = val; +} -/*-******************************************************* -* Dependencies -*********************************************************/ -#include /* memcpy, memmove, memset */ - /* prefetch */ - /* low level memory routines */ +void ColumnIndex::__set_max_values(const std::vector & val) { + this->max_values = val; +} +void ColumnIndex::__set_boundary_order(const BoundaryOrder::type val) { + this->boundary_order = val; +} +void ColumnIndex::__set_null_counts(const std::vector & val) { + this->null_counts = val; +__isset.null_counts = true; +} +std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) +{ + obj.printTo(out); + return out; +} +uint32_t ColumnIndex::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - /* ZSTD_DCtx */ - /* ZSTD_DDictDictContent */ + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -namespace duckdb_zstd { -/*_******************************************************* -* Macros -**********************************************************/ + xfer += iprot->readStructBegin(fname); -/* These two optional macros force the use one way or another of the two - * ZSTD_decompressSequences implementations. You can't force in both directions - * at the same time. - */ -#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" -#endif + using ::duckdb_apache::thrift::protocol::TProtocolException; + bool isset_null_pages = false; + bool isset_min_values = false; + bool isset_max_values = false; + bool isset_boundary_order = false; -/*_******************************************************* -* Memory operations -**********************************************************/ -static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->null_pages.clear(); + uint32_t _size138; + ::duckdb_apache::thrift::protocol::TType _etype141; + xfer += iprot->readListBegin(_etype141, _size138); + this->null_pages.resize(_size138); + uint32_t _i142; + for (_i142 = 0; _i142 < _size138; ++_i142) + { + xfer += iprot->readBool(this->null_pages[_i142]); + } + xfer += iprot->readListEnd(); + } + isset_null_pages = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->min_values.clear(); + uint32_t _size143; + ::duckdb_apache::thrift::protocol::TType _etype146; + xfer += iprot->readListBegin(_etype146, _size143); + this->min_values.resize(_size143); + uint32_t _i147; + for (_i147 = 0; _i147 < _size143; ++_i147) + { + xfer += iprot->readBinary(this->min_values[_i147]); + } + xfer += iprot->readListEnd(); + } + isset_min_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->max_values.clear(); + uint32_t _size148; + ::duckdb_apache::thrift::protocol::TType _etype151; + xfer += iprot->readListBegin(_etype151, _size148); + this->max_values.resize(_size148); + uint32_t _i152; + for (_i152 = 0; _i152 < _size148; ++_i152) + { + xfer += iprot->readBinary(this->max_values[_i152]); + } + xfer += iprot->readListEnd(); + } + isset_max_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + int32_t ecast153; + xfer += iprot->readI32(ecast153); + this->boundary_order = (BoundaryOrder::type)ecast153; + isset_boundary_order = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->null_counts.clear(); + uint32_t _size154; + ::duckdb_apache::thrift::protocol::TType _etype157; + xfer += iprot->readListBegin(_etype157, _size154); + this->null_counts.resize(_size154); + uint32_t _i158; + for (_i158 = 0; _i158 < _size154; ++_i158) + { + xfer += iprot->readI64(this->null_counts[_i158]); + } + xfer += iprot->readListEnd(); + } + this->__isset.null_counts = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + xfer += iprot->readStructEnd(); -/*-************************************************************* - * Block decoding - ***************************************************************/ + if (!isset_null_pages) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_min_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_max_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_boundary_order) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} -/*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ -size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, - blockProperties_t* bpPtr) -{ - RETURN_ERROR_IF(srcSize < ZSTDInternalConstants::ZSTD_blockHeaderSize, srcSize_wrong, ""); +uint32_t ColumnIndex::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnIndex"); - { U32 const cBlockHeader = MEM_readLE24(src); - U32 const cSize = cBlockHeader >> 3; - bpPtr->lastBlock = cBlockHeader & 1; - bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); - bpPtr->origSize = cSize; /* only useful for RLE */ - if (bpPtr->blockType == bt_rle) return 1; - RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); - return cSize; + xfer += oprot->writeFieldBegin("null_pages", ::duckdb_apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); + std::vector ::const_iterator _iter159; + for (_iter159 = this->null_pages.begin(); _iter159 != this->null_pages.end(); ++_iter159) + { + xfer += oprot->writeBool((*_iter159)); } -} + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldBegin("min_values", ::duckdb_apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); + std::vector ::const_iterator _iter160; + for (_iter160 = this->min_values.begin(); _iter160 != this->min_values.end(); ++_iter160) + { + xfer += oprot->writeBinary((*_iter160)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); -/* Hidden declaration for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize); -/*! ZSTD_decodeLiteralsBlock() : - * @return : nb of bytes read from src (< srcSize ) - * note : symbol not declared but exposed for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ -{ - DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); - RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); + xfer += oprot->writeFieldBegin("max_values", ::duckdb_apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); + std::vector ::const_iterator _iter161; + for (_iter161 = this->max_values.begin(); _iter161 != this->max_values.end(); ++_iter161) + { + xfer += oprot->writeBinary((*_iter161)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); - { const BYTE* const istart = (const BYTE*) src; - symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + xfer += oprot->writeFieldBegin("boundary_order", ::duckdb_apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32((int32_t)this->boundary_order); + xfer += oprot->writeFieldEnd(); - switch(litEncType) - { - case set_repeat: - DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); - RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); - /* fall-through */ + if (this->__isset.null_counts) { + xfer += oprot->writeFieldBegin("null_counts", ::duckdb_apache::thrift::protocol::T_LIST, 5); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); + std::vector ::const_iterator _iter162; + for (_iter162 = this->null_counts.begin(); _iter162 != this->null_counts.end(); ++_iter162) + { + xfer += oprot->writeI64((*_iter162)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - case set_compressed: - RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); - { size_t lhSize, litSize, litCSize; - U32 singleStream=0; - U32 const lhlCode = (istart[0] >> 2) & 3; - U32 const lhc = MEM_readLE32(istart); - size_t hufSuccess; - switch(lhlCode) - { - case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ - /* 2 - 2 - 10 - 10 */ - singleStream = !lhlCode; - lhSize = 3; - litSize = (lhc >> 4) & 0x3FF; - litCSize = (lhc >> 14) & 0x3FF; - break; - case 2: - /* 2 - 2 - 14 - 14 */ - lhSize = 4; - litSize = (lhc >> 4) & 0x3FFF; - litCSize = lhc >> 18; - break; - case 3: - /* 2 - 2 - 18 - 18 */ - lhSize = 5; - litSize = (lhc >> 4) & 0x3FFFF; - litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); - break; - } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); - RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); +void swap(ColumnIndex &a, ColumnIndex &b) { + using ::std::swap; + swap(a.null_pages, b.null_pages); + swap(a.min_values, b.min_values); + swap(a.max_values, b.max_values); + swap(a.boundary_order, b.boundary_order); + swap(a.null_counts, b.null_counts); + swap(a.__isset, b.__isset); +} - /* prefetch huffman table if cold */ - if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { - PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); - } +ColumnIndex::ColumnIndex(const ColumnIndex& other163) { + null_pages = other163.null_pages; + min_values = other163.min_values; + max_values = other163.max_values; + boundary_order = other163.boundary_order; + null_counts = other163.null_counts; + __isset = other163.__isset; +} +ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other164) { + null_pages = other164.null_pages; + min_values = other164.min_values; + max_values = other164.max_values; + boundary_order = other164.boundary_order; + null_counts = other164.null_counts; + __isset = other164.__isset; + return *this; +} +void ColumnIndex::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "ColumnIndex("; + out << "null_pages=" << to_string(null_pages); + out << ", " << "min_values=" << to_string(min_values); + out << ", " << "max_values=" << to_string(max_values); + out << ", " << "boundary_order=" << to_string(boundary_order); + out << ", " << "null_counts="; (__isset.null_counts ? (out << to_string(null_counts)) : (out << "")); + out << ")"; +} - if (litEncType==set_repeat) { - if (singleStream) { - hufSuccess = HUF_decompress1X_usingDTable_bmi2( - dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, dctx->bmi2); - } else { - hufSuccess = HUF_decompress4X_usingDTable_bmi2( - dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, dctx->bmi2); - } - } else { - if (singleStream) { -#if defined(HUF_FORCE_DECOMPRESS_X2) - hufSuccess = HUF_decompress1X_DCtx_wksp( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace)); -#else - hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), dctx->bmi2); -#endif - } else { - hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), dctx->bmi2); - } - } - RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); +AesGcmV1::~AesGcmV1() throw() { +} - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - dctx->litEntropy = 1; - if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return litCSize + lhSize; - } - case set_basic: - { size_t litSize, lhSize; - U32 const lhlCode = ((istart[0]) >> 2) & 3; - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = MEM_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = MEM_readLE24(istart) >> 4; - break; - } +void AesGcmV1::__set_aad_prefix(const std::string& val) { + this->aad_prefix = val; +__isset.aad_prefix = true; +} - if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); - memcpy(dctx->litBuffer, istart+lhSize, litSize); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return lhSize+litSize; - } - /* direct reference into compressed stream */ - dctx->litPtr = istart+lhSize; - dctx->litSize = litSize; - return lhSize+litSize; - } +void AesGcmV1::__set_aad_file_unique(const std::string& val) { + this->aad_file_unique = val; +__isset.aad_file_unique = true; +} - case set_rle: - { U32 const lhlCode = ((istart[0]) >> 2) & 3; - size_t litSize, lhSize; - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = MEM_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = MEM_readLE24(istart) >> 4; - RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); - break; - } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); - memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - return lhSize+1; - } - default: - RETURN_ERROR(corruption_detected, "impossible"); - } - } +void AesGcmV1::__set_supply_aad_prefix(const bool val) { + this->supply_aad_prefix = val; +__isset.supply_aad_prefix = true; +} +std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj) +{ + obj.printTo(out); + return out; } -/* Default FSE distribution tables. - * These are pre-calculated FSE decoding tables using default distributions as defined in specification : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions - * They were generated programmatically with following method : - * - start from default distributions, present in /lib/common/zstd_internal.h - * - generate tables normally, using ZSTD_buildFSETable() - * - printout the content of tables - * - pretify output, report below, test with fuzzer to ensure it's correct */ -/* Default FSE distribution table for Literal Lengths */ -static const ZSTD_seqSymbol LL_defaultDTable[(1<readStructBegin(fname); -/* Default FSE distribution table for Match Lengths */ -static const ZSTD_seqSymbol ML_defaultDTable[(1<readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_prefix); + this->__isset.aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_file_unique); + this->__isset.aad_file_unique = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->supply_aad_prefix); + this->__isset.supply_aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } - DTableH->tableLog = 0; - DTableH->fastMode = 0; + xfer += iprot->readStructEnd(); - cell->nbBits = 0; - cell->nextState = 0; - assert(nbAddBits < 255); - cell->nbAdditionalBits = (BYTE)nbAddBits; - cell->baseValue = baseValue; + return xfer; } +uint32_t AesGcmV1::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("AesGcmV1"); -/* ZSTD_buildFSETable() : - * generate FSE decoding table for one symbol (ll, ml or off) - * cannot fail if input is valid => - * all inputs are presumed validated at this stage */ -void -ZSTD_buildFSETable(ZSTD_seqSymbol* dt, - const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog) -{ - ZSTD_seqSymbol* const tableDecode = dt+1; - U16 symbolNext[MaxSeq+1]; + if (this->__isset.aad_prefix) { + xfer += oprot->writeFieldBegin("aad_prefix", ::duckdb_apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->aad_prefix); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.aad_file_unique) { + xfer += oprot->writeFieldBegin("aad_file_unique", ::duckdb_apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->aad_file_unique); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.supply_aad_prefix) { + xfer += oprot->writeFieldBegin("supply_aad_prefix", ::duckdb_apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->supply_aad_prefix); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - U32 const maxSV1 = maxSymbolValue + 1; - U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize-1; +void swap(AesGcmV1 &a, AesGcmV1 &b) { + using ::std::swap; + swap(a.aad_prefix, b.aad_prefix); + swap(a.aad_file_unique, b.aad_file_unique); + swap(a.supply_aad_prefix, b.supply_aad_prefix); + swap(a.__isset, b.__isset); +} - /* Sanity Checks */ - assert(maxSymbolValue <= MaxSeq); - assert(tableLog <= MaxFSELog); +AesGcmV1::AesGcmV1(const AesGcmV1& other165) { + aad_prefix = other165.aad_prefix; + aad_file_unique = other165.aad_file_unique; + supply_aad_prefix = other165.supply_aad_prefix; + __isset = other165.__isset; +} +AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other166) { + aad_prefix = other166.aad_prefix; + aad_file_unique = other166.aad_file_unique; + supply_aad_prefix = other166.supply_aad_prefix; + __isset = other166.__isset; + return *this; +} +void AesGcmV1::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "AesGcmV1("; + out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "")); + out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "")); + out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "")); + out << ")"; +} - /* Init, lay down lowprob symbols */ - { ZSTD_seqSymbol_header DTableH; - DTableH.tableLog = tableLog; - DTableH.fastMode = 1; - { S16 const largeLimit= (S16)(1 << (tableLog-1)); - U32 s; - for (s=0; s= largeLimit) DTableH.fastMode=0; - assert(normalizedCounter[s]>=0); - symbolNext[s] = (U16)normalizedCounter[s]; - } } } - memcpy(dt, &DTableH, sizeof(DTableH)); - } - /* Spread symbols */ - { U32 const tableMask = tableSize-1; - U32 const step = FSE_TABLESTEP(tableSize); - U32 s, position = 0; - for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } } - assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - } +AesGcmCtrV1::~AesGcmCtrV1() throw() { +} - /* Build Decoding table */ - { U32 u; - for (u=0; uaad_prefix = val; +__isset.aad_prefix = true; } +void AesGcmCtrV1::__set_aad_file_unique(const std::string& val) { + this->aad_file_unique = val; +__isset.aad_file_unique = true; +} -/*! ZSTD_buildSeqTable() : - * @return : nb bytes read from src, - * or an error code if it fails */ -static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr, - symbolEncodingType_e type, unsigned max, U32 maxLog, - const void* src, size_t srcSize, - const U32* baseValue, const U32* nbAdditionalBits, - const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, - int ddictIsCold, int nbSeq) +void AesGcmCtrV1::__set_supply_aad_prefix(const bool val) { + this->supply_aad_prefix = val; +__isset.supply_aad_prefix = true; +} +std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj) { - switch(type) - { - case set_rle : - RETURN_ERROR_IF(!srcSize, srcSize_wrong, ""); - RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, ""); - { U32 const symbol = *(const BYTE*)src; - U32 const baseline = baseValue[symbol]; - U32 const nbBits = nbAdditionalBits[symbol]; - ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); - } - *DTablePtr = DTableSpace; - return 1; - case set_basic : - *DTablePtr = defaultTable; - return 0; - case set_repeat: - RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); - /* prefetch FSE table if used */ - if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { - const void* const pStart = *DTablePtr; - size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); - PREFETCH_AREA(pStart, pSize); - } - return 0; - case set_compressed : - { unsigned tableLog; - S16 norm[MaxSeq+1]; - size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); - RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); - ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); - *DTablePtr = DTableSpace; - return headerSize; - } - default : - assert(0); - RETURN_ERROR(GENERIC, "impossible"); - } + obj.printTo(out); + return out; } -size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - const void* src, size_t srcSize) -{ - const BYTE* const istart = (const BYTE* const)src; - const BYTE* const iend = istart + srcSize; - const BYTE* ip = istart; - int nbSeq; - DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); - /* check */ - RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); +uint32_t AesGcmCtrV1::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - /* SeqHead */ - nbSeq = *ip++; - if (!nbSeq) { - *nbSeqPtr=0; - RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); - return 1; - } - if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) { - RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); - nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - } else { - RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); - nbSeq = ((nbSeq-0x80)<<8) + *ip++; - } - } - *nbSeqPtr = nbSeq; + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; - /* FSE table descriptors */ - RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ - { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); - symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); - symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); - ip++; + xfer += iprot->readStructBegin(fname); - /* Build DTables */ - { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, - LLtype, MaxLL, LLFSELog, - ip, iend-ip, - ZSTDConstants::LL_base, ZSTDInternalConstants::LL_bits, - LL_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); - ip += llhSize; - } + using ::duckdb_apache::thrift::protocol::TProtocolException; - { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, - OFtype, MaxOff, OffFSELog, - ip, iend-ip, - ZSTDConstants::OF_base, ZSTDConstants::OF_bits, - OF_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); - ip += ofhSize; - } - { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, - MLtype, MaxML, MLFSELog, - ip, iend-ip, - ZSTDConstants::ML_base, ZSTDInternalConstants::ML_bits, - ML_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); - ip += mlhSize; + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_prefix); + this->__isset.aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_file_unique); + this->__isset.aad_file_unique = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->supply_aad_prefix); + this->__isset.supply_aad_prefix = true; + } else { + xfer += iprot->skip(ftype); } + break; + default: + xfer += iprot->skip(ftype); + break; } + xfer += iprot->readFieldEnd(); + } - return ip-istart; -} + xfer += iprot->readStructEnd(); + return xfer; +} -typedef struct { - size_t litLength; - size_t matchLength; - size_t offset; - const BYTE* match; -} seq_t; +uint32_t AesGcmCtrV1::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("AesGcmCtrV1"); -typedef struct { - size_t state; - const ZSTD_seqSymbol* table; -} ZSTD_fseState; + if (this->__isset.aad_prefix) { + xfer += oprot->writeFieldBegin("aad_prefix", ::duckdb_apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->aad_prefix); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.aad_file_unique) { + xfer += oprot->writeFieldBegin("aad_file_unique", ::duckdb_apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->aad_file_unique); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.supply_aad_prefix) { + xfer += oprot->writeFieldBegin("supply_aad_prefix", ::duckdb_apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->supply_aad_prefix); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} -typedef struct { - BIT_DStream_t DStream; - ZSTD_fseState stateLL; - ZSTD_fseState stateOffb; - ZSTD_fseState stateML; - size_t prevOffset[ZSTD_REP_NUM]; - const BYTE* prefixStart; - const BYTE* dictEnd; - size_t pos; -} seqState_t; +void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { + using ::std::swap; + swap(a.aad_prefix, b.aad_prefix); + swap(a.aad_file_unique, b.aad_file_unique); + swap(a.supply_aad_prefix, b.supply_aad_prefix); + swap(a.__isset, b.__isset); +} -/*! ZSTD_overlapCopy8() : - * Copies 8 bytes from ip to op and updates op and ip where ip <= op. - * If the offset is < 8 then the offset is spread to at least 8 bytes. - * - * Precondition: *ip <= *op - * Postcondition: *op - *op >= 8 - */ -HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { - assert(*ip <= *op); - if (offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ - int const sub2 = dec64table[offset]; - (*op)[0] = (*ip)[0]; - (*op)[1] = (*ip)[1]; - (*op)[2] = (*ip)[2]; - (*op)[3] = (*ip)[3]; - *ip += dec32table[offset]; - ZSTD_copy4(*op+4, *ip); - *ip -= sub2; - } else { - ZSTD_copy8(*op, *ip); - } - *ip += 8; - *op += 8; - assert(*op - *ip >= 8); +AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other167) { + aad_prefix = other167.aad_prefix; + aad_file_unique = other167.aad_file_unique; + supply_aad_prefix = other167.supply_aad_prefix; + __isset = other167.__isset; +} +AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other168) { + aad_prefix = other168.aad_prefix; + aad_file_unique = other168.aad_file_unique; + supply_aad_prefix = other168.supply_aad_prefix; + __isset = other168.__isset; + return *this; +} +void AesGcmCtrV1::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "AesGcmCtrV1("; + out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "")); + out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "")); + out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "")); + out << ")"; } -/*! ZSTD_safecopy() : - * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer - * and write up to 16 bytes past oend_w (op >= oend_w is allowed). - * This function is only called in the uncommon case where the sequence is near the end of the block. It - * should be fast for a single long sequence, but can be slow for several short sequences. - * - * @param ovtype controls the overlap detection - * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. - * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. - * The src buffer must be before the dst buffer. - */ -static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { - ptrdiff_t const diff = op - ip; - BYTE* const oend = op + length; - assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || - (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); +EncryptionAlgorithm::~EncryptionAlgorithm() throw() { +} - if (length < 8) { - /* Handle short lengths. */ - while (op < oend) *op++ = *ip++; - return; - } - if (ovtype == ZSTD_overlap_src_before_dst) { - /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ - assert(length >= 8); - ZSTD_overlapCopy8(&op, &ip, diff); - assert(op - ip >= 8); - assert(op <= oend); - } - if (oend <= oend_w) { - /* No risk of overwrite. */ - ZSTD_wildcopy(op, ip, length, ovtype); - return; - } - if (op <= oend_w) { - /* Wildcopy until we get close to the end. */ - assert(oend > oend_w); - ZSTD_wildcopy(op, ip, oend_w - op, ovtype); - ip += oend_w - op; - op = oend_w; - } - /* Handle the leftovers. */ - while (op < oend) *op++ = *ip++; +void EncryptionAlgorithm::__set_AES_GCM_V1(const AesGcmV1& val) { + this->AES_GCM_V1 = val; +__isset.AES_GCM_V1 = true; } -/* ZSTD_execSequenceEnd(): - * This version handles cases that are near the end of the output buffer. It requires - * more careful checks to make sure there is no overflow. By separating out these hard - * and unlikely cases, we can speed up the common cases. - * - * NOTE: This function needs to be fast for a single long sequence, but doesn't need - * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). - */ -FORCE_NOINLINE -size_t ZSTD_execSequenceEnd(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +void EncryptionAlgorithm::__set_AES_GCM_CTR_V1(const AesGcmCtrV1& val) { + this->AES_GCM_CTR_V1 = val; +__isset.AES_GCM_CTR_V1 = true; +} +std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj) { - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = oLitEnd - sequence.offset; - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + obj.printTo(out); + return out; +} - /* bounds checks : careful of address space overflow in 32-bit mode */ - RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); - RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); - assert(op < op + sequenceLength); - assert(oLitEnd < op + sequenceLength); - /* copy literals */ - ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); - op = oLitEnd; - *litPtr = iLitEnd; +uint32_t EncryptionAlgorithm::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { - /* offset beyond prefix */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); - match = dictEnd - (prefixStart-match); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = prefixStart; - } } - ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); - return sequenceLength; -} + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -HINT_INLINE -size_t ZSTD_execSequence(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) -{ - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = oLitEnd - sequence.offset; + xfer += iprot->readStructBegin(fname); - assert(op != NULL /* Precondition */); - assert(oend_w < oend /* No underflow */); - /* Handle edge cases in a slow path: - * - Read beyond end of literals - * - Match end is within WILDCOPY_OVERLIMIT of oend - * - 32-bit mode and the match length overflows - */ - if (UNLIKELY( - iLitEnd > litLimit || - oMatchEnd > oend_w || - (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) - return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + using ::duckdb_apache::thrift::protocol::TProtocolException; - /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ - assert(op <= oLitEnd /* No overflow */); - assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); - assert(oMatchEnd <= oend /* No underflow */); - assert(iLitEnd <= litLimit /* Literal length is in bounds */); - assert(oLitEnd <= oend_w /* Can wildcopy literals */); - assert(oMatchEnd <= oend_w /* Can wildcopy matches */); - /* Copy Literals: - * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. - * We likely don't need the full 32-byte wildcopy. - */ - assert(WILDCOPY_OVERLENGTH >= 16); - ZSTD_copy16(op, (*litPtr)); - if (UNLIKELY(sequence.litLength > 16)) { - ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* Copy Match */ - if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { - /* offset beyond prefix -> go into extDict */ - RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); - match = dictEnd + (match - prefixStart); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->AES_GCM_V1.read(iprot); + this->__isset.AES_GCM_V1 = true; + } else { + xfer += iprot->skip(ftype); } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = prefixStart; - } } - /* Match within prefix of 1 or more bytes */ - assert(op <= oMatchEnd); - assert(oMatchEnd <= oend_w); - assert(match >= prefixStart); - assert(sequence.matchLength >= 1); - - /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy - * without overlap checking. - */ - if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { - /* We bet on a full wildcopy for matches, since we expect matches to be - * longer than literals (in general). In silesia, ~10% of matches are longer - * than 16 bytes. - */ - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); - return sequenceLength; - } - assert(sequence.offset < WILDCOPY_VECLEN); - - /* Copy 8 bytes and spread the offset to be >= 8. */ - ZSTD_overlapCopy8(&op, &match, sequence.offset); - - /* If the match length is > 8 bytes, then continue with the wildcopy. */ - if (sequence.matchLength > 8) { - assert(op < oMatchEnd); - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->AES_GCM_CTR_V1.read(iprot); + this->__isset.AES_GCM_CTR_V1 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } - return sequenceLength; -} + xfer += iprot->readFieldEnd(); + } -static void -ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) -{ - const void* ptr = dt; - const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; - DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); - DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", - (U32)DStatePtr->state, DTableH->tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} + xfer += iprot->readStructEnd(); -FORCE_INLINE_TEMPLATE void -ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) -{ - ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; + return xfer; } -FORCE_INLINE_TEMPLATE void -ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) -{ - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; +uint32_t EncryptionAlgorithm::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionAlgorithm"); + + if (this->__isset.AES_GCM_V1) { + xfer += oprot->writeFieldBegin("AES_GCM_V1", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); + xfer += this->AES_GCM_V1.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.AES_GCM_CTR_V1) { + xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); + xfer += this->AES_GCM_CTR_V1.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; } -/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum - * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) - * bits before reloading. This value is the maximum number of bytes we read - * after reloading when we are decoding long offsets. - */ -#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ - (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ - ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ - : 0) +void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { + using ::std::swap; + swap(a.AES_GCM_V1, b.AES_GCM_V1); + swap(a.AES_GCM_CTR_V1, b.AES_GCM_CTR_V1); + swap(a.__isset, b.__isset); +} -typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; +EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other169) { + AES_GCM_V1 = other169.AES_GCM_V1; + AES_GCM_CTR_V1 = other169.AES_GCM_CTR_V1; + __isset = other169.__isset; +} +EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other170) { + AES_GCM_V1 = other170.AES_GCM_V1; + AES_GCM_CTR_V1 = other170.AES_GCM_CTR_V1; + __isset = other170.__isset; + return *this; +} +void EncryptionAlgorithm::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "EncryptionAlgorithm("; + out << "AES_GCM_V1="; (__isset.AES_GCM_V1 ? (out << to_string(AES_GCM_V1)) : (out << "")); + out << ", " << "AES_GCM_CTR_V1="; (__isset.AES_GCM_CTR_V1 ? (out << to_string(AES_GCM_CTR_V1)) : (out << "")); + out << ")"; +} -FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) -{ - seq_t seq; - ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; - ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; - ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; - U32 const llBase = llDInfo.baseValue; - U32 const mlBase = mlDInfo.baseValue; - U32 const ofBase = ofDInfo.baseValue; - BYTE const llBits = llDInfo.nbAdditionalBits; - BYTE const mlBits = mlDInfo.nbAdditionalBits; - BYTE const ofBits = ofDInfo.nbAdditionalBits; - BYTE const totalBits = llBits+mlBits+ofBits; - /* sequence */ - { size_t offset; - if (ofBits > 1) { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { - U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - BIT_reloadDStream(&seqState->DStream); - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); - assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } else { - U32 const ll0 = (llBase == 0); - if (LIKELY((ofBits == 0))) { - if (LIKELY(!ll0)) - offset = seqState->prevOffset[0]; - else { - offset = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - } else { - offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); - { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } } } - seq.offset = offset; - } +FileMetaData::~FileMetaData() throw() { +} - seq.matchLength = mlBase; - if (mlBits > 0) - seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); +void FileMetaData::__set_version(const int32_t val) { + this->version = val; +} - seq.litLength = llBase; - if (llBits > 0) - seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); +void FileMetaData::__set_schema(const std::vector & val) { + this->schema = val; +} - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); +void FileMetaData::__set_num_rows(const int64_t val) { + this->num_rows = val; +} - DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); +void FileMetaData::__set_row_groups(const std::vector & val) { + this->row_groups = val; +} - if (prefetch == ZSTD_p_prefetch) { - size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, offset is only used for prefetching */ - seqState->pos = pos + seq.matchLength; - } +void FileMetaData::__set_key_value_metadata(const std::vector & val) { + this->key_value_metadata = val; +__isset.key_value_metadata = true; +} - /* ANS state update - * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). - * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). - * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the - * better option, so it is the default for other compilers. But, if you - * measure that it is worse, please put up a pull request. - */ - { -#if defined(__GNUC__) && !defined(__clang__) - const int kUseUpdateFseState = 1; -#else - const int kUseUpdateFseState = 0; -#endif - if (kUseUpdateFseState) { - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - } else { - ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ - } - } +void FileMetaData::__set_created_by(const std::string& val) { + this->created_by = val; +__isset.created_by = true; +} - return seq; +void FileMetaData::__set_column_orders(const std::vector & val) { + this->column_orders = val; +__isset.column_orders = true; } -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) -{ - size_t const windowSize = dctx->fParams.windowSize; - /* No dictionary used. */ - if (dctx->dictContentEndForFuzzing == NULL) return 0; - /* Dictionary is our prefix. */ - if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; - /* Dictionary is not our ext-dict. */ - if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; - /* Dictionary is not within our window size. */ - if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; - /* Dictionary is active. */ - return 1; +void FileMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { + this->encryption_algorithm = val; +__isset.encryption_algorithm = true; } -MEM_STATIC void ZSTD_assertValidSequence( - ZSTD_DCtx const* dctx, - BYTE const* op, BYTE const* oend, - seq_t const seq, - BYTE const* prefixStart, BYTE const* virtualStart) +void FileMetaData::__set_footer_signing_key_metadata(const std::string& val) { + this->footer_signing_key_metadata = val; +__isset.footer_signing_key_metadata = true; +} +std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) { - size_t const windowSize = dctx->fParams.windowSize; - size_t const sequenceSize = seq.litLength + seq.matchLength; - BYTE const* const oLitEnd = op + seq.litLength; - DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - assert(op <= oend); - assert((size_t)(oend - op) >= sequenceSize); - assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); - if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { - size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); - /* Offset must be within the dictionary. */ - assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); - assert(seq.offset <= windowSize + dictSize); - } else { - /* Offset must be within our window. */ - assert(seq.offset <= windowSize); - } + obj.printTo(out); + return out; } -#endif -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -FORCE_INLINE_TEMPLATE size_t -DONT_VECTORIZE -ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); - const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - DEBUGLOG(5, "ZSTD_decompressSequences_body"); - (void)frame; - /* Regen sequences */ - if (nbSeq) { - seqState_t seqState; - size_t error = 0; - dctx->fseEntropy = 1; - { U32 i; for (i=0; ientropy.rep[i]; } - RETURN_ERROR_IF( - ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected, ""); - ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - assert(dst != NULL); +uint32_t FileMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - ZSTD_STATIC_ASSERT( - BIT_DStream_unfinished < BIT_DStream_completed && - BIT_DStream_endOfBuffer < BIT_DStream_completed && - BIT_DStream_completed < BIT_DStream_overflow); + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; -#if defined(__GNUC__) && defined(__x86_64__) - /* Align the decompression loop to 32 + 16 bytes. - * - * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression - * speed swings based on the alignment of the decompression loop. This - * performance swing is caused by parts of the decompression loop falling - * out of the DSB. The entire decompression loop should fit in the DSB, - * when it can't we get much worse performance. You can measure if you've - * hit the good case or the bad case with this perf command for some - * compressed file test.zst: - * - * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ - * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst - * - * If you see most cycles served out of the MITE you've hit the bad case. - * If you see most cycles served out of the DSB you've hit the good case. - * If it is pretty even then you may be in an okay case. - * - * I've been able to reproduce this issue on the following CPUs: - * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 - * Use Instruments->Counters to get DSB/MITE cycles. - * I never got performance swings, but I was able to - * go from the good case of mostly DSB to half of the - * cycles served from MITE. - * - Coffeelake: Intel i9-9900k - * - * I haven't been able to reproduce the instability or DSB misses on any - * of the following CPUS: - * - Haswell - * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH - * - Skylake - * - * If you are seeing performance stability this script can help test. - * It tests on 4 commits in zstd where I saw performance change. - * - * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 - */ - __asm__(".p2align 5"); - __asm__("nop"); - __asm__(".p2align 4"); -#endif - for ( ; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); -#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -#endif - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - BIT_reloadDStream(&(seqState.DStream)); - /* gcc and clang both don't like early returns in this loop. - * gcc doesn't like early breaks either. - * Instead save an error and report it at the end. - * When there is an error, don't increment op, so we don't - * overwrite. - */ - if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize; - else op += oneSeqSize; - if (UNLIKELY(!--nbSeq)) break; - } + xfer += iprot->readStructBegin(fname); - /* check if reached exact end */ - DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - if (ZSTD_isError(error)) return error; - RETURN_ERROR_IF(nbSeq, corruption_detected, ""); - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); - /* save reps for next block */ - { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } + using ::duckdb_apache::thrift::protocol::TProtocolException; - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); - if (op != NULL) { - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + bool isset_version = false; + bool isset_schema = false; + bool isset_num_rows = false; + bool isset_row_groups = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->version); + isset_version = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->schema.clear(); + uint32_t _size171; + ::duckdb_apache::thrift::protocol::TType _etype174; + xfer += iprot->readListBegin(_etype174, _size171); + this->schema.resize(_size171); + uint32_t _i175; + for (_i175 = 0; _i175 < _size171; ++_i175) + { + xfer += this->schema[_i175].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->row_groups.clear(); + uint32_t _size176; + ::duckdb_apache::thrift::protocol::TType _etype179; + xfer += iprot->readListBegin(_etype179, _size176); + this->row_groups.resize(_size176); + uint32_t _i180; + for (_i180 = 0; _i180 < _size176; ++_i180) + { + xfer += this->row_groups[_i180].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_row_groups = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->key_value_metadata.clear(); + uint32_t _size181; + ::duckdb_apache::thrift::protocol::TType _etype184; + xfer += iprot->readListBegin(_etype184, _size181); + this->key_value_metadata.resize(_size181); + uint32_t _i185; + for (_i185 = 0; _i185 < _size181; ++_i185) + { + xfer += this->key_value_metadata[_i185].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.key_value_metadata = true; + } else { + xfer += iprot->skip(ftype); } + break; + case 6: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->created_by); + this->__isset.created_by = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { + { + this->column_orders.clear(); + uint32_t _size186; + ::duckdb_apache::thrift::protocol::TType _etype189; + xfer += iprot->readListBegin(_etype189, _size186); + this->column_orders.resize(_size186); + uint32_t _i190; + for (_i190 = 0; _i190 < _size186; ++_i190) + { + xfer += this->column_orders[_i190].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.column_orders = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->encryption_algorithm.read(iprot); + this->__isset.encryption_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->footer_signing_key_metadata); + this->__isset.footer_signing_key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; } + xfer += iprot->readFieldEnd(); + } - return op-ostart; -} - -static size_t -ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -FORCE_INLINE_TEMPLATE size_t -ZSTD_decompressSequencesLong_body( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); - const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - (void)frame; + xfer += iprot->readStructEnd(); - /* Regen sequences */ - if (nbSeq) { -#define STORED_SEQS 4 -#define STORED_SEQS_MASK (STORED_SEQS-1) -#define ADVANCED_SEQS 4 - seq_t sequences[STORED_SEQS]; - int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); - seqState_t seqState; - int seqNb; - dctx->fseEntropy = 1; - { int i; for (i=0; ientropy.rep[i]; } - seqState.prefixStart = prefixStart; - seqState.pos = (size_t)(op-prefixStart); - seqState.dictEnd = dictEnd; - assert(dst != NULL); - assert(iend >= ip); - RETURN_ERROR_IF( - ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected, ""); - ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + if (!isset_version) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_row_groups) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} - /* prepare in advance */ - for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbwriteStructBegin("FileMetaData"); - /* decode and decompress */ - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNbwriteFieldBegin("version", ::duckdb_apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->version); + xfer += oprot->writeFieldEnd(); - /* finish queue */ - seqNb -= seqAdvance; - for ( ; seqNbwriteFieldBegin("schema", ::duckdb_apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); + std::vector ::const_iterator _iter191; + for (_iter191 = this->schema.begin(); _iter191 != this->schema.end(); ++_iter191) + { + xfer += (*_iter191).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); - /* save reps for next block */ - { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->num_rows); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("row_groups", ::duckdb_apache::thrift::protocol::T_LIST, 4); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); + std::vector ::const_iterator _iter192; + for (_iter192 = this->row_groups.begin(); _iter192 != this->row_groups.end(); ++_iter192) + { + xfer += (*_iter192).write(oprot); } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); - if (op != NULL) { - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } + if (this->__isset.key_value_metadata) { + xfer += oprot->writeFieldBegin("key_value_metadata", ::duckdb_apache::thrift::protocol::T_LIST, 5); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); + std::vector ::const_iterator _iter193; + for (_iter193 = this->key_value_metadata.begin(); _iter193 != this->key_value_metadata.end(); ++_iter193) + { + xfer += (*_iter193).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.created_by) { + xfer += oprot->writeFieldBegin("created_by", ::duckdb_apache::thrift::protocol::T_STRING, 6); + xfer += oprot->writeString(this->created_by); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_orders) { + xfer += oprot->writeFieldBegin("column_orders", ::duckdb_apache::thrift::protocol::T_LIST, 7); + { + xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); + std::vector ::const_iterator _iter194; + for (_iter194 = this->column_orders.begin(); _iter194 != this->column_orders.end(); ++_iter194) + { + xfer += (*_iter194).write(oprot); + } + xfer += oprot->writeListEnd(); } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encryption_algorithm) { + xfer += oprot->writeFieldBegin("encryption_algorithm", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); + xfer += this->encryption_algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.footer_signing_key_metadata) { + xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 9); + xfer += oprot->writeBinary(this->footer_signing_key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - return op-ostart; +void swap(FileMetaData &a, FileMetaData &b) { + using ::std::swap; + swap(a.version, b.version); + swap(a.schema, b.schema); + swap(a.num_rows, b.num_rows); + swap(a.row_groups, b.row_groups); + swap(a.key_value_metadata, b.key_value_metadata); + swap(a.created_by, b.created_by); + swap(a.column_orders, b.column_orders); + swap(a.encryption_algorithm, b.encryption_algorithm); + swap(a.footer_signing_key_metadata, b.footer_signing_key_metadata); + swap(a.__isset, b.__isset); } -static size_t -ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +FileMetaData::FileMetaData(const FileMetaData& other195) { + version = other195.version; + schema = other195.schema; + num_rows = other195.num_rows; + row_groups = other195.row_groups; + key_value_metadata = other195.key_value_metadata; + created_by = other195.created_by; + column_orders = other195.column_orders; + encryption_algorithm = other195.encryption_algorithm; + footer_signing_key_metadata = other195.footer_signing_key_metadata; + __isset = other195.__isset; +} +FileMetaData& FileMetaData::operator=(const FileMetaData& other196) { + version = other196.version; + schema = other196.schema; + num_rows = other196.num_rows; + row_groups = other196.row_groups; + key_value_metadata = other196.key_value_metadata; + created_by = other196.created_by; + column_orders = other196.column_orders; + encryption_algorithm = other196.encryption_algorithm; + footer_signing_key_metadata = other196.footer_signing_key_metadata; + __isset = other196.__isset; + return *this; +} + +void FileMetaData::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "FileMetaData("; + out << "version=" << to_string(version); + out << ", " << "schema=" << to_string(schema); + out << ", " << "num_rows=" << to_string(num_rows); + out << ", " << "row_groups=" << to_string(row_groups); + out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "")); + out << ", " << "created_by="; (__isset.created_by ? (out << to_string(created_by)) : (out << "")); + out << ", " << "column_orders="; (__isset.column_orders ? (out << to_string(column_orders)) : (out << "")); + out << ", " << "encryption_algorithm="; (__isset.encryption_algorithm ? (out << to_string(encryption_algorithm)) : (out << "")); + out << ", " << "footer_signing_key_metadata="; (__isset.footer_signing_key_metadata ? (out << to_string(footer_signing_key_metadata)) : (out << "")); + out << ")"; } -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ +FileCryptoMetaData::~FileCryptoMetaData() throw() { +} -#if DYNAMIC_BMI2 -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ +void FileCryptoMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { + this->encryption_algorithm = val; +} -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -static TARGET_ATTRIBUTE("bmi2") size_t -ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) +void FileCryptoMetaData::__set_key_metadata(const std::string& val) { + this->key_metadata = val; +__isset.key_metadata = true; +} +std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + obj.printTo(out); + return out; } -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ -#endif /* DYNAMIC_BMI2 */ -typedef size_t (*ZSTD_decompressSequences_t)( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame); +uint32_t FileCryptoMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -static size_t -ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - DEBUGLOG(5, "ZSTD_decompressSequences"); - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::duckdb_apache::thrift::protocol::TType ftype; + int16_t fid; + xfer += iprot->readStructBegin(fname); -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -/* ZSTD_decompressSequencesLong() : - * decompression function triggered when a minimum share of offsets is considered "long", - * aka out of cache. - * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". - * This function will try to mitigate main memory latency through the use of prefetching */ -static size_t -ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - DEBUGLOG(5, "ZSTD_decompressSequencesLong"); -#if DYNAMIC_BMI2 - if (dctx->bmi2) { - return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + using ::duckdb_apache::thrift::protocol::TProtocolException; + + bool isset_encryption_algorithm = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { + break; } -#endif - return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + switch (fid) + { + case 1: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { + xfer += this->encryption_algorithm.read(iprot); + isset_encryption_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->key_metadata); + this->__isset.key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + xfer += iprot->readStructEnd(); + if (!isset_encryption_algorithm) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -/* ZSTD_getLongOffsetsShare() : - * condition : offTable must be valid - * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) - * compared to maximum possible of (1<writeStructBegin("FileCryptoMetaData"); - assert(max <= (1 << OffFSELog)); /* max not too large */ - for (u=0; u 22) total += 1; - } + xfer += oprot->writeFieldBegin("encryption_algorithm", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); + xfer += this->encryption_algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); - assert(tableLog <= OffFSELog); - total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + if (this->__isset.key_metadata) { + xfer += oprot->writeFieldBegin("key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} - return total; +void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { + using ::std::swap; + swap(a.encryption_algorithm, b.encryption_algorithm); + swap(a.key_metadata, b.key_metadata); + swap(a.__isset, b.__isset); } -#endif -size_t -ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame) -{ /* blockType == blockCompressed */ - const BYTE* ip = (const BYTE*)src; - /* isLongOffset must be true if there are long offsets. - * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. - * We don't expect that to be the case in 64-bit mode. - * In block mode, window size is not known, so we have to be conservative. - * (note: but it could be evaluated from current-lowLimit) - */ - ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); - DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); +FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other197) { + encryption_algorithm = other197.encryption_algorithm; + key_metadata = other197.key_metadata; + __isset = other197.__isset; +} +FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other198) { + encryption_algorithm = other198.encryption_algorithm; + key_metadata = other198.key_metadata; + __isset = other198.__isset; + return *this; +} +void FileCryptoMetaData::printTo(std::ostream& out) const { + using ::duckdb_apache::thrift::to_string; + out << "FileCryptoMetaData("; + out << "encryption_algorithm=" << to_string(encryption_algorithm); + out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); + out << ")"; +} - RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); +}} // namespace - /* Decode literals section */ - { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); - DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); - if (ZSTD_isError(litCSize)) return litCSize; - ip += litCSize; - srcSize -= litCSize; - } - /* Build Decoding Tables */ - { - /* These macros control at build-time which decompressor implementation - * we use. If neither is defined, we do some inspection and dispatch at - * runtime. - */ -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - int usePrefetchDecoder = dctx->ddictIsCold; -#endif - int nbSeq; - size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); - if (ZSTD_isError(seqHSize)) return seqHSize; - ip += seqHSize; - srcSize -= seqHSize; +// LICENSE_CHANGE_END - RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - if ( !usePrefetchDecoder - && (!frame || (dctx->fParams.windowSize > (1<<24))) - && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ - U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); - U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ - usePrefetchDecoder = (shareLongOffsets >= minShare); - } -#endif +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 +// See the end of this file for a list - dctx->ddictIsCold = 0; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - if (usePrefetchDecoder) -#endif -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -#endif -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG - /* else */ - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -#endif - } -} +namespace duckdb_apache { +namespace thrift { +namespace protocol { -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) -{ - if (dst != dctx->previousDstEnd) { /* not contiguous */ - dctx->dictEnd = dctx->previousDstEnd; - dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); - dctx->prefixStart = dst; - dctx->previousDstEnd = dst; - } +TProtocol::~TProtocol() = default; +uint32_t TProtocol::skip_virt(TType type) { + return ::duckdb_apache::thrift::protocol::skip(*this, type); } +TProtocolFactory::~TProtocolFactory() = default; -size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t dSize; - ZSTD_checkContinuity(dctx, dst); - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); - dctx->previousDstEnd = (char*)dst + dSize; - return dSize; -} +}}} // duckdb_apache::thrift::protocol -} // LICENSE_CHANGE_END diff --git a/velox/external/duckdb/parquet-amalgamation.hpp b/velox/external/duckdb/parquet-amalgamation.hpp index 7336d2dec20f..f5b1a29c072a 100644 --- a/velox/external/duckdb/parquet-amalgamation.hpp +++ b/velox/external/duckdb/parquet-amalgamation.hpp @@ -1,5 +1,5 @@ /* -Copyright 2018 DuckDB Contributors (see https://github.com/cwida/duckdb/graphs/contributors) +Copyright 2018 DuckDB Contributors (see https://github.com/duckdb/duckdb/graphs/contributors) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: @@ -36,6 +36,9 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #include "duckdb.hpp" +#ifndef DUCKDB_AMALGAMATION +#include "duckdb/common/allocator.hpp" +#endif #include @@ -89,27 +92,36 @@ class ResizeableBuffer : public ByteBuffer { public: ResizeableBuffer() { } - - ResizeableBuffer(uint64_t new_size) { - resize(new_size); + ResizeableBuffer(Allocator &allocator, uint64_t new_size) { + resize(allocator, new_size); } - void resize(uint64_t new_size) { + void resize(Allocator &allocator, uint64_t new_size) { + len = new_size; + if (new_size == 0) { + return; + } if (new_size > alloc_len) { alloc_len = new_size; - auto new_holder = std::unique_ptr(new char[alloc_len]); - holder = move(new_holder); + allocated_data = allocator.Allocate(alloc_len); + ptr = (char *)allocated_data->get(); } - len = new_size; - ptr = holder.get(); } private: - std::unique_ptr holder = nullptr; + unique_ptr allocated_data; idx_t alloc_len = 0; }; } // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// column_reader.hpp +// +// +//===----------------------------------------------------------------------===// + @@ -1137,7 +1149,7 @@ class TTransport { /** * Remove len bytes from the transport. This should always follow a borrow * of at least len bytes, and should always succeed. - * TODO: Is there any transport that could borrow but fail to + * TODO(dreiss): Is there any transport that could borrow but fail to * consume, or that would require a buffer to dump the consumed data? * * @param len How many bytes to consume @@ -6588,7 +6600,7 @@ class TBufferBase : public TVirtualTransport { const uint8_t* borrow(uint8_t* buf, uint32_t* len) { if (TDB_LIKELY(static_cast(*len) <= rBound_ - rBase_)) { // With strict aliasing, writing to len shouldn't force us to - // refetch rBase_ from memory. TODO: Verify this. + // refetch rBase_ from memory. TODO(dreiss): Verify this. *len = static_cast(rBound_ - rBase_); return rBase_; } @@ -6684,7 +6696,7 @@ class TMemoryBuffer : public TVirtualTransport { rBase_ = buffer_; rBound_ = buffer_ + wPos; - // TODO: Investigate NULL-ing this if !owner. + // TODO(dreiss): Investigate NULL-ing this if !owner. wBase_ = buffer_ + wPos; wBound_ = buffer_ + bufferSize_; @@ -6778,7 +6790,7 @@ class TMemoryBuffer : public TVirtualTransport { void close() override {} - // TODO: Make bufPtr const. + // TODO(dreiss): Make bufPtr const. void getBuffer(uint8_t** bufPtr, uint32_t* sz) { *bufPtr = rBase_; *sz = static_cast(wBase_ - rBase_); @@ -7174,9 +7186,11 @@ unique_ptr ParquetTransformColumnStatistics(const SchemaElement #include "duckdb/common/types/string_type.hpp" #include "duckdb/common/types/chunk_collection.hpp" #include "duckdb/common/operator/cast_operators.hpp" +#include "duckdb/common/types/vector_cache.hpp" #endif namespace duckdb { +class ParquetReader; using duckdb_apache::thrift::protocol::TProtocol; @@ -7189,24 +7203,15 @@ using duckdb_parquet::format::Type; typedef std::bitset parquet_filter_t; class ColumnReader { - public: - static unique_ptr CreateReader(const LogicalType &type_p, const SchemaElement &schema_p, - idx_t schema_idx_p, idx_t max_define, idx_t max_repeat); - - ColumnReader(LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define_p, - idx_t max_repeat_p) - : schema(schema_p), file_idx(file_idx_p), max_define(max_define_p), max_repeat(max_repeat_p), type(type_p), - page_rows_available(0) { - - // dummies for Skip() - dummy_result.Initialize(Type()); - none_filter.none(); - dummy_define.resize(STANDARD_VECTOR_SIZE); - dummy_repeat.resize(STANDARD_VECTOR_SIZE); - }; - - virtual void IntializeRead(const std::vector &columns, TProtocol &protocol_p) { + static unique_ptr CreateReader(ParquetReader &reader, const LogicalType &type_p, + const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define, + idx_t max_repeat); + + ColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, + idx_t max_define_p, idx_t max_repeat_p); + + virtual void InitializeRead(const std::vector &columns, TProtocol &protocol_p) { D_ASSERT(file_idx < columns.size()); chunk = &columns[file_idx]; protocol = &protocol_p; @@ -7225,7 +7230,6 @@ class ColumnReader { } group_rows_available = chunk->meta_data.num_values; } - virtual ~ColumnReader(); virtual idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, @@ -7246,7 +7250,8 @@ class ColumnReader { } unique_ptr Stats(const std::vector &columns) { - if (Type().id() == LogicalTypeId::LIST || Type().id() == LogicalTypeId::STRUCT) { + if (Type().id() == LogicalTypeId::LIST || Type().id() == LogicalTypeId::STRUCT || + Type().id() == LogicalTypeId::MAP) { return nullptr; } return ParquetTransformColumnStatistics(Schema(), Type(), columns[file_idx]); @@ -7282,18 +7287,21 @@ class ColumnReader { return max_repeat > 0; } +protected: const SchemaElement &schema; idx_t file_idx; idx_t max_define; idx_t max_repeat; + ParquetReader &reader; + LogicalType type; + private: void PrepareRead(parquet_filter_t &filter); void PreparePage(idx_t compressed_page_size, idx_t uncompressed_page_size); void PrepareDataPage(PageHeader &page_hdr); - LogicalType type; const duckdb_parquet::format::ColumnChunk *chunk; duckdb_apache::thrift::protocol::TProtocol *protocol; @@ -7316,349 +7324,6 @@ class ColumnReader { ResizeableBuffer dummy_repeat; }; -template -struct TemplatedParquetValueConversion { - static VALUE_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - D_ASSERT(offset < dict.len / sizeof(VALUE_TYPE)); - return ((VALUE_TYPE *)dict.ptr)[offset]; - } - - static VALUE_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - return plain_data.read(); - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(sizeof(VALUE_TYPE)); - } -}; - -template -class TemplatedColumnReader : public ColumnReader { - -public: - TemplatedColumnReader(LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define_p, - idx_t max_repeat_p) - : ColumnReader(type_p, schema_p, schema_idx_p, max_define_p, max_repeat_p) {}; - - void Dictionary(shared_ptr data, idx_t num_entries) override { - dict = move(data); - } - - void Offsets(uint32_t *offsets, uint8_t *defines, uint64_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result) override { - auto result_ptr = FlatVector::GetData(result); - - idx_t offset_idx = 0; - for (idx_t row_idx = 0; row_idx < num_values; row_idx++) { - if (HasDefines() && defines[row_idx + result_offset] != max_define) { - FlatVector::SetNull(result, row_idx + result_offset, true); - continue; - } - if (filter[row_idx + result_offset]) { - VALUE_TYPE val = VALUE_CONVERSION::DictRead(*dict, offsets[offset_idx++], *this); - if (!Value::IsValid(val)) { - FlatVector::SetNull(result, row_idx + result_offset, true); - continue; - } - result_ptr[row_idx + result_offset] = val; - } else { - offset_idx++; - } - } - } - - void Plain(shared_ptr plain_data, uint8_t *defines, uint64_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result) override { - auto result_ptr = FlatVector::GetData(result); - for (idx_t row_idx = 0; row_idx < num_values; row_idx++) { - if (HasDefines() && defines[row_idx + result_offset] != max_define) { - FlatVector::SetNull(result, row_idx + result_offset, true); - continue; - } - if (filter[row_idx + result_offset]) { - VALUE_TYPE val = VALUE_CONVERSION::PlainRead(*plain_data, *this); - if (!Value::IsValid(val)) { - FlatVector::SetNull(result, row_idx + result_offset, true); - continue; - } - result_ptr[row_idx + result_offset] = val; - } else { // there is still some data there that we have to skip over - VALUE_CONVERSION::PlainSkip(*plain_data, *this); - } - } - } - - shared_ptr dict; -}; - -struct StringParquetValueConversion { - static string_t DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader); - - static string_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader); - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader); -}; - -class StringColumnReader : public TemplatedColumnReader { - -public: - StringColumnReader(LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define_p, - idx_t max_repeat_p) - : TemplatedColumnReader(type_p, schema_p, schema_idx_p, max_define_p, - max_repeat_p) { - fixed_width_string_length = 0; - if (schema_p.type == Type::FIXED_LEN_BYTE_ARRAY) { - D_ASSERT(schema_p.__isset.type_length); - fixed_width_string_length = schema_p.type_length; - } - }; - - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) override; - - unique_ptr dict_strings; - void VerifyString(const char *str_data, idx_t str_len); - idx_t fixed_width_string_length; - -protected: - void DictReference(Vector &result) override; - void PlainReference(shared_ptr plain_data, Vector &result) override; -}; - -template -struct DecimalParquetValueConversion { - - static DUCKDB_PHYSICAL_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)dict.ptr; - return dict_ptr[offset]; - } - - static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - DUCKDB_PHYSICAL_TYPE res = 0; - auto byte_len = (idx_t)reader.Schema().type_length; /* sure, type length needs to be a signed int */ - D_ASSERT(byte_len <= sizeof(DUCKDB_PHYSICAL_TYPE)); - plain_data.available(byte_len); - auto res_ptr = (uint8_t *)&res; - - // numbers are stored as two's complement so some muckery is required - bool positive = (*plain_data.ptr & 0x80) == 0; - - for (idx_t i = 0; i < byte_len; i++) { - auto byte = *(plain_data.ptr + (byte_len - i - 1)); - res_ptr[i] = positive ? byte : byte ^ 0xFF; - } - plain_data.inc(byte_len); - if (!positive) { - res += 1; - return -res; - } - return res; - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(reader.Schema().type_length); - } -}; - -template -class DecimalColumnReader - : public TemplatedColumnReader> { - -public: - DecimalColumnReader(LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define_p, - idx_t max_repeat_p) - : TemplatedColumnReader>( - type_p, schema_p, file_idx_p, max_define_p, max_repeat_p) {}; - -protected: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) { - this->dict = make_shared(num_entries * sizeof(DUCKDB_PHYSICAL_TYPE)); - auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)this->dict->ptr; - for (idx_t i = 0; i < num_entries; i++) { - dict_ptr[i] = DecimalParquetValueConversion::PlainRead(*dictionary_data, *this); - } - } -}; - -template -struct CallbackParquetValueConversion { - static DUCKDB_PHYSICAL_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - return TemplatedParquetValueConversion::DictRead(dict, offset, reader); - } - - static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - return FUNC(plain_data.read()); - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(sizeof(PARQUET_PHYSICAL_TYPE)); - } -}; - -template -class CallbackColumnReader - : public TemplatedColumnReader> { - -public: - CallbackColumnReader(LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define_p, - idx_t max_repeat_p) - : TemplatedColumnReader>( - type_p, schema_p, file_idx_p, max_define_p, max_repeat_p) {}; - -protected: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) { - this->dict = make_shared(num_entries * sizeof(DUCKDB_PHYSICAL_TYPE)); - auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)this->dict->ptr; - for (idx_t i = 0; i < num_entries; i++) { - dict_ptr[i] = FUNC(dictionary_data->read()); - } - } -}; - -struct BooleanParquetValueConversion; - -class BooleanColumnReader : public TemplatedColumnReader { -public: - BooleanColumnReader(LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define_p, - idx_t max_repeat_p) - : TemplatedColumnReader(type_p, schema_p, schema_idx_p, max_define_p, - max_repeat_p), - byte_pos(0) {}; - - uint8_t byte_pos; - - void IntializeRead(const std::vector &columns, TProtocol &protocol_p) override { - byte_pos = 0; - TemplatedColumnReader::IntializeRead(columns, protocol_p); - } -}; - -struct BooleanParquetValueConversion { - static bool DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - throw std::runtime_error("Dicts for booleans make no sense"); - } - - static bool PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.available(1); - auto &byte_pos = ((BooleanColumnReader &)reader).byte_pos; - bool ret = (*plain_data.ptr >> byte_pos) & 1; - byte_pos++; - if (byte_pos == 8) { - byte_pos = 0; - plain_data.inc(1); - } - return ret; - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - PlainRead(plain_data, reader); - } -}; - -class StructColumnReader : public ColumnReader { - -public: - StructColumnReader(LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define_p, - idx_t max_repeat_p, vector> child_readers_p) - : ColumnReader(type_p, schema_p, schema_idx_p, max_define_p, max_repeat_p), - child_readers(move(child_readers_p)) { - D_ASSERT(type_p.id() == LogicalTypeId::STRUCT); - D_ASSERT(!type_p.child_types().empty()); - }; - - ColumnReader *GetChildReader(idx_t child_idx) { - return child_readers[child_idx].get(); - } - - void IntializeRead(const std::vector &columns, TProtocol &protocol_p) override { - for (auto &child : child_readers) { - child->IntializeRead(columns, protocol_p); - } - } - - idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result) override { - result.Initialize(Type()); - - for (idx_t i = 0; i < Type().child_types().size(); i++) { - auto child_read = make_unique(); - child_read->Initialize(Type().child_types()[i].second); - auto child_num_values = child_readers[i]->Read(num_values, filter, define_out, repeat_out, *child_read); - if (child_num_values != num_values) { - throw std::runtime_error("Struct child row count mismatch"); - } - StructVector::AddEntry(result, Type().child_types()[i].first, move(child_read)); - } - - return num_values; - } - - virtual void Skip(idx_t num_values) override { - D_ASSERT(0); - } - - idx_t GroupRowsAvailable() override { - return child_readers[0]->GroupRowsAvailable(); - } - - vector> child_readers; -}; - -class ListColumnReader : public ColumnReader { -public: - ListColumnReader(LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define_p, - idx_t max_repeat_p, unique_ptr child_column_reader_p) - : ColumnReader(type_p, schema_p, schema_idx_p, max_define_p, max_repeat_p), - child_column_reader(move(child_column_reader_p)), overflow_child_count(0) { - - child_defines.resize(STANDARD_VECTOR_SIZE); - child_repeats.resize(STANDARD_VECTOR_SIZE); - child_defines_ptr = (uint8_t *)child_defines.ptr; - child_repeats_ptr = (uint8_t *)child_repeats.ptr; - - auto child_type = Type().child_types()[0].second; - child_result.Initialize(child_type); - - vector append_chunk_types; - append_chunk_types.push_back(child_type); - append_chunk.Initialize(append_chunk_types); - - child_filter.set(); - }; - - idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result_out) override; - - virtual void Skip(idx_t num_values) override { - D_ASSERT(0); - } - - void IntializeRead(const std::vector &columns, TProtocol &protocol_p) override { - child_column_reader->IntializeRead(columns, protocol_p); - } - - idx_t GroupRowsAvailable() override { - return child_column_reader->GroupRowsAvailable(); - } - -private: - unique_ptr child_column_reader; - ResizeableBuffer child_defines; - ResizeableBuffer child_repeats; - uint8_t *child_defines_ptr; - uint8_t *child_repeats_ptr; - - Vector child_result; - parquet_filter_t child_filter; - DataChunk append_chunk; - - Vector overflow_child_vector; - idx_t overflow_child_count; -}; - } // namespace duckdb @@ -7710,10 +7375,11 @@ class FileMetaData; } // namespace duckdb_parquet namespace duckdb { +class Allocator; class ClientContext; class ChunkCollection; class BaseStatistics; -struct TableFilterSet; +class TableFilterSet; struct ParquetReaderScanState { vector group_idx_list; @@ -7734,10 +7400,10 @@ struct ParquetReaderScanState { class ParquetReader { public: - ParquetReader(unique_ptr file_handle_p, const vector &expected_types_p, - const string &initial_filename_p = string()); - ParquetReader(unique_ptr file_handle_p) - : ParquetReader(move(file_handle_p), vector(), string()) { + ParquetReader(Allocator &allocator, unique_ptr file_handle_p, + const vector &expected_types_p, const string &initial_filename_p = string()); + ParquetReader(Allocator &allocator, unique_ptr file_handle_p) + : ParquetReader(allocator, move(file_handle_p), vector(), string()) { } ParquetReader(ClientContext &context, string file_name, const vector &expected_types_p, @@ -7747,6 +7413,7 @@ class ParquetReader { } ~ParquetReader(); + Allocator &allocator; string file_name; vector return_types; vector names; @@ -7762,7 +7429,7 @@ class ParquetReader { const duckdb_parquet::format::FileMetaData *GetFileMetadata(); - static unique_ptr ReadStatistics(LogicalType &type, column_t column_index, + static unique_ptr ReadStatistics(ParquetReader &reader, LogicalType &type, column_t column_index, const duckdb_parquet::format::FileMetaData *file_meta_data); private: diff --git a/velox/external/duckdb/tpch/dbgen/dbgen.cpp b/velox/external/duckdb/tpch/dbgen/dbgen.cpp index 9aa37673847c..f629aac83742 100644 --- a/velox/external/duckdb/tpch/dbgen/dbgen.cpp +++ b/velox/external/duckdb/tpch/dbgen/dbgen.cpp @@ -162,7 +162,7 @@ void append_decimal(tpch_append_information &info, int64_t value) { } void append_date(tpch_append_information &info, string value) { - info.appender->AppendValue(Date::FromString(value)); + info.appender->AppendValue(Date::FromString(value)); } void append_char(tpch_append_information &info, char value) { @@ -462,7 +462,7 @@ const char *SupplierInfo::Columns[] = {"s_suppkey", "s_name", "s_address", "s const LogicalType SupplierInfo::Types[] = { LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::INTEGER), - LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::DECIMAL, 15, 2), + LogicalType(LogicalTypeId::VARCHAR), LogicalType::DECIMAL(15, 2), LogicalType(LogicalTypeId::VARCHAR)}; struct CustomerInfo { @@ -476,7 +476,7 @@ const char *CustomerInfo::Columns[] = {"c_custkey", "c_name", "c_address", const LogicalType CustomerInfo::Types[] = { LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::INTEGER), - LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::DECIMAL, 15, 2), + LogicalType(LogicalTypeId::VARCHAR), LogicalType::DECIMAL(15, 2), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR)}; struct PartInfo { @@ -490,7 +490,7 @@ const char *PartInfo::Columns[] = {"p_partkey", "p_name", "p_mfgr", const LogicalType PartInfo::Types[] = {LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::INTEGER), - LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::DECIMAL, 15, 2), + LogicalType(LogicalTypeId::VARCHAR), LogicalType::DECIMAL(15, 2), LogicalType(LogicalTypeId::VARCHAR)}; struct PartsuppInfo { @@ -502,7 +502,7 @@ struct PartsuppInfo { const char *PartsuppInfo::Columns[] = {"ps_partkey", "ps_suppkey", "ps_availqty", "ps_supplycost", "ps_comment"}; const LogicalType PartsuppInfo::Types[] = { LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::INTEGER), - LogicalType(LogicalTypeId::DECIMAL, 15, 2), LogicalType(LogicalTypeId::VARCHAR)}; + LogicalType::DECIMAL(15, 2), LogicalType(LogicalTypeId::VARCHAR)}; struct OrdersInfo { static constexpr char *Name = "orders"; @@ -514,7 +514,7 @@ const char *OrdersInfo::Columns[] = {"o_orderkey", "o_custkey", "o_ordersta "o_orderpriority", "o_clerk", "o_shippriority", "o_comment"}; const LogicalType OrdersInfo::Types[] = { LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::INTEGER), - LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::DECIMAL, 15, 2), + LogicalType(LogicalTypeId::VARCHAR), LogicalType::DECIMAL(15, 2), LogicalType(LogicalTypeId::DATE), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::VARCHAR)}; @@ -532,8 +532,8 @@ const char *LineitemInfo::Columns[] = {"l_orderkey", "l_partkey", "l_su const LogicalType LineitemInfo::Types[] = { LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::INTEGER), - LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::DECIMAL, 15, 2), - LogicalType(LogicalTypeId::DECIMAL, 15, 2), LogicalType(LogicalTypeId::DECIMAL, 15, 2), + LogicalType(LogicalTypeId::INTEGER), LogicalType::DECIMAL(15, 2), + LogicalType::DECIMAL(15, 2), LogicalType::DECIMAL(15, 2), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::DATE), LogicalType(LogicalTypeId::DATE), LogicalType(LogicalTypeId::DATE), LogicalType(LogicalTypeId::VARCHAR), diff --git a/velox/external/duckdb/tpch/dbgen/include/dbgen/config.h b/velox/external/duckdb/tpch/dbgen/include/dbgen/config.h index d12d39147280..d705b38438ca 100644 --- a/velox/external/duckdb/tpch/dbgen/include/dbgen/config.h +++ b/velox/external/duckdb/tpch/dbgen/include/dbgen/config.h @@ -139,7 +139,7 @@ #define RNG_C 1uI64 #define HUGE_FORMAT "%I64d" #define HUGE_DATE_FORMAT "%02I64d" -/* required by move to Visual Studio 2005 */ +/* requried by move to Visual Studio 2005 */ #define strdup(x) _strdup(x) #endif /* WIN32 */ diff --git a/velox/external/duckdb/tpch/dbgen/rng64.cpp b/velox/external/duckdb/tpch/dbgen/rng64.cpp index 71ecd5db5374..66cdde8322e3 100644 --- a/velox/external/duckdb/tpch/dbgen/rng64.cpp +++ b/velox/external/duckdb/tpch/dbgen/rng64.cpp @@ -117,7 +117,7 @@ DSS_HUGE AdvanceRand64(DSS_HUGE nSeed, DSS_HUGE nCount) { /* Recursively compute X(n) = A * X(n-1) + C */ /* */ - /* explicitly: */ + /* explicitely: */ /* X(n) = A^n * X(0) + { A^(n-1) + A^(n-2) + ... A + 1 } * C */ /* */ /* we write this as: */ diff --git a/velox/external/duckdb/tpch/tpch-extension.cpp b/velox/external/duckdb/tpch/tpch-extension.cpp index 10309c0b9d61..bba7a5b7ca95 100644 --- a/velox/external/duckdb/tpch/tpch-extension.cpp +++ b/velox/external/duckdb/tpch/tpch-extension.cpp @@ -24,10 +24,10 @@ struct DBGenFunctionData : public TableFunctionData { bool overwrite = false; }; -static unique_ptr dbgen_bind(ClientContext &context, vector &inputs, - unordered_map &named_parameters, - vector &input_table_types, vector &input_table_names, - vector &return_types, vector &names) { +static unique_ptr DbgenBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, vector &input_table_names, + vector &return_types, vector &names) { auto result = make_unique(); for (auto &kv : named_parameters) { if (kv.first == "sf") { @@ -41,12 +41,12 @@ static unique_ptr dbgen_bind(ClientContext &context, vector } } return_types.push_back(LogicalType::BOOLEAN); - names.push_back("Success"); + names.emplace_back("Success"); return move(result); } -static void dbgen_function(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state, - DataChunk *input, DataChunk &output) { +static void DbgenFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state, + DataChunk *input, DataChunk &output) { auto &data = (DBGenFunctionData &)*bind_data; if (data.finished) { return; @@ -56,7 +56,98 @@ static void dbgen_function(ClientContext &context, const FunctionData *bind_data data.finished = true; } -static string pragma_tpch_query(ClientContext &context, const FunctionParameters ¶meters) { + +struct TPCHData : public FunctionOperatorData { + TPCHData() : offset(0) { + } + idx_t offset; +}; + +unique_ptr TPCHInit(ClientContext &context, const FunctionData *bind_data, + const vector &column_ids, TableFilterCollection *filters) { + auto result = make_unique(); + return move(result); +} + +static unique_ptr TPCHQueryBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, vector &input_table_names, + vector &return_types, vector &names) { + names.emplace_back("query_nr"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("query"); + return_types.push_back(LogicalType::VARCHAR); + + return nullptr; +} + +static void TPCHQueryFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (TPCHData &)*operator_state; + idx_t tpch_queries = 22; + if (data.offset >= tpch_queries) { + // finished returning values + return; + } + idx_t chunk_count = 0; + while (data.offset < tpch_queries && chunk_count < STANDARD_VECTOR_SIZE) { + auto query = tpch::DBGenWrapper::GetQuery(data.offset + 1); + // "query_nr", PhysicalType::INT32 + output.SetValue(0, chunk_count, Value::INTEGER((int32_t)data.offset + 1)); + // "query", PhysicalType::VARCHAR + output.SetValue(1, chunk_count, Value(query)); + data.offset++; + chunk_count++; + } + output.SetCardinality(chunk_count); +} + +static unique_ptr TPCHQueryAnswerBind(ClientContext &context, vector &inputs, + unordered_map &named_parameters, + vector &input_table_types, + vector &input_table_names, + vector &return_types, vector &names) { + names.emplace_back("query_nr"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("scale_factor"); + return_types.push_back(LogicalType::INTEGER); + + names.emplace_back("answer"); + return_types.push_back(LogicalType::VARCHAR); + + return nullptr; +} + +static void TPCHQueryAnswerFunction(ClientContext &context, const FunctionData *bind_data, + FunctionOperatorData *operator_state, DataChunk *input, DataChunk &output) { + auto &data = (TPCHData &)*operator_state; + idx_t tpch_queries = 22; + vector scale_factors {0.01, 0.1, 1}; + idx_t total_answers = tpch_queries * scale_factors.size(); + if (data.offset >= total_answers) { + // finished returning values + return; + } + idx_t chunk_count = 0; + while (data.offset < total_answers && chunk_count < STANDARD_VECTOR_SIZE) { + idx_t cur_query = data.offset % tpch_queries; + idx_t cur_sf = data.offset / tpch_queries; + auto answer = tpch::DBGenWrapper::GetAnswer(scale_factors[cur_sf], cur_query + 1); + // "query_nr", PhysicalType::INT32 + output.SetValue(0, chunk_count, Value::INTEGER((int32_t)cur_query + 1)); + // "scale_factor", PhysicalType::INT32 + output.SetValue(1, chunk_count, Value::DOUBLE(scale_factors[cur_sf])); + // "query", PhysicalType::VARCHAR + output.SetValue(2, chunk_count, Value(answer)); + data.offset++; + chunk_count++; + } + output.SetCardinality(chunk_count); +} + +static string PragmaTpchQuery(ClientContext &context, const FunctionParameters ¶meters) { auto index = parameters.values[0].GetValue(); return tpch::DBGenWrapper::GetQuery(index); } @@ -65,7 +156,7 @@ void TPCHExtension::Load(DuckDB &db) { Connection con(db); con.BeginTransaction(); - TableFunction dbgen_func("dbgen", {}, dbgen_function, dbgen_bind); + TableFunction dbgen_func("dbgen", {}, DbgenFunction, DbgenBind); dbgen_func.named_parameters["sf"] = LogicalType::DOUBLE; dbgen_func.named_parameters["overwrite"] = LogicalType::BOOLEAN; dbgen_func.named_parameters["schema"] = LogicalType::VARCHAR; @@ -77,11 +168,20 @@ void TPCHExtension::Load(DuckDB &db) { catalog.CreateTableFunction(*con.context, &dbgen_info); // create the TPCH pragma that allows us to run the query - auto tpch_func = PragmaFunction::PragmaCall("tpch", pragma_tpch_query, {LogicalType::BIGINT}); - + auto tpch_func = PragmaFunction::PragmaCall("tpch", PragmaTpchQuery, {LogicalType::BIGINT}); CreatePragmaFunctionInfo info(tpch_func); catalog.CreatePragmaFunction(*con.context, &info); + // create the TPCH_QUERIES function that returns the query + TableFunction tpch_query_func("tpch_queries", {}, TPCHQueryFunction, TPCHQueryBind, TPCHInit); + CreateTableFunctionInfo tpch_query_info(tpch_query_func); + catalog.CreateTableFunction(*con.context, &tpch_query_info); + + // create the TPCH_ANSWERS that returns the query result + TableFunction tpch_query_answer_func("tpch_answers", {}, TPCHQueryAnswerFunction, TPCHQueryAnswerBind, TPCHInit); + CreateTableFunctionInfo tpch_query_asnwer_info(tpch_query_answer_func); + catalog.CreateTableFunction(*con.context, &tpch_query_asnwer_info); + con.Commit(); }